-
Notifications
You must be signed in to change notification settings - Fork 8
/
utils.jl
653 lines (544 loc) · 25.2 KB
/
utils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
### USEFUL BUT NOT EXPORTED FUNCTIONS
"""
replace_words(text::AbstractString, words::Vector{<:AbstractString}; replacement::AbstractString="ABC")
Replace all occurrences of words in `words` with `replacement` in `text`. Useful to quickly remove specific names or entities from a text.
# Arguments
- `text::AbstractString`: The text to be processed.
- `words::Vector{<:AbstractString}`: A vector of words to be replaced.
- `replacement::AbstractString="ABC"`: The replacement string to be used. Defaults to "ABC".
# Example
```julia
text = "Disney is a great company"
replace_words(text, ["Disney", "Snow White", "Mickey Mouse"])
# Output: "ABC is a great company"
```
"""
replace_words(text::AbstractString, words::Vector{<:AbstractString}; replacement::AbstractString = "ABC") = replace_words(
text,
Regex("\\b$(join(words, "\\b|\\b"))\\b", "i"),
replacement)
function replace_words(text::AbstractString, pattern::Regex, replacement::AbstractString)
replace(text, pattern => replacement)
end
# dispatch for single word
function replace_words(text::AbstractString,
word::AbstractString;
replacement::AbstractString = "ABC")
replace_words(text, [word]; replacement)
end
"""
recursive_splitter(text::String; separator::String=" ", max_length::Int=35000) -> Vector{String}
Split a given string `text` into chunks of a specified maximum length `max_length`.
This is particularly useful for splitting larger documents or texts into smaller segments, suitable for models or systems with smaller context windows.
There is a method for dispatching on multiple separators, `recursive_splitter(text::String, separators::Vector{String}; max_length::Int=35000) -> Vector{String}` that mimics the logic of Langchain's `RecursiveCharacterTextSplitter`.
# Arguments
- `text::String`: The text to be split.
- `separator::String=" "`: The separator used to split the text into minichunks. Defaults to a space character.
- `max_length::Int=35000`: The maximum length of each chunk. Defaults to 35,000 characters, which should fit within 16K context window.
# Returns
`Vector{String}`: A vector of strings, each representing a chunk of the original text that is smaller than or equal to `max_length`.
# Notes
- The function ensures that each chunk is as close to `max_length` as possible without exceeding it.
- If the `text` is empty, the function returns an empty array.
- The `separator` is re-added to the text chunks after splitting, preserving the original structure of the text as closely as possible.
# Examples
Splitting text with the default separator (" "):
```julia
text = "Hello world. How are you?"
chunks = recursive_splitter(text; max_length=13)
length(chunks) # Output: 2
```
Using a custom separator and custom `max_length`
```julia
text = "Hello,World," ^ 2900 # length 34900 chars
recursive_splitter(text; separator=",", max_length=10000) # for 4K context window
length(chunks[1]) # Output: 4
```
"""
function recursive_splitter(text::String;
separator::String = " ",
max_length::Int = 35000)
## shortcut
length(text) <= max_length && return [text]
## split by separator
minichunks = split(text, separator)
sep_length = length(separator)
chunks = String[]
current_chunk = IOBuffer()
current_length = 0
for i in eachindex(minichunks)
sep_length_ = i < length(minichunks) ? sep_length : 0
# Check if the current chunk is full
if current_length + length(minichunks[i]) + sep_length_ > max_length
# Save chunk, excluding the current mini chunk
save_chunk = String(take!(current_chunk))
if length(save_chunk) > 0
push!(chunks, save_chunk)
end
current_length = 0
end
write(current_chunk, minichunks[i])
current_length += length(minichunks[i])
if i < length(minichunks)
write(current_chunk, separator)
current_length += sep_length
end
end
# Add the last chunk if it's not empty
final_chunk = String(take!(current_chunk))
if length(final_chunk) > 0
push!(chunks, final_chunk)
end
return chunks
end
# Overload for dispatch on multiple separators
function recursive_splitter(text::String,
separator::String,
max_length::Int = 35000)
recursive_splitter(text; separator, max_length)
end
"""
recursive_splitter(text::AbstractString, separators::Vector{String}; max_length::Int=35000) -> Vector{String}
Split a given string `text` into chunks recursively using a series of separators, with each chunk having a maximum length of `max_length` (if it's achievable given the `separators` provided).
This function is useful for splitting large documents or texts into smaller segments that are more manageable for processing, particularly for models or systems with limited context windows.
It was previously known as `split_by_length`.
This is similar to Langchain's [`RecursiveCharacterTextSplitter`](https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter).
To achieve the same behavior, use `separators=["\\n\\n", "\\n", " ", ""]`.
# Arguments
- `text::AbstractString`: The text to be split.
- `separators::Vector{String}`: An ordered list of separators used to split the text. The function iteratively applies these separators to split the text. Recommend to use `["\\n\\n", ". ", "\\n", " "]`
- `max_length::Int`: The maximum length of each chunk. Defaults to 35,000 characters. This length is considered after each iteration of splitting, ensuring chunks fit within specified constraints.
# Returns
`Vector{String}`: A vector of strings, where each string is a chunk of the original text that is smaller than or equal to `max_length`.
# Usage Tips
- I tend to prefer splitting on sentences (`". "`) before splitting on newline characters (`"\\n"`) to preserve the structure of the text.
- What's the difference between `separators=["\\n"," ",""]` and `separators=["\\n"," "]`?
The former will split down to character level (`""`), so it will always achieve the `max_length` but it will split words (bad for context!)
I prefer to instead set slightly smaller `max_length` but not split words.
# How It Works
- The function processes the text iteratively with each separator in the provided order. It then measures the length of each chunk and splits it further if it exceeds the `max_length`.
If the chunks is "short enough", the subsequent separators are not applied to it.
- Each chunk is as close to `max_length` as possible (unless we cannot split it any further, eg, if the splitters are "too big" / there are not enough of them)
- If the `text` is empty, the function returns an empty array.
- Separators are re-added to the text chunks after splitting, preserving the original structure of the text as closely as possible. Apply `strip` if you do not need them.
- The function provides `separators` as the second argument to distinguish itself from its single-separator counterpart dispatch.
# Examples
Splitting text using multiple separators:
```julia
text = "Paragraph 1\\n\\nParagraph 2. Sentence 1. Sentence 2.\\nParagraph 3"
separators = ["\\n\\n", ". ", "\\n"] # split by paragraphs, sentences, and newlines (not by words)
chunks = recursive_splitter(text, separators, max_length=20)
```
Splitting text using multiple separators - with splitting on words:
```julia
text = "Paragraph 1\\n\\nParagraph 2. Sentence 1. Sentence 2.\\nParagraph 3"
separators = ["\\n\\n", ". ", "\\n", " "] # split by paragraphs, sentences, and newlines, words
chunks = recursive_splitter(text, separators, max_length=10)
```
Using a single separator:
```julia
text = "Hello,World," ^ 2900 # length 34900 characters
chunks = recursive_splitter(text, [","], max_length=10000)
```
To achieve the same behavior as Langchain's `RecursiveCharacterTextSplitter`, use `separators=["\\n\\n", "\\n", " ", ""]`.
```julia
text = "Paragraph 1\\n\\nParagraph 2. Sentence 1. Sentence 2.\\nParagraph 3"
separators = ["\\n\\n", "\\n", " ", ""]
chunks = recursive_splitter(text, separators, max_length=10)
```
"""
function recursive_splitter(
text::AbstractString, separators::Vector{String};
max_length::Int = 35000)
@assert !isempty(separators) "`separators` can't be empty"
separators_ = copy(separators)
separator = popfirst!(separators_)
chunks = recursive_splitter(text; separator, max_length)
isempty(separators_) && return chunks
## Iteratively split by separators
for separator in separators_
chunks = mapreduce(text_ -> recursive_splitter(text_; max_length, separator),
vcat,
chunks)
end
return chunks
end
# Alias to keep compatibility
const split_by_length = recursive_splitter
"""
wrap_string(str::String,
text_width::Int = 20;
newline::Union{AbstractString, AbstractChar} = '\n')
Breaks a string into lines of a given `text_width`.
Optionally, you can specify the `newline` character or string to use.
# Example:
```julia
wrap_string("Certainly, here's a function in Julia that will wrap a string according to the specifications:", 10) |> print
```
"""
function wrap_string(str::AbstractString,
text_width::Int = 20;
newline::Union{AbstractString, AbstractChar} = '\n')
## split only on spaces to make sure it doesn't remove newlines already in the text!
words = split(str, " ")
output = IOBuffer()
current_line_length = 0
for word in words
word_length = length(word)
if current_line_length + word_length > text_width
if current_line_length > 0
write(output, newline)
current_line_length = 0
end
while word_length > text_width
write(output, word[1:(text_width - 1)], "-$newline")
word = word[text_width:end]
word_length -= text_width - 1
end
end
if current_line_length > 0
write(output, ' ')
current_line_length += 1
end
write(output, word)
current_line_length += word_length
end
return String(take!(output))
end;
"""
length_longest_common_subsequence(itr1, itr2)
Compute the length of the longest common subsequence between two sequences (ie, the higher the number, the better the match).
Source: https://cn.julialang.org/LeetCode.jl/dev/democards/problems/problems/1143.longest-common-subsequence/
# Arguments
- `itr1`: The first sequence, eg, a String.
- `itr2`: The second sequence, eg, a String.
# Returns
The length of the longest common subsequence.
# Examples
```julia
text1 = "abc-abc----"
text2 = "___ab_c__abc"
longest_common_subsequence(text1, text2)
# Output: 6 (-> "abcabc")
```
It can be used to fuzzy match strings and find the similarity between them (Tip: normalize the match)
```julia
commands = ["product recommendation", "emotions", "specific product advice", "checkout advice"]
query = "Which product can you recommend for me?"
let pos = argmax(length_longest_common_subsequence.(Ref(query), commands))
dist = length_longest_common_subsequence(query, commands[pos])
norm = dist / min(length(query), length(commands[pos]))
@info "The closest command to the query: \"\$(query)\" is: \"\$(commands[pos])\" (distance: \$(dist), normalized: \$(norm))"
end
```
But it might be easier to use directly the convenience wrapper `distance_longest_common_subsequence`!
```
"""
function length_longest_common_subsequence(itr1, itr2)
m, n = length(itr1) + 1, length(itr2) + 1
dp = fill(0, m, n)
for i in 2:m, j in 2:n
dp[i, j] = (itr1[i - 1] == itr2[j - 1]) ? (dp[i - 1, j - 1] + 1) :
max(dp[i - 1, j], dp[i, j - 1])
end
return dp[m, n]
end
"""
distance_longest_common_subsequence(
input1::AbstractString, input2::AbstractString)
distance_longest_common_subsequence(
input1::AbstractString, input2::AbstractVector{<:AbstractString})
Measures distance between two strings using the length of the longest common subsequence (ie, the lower the number, the better the match). Perfect match is `distance = 0.0`
Convenience wrapper around `length_longest_common_subsequence` to normalize the distances to 0-1 range.
There is a also a dispatch for comparing a string vs an array of strings.
# Notes
- Use `argmin` and `minimum` to find the position of the closest match and the distance, respectively.
- Matching with an empty string will always return 1.0 (worst match), even if the other string is empty as well (safety mechanism to avoid division by zero).
# Arguments
- `input1::AbstractString`: The first string to compare.
- `input2::AbstractString`: The second string to compare.
# Example
You can also use it to find the closest context for some AI generated summary/story:
```julia
context = ["The enigmatic stranger vanished as swiftly as a wisp of smoke, leaving behind a trail of unanswered questions.",
"Beneath the shimmering moonlight, the ocean whispered secrets only the stars could hear.",
"The ancient tree stood as a silent guardian, its gnarled branches reaching for the heavens.",
"The melody danced through the air, painting a vibrant tapestry of emotions.",
"Time flowed like a relentless river, carrying away memories and leaving imprints in its wake."]
story = \"\"\"
Beneath the shimmering moonlight, the ocean whispered secrets only the stars could hear.
Under the celestial tapestry, the vast ocean whispered its secrets to the indifferent stars. Each ripple, a murmured confidence, each wave, a whispered lament. The glittering celestial bodies listened in silent complicity, their enigmatic gaze reflecting the ocean's unspoken truths. The cosmic dance between the sea and the sky, a symphony of shared secrets, forever echoing in the ethereal expanse.
\"\"\"
dist = distance_longest_common_subsequence(story, context)
@info "The closest context to the query: \"\$(first(story,20))...\" is: \"\$(context[argmin(dist)])\" (distance: \$(minimum(dist)))"
```
"""
function distance_longest_common_subsequence(
input1::AbstractString, input2::AbstractString)
if isempty(input1) || isempty(input2)
return 1.0
end
similarity = length_longest_common_subsequence(input1, input2)
shortest_length = min(length(input1), length(input2))
# it's a distance, so 1.0 is the worst match, 0.0 is the best match (=no distance)
return 1.0 - similarity / shortest_length
end
# Dispatch for arrays (eg, context)
function distance_longest_common_subsequence(
input1::AbstractString, input2::AbstractVector{<:AbstractString})
distance_longest_common_subsequence.(Ref(input1), input2)
end
### INTERNAL FUNCTIONS - DO NOT USE DIRECTLY
# helper to extract handlebar variables (eg, `{{var}}`) from a prompt string
function _extract_handlebar_variables(s::AbstractString)
Symbol[Symbol(m[1]) for m in eachmatch(r"\{\{([^\}]+)\}\}", s)]
end
# create a method for Vector{Dict} in UserMessageWithImage to extract handlebar variables for Dict keys
function _extract_handlebar_variables(vect::Vector{Dict{String, <:AbstractString}})
unique([_extract_handlebar_variables(v) for d in vect for (k, v) in d if k == "text"])
end
"""
call_cost(prompt_tokens::Int, completion_tokens::Int, model::String;
cost_of_token_prompt::Number = get(MODEL_REGISTRY,
model,
(; cost_of_token_prompt = 0.0)).cost_of_token_prompt,
cost_of_token_generation::Number = get(MODEL_REGISTRY, model,
(; cost_of_token_generation = 0.0)).cost_of_token_generation)
call_cost(msg, model::String)
Calculate the cost of a call based on the number of tokens in the message and the cost per token.
# Arguments
- `prompt_tokens::Int`: The number of tokens used in the prompt.
- `completion_tokens::Int`: The number of tokens used in the completion.
- `model::String`: The name of the model to use for determining token costs. If the model
is not found in `MODEL_REGISTRY`, default costs are used.
- `cost_of_token_prompt::Number`: The cost per prompt token. Defaults to the cost in `MODEL_REGISTRY`
for the given model, or 0.0 if the model is not found.
- `cost_of_token_generation::Number`: The cost per generation token. Defaults to the cost in
`MODEL_REGISTRY` for the given model, or 0.0 if the model is not found.
# Returns
- `Number`: The total cost of the call.
# Examples
```julia
# Assuming MODEL_REGISTRY is set up with appropriate costs
MODEL_REGISTRY = Dict(
"model1" => (cost_of_token_prompt = 0.05, cost_of_token_generation = 0.10),
"model2" => (cost_of_token_prompt = 0.07, cost_of_token_generation = 0.02)
)
cost1 = call_cost(10, 20, "model1")
# from message
msg1 = AIMessage(;tokens=[10, 20]) # 10 prompt tokens, 20 generation tokens
cost1 = call_cost(msg1, "model1")
# cost1 = 10 * 0.05 + 20 * 0.10 = 2.5
# Using custom token costs
cost2 = call_cost(10, 20, "model3"; cost_of_token_prompt = 0.08, cost_of_token_generation = 0.12)
# cost2 = 10 * 0.08 + 20 * 0.12 = 3.2
```
"""
function call_cost(prompt_tokens::Int, completion_tokens::Int, model::String;
cost_of_token_prompt::Number = get(MODEL_REGISTRY,
model,
(; cost_of_token_prompt = 0.0)).cost_of_token_prompt,
cost_of_token_generation::Number = get(MODEL_REGISTRY, model,
(; cost_of_token_generation = 0.0)).cost_of_token_generation)
cost = prompt_tokens * cost_of_token_prompt +
completion_tokens * cost_of_token_generation
return cost
end
function call_cost(msg, model::String)
cost = if !isnothing(msg.cost)
msg.cost
else
call_cost(msg.tokens[1], msg.tokens[2], model)
end
return cost
end
## dispatch for array -> take unique messages only (eg, for multiple samples we count only once)
function call_cost(conv::AbstractVector, model::String)
sum_ = 0.0
visited_runs = Set{Int}()
for msg in conv
if isnothing(msg.run_id) || (msg.run_id ∉ visited_runs)
sum_ += call_cost(msg, model)
push!(visited_runs, msg.run_id)
end
end
return sum_
end
"""
call_cost_alternative()
Alternative cost calculation. Used to calculate cost of image generation with DALL-E 3 and similar.
"""
function call_cost_alternative(
count_images, model; image_quality::Union{AbstractString, Nothing} = nothing,
image_size::Union{AbstractString, Nothing} = nothing)
global ALTERNATIVE_GENERATION_COSTS
default_img_cost = 0.0 # per image
if haskey(ALTERNATIVE_GENERATION_COSTS, model) && !isnothing(image_quality) &&
!isnothing(image_size)
model_costs = get(
ALTERNATIVE_GENERATION_COSTS, model, Dict())
quality_costs = get(model_costs, image_quality, Dict())
size_costs = get(quality_costs, image_size, default_img_cost) * count_images
else
default_img_cost * count_images
end
end
# helper to produce summary message of how many tokens were used and for how much
function _report_stats(msg,
model::String)
cost = call_cost(msg, model)
cost_str = iszero(cost) ? "" : " @ Cost: \$$(round(cost; digits=4))"
return "Tokens: $(sum(msg.tokens))$(cost_str) in $(round(msg.elapsed;digits=1)) seconds"
end
## dispatch for array -> take last message
function _report_stats(msg::AbstractVector,
model::String)
_report_stats(last(msg), model)
end
# Loads and encodes the provided image path as a base64 string
function _encode_local_image(image_path::AbstractString; base64_only::Bool = false)
@assert isfile(image_path) "`image_path` must be a valid path to an image file. File: $image_path not found."
base64_image = open(image_path, "r") do image_bytes
base64encode(image_bytes)
end
if base64_only
return base64_image
else
image_suffix = split(image_path, ".")[end]
image_url = "data:image/$image_suffix;base64,$(base64_image)"
end
return image_url
end
function _encode_local_image(image_path::Vector{<:AbstractString};
base64_only::Bool = false)
return _encode_local_image.(image_path; base64_only)
end
_encode_local_image(::Nothing) = String[]
# Used for image_url in aiscan to provided consistent output type
_string_to_vector(s::AbstractString) = [s]
_string_to_vector(v::Vector{<:AbstractString}) = v
### Conversation Management
"""
push_conversation!(conv_history, conversation::AbstractVector, max_history::Union{Int, Nothing})
Add a new conversation to the conversation history and resize the history if necessary.
This function appends a conversation to the `conv_history`, which is a vector of conversations. Each conversation is represented as a vector of `AbstractMessage` objects. After adding the new conversation, the history is resized according to the `max_history` parameter to ensure that the size of the history does not exceed the specified limit.
## Arguments
- `conv_history`: A vector that stores the history of conversations. Typically, this is `PT.CONV_HISTORY`.
- `conversation`: The new conversation to be added. It should be a vector of `AbstractMessage` objects.
- `max_history`: The maximum number of conversations to retain in the history. If `Nothing`, the history is not resized.
## Returns
The updated conversation history.
## Example
```julia
new_conversation = aigenerate("Hello World"; return_all = true)
push_conversation!(PT.CONV_HISTORY, new_conversation, 10)
```
This is done automatically by the ai"" macros.
"""
function push_conversation!(conv_history::Vector{<:Vector{<:Any}},
conversation::AbstractVector,
max_history::Union{Int, Nothing})
push!(conv_history, conversation)
resize_conversation!(conv_history, max_history)
return conv_history
end
"""
resize_conversation!(conv_history, max_history::Union{Int, Nothing})
Resize the conversation history to a specified maximum length.
This function trims the `conv_history` to ensure that its size does not exceed `max_history`. It removes the oldest conversations first if the length of `conv_history` is greater than `max_history`.
## Arguments
- `conv_history`: A vector that stores the history of conversations. Typically, this is `PT.CONV_HISTORY`.
- `max_history`: The maximum number of conversations to retain in the history. If `Nothing`, the history is not resized.
## Returns
The resized conversation history.
## Example
```julia
resize_conversation!(PT.CONV_HISTORY, PT.MAX_HISTORY_LENGTH)
```
After the function call, `conv_history` will contain only the 10 most recent conversations.
This is done automatically by the ai"" macros.
"""
function resize_conversation!(conv_history,
max_history::Union{Int, Nothing})
if isnothing(max_history)
return
end
while length(conv_history) > max_history
popfirst!(conv_history)
end
return conv_history
end
"""
@timeout(seconds, expr_to_run, expr_when_fails)
Simple macro to run an expression with a timeout of `seconds`. If the `expr_to_run` fails to finish in `seconds` seconds, `expr_when_fails` is returned.
# Example
```julia
x = @timeout 1 begin
sleep(1.1)
println("done")
1
end "failed"
```
"""
macro timeout(seconds, expr_to_run, expr_when_fails)
quote
tsk = @task $(esc(expr_to_run))
schedule(tsk)
Timer($(esc(seconds))) do timer
istaskdone(tsk) || Base.throwto(tsk, InterruptException())
end
try
fetch(tsk)
catch _
$(esc(expr_when_fails))
end
end
end
"Utility for rendering the conversation (vector of messages) as markdown. REQUIRES the Markdown package to load the extension! See also `pprint`"
function preview end
"Utility for pretty printing PromptingTools types in REPL."
function pprint end
# show fallback
function pprint(io::IO, anything::Any; text_width::Int = displaysize(io)[2])
show(io, anything)
end
function pprint(anything::Any;
text_width = displaysize(stdout)[2], kwargs...)
pprint(stdout, anything; text_width, kwargs...)
end
"""
auth_header(api_key::Union{Nothing, AbstractString};
bearer::Bool = true,
x_api_key::Bool = false,
extra_headers::AbstractVector = Vector{
Pair{String, String},
}[],
kwargs...)
Creates the authentication headers for any API request. Assumes that the communication is done in JSON format.
# Arguments
- `api_key::Union{Nothing, AbstractString}`: The API key to be used for authentication. If `Nothing`, no authentication is used.
- `bearer::Bool`: Provide the API key in the `Authorization: Bearer ABC` format. Defaults to `true`.
- `x_api_key::Bool`: Provide the API key in the `Authorization: x-api-key: ABC` format. Defaults to `false`.
"""
function auth_header(api_key::Union{Nothing, AbstractString};
bearer::Bool = true,
x_api_key::Bool = false,
extra_headers::AbstractVector = Vector{
Pair{String, String},
}[],
kwargs...)
@assert !(bearer && x_api_key) "Cannot use both `bearer` and `x_api_key`. Select only one format."
@assert (bearer||x_api_key) "At least one of `bearer` and `x_api_key` must be selected."
!isnothing(api_key) && isempty(api_key) &&
throw(ArgumentError("`api_key` cannot be empty"))
headers = [
"Content-Type" => "application/json",
"Accept" => "application/json",
extra_headers...
]
!isnothing(api_key) && bearer &&
pushfirst!(headers, "Authorization" => "Bearer $api_key")
!isnothing(api_key) && x_api_key &&
pushfirst!(headers, "x-api-key" => "$api_key")
return headers
end