In [1]:
%cd ../src

/Users/shanekercheval/repos/sik-llms/src


In [2]:
OPENAI_MODEL = 'gpt-5-mini'
CLAUDE_MODEL = 'claude-sonnet-4-latest'

# Clients

In [3]:
# For "registered" clients (via `@Client.register`), the client
# can be created with `create_client` by passing in the model name.
from sik_llms import create_client

client = create_client(
    model_name=OPENAI_MODEL,
    temperature=0.1,
)
client

<sik_llms.openai.OpenAI at 0x1128fa7b0>

In [4]:
# Or, the client can be directly instantiated
from sik_llms import OpenAI
client = OpenAI(
    model_name=OPENAI_MODEL,
    temperature=0.1,
)
client

<sik_llms.openai.OpenAI at 0x112f88cd0>

In [5]:
# Or, the client can be directly instantiated
from sik_llms import Anthropic
client = Anthropic(
    model_name=CLAUDE_MODEL,
    temperature=0.1,
)
client

<sik_llms.anthropic.Anthropic at 0x112fe5a90>

# Chat

In [6]:
from sik_llms import create_client, user_message, TextChunkEvent

client = create_client(
    model_name=OPENAI_MODEL,
    temperature=0.1,
)

message = user_message("What is the capital of France?")
message

{'role': 'user', 'content': 'What is the capital of France?'}

### Run Synchronously via `__call__`

In [7]:
response = client(messages=[message])
print(response)
print(response.response)

input_tokens=13 output_tokens=16 input_cost=3.25e-06 output_cost=3.2e-05 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=1.3067527090006479 response='The capital of France is Paris.'
The capital of France is Paris.


### Run Aynchronously via `run_async`

In [8]:
response = await client.run_async(messages=[message])
print(response)
print(response.response)

input_tokens=13 output_tokens=80 input_cost=3.25e-06 output_cost=0.00015999999999999999 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=1.446224584000447 response='The capital of France is Paris.'
The capital of France is Paris.


### Stream Asynchronously via `stream`

In [9]:
responses = []
async for response in client.stream(messages=[message]):
    if isinstance(response, TextChunkEvent):
        print(response.content, end="")
    responses.append(response)

The capital of France is Paris.

In [10]:
responses

[TextChunkEvent(content='The', logprob=None),
 TextChunkEvent(content=' capital', logprob=None),
 TextChunkEvent(content=' of', logprob=None),
 TextChunkEvent(content=' France', logprob=None),
 TextChunkEvent(content=' is', logprob=None),
 TextChunkEvent(content=' Paris', logprob=None),
 TextChunkEvent(content='.', logprob=None),
 TextResponse(input_tokens=13, output_tokens=80, input_cost=3.25e-06, output_cost=0.00015999999999999999, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=1.5054410000011558, response='The capital of France is Paris.')]

### Generate multiple responses from a single input via `sample`

In [13]:
client = create_client(model_name='gpt-4o-mini', temperature=1.5)
responses = await client.sample(
    messages=[user_message('Generate a random number. Return only the number')],
    n=3,
)
print([r.response for r in responses])
responses

['42', '78546', '47']


[TextResponse(input_tokens=16, output_tokens=1, input_cost=2.4e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.5566354580005282, response='42'),
 TextResponse(input_tokens=16, output_tokens=2, input_cost=2.4e-06, output_cost=1.2e-06, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.6124812919988472, response='78546'),
 TextResponse(input_tokens=16, output_tokens=1, input_cost=2.4e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.5687354159999813, response='47')]

### Generate multiple responses from multiple inputs via `generate_multiple`

In [14]:
client = create_client(model_name=OPENAI_MODEL)
responses = await client.generate_multiple(
    messages=[
        [user_message("What is the capital of France? Return only the city name.")],
        [user_message("What is the capital of Italy? Return only the city name.")],
    ],
)
print([r.response for r in responses])
responses

['Paris', 'Rome']


[TextResponse(input_tokens=19, output_tokens=74, input_cost=4.749999999999999e-06, output_cost=0.000148, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=2.909112332999939, response='Paris'),
 TextResponse(input_tokens=19, output_tokens=74, input_cost=4.749999999999999e-06, output_cost=0.000148, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=1.7156433749987627, response='Rome')]

### Generate `sample_n` responses from multiple inputs via `generate_multiple`

In [15]:
client = create_client(model_name='gpt-4o-mini', temperature=1.5)
responses_set = await client.generate_multiple(
    messages=[
        [user_message("Pick a random number between 1 and 100. Return only the number.")],
        [user_message("Pick a random number between 100 and 200. Return only the number.")],
    ],
    sample_n=5,
)
for responses in responses_set:
    print([r.response for r in responses])
responses_set

['47', '37', '42', '73', '57']
['147', '143', '163', '157', '157']


[[TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.4945137500017154, response='47'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.6733545419992879, response='37'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.7809303750000254, response='42'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.493597624999893, response='73'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, out

---

# OpenAI Functions/Tools

In [18]:
from sik_llms import (
    create_client, user_message,
    Tool, Parameter, RegisteredClients,
)

weather_tool = Tool(
    name='get_weather',
    description="Get the weather for a location.",
    parameters=[
        Parameter(
            name='location',
            param_type=str,
            required=True,
            description='The city and country for weather info.',
        ),
    ],
)

client = create_client(
    client_type=RegisteredClients.OPENAI_TOOLS,
    model_name=OPENAI_MODEL,
    tools=[weather_tool],
)

message = user_message("What is the weather in Paris?")
response = await client.run_async(messages=[message])
# or `response = client(messages=[message])` for synchronous execution
print(response)
print('---')
print(response.tool_prediction)

input_tokens=141 output_tokens=153 input_cost=3.5249999999999996e-05 output_cost=0.000306 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=2.9399389999998675 tool_prediction=ToolPrediction(name='get_weather', arguments={'location': 'Paris, France'}, call_id='call_Togxm6Iw3m3dhxGHrLEoKeMc') message=None
---
name='get_weather' arguments={'location': 'Paris, France'} call_id='call_Togxm6Iw3m3dhxGHrLEoKeMc'


---

# Claude Functions/Tools

In [19]:
from sik_llms import (
    create_client, user_message,
    Tool, Parameter, RegisteredClients,
)

weather_tool = Tool(
    name='get_weather',
    description="Get the weather for a location.",
    parameters=[
        Parameter(
            name='location',
            param_type=str,
            required=True,
            description='The city and country for weather info.',
        ),
    ],
)

client = create_client(
    client_type=RegisteredClients.ANTHROPIC_TOOLS,
    model_name=CLAUDE_MODEL,
    tools=[weather_tool],
)

message = user_message("What is the weather in Paris?")
response = await client.run_async(messages=[message])
# or `response = client(messages=[message])` for synchronous execution
print(response)
print('---')
print(response.tool_prediction)

input_tokens=402 output_tokens=40 input_cost=0.001206 output_cost=0.0006000000000000001 cache_write_tokens=0 cache_read_tokens=0 cache_write_cost=0.0 cache_read_cost=0.0 duration_seconds=2.3683890410011372 tool_prediction=ToolPrediction(name='get_weather', arguments={'location': 'Paris, France'}, call_id='toolu_01GoWnSxbeS9bTUZKBN1FQue') message=None
---
name='get_weather' arguments={'location': 'Paris, France'} call_id='toolu_01GoWnSxbeS9bTUZKBN1FQue'


---

# Structured Outputs via OpenAI

In [20]:
from pydantic import BaseModel
from sik_llms import create_client, system_message, user_message


class CalendarEvent(BaseModel):  # noqa: D101
    name: str
    date: str
    participants: list[str]

client = create_client(
    model_name=OPENAI_MODEL,
    response_format=CalendarEvent,
)
messages=[
    system_message("Extract the event information."),
    user_message("Alice and Bob are going to a science fair on Friday."),
]
response = await client.run_async(messages=messages)
# or `response = client(messages=messages)` for synchronous execution
print(response)
print('---')
print(response.parsed)

input_tokens=90 output_tokens=350 input_cost=2.2499999999999998e-05 output_cost=0.0007 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=4.061760750000758 parsed=CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob']) refusal=None
---
name='Science Fair' date='Friday' participants=['Alice', 'Bob']


---

# Structured Outputs via Anthropic

In [21]:
from pydantic import BaseModel
from sik_llms import create_client, system_message, user_message


class CalendarEvent(BaseModel):  # noqa: D101
    name: str
    date: str
    participants: list[str]

client = create_client(
    model_name=CLAUDE_MODEL,
    response_format=CalendarEvent,
)
messages=[
    system_message("Extract the event information."),
    user_message("Alice and Bob are going to a science fair on Friday."),
]
response = await client.run_async(messages=messages)
# or `response = client(messages=messages)` for synchronous execution
print(response)
print('---')
print(response.parsed)

input_tokens=442 output_tokens=78 input_cost=0.0013260000000000001 output_cost=0.00117 cache_write_tokens=None cache_read_tokens=None cache_write_cost=None cache_read_cost=None duration_seconds=2.2905782089983404 parsed=CalendarEvent(name='Science fair', date='Friday', participants=['Alice', 'Bob']) refusal=None
---
name='Science fair' date='Friday' participants=['Alice', 'Bob']


---

# Reasoning via OpenAI

In [22]:
from sik_llms import (
    create_client, user_message,
    TextChunkEvent, TextResponse, ReasoningEffort,
)

client = create_client(
    model_name='o3-mini',
    reasoning_effort=ReasoningEffort.MEDIUM,
)
messages=[user_message("What is 1 + 2 + (3 * 4) + (5 * 6)?")]
summary = None
async for response in client.stream(messages=messages):
    if isinstance(response, TextChunkEvent):
        print(response.content, end="")
    elif isinstance(response, TextResponse):
        summary = response
    else:
        raise ValueError(f"Unexpected response type: {response}")

To solve the expression 1 + 2 + (3 * 4) + (5 * 6), first calculate the multiplications inside the parentheses:

• 3 * 4 = 12
• 5 * 6 = 30

Now substitute these values back into the expression:

1 + 2 + 12 + 30

Then, proceed with the addition:

1 + 2 = 3  
3 + 12 = 15  
15 + 30 = 45

So, the final answer is 45.

In [23]:
summary

TextResponse(input_tokens=27, output_tokens=254, input_cost=2.97e-05, output_cost=0.0011176, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=2.4380913749992033, response='To solve the expression 1 + 2 + (3 * 4) + (5 * 6), first calculate the multiplications inside the parentheses:\n\n• 3 * 4 = 12\n• 5 * 6 = 30\n\nNow substitute these values back into the expression:\n\n1 + 2 + 12 + 30\n\nThen, proceed with the addition:\n\n1 + 2 = 3  \n3 + 12 = 15  \n15 + 30 = 45\n\nSo, the final answer is 45.')

---

# Reasoning via Claude

In [24]:
from sik_llms import (
    create_client, user_message,
    TextChunkEvent, ThinkingChunkEvent,
    TextResponse, ReasoningEffort,
)

client = create_client(
    model_name=CLAUDE_MODEL,
    reasoning_effort=ReasoningEffort.MEDIUM,
)
messages=[user_message("What is 1 + 2 + (3 * 4) + (5 * 6)?")]
summary = None

current_type = None
async for response in client.stream(messages=messages):
    is_text_chunk = isinstance(response, TextChunkEvent)
    is_thinking_chunk = isinstance(response, ThinkingChunkEvent)
    is_summary = isinstance(response, TextResponse)

    if is_text_chunk or is_thinking_chunk:
        if type(response) is not current_type:
            print(f"\n\n[{'THINKING' if is_thinking_chunk else 'TEXT'}]")
            current_type = type(response)
        print(response.content, end="")
    elif isinstance(response, TextResponse):
        summary = response
    else:
        raise ValueError(f"Unexpected response type: {response}")



[THINKING]
I need to calculate this step by step, following the order of operations (PEMDAS/BODMAS).

The expression is: 1 + 2 + (3 * 4) + (5 * 6)

First, I'll handle the operations in parentheses:
- (3 * 4) = 12
- (5 * 6) = 30

Now the expression becomes: 1 + 2 + 12 + 30

Now I can add from left to right:
1 + 2 = 3
3 + 12 = 15
15 + 30 = 45

[TEXT]
I'll solve this step by step, following the order of operations.

1 + 2 + (3 * 4) + (5 * 6)

First, I'll calculate the multiplications in parentheses:
- (3 * 4) = 12
- (5 * 6) = 30

Now the expression becomes:
1 + 2 + 12 + 30

Adding from left to right:
1 + 2 + 12 + 30 = 45

The answer is **45**.

In [None]:
summary

---

# ReasoningAgent

In [25]:
import json
from sik_llms.models_base import (
    Tool, Parameter, ThinkingEvent, ToolPredictionEvent,
    ToolResultEvent, TextChunkEvent, ErrorEvent, TextResponse,
)
from sik_llms.reasoning_agent import ReasoningAgent

####
# Define the tool functions
####
async def calculator(expression: str) -> str:
    """Execute calculator tool."""
    try:
        # Only allow simple arithmetic for safety
        allowed_chars = set('0123456789+-*/() .')
        if not all(c in allowed_chars for c in expression):
            return "Error: Invalid characters in expression"
        return str(eval(expression))
    except Exception as e:
        return f"Error: {e!s}"


async def weather(location: str, units: str) -> str:
    """Mock weather tool - returns fake data."""
    # Return mock weather data
    weather_data = {
        'New York': '68',
        'San Francisco': '62',
        'Miami': '85',
        'Chicago': '55',
        'Los Angeles': '75',
    }
    for city in weather_data:  # noqa: PLC0206
        if city.lower() in location.lower():
            temp = weather_data[city]
            if units == 'C':
                # C = (°F - 32) x (5/9)
                temp = round((temp - 32) * 5 / 9)
            return {location: f"{temp}°{units}"}
    return None

####
# Define tool objects
####
calculator_tool = Tool(
    name='calculator',
    description="Perform mathematical calculations",
    parameters=[
        Parameter(
            name='expression',
            param_type=str,
            required=True,
            description="The mathematical expression to evaluate (e.g., '2 + 2', '5 * 10')",
        ),
    ],
    func=calculator,
)

weather_tool = Tool(
    name="get_weather",
    description="Get the current weather for a location",
    parameters=[
        Parameter(
            name="location",
            param_type=str,
            required=True,
            description="The name of the city (e.g., 'San Francisco', 'New York', 'London')",
        ),
        Parameter(
            name='units',
            param_type=str,
            required=True,
            description="The units for temperature",
            valid_values=['F', 'C'],
        ),
    ],
    func=weather,
)

# Create the reasoning agent
agent = ReasoningAgent(
    model_name=OPENAI_MODEL,  # You can change this to other models
    tools=[calculator_tool, weather_tool],
    max_iterations=10,
    temperature=0,
)

question = "I'm planning a trip to New York and Miami. What's the weather like in both cities? Also, if I have a budget of $2400 for a 6-day trip, how much can I spend per day?"  # noqa: E501
# Run the agent and collect the results
messages = [{"role": "user", "content": question}]

print(f"[QUESTION]: {question}\n")

current_iteration = 0

async for event in agent.stream(messages):
    if isinstance(event, ThinkingEvent):
        if hasattr(event, 'iteration') and event.iteration != current_iteration:
            current_iteration = event.iteration
            print(f"\n--- Iteration {current_iteration}\n")
        if event.content:
            print(f"\n[THINKING]:\n{event.content}")

    elif isinstance(event, ToolPredictionEvent):
        print("\n[TOOL PREDICTION]:")
        print(f"Tool: {event.name}`")
        print(f"Parameters: \n```json\n{json.dumps(event.arguments, indent=2)}\n```")

    elif isinstance(event, ToolResultEvent):
        print("\n[TOOL RESULT]:")
        print(f"Tool: {event.name}`")
        print(f"Result: {event.result}")

    elif isinstance(event, ErrorEvent):
        print("\n[ERROR]:")
        print(f"Error: {event.content}")

    elif isinstance(event, TextChunkEvent):
        # For streaming text generation
        if current_iteration  >= 0:  # Only print once for the header
            print("\n[FINAL RESPONSE]:")
            current_iteration = -1  # Set to an impossible iteration to avoid repeating

        print(event.content, end="")

    elif isinstance(event, TextResponse):
        # Print nothing here as we've already streamed the response
        pass

# Just for notebook display cleanliness
print("\n\n---\n\n")

[QUESTION]: I'm planning a trip to New York and Miami. What's the weather like in both cities? Also, if I have a budget of $2400 for a 6-day trip, how much can I spend per day?


--- Iteration 1


[THINKING]:
I need the current weather for New York. I'll call the weather tool to get up-to-date conditions.

[TOOL PREDICTION]:
Tool: get_weather`
Parameters: 
```json
{
  "location": "New York",
  "units": "F"
}
```

[TOOL RESULT]:
Tool: get_weather`
Result: {'New York': '68°F'}

--- Iteration 2


[THINKING]:
I have New York's current temperature (68°F). Next I should get Miami's current weather (use same units for consistency) and then compute the per-day budget for the 6-day trip ($2400 ÷ 6). I'll call the weather tool for Miami now.

[TOOL PREDICTION]:
Tool: get_weather`
Parameters: 
```json
{
  "location": "Miami",
  "units": "F"
}
```

[TOOL RESULT]:
Tool: get_weather`
Result: {'Miami': '85°F'}

--- Iteration 3


[THINKING]:
I have the current temperatures from the tool: New York 68°F

In [27]:
print(f"Input Tokens: {event.input_tokens}")
print(f"Output Tokens: {event.output_tokens}")
print(f"Total Cost: {event.total_cost}")
print(f"Duration: {event.duration_seconds:.2f} seconds")

Input Tokens: 3389
Output Tokens: 3447
Total Cost: 0.007741249999999999
Duration: 41.64 seconds


---

# Prompt Caching

## Anthropic - `cache_control` parameter in `system_message`

In [28]:
from faker import Faker

cache_content = Faker().text(max_nb_chars=15_000)
cache_content[0:200]

'She civil staff but time majority onto claim. Economy if message physical someone choice off.\nAround left gas drop drop matter. Sound compare charge nature drug support art why. Pass administration on'

In [29]:
####
# This example is modified from anthropic's prompt-caching.ipynb notebook
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
####
from sik_llms import Anthropic
from sik_llms.models_base import system_message, user_message, assistant_message

client = Anthropic(
    model_name=CLAUDE_MODEL,
    temperature=0.1,
)
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
system_messages = [
    system_message("You are a helpful assistant."),
    system_message(
        cache_content,
        cache_control={'type': 'ephemeral'},
    ),
]
messages = [
    *system_messages,
    user_message("What is the first word of the cached text?"),
]

# first run should result in a cache-miss & write
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

The first word of the cached text is "She".
---
Total Cost: 0.01113975
---
Input Tokens: 17
Output Tokens: 14
Cache Write Tokens: 2901
Cache Read Tokens: 0
Total Tokens: 2932


In [30]:
# second run should result in a cache-hit & read
messages = [
    *system_messages,
    user_message("What is the first word of the cached text?"),
    assistant_message(response.response),
    user_message("What is the second word of the cached text?"),
]
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

The second word of the cached text is "civil".
---
Total Cost: 0.0012123
---
Input Tokens: 44
Output Tokens: 14
Cache Write Tokens: 0
Cache Read Tokens: 2901
Total Tokens: 2959


## Anthropic - `cache_content` parameter in `__init__`

In [31]:
from faker import Faker

cache_content = Faker().text(max_nb_chars=15_000)
cache_content[0:200]

'First thank research site tell. Whatever exist special defense knowledge effort.\nApproach citizen whether admit issue party hotel moment. Quite return stage tell sit affect plan.\nAlso cup including st'

In [32]:
####
# This example is modified from anthropic's prompt-caching.ipynb notebook
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
####
from sik_llms import Anthropic
from sik_llms.models_base import system_message, user_message, assistant_message

client = Anthropic(
    model_name=CLAUDE_MODEL,
    temperature=0.1,
    cache_content=cache_content,
)
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
messages = [
    system_message("You are a helpful assistant."),
    user_message("What is the first word of the cached text?"),
]

# first run should result in a cache-miss & write
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

The first word of the cached text is "First".
---
Total Cost: 0.011057250000000001
---
Input Tokens: 17
Output Tokens: 14
Cache Write Tokens: 2879
Cache Read Tokens: 0
Total Tokens: 2910


In [33]:
# second run should result in a cache-hit & read
messages = [
    system_message("You are a helpful assistant."),
    user_message("What is the first word of the cached text?"),
    assistant_message(response.response),
    user_message("What is the second word of the cached text?"),
]
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

The second word of the cached text is "thank".
---
Total Cost: 0.0012057
---
Input Tokens: 44
Output Tokens: 14
Cache Write Tokens: 0
Cache Read Tokens: 2879
Total Tokens: 2937


---

# Misc Examples

## Bedrock via OpenAI API

In [None]:
import os
from sik_llms import OpenAI, user_message
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(
    server_url=os.getenv('BEDROCK_API_URL'),
    api_key=os.getenv('BEDROCK_API_KEY'),
    model_name='anthropic.claude-3-haiku-20240307-v1:0',
    user='bedrock-requires-user?',
)
response = client(messages=[user_message("What is the capital of France?")])
print(response.response)

---

## OpenAI Log Probs

In [4]:
import math
from sik_llms import OpenAI, user_message, TextChunkEvent
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(
    model_name='gpt-4o-mini',  # logprobs don't seem to be supported by gpt-5 models
    logprobs=True,
)
messages = [
    user_message("Write a haiku about the ocean."),
]
async for response in client.stream(messages=messages):
    if isinstance(response, TextChunkEvent):
        log_prob = response.logprob
        prob = math.exp(log_prob)
        print(f"{response.content} (logprob: {log_prob:.2f}, prob: {prob:.2f})")

W (logprob: -0.44, prob: 0.65)
aves (logprob: 0.00, prob: 1.00)
 kiss (logprob: -1.04, prob: 0.35)
 sandy (logprob: -1.17, prob: 0.31)
 shores (logprob: -0.35, prob: 0.71)
, (logprob: -0.00, prob: 1.00)
  
 (logprob: -0.00, prob: 1.00)
Wh (logprob: -0.10, prob: 0.91)
ispers (logprob: -0.00, prob: 1.00)
 of (logprob: -0.00, prob: 1.00)
 the (logprob: -0.08, prob: 0.92)
 deep (logprob: -0.05, prob: 0.95)
 blue (logprob: -1.35, prob: 0.26)
 call (logprob: -0.21, prob: 0.81)
, (logprob: -0.01, prob: 0.99)
  
 (logprob: -0.00, prob: 1.00)
End (logprob: -0.28, prob: 0.76)
less (logprob: -0.00, prob: 1.00)
 tales (logprob: -3.03, prob: 0.05)
 unfold (logprob: -0.23, prob: 0.80)
. (logprob: -0.00, prob: 1.00)
   (logprob: -2.58, prob: 0.08)


---

## Images

In [7]:
from sik_llms import create_client, user_message, ImageContent
from dotenv import load_dotenv
load_dotenv()

image = ImageContent.from_url(
    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/320px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
)

client = create_client(model_name=OPENAI_MODEL)
response = client(messages=[
    user_message([
        "What's in this image? Describe it briefly.",
        image,
    ]),
])
print(response)
print('---')
print(response.response)

input_tokens=100 output_tokens=121 input_cost=2.4999999999999998e-05 output_cost=0.000242 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=1.8928221669993945 response='A wooden boardwalk or footpath runs straight through a wide green meadow or marsh of tall grasses, leading toward the horizon. The scene is under a bright blue sky with scattered white clouds, giving a peaceful, open‑landscape feel.'
---
A wooden boardwalk or footpath runs straight through a wide green meadow or marsh of tall grasses, leading toward the horizon. The scene is under a bright blue sky with scattered white clouds, giving a peaceful, open‑landscape feel.


---

## Web Search w/ Claude

In [8]:
from sik_llms import create_client, user_message
from dotenv import load_dotenv
load_dotenv()

client = create_client(model_name=CLAUDE_MODEL, web_search=True)
response = client(messages=[user_message("What the current net worth of Michael Jordan?")])
print(response)
print('---')
print(response.response)

input_tokens=2039 output_tokens=542 input_cost=0.006117 output_cost=0.00813 cache_write_tokens=0 cache_read_tokens=0 cache_write_cost=0.0 cache_read_cost=0.0 duration_seconds=12.750808541000879 response="Based on the search results, Michael Jordan's net worth in 2025 is estimated at $3.5 billion according to Forbes, making him the wealthiest former professional athlete in the world.\n\nThis massive fortune comes from several key sources:\n\n**Jordan Brand Royalties**: Jordan's contract was renegotiated into a 5% royalty on wholesale, giving him an estimated $350 million cash payout for 2024 alone. In 2024 Jordan Brand sold about $7 billion, 14% of Nike's total revenue.\n\n**Charlotte Hornets Sale**: In 2023, he sold his majority stake for $3 billion, nearly tripling his investment from when he bought a majority stake in the team for $175 million back in 2010.\n\n**NBA Career Earnings**: Jordan earned $90 million in salary during his NBA career, which while substantial, represents only 

---