In [1]:
%cd ../src

/Users/shanekercheval/repos/sik-llms/src


In [2]:
OPENAI_MODEL = 'gpt-5-mini'
CLAUDE_MODEL = 'claude-sonnet-4'

# Clients

In [3]:
# For "registered" clients (via `@Client.register`), the client
# can be created with `create_client` by passing in the model name.
from sik_llms import create_client

client = create_client(
    model_name=OPENAI_MODEL,
    temperature=0.1,
)
client

<sik_llms.openai.OpenAI at 0x1054ac8d0>

In [4]:
# Or, the client can be directly instantiated
from sik_llms import OpenAI
client = OpenAI(
    model_name=OPENAI_MODEL,
    temperature=0.1,
)
client

<sik_llms.openai.OpenAI at 0x110e8df90>

In [5]:
# Or, the client can be directly instantiated
from sik_llms import Anthropic
client = Anthropic(
    model_name=CLAUDE_MODEL,
    temperature=0.1,
)
client

<sik_llms.anthropic.Anthropic at 0x110e8fb50>

# Chat

In [6]:
from sik_llms import create_client, user_message, TextChunkEvent

client = create_client(
    model_name=OPENAI_MODEL,
    temperature=0.1,
)

message = user_message("What is the capital of France?")
message

{'role': 'user', 'content': 'What is the capital of France?'}

### Run Synchronously via `__call__`

In [7]:
response = client(messages=[message])
print(response)
print(response.response)

input_tokens=13 output_tokens=16 input_cost=3.25e-06 output_cost=3.2e-05 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=1.6645826250314713 response='The capital of France is Paris.'
The capital of France is Paris.


### Run Aynchronously via `run_async`

In [8]:
response = await client.run_async(messages=[message])
print(response)
print(response.response)

input_tokens=13 output_tokens=16 input_cost=3.25e-06 output_cost=3.2e-05 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=1.5292387079680339 response='The capital of France is Paris.'
The capital of France is Paris.


### Stream Asynchronously via `stream`

In [9]:
responses = []
async for response in client.stream(messages=[message]):
    if isinstance(response, TextChunkEvent):
        print(response.content, end="")
    responses.append(response)

The capital of France is Paris.

In [10]:
responses

[TextChunkEvent(content='The', logprob=None),
 TextChunkEvent(content=' capital', logprob=None),
 TextChunkEvent(content=' of', logprob=None),
 TextChunkEvent(content=' France', logprob=None),
 TextChunkEvent(content=' is', logprob=None),
 TextChunkEvent(content=' Paris', logprob=None),
 TextChunkEvent(content='.', logprob=None),
 TextResponse(input_tokens=13, output_tokens=80, input_cost=3.25e-06, output_cost=0.00015999999999999999, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=1.6412498749559745, response='The capital of France is Paris.')]

### Generate multiple responses from a single input via `sample`

In [11]:
client = create_client(model_name='gpt-4o-mini', temperature=1.5)
responses = await client.sample(
    messages=[user_message('Generate a random number. Return only the number')],
    n=3,
)
print([r.response for r in responses])
responses

['45', '53724', '42']


[TextResponse(input_tokens=16, output_tokens=1, input_cost=2.4e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.4291734170401469, response='45'),
 TextResponse(input_tokens=16, output_tokens=2, input_cost=2.4e-06, output_cost=1.2e-06, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.4908954999409616, response='53724'),
 TextResponse(input_tokens=16, output_tokens=1, input_cost=2.4e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.4522558340104297, response='42')]

### Generate multiple responses from multiple inputs via `generate_multiple`

In [12]:
client = create_client(model_name=OPENAI_MODEL)
responses = await client.generate_multiple(
    messages=[
        [user_message("What is the capital of France? Return only the city name.")],
        [user_message("What is the capital of Italy? Return only the city name.")],
    ],
)
print([r.response for r in responses])
responses

['Paris', 'Rome']


[TextResponse(input_tokens=19, output_tokens=10, input_cost=4.749999999999999e-06, output_cost=1.9999999999999998e-05, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=1.358596707927063, response='Paris'),
 TextResponse(input_tokens=19, output_tokens=74, input_cost=4.749999999999999e-06, output_cost=0.000148, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=1.9585146250901744, response='Rome')]

### Generate `sample_n` responses from multiple inputs via `generate_multiple`

In [13]:
client = create_client(model_name='gpt-4o-mini', temperature=1.5)
responses_set = await client.generate_multiple(
    messages=[
        [user_message("Pick a random number between 1 and 100. Return only the number.")],
        [user_message("Pick a random number between 100 and 200. Return only the number.")],
    ],
    sample_n=5,
)
for responses in responses_set:
    print([r.response for r in responses])
responses_set

['37', '42', '57', '47', '72']
['157', '157', '147', '147', '153']


[[TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.4226528749568388, response='37'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.40700400003697723, response='42'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.390481291920878, response='57'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, output_cost=6e-07, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=0.38550245901569724, response='47'),
  TextResponse(input_tokens=23, output_tokens=1, input_cost=3.45e-06, o

---

# OpenAI Functions/Tools

In [14]:
from sik_llms import (
    create_client, user_message,
    Tool, Parameter, RegisteredClients,
)

weather_tool = Tool(
    name='get_weather',
    description="Get the weather for a location.",
    parameters=[
        Parameter(
            name='location',
            param_type=str,
            required=True,
            description='The city and country for weather info.',
        ),
    ],
)

client = create_client(
    client_type=RegisteredClients.OPENAI_TOOLS,
    model_name=OPENAI_MODEL,
    tools=[weather_tool],
)

message = user_message("What is the weather in Paris?")
response = await client.run_async(messages=[message])
# or `response = client(messages=[message])` for synchronous execution
print(response)
print('---')
print(response.tool_prediction)

input_tokens=141 output_tokens=89 input_cost=3.5249999999999996e-05 output_cost=0.000178 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=1.85469154198654 tool_prediction=ToolPrediction(name='get_weather', arguments={'location': 'Paris, France'}, call_id='call_xvtWMQSARmodw5bee1elKPfv') message=None
---
name='get_weather' arguments={'location': 'Paris, France'} call_id='call_xvtWMQSARmodw5bee1elKPfv'


---

# Claude Functions/Tools

In [15]:
from sik_llms import (
    create_client, user_message,
    Tool, Parameter, RegisteredClients,
)

weather_tool = Tool(
    name='get_weather',
    description="Get the weather for a location.",
    parameters=[
        Parameter(
            name='location',
            param_type=str,
            required=True,
            description='The city and country for weather info.',
        ),
    ],
)

client = create_client(
    client_type=RegisteredClients.ANTHROPIC_TOOLS,
    model_name=CLAUDE_MODEL,
    tools=[weather_tool],
)

message = user_message("What is the weather in Paris?")
response = await client.run_async(messages=[message])
# or `response = client(messages=[message])` for synchronous execution
print(response)
print('---')
print(response.tool_prediction)

input_tokens=402 output_tokens=40 input_cost=0.001206 output_cost=0.0006000000000000001 cache_write_tokens=0 cache_read_tokens=0 cache_write_cost=0.0 cache_read_cost=0.0 duration_seconds=1.1365794580196962 tool_prediction=ToolPrediction(name='get_weather', arguments={'location': 'Paris, France'}, call_id='toolu_01SZbbKwFk2JeCgiRhwwrj6m') message=None
---
name='get_weather' arguments={'location': 'Paris, France'} call_id='toolu_01SZbbKwFk2JeCgiRhwwrj6m'


---

# Structured Outputs via OpenAI

In [16]:
from pydantic import BaseModel
from sik_llms import create_client, system_message, user_message


class CalendarEvent(BaseModel):  # noqa: D101
    name: str
    date: str
    participants: list[str]

client = create_client(
    model_name=OPENAI_MODEL,
    response_format=CalendarEvent,
)
messages=[
    system_message("Extract the event information."),
    user_message("Alice and Bob are going to a science fair on Friday."),
]
response = await client.run_async(messages=messages)
# or `response = client(messages=messages)` for synchronous execution
print(response)
print('---')
print(response.parsed)

input_tokens=90 output_tokens=222 input_cost=2.2499999999999998e-05 output_cost=0.000444 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=4.257336625014432 parsed=CalendarEvent(name='Science fair', date='Friday', participants=['Alice', 'Bob']) refusal=None
---
name='Science fair' date='Friday' participants=['Alice', 'Bob']


---

# Structured Outputs via Anthropic

In [17]:
from pydantic import BaseModel
from sik_llms import create_client, system_message, user_message


class CalendarEvent(BaseModel):  # noqa: D101
    name: str
    date: str
    participants: list[str]

client = create_client(
    model_name=CLAUDE_MODEL,
    response_format=CalendarEvent,
)
messages=[
    system_message("Extract the event information."),
    user_message("Alice and Bob are going to a science fair on Friday."),
]
response = await client.run_async(messages=messages)
# or `response = client(messages=messages)` for synchronous execution
print(response)
print('---')
print(response.parsed)

input_tokens=442 output_tokens=78 input_cost=0.0013260000000000001 output_cost=0.00117 cache_write_tokens=None cache_read_tokens=None cache_write_cost=None cache_read_cost=None duration_seconds=1.6880054170032963 parsed=CalendarEvent(name='Science fair', date='Friday', participants=['Alice', 'Bob']) refusal=None
---
name='Science fair' date='Friday' participants=['Alice', 'Bob']


---

# Reasoning via OpenAI

In [18]:
from sik_llms import (
    create_client, user_message,
    TextChunkEvent, TextResponse, ReasoningEffort,
)

client = create_client(
    model_name='o3-mini',
    reasoning_effort=ReasoningEffort.MEDIUM,
)
messages=[user_message("What is 1 + 2 + (3 * 4) + (5 * 6)?")]
summary = None
async for response in client.stream(messages=messages):
    if isinstance(response, TextChunkEvent):
        print(response.content, end="")
    elif isinstance(response, TextResponse):
        summary = response
    else:
        raise ValueError(f"Unexpected response type: {response}")

Let's break it down step by step:

1. Evaluate the multiplications:
   - 3 * 4 = 12
   - 5 * 6 = 30

2. Now substitute back into the expression:
   1 + 2 + 12 + 30

3. Then add everything together:
   1 + 2 = 3  
   3 + 12 = 15  
   15 + 30 = 45

So, the answer is 45.

In [19]:
summary

TextResponse(input_tokens=27, output_tokens=246, input_cost=2.97e-05, output_cost=0.0010824, cache_write_tokens=None, cache_read_tokens=0, cache_write_cost=None, cache_read_cost=0.0, duration_seconds=2.1931866669328883, response="Let's break it down step by step:\n\n1. Evaluate the multiplications:\n   - 3 * 4 = 12\n   - 5 * 6 = 30\n\n2. Now substitute back into the expression:\n   1 + 2 + 12 + 30\n\n3. Then add everything together:\n   1 + 2 = 3  \n   3 + 12 = 15  \n   15 + 30 = 45\n\nSo, the answer is 45.")

---

# Reasoning via Claude

In [20]:
from sik_llms import (
    create_client, user_message,
    TextChunkEvent, ThinkingChunkEvent,
    TextResponse, ReasoningEffort,
)

client = create_client(
    model_name=CLAUDE_MODEL,
    reasoning_effort=ReasoningEffort.MEDIUM,
)
messages=[user_message("What is 1 + 2 + (3 * 4) + (5 * 6)?")]
summary = None

current_type = None
async for response in client.stream(messages=messages):
    is_text_chunk = isinstance(response, TextChunkEvent)
    is_thinking_chunk = isinstance(response, ThinkingChunkEvent)
    is_summary = isinstance(response, TextResponse)

    if is_text_chunk or is_thinking_chunk:
        if type(response) is not current_type:
            print(f"\n\n[{'THINKING' if is_thinking_chunk else 'TEXT'}]")
            current_type = type(response)
        print(response.content, end="")
    elif isinstance(response, TextResponse):
        summary = response
    else:
        raise ValueError(f"Unexpected response type: {response}")



[THINKING]
I need to solve this step by step, following the order of operations (PEMDAS/BODMAS).

The expression is: 1 + 2 + (3 * 4) + (5 * 6)

First, I'll handle the operations in parentheses:
- (3 * 4) = 12
- (5 * 6) = 30

Now the expression becomes: 1 + 2 + 12 + 30

Now I can add from left to right:
- 1 + 2 = 3
- 3 + 12 = 15
- 15 + 30 = 45

[TEXT]
I'll solve this step by step, following the order of operations.

1 + 2 + (3 * 4) + (5 * 6)

First, I'll calculate the multiplication in parentheses:
- (3 * 4) = 12
- (5 * 6) = 30

Now the expression becomes:
1 + 2 + 12 + 30

Adding from left to right:
1 + 2 + 12 + 30 = 45

The answer is **45**.

In [21]:
summary

TextResponse(input_tokens=60, output_tokens=293, input_cost=0.00018, output_cost=0.0043950000000000005, cache_write_tokens=0, cache_read_tokens=0, cache_write_cost=0.0, cache_read_cost=0.0, duration_seconds=3.489773334003985, response="I need to solve this step by step, following the order of operations (PEMDAS/BODMAS).\n\nThe expression is: 1 + 2 + (3 * 4) + (5 * 6)\n\nFirst, I'll handle the operations in parentheses:\n- (3 * 4) = 12\n- (5 * 6) = 30\n\nNow the expression becomes: 1 + 2 + 12 + 30\n\nNow I can add from left to right:\n- 1 + 2 = 3\n- 3 + 12 = 15\n- 15 + 30 = 45I'll solve this step by step, following the order of operations.\n\n1 + 2 + (3 * 4) + (5 * 6)\n\nFirst, I'll calculate the multiplication in parentheses:\n- (3 * 4) = 12\n- (5 * 6) = 30\n\nNow the expression becomes:\n1 + 2 + 12 + 30\n\nAdding from left to right:\n1 + 2 + 12 + 30 = 45\n\nThe answer is **45**.")

---

# ReasoningAgent

In [22]:
import json
from sik_llms.models_base import (
    Tool, Parameter, ThinkingEvent, ToolPredictionEvent,
    ToolResultEvent, TextChunkEvent, ErrorEvent, TextResponse,
)
from sik_llms.reasoning_agent import ReasoningAgent

####
# Define the tool functions
####
async def calculator(expression: str) -> str:
    """Execute calculator tool."""
    try:
        # Only allow simple arithmetic for safety
        allowed_chars = set('0123456789+-*/() .')
        if not all(c in allowed_chars for c in expression):
            return "Error: Invalid characters in expression"
        return str(eval(expression))
    except Exception as e:
        return f"Error: {e!s}"


async def weather(location: str, units: str) -> str:
    """Mock weather tool - returns fake data."""
    # Return mock weather data
    weather_data = {
        'New York': '68',
        'San Francisco': '62',
        'Miami': '85',
        'Chicago': '55',
        'Los Angeles': '75',
    }
    for city in weather_data:  # noqa: PLC0206
        if city.lower() in location.lower():
            temp = weather_data[city]
            if units == 'C':
                # C = (°F - 32) x (5/9)
                temp = round((temp - 32) * 5 / 9)
            return {location: f"{temp}°{units}"}
    return None

####
# Define tool objects
####
calculator_tool = Tool(
    name='calculator',
    description="Perform mathematical calculations",
    parameters=[
        Parameter(
            name='expression',
            param_type=str,
            required=True,
            description="The mathematical expression to evaluate (e.g., '2 + 2', '5 * 10')",
        ),
    ],
    func=calculator,
)

weather_tool = Tool(
    name="get_weather",
    description="Get the current weather for a location",
    parameters=[
        Parameter(
            name="location",
            param_type=str,
            required=True,
            description="The name of the city (e.g., 'San Francisco', 'New York', 'London')",
        ),
        Parameter(
            name='units',
            param_type=str,
            required=True,
            description="The units for temperature",
            valid_values=['F', 'C'],
        ),
    ],
    func=weather,
)

# Create the reasoning agent
agent = ReasoningAgent(
    model_name=OPENAI_MODEL,  # You can change this to other models
    tools=[calculator_tool, weather_tool],
    max_iterations=10,
    temperature=0,
)

question = "I'm planning a trip to New York and Miami. What's the weather like in both cities? Also, if I have a budget of $2400 for a 6-day trip, how much can I spend per day?"  # noqa: E501
# Run the agent and collect the results
messages = [{"role": "user", "content": question}]

print(f"[QUESTION]: {question}\n")

current_iteration = 0

async for event in agent.stream(messages):
    if isinstance(event, ThinkingEvent):
        if hasattr(event, 'iteration') and event.iteration != current_iteration:
            current_iteration = event.iteration
            print(f"\n--- Iteration {current_iteration}\n")
        if event.content:
            print(f"\n[THINKING]:\n{event.content}")

    elif isinstance(event, ToolPredictionEvent):
        print("\n[TOOL PREDICTION]:")
        print(f"Tool: {event.name}`")
        print(f"Parameters: \n```json\n{json.dumps(event.arguments, indent=2)}\n```")

    elif isinstance(event, ToolResultEvent):
        print("\n[TOOL RESULT]:")
        print(f"Tool: {event.name}`")
        print(f"Result: {event.result}")

    elif isinstance(event, ErrorEvent):
        print("\n[ERROR]:")
        print(f"Error: {event.content}")

    elif isinstance(event, TextChunkEvent):
        # For streaming text generation
        if current_iteration  >= 0:  # Only print once for the header
            print("\n[FINAL RESPONSE]:")
            current_iteration = -1  # Set to an impossible iteration to avoid repeating

        print(event.content, end="")

    elif isinstance(event, TextResponse):
        # Print nothing here as we've already streamed the response
        pass

# Just for notebook display cleanliness
print("\n\n---\n\n")

[QUESTION]: I'm planning a trip to New York and Miami. What's the weather like in both cities? Also, if I have a budget of $2400 for a 6-day trip, how much can I spend per day?


--- Iteration 1


[THINKING]:
I need the current weather for New York and Miami. I'll first fetch the weather for New York using the weather tool.

[TOOL PREDICTION]:
Tool: get_weather`
Parameters: 
```json
{
  "location": "New York",
  "units": "F"
}
```

[TOOL RESULT]:
Tool: get_weather`
Result: {'New York': '68°F'}

--- Iteration 2


[THINKING]:
The get_weather result shows New York is 68°F — a mild, comfortable temperature. Next I should fetch the current weather for Miami so I can report both cities. After that I'll compute the daily budget from $2400 for 6 days ($2400 ÷ 6).

[TOOL PREDICTION]:
Tool: get_weather`
Parameters: 
```json
{
  "location": "Miami",
  "units": "F"
}
```

[TOOL RESULT]:
Tool: get_weather`
Result: {'Miami': '85°F'}

--- Iteration 3


[THINKING]:
The get_weather results are New York

In [23]:
print(f"Input Tokens: {event.input_tokens}")
print(f"Output Tokens: {event.output_tokens}")
print(f"Total Cost: {event.total_cost}")
print(f"Duration: {event.duration_seconds:.2f} seconds")

Input Tokens: 3402
Output Tokens: 2736
Total Cost: 0.006322499999999999
Duration: 43.08 seconds


---

# Prompt Caching

## Anthropic - `cache_control` parameter in `system_message`

In [24]:
from faker import Faker

cache_content = Faker().text(max_nb_chars=15_000)
cache_content[0:200]

'Begin whom area in along clear scientist. Tough effort open civil fish side.\nWhether road ask be employee. Hot direction work game where a color. Energy strong go someone strong.\nFeel may national. Sp'

In [25]:
####
# This example is modified from anthropic's prompt-caching.ipynb notebook
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
####
from sik_llms import Anthropic
from sik_llms.models_base import system_message, user_message, assistant_message

client = Anthropic(
    model_name=CLAUDE_MODEL,
    temperature=0.1,
)
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
system_messages = [
    system_message("You are a helpful assistant."),
    system_message(
        cache_content,
        cache_control={'type': 'ephemeral'},
    ),
]
messages = [
    *system_messages,
    user_message("What is the first word of the cached text?"),
]

# first run should result in a cache-miss & write
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

The first word of the cached text is "Begin".
---
Total Cost: 0.011061000000000001
---
Input Tokens: 17
Output Tokens: 14
Cache Write Tokens: 2880
Cache Read Tokens: 0
Total Tokens: 2911


In [26]:
# second run should result in a cache-hit & read
messages = [
    *system_messages,
    user_message("What is the first word of the cached text?"),
    assistant_message(response.response),
    user_message("What is the second word of the cached text?"),
]
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

The second word of the cached text is "whom".
---
Total Cost: 0.001206
---
Input Tokens: 44
Output Tokens: 14
Cache Write Tokens: 0
Cache Read Tokens: 2880
Total Tokens: 2938


## Anthropic - `cache_content` parameter in `__init__`

In [27]:
from faker import Faker

cache_content = Faker().text(max_nb_chars=15_000)
cache_content[0:200]

'Whose send space him part. Imagine back offer could threat chair western picture. Close reach prepare budget road practice guess.\nThank down sometimes factor.\nLife health remain great edge culture. Me'

In [28]:
####
# This example is modified from anthropic's prompt-caching.ipynb notebook
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
####
from sik_llms import Anthropic
from sik_llms.models_base import system_message, user_message, assistant_message

client = Anthropic(
    model_name=CLAUDE_MODEL,
    temperature=0.1,
    cache_content=cache_content,
)
# https://github.com/anthropics/anthropic-cookbook/blob/main/misc/prompt_caching.ipynb
messages = [
    system_message("You are a helpful assistant."),
    user_message("What is the first word of the cached text?"),
]

# first run should result in a cache-miss & write
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

I don't have any cached text to reference. The message you sent appears to be a collection of random sentences and phrases, but there isn't a specific "cached text" that I'm storing or referencing from our conversation.

If you're asking about the first word of the text you just sent me, that would be "Thank" (from "Thank down sometimes factor.").

Could you clarify what you mean by "cached text"? I'd be happy to help once I understand what specific text you're referring to.
---
Total Cost: 0.012538500000000001
---
Input Tokens: 17
Output Tokens: 112
Cache Write Tokens: 2882
Cache Read Tokens: 0
Total Tokens: 3011


In [29]:
# second run should result in a cache-hit & read
messages = [
    system_message("You are a helpful assistant."),
    user_message("What is the first word of the cached text?"),
    assistant_message(response.response),
    user_message("What is the second word of the cached text?"),
]
response = await client.run_async(messages=messages)
print(response.response)
print('---')
print(f"Total Cost: {response.total_cost}")
print('---')
print(f"Input Tokens: {response.input_tokens}")
print(f"Output Tokens: {response.output_tokens}")
print(f"Cache Write Tokens: {response.cache_write_tokens}")
print(f"Cache Read Tokens: {response.cache_read_tokens}")
print(f"Total Tokens: {response.total_tokens}")

I don't have any cached text stored from our conversation. As I mentioned in my previous response, I don't maintain a cache of text that I can reference.

If you're referring to the text in your first message, the second word would be "down" (from the phrase "Thank down sometimes factor.").

However, I want to clarify that I'm not storing or caching any text - I'm simply reading and responding to what you've written in your messages. Could you help me understand what you mean by "cached text" so I can better assist you?
---
Total Cost: 0.0031206000000000003
---
Input Tokens: 142
Output Tokens: 122
Cache Write Tokens: 0
Cache Read Tokens: 2882
Total Tokens: 3146


---

# Misc Examples

## Bedrock via OpenAI API

In [None]:
import os
from sik_llms import OpenAI, user_message
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(
    server_url=os.getenv('BEDROCK_API_URL'),
    api_key=os.getenv('BEDROCK_API_KEY'),
    model_name='anthropic.claude-3-haiku-20240307-v1:0',
    user='bedrock-requires-user?',
)
response = client(messages=[user_message("What is the capital of France?")])
print(response.response)

---

## OpenAI Log Probs

In [31]:
import math
from sik_llms import OpenAI, user_message, TextChunkEvent
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(
    model_name='gpt-4o-mini',  # logprobs don't seem to be supported by gpt-5 models
    logprobs=True,
)
messages = [
    user_message("Write a haiku about the ocean."),
]
async for response in client.stream(messages=messages):
    if isinstance(response, TextChunkEvent):
        log_prob = response.logprob
        prob = math.exp(log_prob)
        print(f"{response.content} (logprob: {log_prob:.2f}, prob: {prob:.2f})")

W (logprob: -0.61, prob: 0.54)
aves (logprob: 0.00, prob: 1.00)
 whisper (logprob: -0.65, prob: 0.52)
 secrets (logprob: -0.06, prob: 0.94)
, (logprob: -0.00, prob: 1.00)
  
 (logprob: -0.00, prob: 1.00)
End (logprob: -0.64, prob: 0.53)
less (logprob: -0.00, prob: 1.00)
 blue (logprob: -0.16, prob: 0.85)
 ca (logprob: -3.16, prob: 0.04)
resses (logprob: -0.05, prob: 0.95)
 shore (logprob: -0.21, prob: 0.81)
, (logprob: -0.02, prob: 0.98)
  
 (logprob: -0.00, prob: 1.00)
Time (logprob: -0.97, prob: 0.38)
 flows (logprob: -1.29, prob: 0.27)
 like (logprob: -1.59, prob: 0.20)
 the (logprob: -0.08, prob: 0.92)
 tide (logprob: -0.02, prob: 0.98)
. (logprob: -0.00, prob: 1.00)
   (logprob: -1.91, prob: 0.15)


---

## Images

In [32]:
from sik_llms import create_client, user_message, ImageContent
from dotenv import load_dotenv
load_dotenv()

image = ImageContent.from_url(
    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/320px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
)

client = create_client(model_name=OPENAI_MODEL)
response = client(messages=[
    user_message([
        "What's in this image? Describe it briefly.",
        image,
    ]),
])
print(response)
print('---')
print(response.response)

input_tokens=100 output_tokens=116 input_cost=2.4999999999999998e-05 output_cost=0.000232 cache_write_tokens=None cache_read_tokens=0 cache_write_cost=None cache_read_cost=0.0 duration_seconds=2.346937666996382 response='A wooden boardwalk or footpath runs straight through tall green grasses across a flat wetland or meadow, leading toward a distant line of trees. Above is a wide blue sky streaked with wispy white clouds.'
---
A wooden boardwalk or footpath runs straight through tall green grasses across a flat wetland or meadow, leading toward a distant line of trees. Above is a wide blue sky streaked with wispy white clouds.


---

## Web Search w/ Claude

In [33]:
from sik_llms import create_client, user_message
from dotenv import load_dotenv
load_dotenv()

client = create_client(model_name=CLAUDE_MODEL, web_search=True)
response = client(messages=[user_message("What the current net worth of Michael Jordan?")])
print(response)
print('---')
print(response.response)

input_tokens=2039 output_tokens=535 input_cost=0.006117 output_cost=0.008025000000000001 cache_write_tokens=0 cache_read_tokens=0 cache_write_cost=0.0 cache_read_cost=0.0 duration_seconds=12.173501041019335 response="Based on my search results, Michael Jordan's current net worth is estimated at $3.5 billion in 2025. This makes him the wealthiest former professional athlete in the world.\n\nThe sources of his massive wealth include:\n\n**Jordan Brand Partnership with Nike**: Jordan's contract was renegotiated into a 5% royalty on wholesale, giving him an estimated $350 million cash payout for 2024 alone. Jordan earned an estimated $300 million in 2024 without setting foot on the court, which topped the $260 million Cristiano Ronaldo earned as the world's highest-paid active athlete.\n\n**Charlotte Hornets Sale**: In 2023, he sold his majority stake for $3 billion, nearly tripling his investment from when he bought the franchise in 2010 for a reported $275 million.\n\n**NBA Career**: Jor

---