In [None]:
from rich import print as rprint

### Basic LLM call


#### Sync


In [None]:
from tinyloop.inference.litellm import LLM

llm = LLM(model="openai/gpt-3.5-turbo", temperature=0.1)

response = llm(prompt="Hello, how are you?")
rprint(response)

In [None]:
llm.get_history()

#### Async


In [None]:
from tinyloop.inference.litellm import LLM

llm = LLM(model="openai/gpt-3.5-turbo", temperature=0.1)

response = await llm.acall(prompt="Hello, how are you?")
rprint(response)

In [None]:
llm.get_history()

### Streaming


#### Async


In [None]:
from tinyloop.inference.litellm import LLM
from rich import print as rprint

llm = LLM(model="openai/gpt-3.5-turbo", temperature=0.1)

response = await llm.acall(prompt="Hello, how are you?", stream=True)
async for chunk in response:
    rprint(chunk)

In [None]:
llm.get_history()

### Structured outputs


#### Sync


In [None]:
from tinyloop.inference.litellm import LLM
from pydantic import BaseModel

llm = LLM(
    model="openrouter/qwen/qwen-2.5-72b-instruct",
    temperature=0.1,
)


class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]


class EventsList(BaseModel):
    events: list[CalendarEvent]


response = llm(
    prompt="List 5 important events in the XIX century", response_format=EventsList
)
rprint(response)

#### Async


In [None]:
from tinyloop.inference.litellm import LLM

llm = LLM(model="openai/gpt-3.5-turbo", temperature=0.1)

response = await llm.acall(prompt="Hello, how are you?")
rprint(response)

In [None]:
from tinyloop.inference.litellm import LLM
from pydantic import BaseModel

llm = LLM(
    model="anthropic/claude-3-5-haiku-20241022",
    temperature=0.1,
)


class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]


class EventsList(BaseModel):
    events: list[CalendarEvent]


response = await llm.acall(
    prompt="List 5 important events in the XIX century", response_format=EventsList
)
rprint(response)

In [None]:
from tinyloop.inference.litellm import LLM
from pydantic import BaseModel

llm = LLM(
    model="openrouter/google/gemini-2.5-flash",
    temperature=0.1,
)


class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]


class EventsList(BaseModel):
    events: list[CalendarEvent]


response = await llm.acall(
    prompt="List 5 important events in the XIX century", response_format=EventsList
)
rprint(response)

### Vision


#### Sync


##### From PIL


In [None]:
from tinyloop.inference.litellm import LLM
from PIL import Image as PILImage
from tinyloop.features.vision import Image

llm = LLM(
    model="openai/gpt-4.1-nano",
    temperature=0.1,
)


# Read a local JPG file as PIL image
pil_image = PILImage.open("sutton.jpg")

# Create tinyloop Image from PIL image
image = Image.from_PIL(pil_image)


response = llm(prompt="Describe the image", images=[image])

rprint(response)

##### From File


In [None]:
from tinyloop.inference.litellm import LLM
from PIL import Image as PILImage
from tinyloop.features.vision import Image

llm = LLM(
    model="anthropic/claude-3-7-sonnet-20250219",
    temperature=0.1,
)

# Create tinyloop Image from PIL image
image = Image.from_file("sutton.jpg")


response = llm(prompt="Describe the image", images=[image])

rprint(response)

##### From URL


In [None]:
from tinyloop.inference.litellm import LLM
from PIL import Image as PILImage
from tinyloop.features.vision import Image
import litellm

llm = LLM(
    model="openrouter/google/gemini-2.5-pro",
    temperature=0.1,
)
litellm._turn_on_debug()

# Create tinyloop Image from PIL image
url = "https://images.ctfassets.net/cnu0m8re1exe/2xdqQSvfebktASbHvILYH5/fcc91130ad1ff329765595b669549d8d/Meet-Jumping-Spider-Adorable-Arachnid.jpg?fm=jpg&fl=progressive&w=660&h=433&fit=fill"
image = Image.from_url(url)


response = llm(prompt="Describe the image", images=[image])

rprint(response)

#### Async


##### From PIL


In [None]:
from tinyloop.inference.litellm import LLM
from PIL import Image as PILImage
from tinyloop.features.vision import Image
from rich import print as rprint

llm = LLM(
    model="openrouter/google/gemini-2.5-flash",
    temperature=0.1,
)


# Read a local JPG file as PIL image
pil_image = PILImage.open("sutton.jpg")

# Create tinyloop Image from PIL image
image = Image.from_PIL(pil_image)


response = await llm.acall(prompt="Describe the image", images=[image])

rprint(response)

##### From File


In [None]:
from tinyloop.inference.litellm import LLM
from PIL import Image as PILImage
from tinyloop.features.vision import Image

llm = LLM(
    model="openai/gpt-4.1-nano",
    temperature=0.1,
)

# Create tinyloop Image from PIL image
image = Image.from_file("sutton.jpg")


response = await llm.acall(prompt="Describe the image", images=[image])

rprint(response)

##### From URL


In [None]:
from tinyloop.inference.litellm import LLM
from PIL import Image as PILImage
from tinyloop.features.vision import Image

llm = LLM(
    model="anthropic/claude-3-7-sonnet-20250219",
    temperature=0.1,
)

# Create tinyloop Image from PIL image
url = "https://images.ctfassets.net/cnu0m8re1exe/2xdqQSvfebktASbHvILYH5/fcc91130ad1ff329765595b669549d8d/Meet-Jumping-Spider-Adorable-Arachnid.jpg?fm=jpg&fl=progressive&w=660&h=433&fit=fill"
image = Image.from_url(url)


response = await llm.acall(prompt="Describe the image", images=[image])

rprint(response)

### Function calling


#### Sync


In [None]:
from tinyloop.inference.litellm import LLM
import json
from tinyloop.features.function_calling import Tool


def get_current_weather(location: str, unit: str):
    """Get the current weather in a given location

    Parameters
    ----------
    location : str
        The city and state, e.g. San Francisco, CA
    unit : str {'celsius', 'fahrenheit'}
        Temperature unit

    Returns
    -------
    str
        a sentence indicating the weather
    """
    if location == "Boston, MA":
        return "The weather is 12F"
    return f"Weather in {location} is sunny"


llm = LLM(
    model="openai/gpt-4.1",
    temperature=0.1,
)

weather_tool = Tool(get_current_weather)

inference = llm(
    prompt="What is the weather in Boston, MA?",
    tools=[weather_tool],
)

for tool_call in inference.raw_response.choices[0].message.tool_calls:
    tool_name = tool_call.function.name
    tool_args = json.loads(tool_call.function.arguments)
    print(f"Tool: {tool_name}")
    print(f"Args: {tool_args}")
    print(weather_tool(**tool_args))

#### Async


In [None]:
from tinyloop.inference.litellm import LLM
import json
from tinyloop.features.function_calling import Tool


def get_current_weather(location: str, unit: str):
    """Get the current weather in a given location

    Parameters
    ----------
    location : str
        The city and state, e.g. San Francisco, CA
    unit : str {'celsius', 'fahrenheit'}
        Temperature unit

    Returns
    -------
    str
        a sentence indicating the weather
    """
    if location == "Boston, MA":
        return "The weather is 12F"
    return f"Weather in {location} is sunny"


llm = LLM(
    model="openrouter/google/gemini-2.5-flash",
    temperature=0.1,
)

weather_tool = Tool(get_current_weather)

inference = await llm.ainvoke(
    prompt="What is the weather in Boston, MA (in fahrenheit)?",
    tools=[weather_tool],
)

for tool_call in inference.raw_response.choices[0].message.tool_calls:
    tool_name = tool_call.function.name
    tool_args = json.loads(tool_call.function.arguments)
    print(f"Tool: {tool_name}")
    print(f"Args: {tool_args}")
    print(weather_tool(**tool_args))

#### Streaming


In [1]:
from tinyloop.inference.litellm import LLM
from rich import print as rprint
from tinyloop.features.function_calling import Tool
import litellm


# def track_cost_callback(kwargs, completion_response, start_time, end_time):
#     cost = kwargs["response_cost"]
#     print(f"tloop_final_cost={cost:.6f}")


# litellm.success_callback = [track_cost_callback]


def get_current_weather(location: str, unit: str):
    """Get the current weather in a given location

    Parameters
    ----------
    location : str
        The city and state, e.g. San Francisco, CA
    unit : str {'celsius', 'fahrenheit'}
        Temperature unit

    Returns
    -------
    str
        a sentence indicating the weather
    """
    if location == "Boston, MA":
        return "The weather is 12F"
    return f"Weather in {location} is sunny"


llm = LLM(
    model="anthropic/claude-sonnet-4-20250514",
    temperature=0.1,
)

weather_tool = Tool(get_current_weather)

inference = await llm.ainvoke(
    prompt="What is the weather in Boston, MA (in fahrenheit)?",
    tools=[weather_tool],
    stream=True,
    parallel_tool_calls=False,
)

async for chunk in inference:
    rprint(chunk)

# for tool_call in inference.raw_response.choices[0].message.tool_calls:
#     tool_name = tool_call.function.name
#     tool_args = json.loads(tool_call.function.arguments)
#     print(f"Tool: {tool_name}")
#     print(f"Args: {tool_args}")
#     print(weather_tool(**tool_args))

Failed to parse tool call arguments: {"l


Failed to parse tool call arguments: {"location": "B


Failed to parse tool call arguments: {"location": "Boston,


Failed to parse tool call arguments: {"location": "Boston, MA"


Failed to parse tool call arguments: {"location": "Boston, MA", "unit


Failed to parse tool call arguments: {"location": "Boston, MA", "unit": "fahrenh


tloop_final_cost=0.002502
captured_cost: 0.002502


  PydanticSerializationUnexpectedValue(Expected 9 fields but got 5: Expected `Message` - serialized value may not be as expected [input_value=Message(content="I'll get...er_specific_fields=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [input_value=Choices(finish_reason='to...r_specific_fields=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_python(


In [2]:
from tinyloop.inference.litellm import LLM
from rich import print as rprint
from tinyloop.features.function_calling import Tool
import litellm


# def track_cost_callback(kwargs, completion_response, start_time, end_time):
#     cost = kwargs["response_cost"]
#     print(f"tloop_final_cost={cost:.6f}")


# litellm.success_callback = [track_cost_callback]


def get_current_weather(location: str, unit: str):
    """Get the current weather in a given location

    Parameters
    ----------
    location : str
        The city and state, e.g. San Francisco, CA
    unit : str {'celsius', 'fahrenheit'}
        Temperature unit

    Returns
    -------
    str
        a sentence indicating the weather
    """
    if location == "Boston, MA":
        return "The weather is 12F"
    return f"Weather in {location} is sunny"


llm = LLM(
    model="openai/gpt-4.1",
    temperature=0.1,
)

weather_tool = Tool(get_current_weather)

inference = await llm.ainvoke(
    prompt="What is the weather in Boston, MA (in fahrenheit)?",
    tools=[weather_tool],
    stream=True,
    parallel_tool_calls=False,
)

async for chunk in inference:
    rprint(chunk)

# for tool_call in inference.raw_response.choices[0].message.tool_calls:
#     tool_name = tool_call.function.name
#     tool_args = json.loads(tool_call.function.arguments)
#     print(f"Tool: {tool_name}")
#     print(f"Args: {tool_args}")
#     print(weather_tool(**tool_args))

Failed to parse tool call arguments: {"


Failed to parse tool call arguments: {"location


Failed to parse tool call arguments: {"location":"


Failed to parse tool call arguments: {"location":"Boston


Failed to parse tool call arguments: {"location":"Boston,


Failed to parse tool call arguments: {"location":"Boston, MA


Failed to parse tool call arguments: {"location":"Boston, MA","


Failed to parse tool call arguments: {"location":"Boston, MA","unit


Failed to parse tool call arguments: {"location":"Boston, MA","unit":"


Failed to parse tool call arguments: {"location":"Boston, MA","unit":"fahren


Failed to parse tool call arguments: {"location":"Boston, MA","unit":"fahrenheit


  PydanticSerializationUnexpectedValue(Expected 9 fields but got 5: Expected `Message` - serialized value may not be as expected [input_value=Message(content=None, rol...er_specific_fields=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [input_value=Choices(finish_reason='st...r_specific_fields=None)), input_type=Choices])"
Cost capture timed out after 2.0s, using 0.0


captured_cost: 0.0


In [None]:
from tinyloop.inference.litellm import LLM
from rich import print as rprint
from tinyloop.features.function_calling import Tool
from litellm import acompletion


# def track_cost_callback(kwargs, completion_response, start_time, end_time):
#     cost = kwargs["response_cost"]
#     print(f"tloop_final_cost={cost:.6f}")


# litellm.success_callback = [track_cost_callback]


def get_current_weather(location: str, unit: str):
    """Get the current weather in a given location

    Parameters
    ----------
    location : str
        The city and state, e.g. San Francisco, CA
    unit : str {'celsius', 'fahrenheit'}
        Temperature unit

    Returns
    -------
    str
        a sentence indicating the weather
    """
    if location == "Boston, MA":
        return "The weather is 12F"
    return f"Weather in {location} is sunny"


llm = LLM(
    model="anthropic/claude-sonnet-4-20250514",
    temperature=0.1,
)

weather_tool = Tool(get_current_weather)

inference = await acompletion(
    model="anthropic/claude-sonnet-4-20250514",
    messages=[
        {
            "role": "user",
            "content": "What is the weather in Boston, MA (in fahrenheit)?",
        }
    ],
    tools=[weather_tool.definition],
    stream=True,
    parallel_tool_calls=False,
)

async for chunk in inference:
    print(chunk)

# for tool_call in inference.raw_response.choices[0].message.tool_calls:
#     tool_name = tool_call.function.name
#     tool_args = json.loads(tool_call.function.arguments)
#     print(f"Tool: {tool_name}")
#     print(f"Args: {tool_args}")
#     print(weather_tool(**tool_args))

In [None]:
from tinyloop.inference.litellm import LLM
from rich import print as rprint
from tinyloop.features.function_calling import Tool
from litellm import acompletion


# def track_cost_callback(kwargs, completion_response, start_time, end_time):
#     cost = kwargs["response_cost"]
#     print(f"tloop_final_cost={cost:.6f}")


# litellm.success_callback = [track_cost_callback]


def get_current_weather(location: str, unit: str):
    """Get the current weather in a given location

    Parameters
    ----------
    location : str
        The city and state, e.g. San Francisco, CA
    unit : str {'celsius', 'fahrenheit'}
        Temperature unit

    Returns
    -------
    str
        a sentence indicating the weather
    """
    if location == "Boston, MA":
        return "The weather is 12F"
    return f"Weather in {location} is sunny"


llm = LLM(
    model="anthropic/claude-sonnet-4-20250514",
    temperature=0.1,
)

weather_tool = Tool(get_current_weather)

inference = await acompletion(
    model="openai/gpt-4.1",
    messages=[
        {
            "role": "user",
            "content": "What is the weather in Boston, MA (in fahrenheit)?",
        }
    ],
    tools=[weather_tool.definition],
    stream=True,
    parallel_tool_calls=False,
)

async for chunk in inference:
    print(chunk)

# for tool_call in inference.raw_response.choices[0].message.tool_calls:
#     tool_name = tool_call.function.name
#     tool_args = json.loads(tool_call.function.arguments)
#     print(f"Tool: {tool_name}")
#     print(f"Args: {tool_args}")
#     print(weather_tool(**tool_args))