In [1]:
import os
import warnings

warnings.simplefilter(action="ignore")
os.environ["GRPC_VERBOSITY"] = "NONE"

# Prerequisites

Please make sure your environmental variables and dependencies are ready to use LLM services. Name of the environmental variables is arbitraray because langrila modules accepts that name as an argument.

In [2]:
from dotenv import load_dotenv

load_dotenv("../../.env_api")

True

# Import modules

Main modules are `{Provider}Client` module and `Agent` module. `{Provider}Client` module is inherited by each module for a specific llm provider like OpenAI, Google or Anthropic.

In [3]:
from langrila import Agent
from langrila.anthropic import AnthropicClient
from langrila.aws import BedrockClient
from langrila.google import GoogleClient
from langrila.openai import OpenAIClient

# Basic usage

Instantiating client modules.

In [4]:
# For OpenAI
openai_client = OpenAIClient(api_key_env_name="OPENAI_API_KEY")

# For Azure OpenAI
azure_openai_client = OpenAIClient(
    api_key_env_name="AZURE_API_KEY",
    api_type="azure",
    azure_api_version="2024-11-01-preview",
    azure_endpoint_env_name="AZURE_ENDPOINT",
    azure_deployment_env_name="AZURE_DEPLOYMENT_ID",
)

# For Gemini on Google AI Studio
google_dev_client = GoogleClient(api_key_env_name="GEMINI_API_KEY")

# For Gemini on Google Cloud VertexAI
vertexai_client = GoogleClient(
    api_type="vertexai",
    project_id_env_name="GOOGLE_CLOUD_PROJECT",
    location="us-central1",
)

# For Claude of Anthropic
anthropic_client = AnthropicClient(api_key_env_name="ANTHROPIC_API_KEY")

# For Claude of Amazon Bedrock
claude_bedrock_client = AnthropicClient(
    api_type="bedrock",
    aws_access_key_env_name="AWS_ACCESS_KEY",
    aws_secret_key_env_name="AWS_SECRET_KEY",
    aws_region="us-east-1",
)

# Converse API for Amazon Bedrock
bedrock_client = BedrockClient(
    region_name="us-east-1",
    aws_access_key_env_name="AWS_ACCESS_KEY",
    aws_secret_key_env_name="AWS_SECRET_KEY",
)

You can pass any client instance to `Agent`. Almost all arguments is the same as the provider's sdk. Arguments of the `Agent` class are finally passed to the `{Provider}Client` module almost as it is, and `{Provider}Client` is just simple wrapper module of each LLM provider API. So please look at the API reference of each provider API if you want to know what arguments can be accepted by `Agent`.

In [5]:
openai_agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    temperature=0.0,
)

azure_openai_agent = Agent(
    client=azure_openai_client,
    model="gpt-4o-mini-2024-07-18",
    temperature=0.0,
)

google_agent = Agent(
    client=google_dev_client,
    model="gemini-2.0-flash-exp",
    temperature=0.0,
)

vertexai_agent = Agent(
    client=vertexai_client,
    model="gemini-2.0-flash-exp",
    temperature=0.0,
)

claude_agent = Agent(
    client=anthropic_client,
    model="claude-3-5-sonnet-20240620",
    temperature=0.0,
    max_tokens=500,
)

claude_bedrock_agent = Agent(
    client=claude_bedrock_client,
    model="anthropic.claude-3-sonnet-20240229-v1:0",
    temperature=0.0,
    max_tokens=500,
)

bedrock_agent = Agent(
    client=bedrock_client,
    modelId="us.amazon.nova-lite-v1:0",
)

# Generate text

## Prompt

You can input your prompt in 2 ways; input string prompt directly or create `Prompt` instance.

In [6]:
from langrila import Prompt, TextPrompt

In [7]:
# 1. Simple string prompt
prompt1 = "Hello. How are you today?"

# 2. Prompt instance with raw string
prompt2 = Prompt(
    role="user",
    contents="Hello. How are you today?",
)

# 3. Prompt instance with list of TextPrompt
prompt3 = Prompt(
    role="user",
    contents=[
        TextPrompt(
            text="Hello. How are you today?",
        ),
    ],
)

In the pattern 2, string specified as `contents` argument is automatically converted to list of `TextPrompt`.

In [8]:
prompt2 == prompt3

True

In [9]:
prompt2

Prompt(type='Prompt', role='user', contents=[TextPrompt(text='Hello. How are you today?')], name=None)

In [10]:
prompt3

Prompt(type='Prompt', role='user', contents=[TextPrompt(text='Hello. How are you today?')], name=None)

## Synchronous, non-streaming text generation

In [11]:
response = openai_agent.generate_text(prompt=prompt1)

# Same as
# response = openai_agent.generate_text(prompt=prompt2)
# response = openai_agent.generate_text(prompt=prompt3)

[32m[2025-01-14 09:59:39][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:39][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 09:59:41][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


response object is a pydantic model contains formatted response from the provider API, usage, and raw response.

In [12]:
response

Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff78c9d0>, raw=ChatCompletion(id='chatcmpl-ApPZMWkw8rPWUQyqyarq6qdf3hbtv', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1736816380, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_72ed7ab54c', usage=CompletionUsage(completion_tokens=30, prompt_tokens=14, total_tokens=44, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_to

In [13]:
print(response.contents[0].text)

Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


Agent class accumulates usage across all API call and interaction with subagent. Sub-agent has its own name, and root agent is named "root". You can access this accumulated usage via `response.usage`. `response.usage` is dict-like object, and it has `items()` method and `__getitem__()` method.

In [14]:
response.usage.items()

dict_items([('root', Usage(model_name='gpt-4o-mini-2024-07-18', prompt_tokens=14, output_tokens=30))])

In [15]:
response.usage["root"]

Usage(model_name='gpt-4o-mini-2024-07-18', prompt_tokens=14, output_tokens=30)

You can access raw response from the API that is helpful fo9r debugging.

In [16]:
response.raw

ChatCompletion(id='chatcmpl-ApPZMWkw8rPWUQyqyarq6qdf3hbtv', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1736816380, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_72ed7ab54c', usage=CompletionUsage(completion_tokens=30, prompt_tokens=14, total_tokens=44, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

Same as other client.

Azure OpenAI

In [17]:
response = azure_openai_agent.generate_text(prompt=prompt1)
response

[32m[2025-01-14 09:59:42][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:42][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 09:59:43][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff78fc10>, raw=ChatCompletion(id='chatcmpl-ApPZOdgO2K8xWx4bBA4BsButU7KkF', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1736816382, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_65792305

In [18]:
response.raw

ChatCompletion(id='chatcmpl-ApPZOdgO2K8xWx4bBA4BsButU7KkF', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1736816382, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_65792305e4', usage=CompletionUsage(completion_tokens=30, prompt_tokens=14, total_tokens=44, completion_tokens_details=None, prompt_tokens_details=None), prompt_filter_results=[{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbre

Gemini on Google AI Studio

In [19]:
response = google_agent.generate_text(prompt=prompt1)
response

[32m[2025-01-14 09:59:43][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:43][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 09:59:45][0m [34m[1mDEBUG | Response: [TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff0fe8c0>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding

In [20]:
response.raw

GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=[SafetyRating(blocked=None, category='HARM_CATEGORY_HATE_SPEECH', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRating(blocked=None, category='HARM_CATEGORY_DANGEROUS_CONTENT', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRat

Gemini on VertexAI

In [21]:
response = vertexai_agent.generate_text(prompt=prompt1)
response

[32m[2025-01-14 09:59:45][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:45][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 09:59:47][0m [34m[1mDEBUG | Response: [TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7c002fba00>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding

In [22]:
response.raw

GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=None)], model_version='gemini-2.0-flash-exp', prompt_feedback=None, usage_metadata=GenerateContentResponseUsageMetadata(cached_content_token_count=None, candidates_token_count=58, prompt_token_count=7, total_token_count=65), automatic_function_calling_history=[], parsed=None)

Claude of Anthropic

In [23]:
response = claude_agent.generate_text(prompt=prompt1)
response

[32m[2025-01-14 09:59:47][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:47][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 09:59:49][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff0ff2b0>, raw=Message(id='msg_01TY7BUXPMqaVoZ3HAnFWwBp', content=[TextBlock(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?", type='text')], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=43, cache_creation_input_tokens=0, cache_read_input_tokens=0)), name=None, is_last_chunk=None, prompt=[{'role': 'user', 'content': [{'text': 'Hello. How are you today?', 'type': 'text'}]}])

In [24]:
response.raw

Message(id='msg_01TY7BUXPMqaVoZ3HAnFWwBp', content=[TextBlock(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?", type='text')], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=43, cache_creation_input_tokens=0, cache_read_input_tokens=0))

Claude on Amazon Bedrock

In [25]:
response = claude_bedrock_agent.generate_text(prompt=prompt1)
response

[32m[2025-01-14 09:59:49][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:49][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 09:59:51][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7c1bfeed70>, raw=Message(id='msg_bdrk_011YqoEwaaP6iqa4zYJsiGVe', content=[TextBlock(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?", type='text')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=45)), name=None, is_last_chunk=None, prompt=[{'role': 'user', 'content': [{'text': 'Hello. How are you today?', 'type': 'text'}]}])

In [26]:
response.raw

Message(id='msg_bdrk_011YqoEwaaP6iqa4zYJsiGVe', content=[TextBlock(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?", type='text')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=45))

Converse API model such as Amazon Nova

In [27]:
response = bedrock_agent.generate_text(prompt=prompt1)
response

[32m[2025-01-14 09:59:51][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:51][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 09:59:52][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm doing well, thank you for asking. How can I assist you today? If you have any questions or need support with something, feel free to let me know.")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm doing well, thank you for asking. How can I assist you today? If you have any questions or need support with something, feel free to let me know.")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfeba7fa0>, raw={'ResponseMetadata': {'RequestId': '8150b086-9690-4a64-b402-c92afe42a3a9', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 14 Jan 2025 00:59:52 GMT', 'content-type': 'application/json', 'content-length': '336', 'connection': 'keep-alive', 'x-amzn-requestid': '8150b086-9690-4a64-b402-c92afe42a3a9'}, 'RetryAttempts': 0}, 'output': {'message': {'role': 'assistant', 'content': [{'text': "Hello! I'm doing well, thank you for asking. How can I assist you today? If you have any questions or need support with something, feel free to let me know."}]}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 7, 'outputTokens': 38, 'totalTokens': 45}, 'metrics': {'latencyMs': 455}}, name=None, is_la

In [28]:
response.raw

{'ResponseMetadata': {'RequestId': '8150b086-9690-4a64-b402-c92afe42a3a9',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Tue, 14 Jan 2025 00:59:52 GMT',
   'content-type': 'application/json',
   'content-length': '336',
   'connection': 'keep-alive',
   'x-amzn-requestid': '8150b086-9690-4a64-b402-c92afe42a3a9'},
  'RetryAttempts': 0},
 'output': {'message': {'role': 'assistant',
   'content': [{'text': "Hello! I'm doing well, thank you for asking. How can I assist you today? If you have any questions or need support with something, feel free to let me know."}]}},
 'stopReason': 'end_turn',
 'usage': {'inputTokens': 7, 'outputTokens': 38, 'totalTokens': 45},
 'metrics': {'latencyMs': 455}}

## Constructor params vs generation params

Except a few parameters, almost all the parameters can be specified in both constructor and generation method (such as `generate_text()` method). Constructor params in `Agent` class can be overriden by the arguments in `generate_text` method (as well as `stream_text(_async)` method as shown later).

In [29]:
agent = Agent(
    client=google_dev_client,
    model="gemini-2.0-flash-exp",
    temperature=0.0,
    max_output_tokens=500,
)

agent.generate_text(prompt="Hello!")

[32m[2025-01-14 09:59:53][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello!')][0m
[32m[2025-01-14 09:59:53][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 09:59:54][0m [34m[1mDEBUG | Response: [TextResponse(text='Hello there! How can I help you today?')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='Hello there! How can I help you today?')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfeba7d00>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='Hello there! How can I help you today?\n')], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=[SafetyRating(blocked=None, category='HARM_CATEGORY_HATE_SPEECH', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRating(blocked=None, category='HARM_CATEGORY_DANGEROUS_CONTENT', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRating(blocked=Non

is the same as:

In [30]:
agent = Agent(
    client=google_dev_client,
)

agent.generate_text(
    prompt="Hello!", model="gemini-2.0-flash-exp", temperature=0.0, max_output_tokens=500
)

[32m[2025-01-14 09:59:54][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello!')][0m
[32m[2025-01-14 09:59:54][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 09:59:55][0m [34m[1mDEBUG | Response: [TextResponse(text='Hello there! How can I help you today?')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='Hello there! How can I help you today?')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfeba7880>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='Hello there! How can I help you today?\n')], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=[SafetyRating(blocked=None, category='HARM_CATEGORY_HATE_SPEECH', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRating(blocked=None, category='HARM_CATEGORY_DANGEROUS_CONTENT', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRating(blocked=Non

Passing arguments when initiating is useful to reuse same parameters at multiple place. Especiall, multi-agent system invokes multiple agents at the same time, so it requires many parameters to be static. On the other hand, passing arguments when generating helps you control settings whenever you call the API. This is helpful for the single agent case.

Then, here is an example to specify the same parameter when initializing agent and generating response. (As I mentioned, the later parameter is prioritized.)

In [31]:
agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    n=1,
)

In [32]:
response = agent.generate_text(prompt="Hello! How are you?", n=2)

[32m[2025-01-14 09:59:55][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello! How are you?')][0m
[32m[2025-01-14 09:59:55][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 09:59:56][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?"), TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


In [33]:
response.contents

[TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?"),
 TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")]

In this case, `n` parameter is specified in both constructor and `generate_text` method, but `n=2` is used, which was specified for `generate_text` method.

## Asynchronous, non-streaming text generation

`generate_text_async` method allows you to received text asyncronously.

OpenAI

In [34]:
response = await openai_agent.generate_text_async(prompt=prompt1)
response

[32m[2025-01-14 09:59:56][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:56][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 09:59:57][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfebb2770>, raw=ChatCompletion(id='chatcmpl-ApPZdlYcDa3zaoiMhqMVbjY8JNya8', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1736816397, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_bd83329f63', usage=CompletionUsage(completion_tokens=30, prompt_tokens=14, total_tokens=44, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_to

Azure OpenAI

In [35]:
response = await azure_openai_agent.generate_text_async(prompt=prompt1)
response

[32m[2025-01-14 09:59:57][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:57][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 09:59:59][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff94f850>, raw=ChatCompletion(id='chatcmpl-ApPZeh9n2ZNYTCFveamTamHosvcWP', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1736816398, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_f3927aa0

Gemini

In [36]:
response = await google_agent.generate_text_async(prompt=prompt1)
response

[32m[2025-01-14 09:59:59][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 09:59:59][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:00][0m [34m[1mDEBUG | Response: [TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff0fc7f0>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding

Gemini on VertexAI

In [37]:
response = await vertexai_agent.generate_text_async(prompt=prompt1)
response

[32m[2025-01-14 10:00:00][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:00][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:02][0m [34m[1mDEBUG | Response: [TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff9a2230>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding

Claude of Anthropic

In [38]:
response = await claude_agent.generate_text_async(prompt=prompt1)
response

[32m[2025-01-14 10:00:02][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:02][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:03][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfebec8e0>, raw=Message(id='msg_01AUALzZJsryUg9npp9ADBiW', content=[TextBlock(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?", type='text')], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=43, cache_creation_input_tokens=0, cache_read_input_tokens=0)), name=None, is_last_chunk=None, prompt=[{'role': 'user', 'content': [{'text': 'Hello. How are you today?', 'type': 'text'}]}])

Claude of Amazon Bedrock

In [39]:
response = await claude_bedrock_agent.generate_text_async(prompt=prompt1)
response

[32m[2025-01-14 10:00:03][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:03][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:05][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfeb5d270>, raw=Message(id='msg_bdrk_01JPoQZiYZLfa2fDkwW8DLif', content=[TextBlock(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?", type='text')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=45)), name=None, is_last_chunk=None, prompt=[{'role': 'user', 'content': [{'text': 'Hello. How are you today?', 'type': 'text'}]}])

## Synchronous streaming text generation

OpenAI

In [40]:
streamed_response = openai_agent.stream_text(prompt=prompt1)

for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:05][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:05][0m [1mINFO | root: Generating text[0m


Hello
Hello!
Hello! I'm
Hello! I'm just
Hello! I'm just a
Hello! I'm just a program
Hello! I'm just a program,
Hello! I'm just a program, so
Hello! I'm just a program, so I
Hello! I'm just a program, so I don't
Hello! I'm just a program, so I don't have
Hello! I'm just a program, so I don't have feelings
Hello! I'm just a program, so I don't have feelings,
Hello! I'm just a program, so I don't have feelings, but
Hello! I'm just a program, so I don't have feelings, but I'm
Hello! I'm just a program, so I don't have feelings, but I'm here
Hello! I'm just a program, so I don't have feelings, but I'm here and
Hello! I'm just a program, so I don't have feelings, but I'm here and ready
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you
Hello! I'm just a program, so I don't have feelings, but I'm

[32m[2025-01-14 10:00:06][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


Each chunk are the combination of the current and past chunks, and `stream_text` method is designed to return the result finally same as the response from the `generate_text` method.

In [41]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfebb3160>, raw=<openai.Stream object at 0x7f7bff78f010>, name=None, is_last_chunk=True, prompt=None)

In [42]:
chunk.usage.items()

dict_items([('root', Usage(model_name='gpt-4o-mini-2024-07-18', prompt_tokens=14, output_tokens=29))])

Other client has the same interface.

Azure OpenAI

In [43]:
streamed_response = azure_openai_agent.stream_text(prompt=prompt1)

for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:06][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:06][0m [1mINFO | root: Generating text[0m


Hello
Hello!
Hello! I'm
Hello! I'm just
Hello! I'm just a
Hello! I'm just a computer
Hello! I'm just a computer program
Hello! I'm just a computer program,
Hello! I'm just a computer program, so
Hello! I'm just a computer program, so I
Hello! I'm just a computer program, so I don't
Hello! I'm just a computer program, so I don't have
Hello! I'm just a computer program, so I don't have feelings
Hello! I'm just a computer program, so I don't have feelings,
Hello! I'm just a computer program, so I don't have feelings, but
Hello! I'm just a computer program, so I don't have feelings, but I'm
Hello! I'm just a computer program, so I don't have feelings, but I'm here
Hello! I'm just a computer program, so I don't have feelings, but I'm here and
Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready
Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to
Hello! I'm just a computer program, so I don't have feelings, but I'm here and 

[32m[2025-01-14 10:00:08][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


In [44]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfebb2650>, raw=<openai.Stream object at 0x7f7bff0ff190>, name=None, is_last_chunk=True, prompt=None)

Gemini on Google AI Studio

In [45]:
streamed_response = google_agent.stream_text(prompt=prompt1)

for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:08][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:08][0m [1mINFO | root: Generating text[0m


I


I'm doing well, thank you for asking! As a large language model,
I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'
I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?



[32m[2025-01-14 10:00:09][0m [34m[1mDEBUG | Response: [TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")][0m


I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?



In [46]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff0fc5e0>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='m functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n')], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=None, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=[SafetyRating(blocked=None, category='HARM_CATEGORY_HATE_S

Gemini on VertexAI

In [47]:
streamed_response = vertexai_agent.stream_text(prompt=prompt1)

for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:09][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:09][0m [1mINFO | root: Generating text[0m


I
I'm doing well, thank you for asking! As a large language model,
I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'
I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?



[32m[2025-01-14 10:00:11][0m [34m[1mDEBUG | Response: [TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")][0m


I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?



In [48]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="I'm doing well, thank you for asking! As a large language model, I don't experience feelings in the same way humans do, but I'm functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24fd60>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='m functioning optimally and ready to assist you. How are you doing today? Is there anything I can help you with?\n')], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=None, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=None)], model_version='gemini-2.0-flash-exp', prompt_feedb

Claude on Anthropic

In [49]:
streamed_response = claude_agent.stream_text(prompt=prompt1)

for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:11][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:11][0m [1mINFO | root: Generating text[0m


Hello! As
Hello! As an AI language model, I don't have feelings
Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist
Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have.
Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?


[32m[2025-01-14 10:00:12][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?")][0m


Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?


In [50]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe26bfa0>, raw=None, name=None, is_last_chunk=True, prompt=None)

Claude on Bedrock

In [51]:
streamed_response = claude_bedrock_agent.stream_text(prompt=prompt1)

for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:12][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:12][0m [1mINFO | root: Generating text[0m


Hello
Hello!
Hello! As
Hello! As an
Hello! As an AI
Hello! As an AI language
Hello! As an AI language model
Hello! As an AI language model,
Hello! As an AI language model, I
Hello! As an AI language model, I don
Hello! As an AI language model, I don't
Hello! As an AI language model, I don't have
Hello! As an AI language model, I don't have feelings
Hello! As an AI language model, I don't have feelings or
Hello! As an AI language model, I don't have feelings or emotions
Hello! As an AI language model, I don't have feelings or emotions,
Hello! As an AI language model, I don't have feelings or emotions, but
Hello! As an AI language model, I don't have feelings or emotions, but I
Hello! As an AI language model, I don't have feelings or emotions, but I'm
Hello! As an AI language model, I don't have feelings or emotions, but I'm operating
Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly
Hello! As an AI language model, I don't have feelings or emot

[32m[2025-01-14 10:00:14][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?")][0m


Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?


In [52]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! As an AI language model, I don't have feelings or emotions, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfebee950>, raw=None, name=None, is_last_chunk=True, prompt=None)

Converse API model for Amazon Bedrock, here is the Amazon Nova.

In [53]:
streamed_response = bedrock_agent.stream_text(prompt=prompt1)

for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:14][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:14][0m [1mINFO | root: Generating text[0m


Hello
Hello!
Hello! I
Hello! I'
Hello! I'm
Hello! I'm doing
Hello! I'm doing well
Hello! I'm doing well,
Hello! I'm doing well, thank
Hello! I'm doing well, thank you
Hello! I'm doing well, thank you for
Hello! I'm doing well, thank you for asking
Hello! I'm doing well, thank you for asking.
Hello! I'm doing well, thank you for asking. How
Hello! I'm doing well, thank you for asking. How can
Hello! I'm doing well, thank you for asking. How can I
Hello! I'm doing well, thank you for asking. How can I assist
Hello! I'm doing well, thank you for asking. How can I assist you
Hello! I'm doing well, thank you for asking. How can I assist you today
Hello! I'm doing well, thank you for asking. How can I assist you today?
Hello! I'm doing well, thank you for asking. How can I assist you today? If
Hello! I'm doing well, thank you for asking. How can I assist you today? If you
Hello! I'm doing well, thank you for asking. How can I assist you today? If you have
Hello! I'm doing well, thank you for

[32m[2025-01-14 10:00:15][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm doing well, thank you for asking. How can I assist you today? If you have any questions or need information on a particular topic, feel free to let me know.")][0m


Hello! I'm doing well, thank you for asking. How can I assist you today? If you have any questions or need information on a particular topic, feel free to let me know.


In [54]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm doing well, thank you for asking. How can I assist you today? If you have any questions or need information on a particular topic, feel free to let me know.")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24f1c0>, raw={'metadata': {'usage': {'inputTokens': 7, 'outputTokens': 40, 'totalTokens': 47}, 'metrics': {'latencyMs': 462}}}, name=None, is_last_chunk=True, prompt=None)

## Asynchronously streaming text generation

`stream_text_async` method is responsible for asynchronous streaming text generation.

OpenAI

In [55]:
streamed_response = openai_agent.stream_text_async(prompt=prompt1)

async for chunk in streamed_response:
    print(chunk.contents[0].text, flush=True)

[32m[2025-01-14 10:00:15][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:15][0m [1mINFO | root: Generating text[0m


Hello
Hello!
Hello! I'm
Hello! I'm just
Hello! I'm just a
Hello! I'm just a program
Hello! I'm just a program,
Hello! I'm just a program, so
Hello! I'm just a program, so I
Hello! I'm just a program, so I don't
Hello! I'm just a program, so I don't have
Hello! I'm just a program, so I don't have feelings
Hello! I'm just a program, so I don't have feelings,
Hello! I'm just a program, so I don't have feelings, but
Hello! I'm just a program, so I don't have feelings, but I'm
Hello! I'm just a program, so I don't have feelings, but I'm here
Hello! I'm just a program, so I don't have feelings, but I'm here and
Hello! I'm just a program, so I don't have feelings, but I'm here and ready
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you
Hello! I'm just a program, so I don't have feelings, but I'm

[32m[2025-01-14 10:00:16][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")][0m


Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


In [56]:
chunk

Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe27fdf0>, raw=<openai.AsyncStream object at 0x7f7bfebeeb30>, name=None, is_last_chunk=True, prompt=None)

Same as other client, so skip demonstration.

# System prompt

In langrila, every client accepts `system_instruction` argument. This argument can be specified in both constructor and `generate_text(_async)`/`stream_text(_async)` method. The later case overrides constructor's system_instruction. System instruction needs to be `SystemPrompt` instance because OpenAI API needs role property.

In [57]:
from langrila import SystemPrompt

system_prompt = SystemPrompt(
    role="system",  # or developer role used for the models later than o1 for OpenAI API.
    contents="You must to answer the question in Japanese.",
)

OpenAI

In [58]:
response = openai_agent.generate_text(prompt=prompt1, system_instruction=system_prompt)
response

[32m[2025-01-14 10:00:17][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:17][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:18][0m [34m[1mDEBUG | Response: [TextResponse(text='こんにちは。今日は元気です。あなたはいかがですか？')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='こんにちは。今日は元気です。あなたはいかがですか？')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24f940>, raw=ChatCompletion(id='chatcmpl-ApPZxkwlXDszbGtZ7CDGyZilJrT4U', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='こんにちは。今日は元気です。あなたはいかがですか？', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1736816417, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_bd83329f63', usage=CompletionUsage(completion_tokens=15, prompt_tokens=27, total_tokens=42, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0))), name=None, is_last_chunk=None, prompt=[{'role': 'system', 'content': 'You must to answer the question in Japanese

Azure OpenAI

In [59]:
response = azure_openai_agent.generate_text(prompt=prompt1, system_instruction=system_prompt)
response

[32m[2025-01-14 10:00:18][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:18][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:19][0m [34m[1mDEBUG | Response: [TextResponse(text='こんにちは。おかげさまで元気です。あなたはいかがですか？')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='こんにちは。おかげさまで元気です。あなたはいかがですか？')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24dae0>, raw=ChatCompletion(id='chatcmpl-ApPZyltFuu8xxwzKlRaAleE52YOWU', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='こんにちは。おかげさまで元気です。あなたはいかがですか？', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1736816418, model='gpt-4o-2024-05-13', object='chat.completion', service_tier=None, system_fingerprint='fp_65792305e4', usage=CompletionUsage(completion_tokens=18, prompt_tokens=27, total_tokens=45, completion_tokens_details=None, prompt_tokens_details=None), prompt_filter_results=[{'prompt_index': 0, 'conte

Gemini on Google AI Studio

In [60]:
response = google_agent.generate_text(prompt=prompt1, system_instruction=system_prompt)
response

[32m[2025-01-14 10:00:19][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:19][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:20][0m [34m[1mDEBUG | Response: [TextResponse(text='こんにちは。今日は元気ですか？')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='こんにちは。今日は元気ですか？')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bff0c4f10>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='こんにちは。今日は元気ですか？\n')], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=[SafetyRating(blocked=None, category='HARM_CATEGORY_HATE_SPEECH', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRating(blocked=None, category='HARM_CATEGORY_DANGEROUS_CONTENT', probability='NEGLIGIBLE', probability_score=None, severity=None, severity_score=None), SafetyRating(blocked=None, category='HARM_CATEGORY_HARASSMENT', probab

Gemini on VertexAI

In [61]:
response = vertexai_agent.generate_text(prompt=prompt1, system_instruction=system_prompt)
response

[32m[2025-01-14 10:00:20][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:20][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:22][0m [34m[1mDEBUG | Response: [TextResponse(text='こんにちは。今日は元気ですか？')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='こんにちは。今日は元気ですか？')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24ce20>, raw=GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='こんにちは。今日は元気ですか？\n')], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=inf, finish_reason='STOP', grounding_metadata=None, index=None, logprobs_result=None, safety_ratings=None)], model_version='gemini-2.0-flash-exp', prompt_feedback=None, usage_metadata=GenerateContentResponseUsageMetadata(cached_content_token_count=None, candidates_token_count=7, prompt_token_count=16, total_token_count=23), automatic_function_calling_history=[], parsed=None), name=None, is_last_chunk=None, prompt=None)

Claude on Anthropic

In [62]:
response = claude_agent.generate_text(prompt=prompt1, system_instruction=system_prompt)
response

[32m[2025-01-14 10:00:22][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:22][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:23][0m [34m[1mDEBUG | Response: [TextResponse(text='こんにちは。お元気ですか？今日の気分はいかがですか？')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='こんにちは。お元気ですか？今日の気分はいかがですか？')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe13bc10>, raw=Message(id='msg_01BfLFMxpceAtgdp5s868Vmk', content=[TextBlock(text='こんにちは。お元気ですか？今日の気分はいかがですか？', type='text')], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=23, output_tokens=28, cache_creation_input_tokens=0, cache_read_input_tokens=0)), name=None, is_last_chunk=None, prompt=[{'role': 'user', 'content': [{'text': 'Hello. How are you today?', 'type': 'text'}]}])

Claude on Bedrock

In [63]:
response = claude_bedrock_agent.generate_text(prompt=prompt1, system_instruction=system_prompt)
response

[32m[2025-01-14 10:00:23][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:23][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:25][0m [34m[1mDEBUG | Response: [TextResponse(text='こんにちは。私は人工知能のAssistantですので、気分というものはありませんが、きちんと動作しています。何かお手伝いできることがあれば、喜んでお答えします。')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='こんにちは。私は人工知能のAssistantですので、気分というものはありませんが、きちんと動作しています。何かお手伝いできることがあれば、喜んでお答えします。')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24ee90>, raw=Message(id='msg_bdrk_01RmRBktfVKD2NN7NYSvNbtH', content=[TextBlock(text='こんにちは。私は人工知能のAssistantですので、気分というものはありませんが、きちんと動作しています。何かお手伝いできることがあれば、喜んでお答えします。', type='text')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=23, output_tokens=64)), name=None, is_last_chunk=None, prompt=[{'role': 'user', 'content': [{'text': 'Hello. How are you today?', 'type': 'text'}]}])

Converse API for Bedrock

In [64]:
response = bedrock_agent.generate_text(prompt=prompt1, system_instruction=system_prompt)
response

[32m[2025-01-14 10:00:25][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:25][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:26][0m [34m[1mDEBUG | Response: [TextResponse(text='こんにちは。今日はいかがお過ごしですか？元気ですか？')][0m


Response(type='Response', role='assistant', contents=[TextResponse(text='こんにちは。今日はいかがお過ごしですか？元気ですか？')], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24ffa0>, raw={'ResponseMetadata': {'RequestId': 'df8c2eb3-446d-4f67-b854-d4c982a71b0f', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Tue, 14 Jan 2025 01:00:26 GMT', 'content-type': 'application/json', 'content-length': '259', 'connection': 'keep-alive', 'x-amzn-requestid': 'df8c2eb3-446d-4f67-b854-d4c982a71b0f'}, 'RetryAttempts': 0}, 'output': {'message': {'role': 'assistant', 'content': [{'text': 'こんにちは。今日はいかがお過ごしですか？元気ですか？'}]}}, 'stopReason': 'end_turn', 'usage': {'inputTokens': 16, 'outputTokens': 15, 'totalTokens': 31}, 'metrics': {'latencyMs': 287}}, name=None, is_last_chunk=None, prompt=None)

# Response schema using tool calling

Response schema in langrila is just tool calling. Generated args are validated with pydantic, then is finally output as text response. This feature is supported by the `response_schema_as_tool` argument.

In [65]:
from enum import Enum

from pydantic import BaseModel, Field


class Language(str, Enum):
    japanese = "Japanese"
    english = "English"
    french = "French"
    german = "German"


class GreetingTime(str, Enum):
    morning = "morning"
    afternoon = "afternoon"
    evening = "evening"


class Greeting(BaseModel):
    language: Language = Field(..., title="Language", description="Language to greet in.")
    greeting: str = Field(..., title="Greeting", description="Greeting message.")
    time: GreetingTime = Field(..., title="Time", description="Time of the day.")

In [66]:
prompt = """Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then
tell me the greeting message and the time of the day to use it.
"""

In [67]:
agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    response_schema_as_tool=Greeting,  # <- This is the arg to use the schema as a tool.
)

response = agent.generate_text(prompt=prompt)

[32m[2025-01-14 10:00:26][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:26][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:28][0m [34m[1mDEBUG | Response: [ToolCallResponse(name='final_answer', args='{"language":"Japanese","greeting":"おはようございます","time":"morning"}', call_id='call_LBMJEtW1lVL6XnwSHrPFkIBR')][0m
[32m[2025-01-14 10:00:28][0m [1mINFO | Running tool: final_answer[0m
[32m[2025-01-14 10:00:28][0m [1mINFO | Tool: final_answer successfully ran.[0m
[32m[2025-01-14 10:00:28][0m [34m[1mDEBUG | Final result: [TextResponse(text='{"language": "Japanese", "greeting": "おはようございます", "time": "morning"}')][0m


In [68]:
Greeting.model_validate_json(response.contents[0].text)

Greeting(language=<Language.japanese: 'Japanese'>, greeting='おはようございます', time=<GreetingTime.morning: 'morning'>)

`response_schema_as_tool` argument can be specified in generation method as well. If you pass this argument in both constructor and generation method, the later is used.

In [69]:
prompt = """Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then
tell me the greeting message and the time of the day to use it.
"""

agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
)

response = agent.generate_text(prompt=prompt, response_schema_as_tool=Greeting)

[32m[2025-01-14 10:00:28][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:28][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:29][0m [34m[1mDEBUG | Response: [TextResponse(text='Let\'s choose Japanese. \n\nIn Japanese, you can greet someone with "こんにちは" (Konnichiwa) which means "Hello." This greeting is typically used in the afternoon.')][0m
[32m[2025-01-14 10:00:29][0m [34m[1mDEBUG | Prompt: [TextPrompt(text="Decide the next action based on the conversation. If you have all information for answering the user, run 'final_result' tool. If you need more information, invoke other tool to get necessary information. ")][0m
[32m[2025-01-14 10:00:29][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:30][0m [34m[1mDEBUG | Response: [ToolCallRespons

In [70]:
Greeting.model_validate_json(response.contents[0].text)

Greeting(language=<Language.japanese: 'Japanese'>, greeting='こんにちは', time=<GreetingTime.afternoon: 'afternoon'>)

In this case, unless you specify the response schema when instantiating the agent, response schema is reset at every execution.

In [71]:
response = agent.generate_text(prompt=prompt)

print(response.contents[0].text)

[32m[2025-01-14 10:00:30][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:30][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:32][0m [34m[1mDEBUG | Response: [TextResponse(text='Let\'s choose **French**.\n\nA common greeting in French is **"Bonjour."**\n\n- **Time of the Day to Use:** "Bonjour" is typically used in the morning until late afternoon, roughly from morning until around 6 PM. After this time, you would usually switch to "Bonsoir," which means "Good evening." \n\nSo, when you meet someone during the day, you can say "Bonjour!"')][0m


Let's choose **French**.

A common greeting in French is **"Bonjour."**

- **Time of the Day to Use:** "Bonjour" is typically used in the morning until late afternoon, roughly from morning until around 6 PM. After this time, you would usually switch to "Bonsoir," which means "Good evening." 

So, when you meet someone during the day, you can say "Bonjour!"


Response schema as a tool is named `final_answer`, and its default description is:

In [72]:
from langrila import AgentConfig

agent_config = AgentConfig()
print(agent_config.final_answer_description)

The final answer which ends this conversation. Arguments of this tool must be selected from the conversation history.
Unkown argument in the entire conversation history must be null, however, the argument appeared in the previous conversation must be provided.



This parameter is configurable, so if you want to use another description, please change the description.

In [73]:
agent_config = AgentConfig(
    final_answer_description="This is example of final answer description.",
)

_agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    response_schema_as_tool=Greeting,
    agent_config=agent_config,  # Set the agent configuration
)

For Gemini

In [74]:
agent = Agent(
    client=google_dev_client,
    model="gemini-2.0-flash-exp",
    response_schema_as_tool=Greeting,
)

response = agent.generate_text(prompt=prompt)

[32m[2025-01-14 10:00:32][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:32][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:34][0m [34m[1mDEBUG | Response: [TextResponse(text="I can help with that! But, I need you to specify the language and time of day you'd like to use. Please choose one language from Japanese, English, French, and German, and one time of day from morning, afternoon, or evening.")][0m
[32m[2025-01-14 10:00:34][0m [34m[1mDEBUG | Prompt: [TextPrompt(text="Decide the next action based on the conversation. If you have all information for answering the user, run 'final_result' tool. If you need more information, invoke other tool to get necessary information. ")][0m
[32m[2025-01-14 10:00:34][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:35][0m [34m[1mDEBUG | Response: [TextResponse(text="I need more information to provide a greeting. I'll ask the user to specify the language and time of day they'd like to use.")][0m
[32m[2025-01-14 10:00:35][0m [34m[1mDEBUG | Prompt: [TextPrompt(text="Decide the next action based on the conversation. I

In [75]:
Greeting.model_validate_json(response.contents[0].text)

Greeting(language=<Language.english: 'English'>, greeting='Good morning', time=<GreetingTime.morning: 'morning'>)

For Claude

In [76]:
agent = Agent(
    client=anthropic_client,
    model="claude-3-5-sonnet-20240620",
    max_tokens=500,
    response_schema_as_tool=Greeting,
)

In [77]:
response = agent.generate_text(prompt=prompt)

[32m[2025-01-14 10:00:37][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:37][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:39][0m [34m[1mDEBUG | Response: [TextResponse(text="Certainly! I'd be happy to provide you with a greeting in one of the languages you mentioned. I'll choose a language, provide a greeting message, and specify a time of day to use it. Let me prepare that information for you."), ToolCallResponse(name='final_answer', args='{"language": "French", "greeting": "Bonjour!", "time": "morning"}', call_id='toolu_01KqmyJLVodMNwSwZ35VcKrw')][0m
[32m[2025-01-14 10:00:39][0m [1mINFO | Running tool: final_answer[0m
[32m[2025-01-14 10:00:39][0m [1mINFO | Tool: final_answer successfully ran.[0m
[32m[2025-01-14 10:00:39][0m [34m[1mDEBUG | Final 

In [78]:
Greeting.model_validate_json(response.contents[0].text)

Greeting(language=<Language.french: 'French'>, greeting='Bonjour!', time=<GreetingTime.morning: 'morning'>)

Converse API for Bedrock

In [79]:
agent = Agent(
    client=bedrock_client,
    modelId="us.amazon.nova-lite-v1:0",
    response_schema_as_tool=Greeting,
)

response = agent.generate_text(prompt=prompt)

[32m[2025-01-14 10:00:39][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:39][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:40][0m [34m[1mDEBUG | Response: [TextResponse(text='<thinking>The User has asked for a greeting in one of the four specified languages. I will randomly select one language from the list and provide the corresponding greeting message and the appropriate time of the day to use it.</thinking>\n'), ToolCallResponse(name='final_answer', args='{"greeting": "こんにちは", "language": "Japanese", "time": "morning"}', call_id='tooluse_7tHknRD7SkCNLpOJDNRulQ')][0m
[32m[2025-01-14 10:00:40][0m [1mINFO | Running tool: final_answer[0m
[32m[2025-01-14 10:00:40][0m [1mINFO | Tool: final_answer successfully ran.[0m
[32m[2025-01-14 10:00:40][0m [34m[1mDEBUG | Final result: [TextResponse(text='{"greeting": "こんにちは", "language": "Japanese", "time": "morning"}')][0m


In [80]:
Greeting.model_validate_json(response.contents[0].text)

Greeting(language=<Language.japanese: 'Japanese'>, greeting='こんにちは', time=<GreetingTime.morning: 'morning'>)

# Response schema natively supported by each provider

Except a few arguments, Agent module accepts almost all parameters natively supported by each provider as it is. For example, if you want to use native response schema for OpenAI API or Gemini, you can specify that parameter supported by each API.

For OpenAI

In [81]:
prompt = """Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then
tell me the greeting message and the time of the day to use it.
"""

agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    response_format=Greeting,  # native parameter for structured output
)

response = agent.generate_text(prompt=prompt)

[32m[2025-01-14 10:00:40][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:40][0m [1mINFO | root: Generating text[0m
[32m[2025-01-14 10:00:41][0m [34m[1mDEBUG | Response: [TextResponse(text='{"language":"Japanese","greeting":"おはようございます","time":"morning"}')][0m


In [82]:
Greeting.model_validate_json(response.contents[0].text)

Greeting(language=<Language.japanese: 'Japanese'>, greeting='おはようございます', time=<GreetingTime.morning: 'morning'>)

For Gemini

In [83]:
# Gemini can't support respones schema includes $defs and $ref so we need to flatten the schema.
# Also parameter type must be upper case.

from langrila.google.gemini_utils import to_gemini_schema

agent = Agent(
    client=google_dev_client,
    model="gemini-2.0-flash-exp",
    response_mime_type="application/json",
    response_schema=to_gemini_schema(Greeting),  # native parameter for structured output
)

response = agent.generate_text(prompt=prompt)

[32m[2025-01-14 10:00:42][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Tell me how to greet in a language. Please pick one language from Japanese, English, French, and German, then\ntell me the greeting message and the time of the day to use it.\n')][0m
[32m[2025-01-14 10:00:42][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:44][0m [34m[1mDEBUG | Response: [TextResponse(text='{\n  "greeting": "Konnichiwa",\n  "language": "Japanese",\n  "time": "afternoon"\n}')][0m


In [84]:
Greeting.model_validate_json(response.contents[0].text)

Greeting(language=<Language.japanese: 'Japanese'>, greeting='Konnichiwa', time=<GreetingTime.afternoon: 'afternoon'>)

Please note that different provider has the different specification and limitation of the native response schema.

# Customize internal prompt

Some agent framework uses internal prompt that is not customizable. This might affect the agent behaviour using another language like Japanese. Agent in langrila has internal prompts as well, for example, that is used for validation error retry, but that is customizable.

In [85]:
from langrila import InternalPrompt

internal_prompt = InternalPrompt()

All the internal prompts are:

In [86]:
internal_prompt.model_dump()

{'error_retry': 'Please fix the error on tool use. If the validation error is raised, reflect the conversation history and try to find the correct answer. If there is no fact to answer the user, try to run other tools to get necessary information. ',
 'no_tool_use_retry': "Decide the next action based on the conversation. If you have all information for answering the user, run 'final_result' tool. If you need more information, invoke other tool to get necessary information. ",
 'planning': 'Please make a concise plan to answer the following question/requirement, considering the conversation history.\nYou can invoke the sub-agents or tools to answer the questions/requirements shown in the capabilities section.\nAgent has no description while the tools have a description.\n\nQuestion/Requirement:\n{user_input}\n\nCapabilities:\n{capabilities}',
 'do_plan': 'Put the plan into action.'}

If you want to customize internal prompt, you can configure it via `AgentConfig`.

In [87]:
from langrila import AgentConfig

agent_config = AgentConfig(
    internal_prompt=InternalPrompt(
        error_retry="エラーを修正してください。",  # replace retry prompt with Japanese
    ),
)

agent = Agent(
    client=openai_client,
    model="gpt-4o-mini-2024-07-18",
    agent_config=agent_config,
)

# Appendix

Langrila supports o1-family for OpenAI API. (Precisely, langrila doesn't depend on the model name.)

In [88]:
agent = Agent(
    client=openai_client,
    model="o1-mini-2024-09-12",
)

In [89]:
response = agent.generate_text(prompt=prompt1)
response

[32m[2025-01-14 10:00:45][0m [34m[1mDEBUG | Prompt: [TextPrompt(text='Hello. How are you today?')][0m
[32m[2025-01-14 10:00:45][0m [1mINFO | root: Generating text[0m


[32m[2025-01-14 10:00:48][0m [34m[1mDEBUG | Response: [TextResponse(text="Hello! I'm doing well, thank you for asking. How can I assist you today?")][0m


Response(type='Response', role='assistant', contents=[TextResponse(text="Hello! I'm doing well, thank you for asking. How can I assist you today?")], usage=<langrila.core.usage.NamedUsage object at 0x7f7bfe24fc70>, raw=ChatCompletion(id='chatcmpl-ApPaPeNI8z8EtCSh5zWpY8IGdmOFW', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm doing well, thank you for asking. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1736816445, model='o1-mini-2024-09-12', object='chat.completion', service_tier='default', system_fingerprint='fp_f56e40de61', usage=CompletionUsage(completion_tokens=350, prompt_tokens=14, total_tokens=364, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=320, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0))), name=None, is_last_

In [90]:
response.usage.items()

dict_items([('root', Usage(model_name='o1-mini-2024-09-12', prompt_tokens=14, output_tokens=350))])