In [1]:
import os
import warnings

warnings.simplefilter(action="ignore")
os.environ["GRPC_VERBOSITY"] = "NONE"

# Prerequisites

Please make sure your environmental variables and dependencies are ready to use LLM services.

In [2]:
from dotenv import load_dotenv

load_dotenv("../../.env_api")

True

# Import modules

In langrila, the module to chat with LLM and the module to call tools are completely separated. FunctionalChat class is the combination of those two.

- `{Client}ChatModule`: Only focuses on doing conversation with LLM
- `{Client}FunctionCallingModule`: Only focuses on calling tools
- `{Client}FunctionalChat`: The combination of the two. FunctionCallingModule works at first and then ChatModule performs. If any tool is not provided, this module behaves as just ChatModule.

In [3]:
from langrila import (
    InMemoryConversationMemory,
    Message,
    PromptTemplate,
    Usage,
)
from langrila.claude import ClaudeFunctionalChat
from langrila.gemini import GeminiFunctionalChat
from langrila.openai import OpenAIFunctionalChat

# Way to initialize chat module

You can pass both client parameters and generation parameters to the `{Client}FunctionalChat` when initializing. This way is still useful when you want to reuse the same setting at every place..

In [4]:
chat = OpenAIFunctionalChat(
    api_key_env_name="OPENAI_API_KEY",
    model_name="gpt-4o-2024-08-06",
    max_tokens=300,
    # max_completion_tokens=300, # for o1 family
    temperature=0.8,
    seed=42,
    system_instruction="Generate 3 paraphrases of the input English term.",
)

response = chat.run(prompt="How are you?")
print(response.message.content[0].text)

1. How's it going?
2. How have you been?
3. What's up with you?


Another way is to pass only client parameters when initializing, and to specify generation parameters when generating message. This way helps you obtain outputs with different parameters or system instruction considering module as like a client instance.

In [5]:
chat = OpenAIFunctionalChat(api_key_env_name="OPENAI_API_KEY")

response = chat.run(
    prompt="How are you?",
    model_name="gpt-4o-2024-08-06",
    max_tokens=300,
    # max_completion_tokens=300, # for o1 family
    temperature=0.8,
    seed=42,
    system_instruction="Generate 3 paraphrases of the input English term.",
)

print(response.message.content[0].text)

1. How's it going?
2. How do you do?
3. What's up with you?


NOTE: If you specify different values for the same parameter when initialization and generation, the value provided to generation method is prioritized.

In [6]:
chat = OpenAIFunctionalChat(
    api_key_env_name="OPENAI_API_KEY",
    model_name="gpt-4o-2024-08-06",
    max_tokens=300,
    # max_completion_tokens=300, # for o1 family
    system_instruction="Generate 3 paraphrases of the input English term.",
)

response = chat.run(prompt="How are you?")

response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "1. What's up with you?\n2. How's it going?\n3. How do you do?"}],
 'name': None}

In [7]:
response = chat.run(
    prompt="How are you?",
    # override for the demonstration from 3 paraphrases to 5 paraphrases
    system_instruction="Generate 5 paraphrases of the input English term.",
)

response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "1. How's it going with you?\n2. What's up with you?\n3. How have you been?\n4. How are things with you?\n5. How do you do?"}],
 'name': None}

# Simple text prompt

### For OpenAI Chat Completion

In [8]:
chat_openai = OpenAIFunctionalChat(
    api_key_env_name="OPENAI_API_KEY",
    api_type="openai",
    # organization_id_env_name="ORGANIZATION_ID",  # as needed
)

In [9]:
prompt = "Hello. How are you today?"

Generate synchronously

In [10]:
response = chat_openai.run(
    prompt,
    model_name="gpt-4o-2024-08-06",
)

response is a pydantic model

In [11]:
response.model_dump()

{'message': {'role': 'assistant',
  'content': [{'text': "Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?"}],
  'name': None},
 'usage': {'model_name': 'gpt-4o-2024-08-06',
  'prompt_tokens': 16,
  'completion_tokens': 30},
 'prompt': [{'role': 'user',
   'content': [{'type': 'text', 'text': 'Hello. How are you today?'}],
   'name': 'User'}]}

Raw attributes contains raw resposne from the client API. This attribute is hidden from `response.model_dump()`. Raw response is useful for debugging.

In [12]:
response.raw

ChatCompletion(id='chatcmpl-AKaQ7UAF4O2HAmOjg4ZzRLpbP1gso', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1729469923, model='gpt-4o-2024-08-06', object='chat.completion', service_tier=None, system_fingerprint='fp_a7d06e42a7', usage=CompletionUsage(completion_tokens=30, prompt_tokens=16, total_tokens=46, completion_tokens_details=CompletionTokensDetails(audio_tokens=None, reasoning_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)))

In [13]:
dict(response.raw)

{'id': 'chatcmpl-AKaQ7UAF4O2HAmOjg4ZzRLpbP1gso',
 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))],
 'created': 1729469923,
 'model': 'gpt-4o-2024-08-06',
 'object': 'chat.completion',
 'service_tier': None,
 'system_fingerprint': 'fp_a7d06e42a7',
 'usage': CompletionUsage(completion_tokens=30, prompt_tokens=16, total_tokens=46, completion_tokens_details=CompletionTokensDetails(audio_tokens=None, reasoning_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0))}

You can pick response text like this

In [14]:
print(response.message.content[0].text)

Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


You can see usage to generate

In [15]:
response.usage

Usage(prompt_tokens=16, completion_tokens=30, total_tokens=46)

You can access prompt

In [16]:
response.prompt

[{'role': 'user',
  'content': [{'type': 'text', 'text': 'Hello. How are you today?'}],
  'name': 'User'}]

Asynchronous call is available.

In [17]:
response = await chat_openai.arun(prompt, model_name="gpt-4o-2024-08-06")
print(response.message.content[0].text)

Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


Streaming completion

In [18]:
stream = chat_openai.stream(prompt, model_name="gpt-4o-2024-08-06")
responses = [r for r in stream]
responses

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello!')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm just")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(messa

Last response includes usage

In [19]:
responses[-1].usage

Usage(prompt_tokens=16, completion_tokens=30, total_tokens=46)

For streaming generation, raw response is included in the last chunk.

In [20]:
responses[-1].raw

[ChatCompletionChunk(id='chatcmpl-AKaQ9RrlSxE5l0h3kke9MdroLwyfS', choices=[Choice(delta=ChoiceDelta(content='', function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1729469925, model='gpt-4o-2024-08-06', object='chat.completion.chunk', service_tier=None, system_fingerprint='fp_143bb8492c', usage=None),
 ChatCompletionChunk(id='chatcmpl-AKaQ9RrlSxE5l0h3kke9MdroLwyfS', choices=[Choice(delta=ChoiceDelta(content='Hello', function_call=None, refusal=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1729469925, model='gpt-4o-2024-08-06', object='chat.completion.chunk', service_tier=None, system_fingerprint='fp_143bb8492c', usage=None),
 ChatCompletionChunk(id='chatcmpl-AKaQ9RrlSxE5l0h3kke9MdroLwyfS', choices=[Choice(delta=ChoiceDelta(content='!', function_call=None, refusal=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1729469925, model='gpt

Asynchronous streaming

In [21]:
stream = chat_openai.astream(prompt, model_name="gpt-4o-2024-08-06")
responses = [r async for r in stream]
responses

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello!')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm here")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(messa

multiple response generation

In [22]:
response = chat_openai.run(prompt, n_results=2, model_name="gpt-4o-2024-08-06")

In [23]:
response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "Hello! I'm here and ready to help. How can I assist you today?"},
  {'text': "Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?"}],
 'name': None}

In [24]:
# async

response = await chat_openai.arun(prompt, n_results=2, model_name="gpt-4o-2024-08-06")
response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?"},
  {'text': "Hello! I'm just a computer program, so I don't have feelings, but I'm here to help you. What can I assist you with today?"}],
 'name': None}

### For Azure OpenAI

In [25]:
chat_openai_azure = OpenAIFunctionalChat(
    api_key_env_name="AZURE_API_KEY",
    api_type="azure",
    api_version="2024-08-01-preview",
    endpoint_env_name="AZURE_ENDPOINT",
    deployment_id_env_name="AZURE_DEPLOYMENT_ID",
)

In [26]:
response = chat_openai_azure.run(prompt, model_name="gpt-4o-2024-05-13")
print(response.message.content[0].text)

Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today?


In [27]:
dict(response.raw)

{'id': 'chatcmpl-AKaQFvceM13iNIHYokljmpUqFllqe',
 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})],
 'created': 1729469931,
 'model': 'gpt-4o-2024-05-13',
 'object': 'chat.completion',
 'service_tier': None,
 'system_fingerprint': 'fp_67802d9a6d',
 'usage': CompletionUsage(completion_tokens=34, prompt_tokens=16, total_tokens=50, completion_tokens_details=None, prompt_tokens_details=None),
 'prompt_filter_results': [{'prompt_index': 0,
   'content_filter_results': {'hate': {'

In [28]:
# async
response = await chat_openai_azure.arun(prompt, model_name="gpt-4o-2024-05-13")
print(response.message.content[0].text)

Hello! I'm just a computer program, so I don't have feelings, but thanks for asking! How can I assist you today?


In [29]:
# stream
response = chat_openai_azure.stream(prompt, model_name="gpt-4o-2024-05-13")
[r for r in response]

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello!')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm just")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(messa

In [30]:
# async stream
response = chat_openai_azure.astream(prompt, model_name="gpt-4o-2024-05-13")
[r async for r in response]

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello!')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! I'm just")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(messa

multiple generation

In [31]:
response = chat_openai_azure.run(prompt, n_results=2, model_name="gpt-4o-2024-05-13")
response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today?"},
  {'text': "Hello! I'm here and ready to assist you. How can I help you today?"}],
 'name': None}

In [32]:
# async
response = await chat_openai_azure.arun(prompt, n_results=2, model_name="gpt-4o-2024-05-13")
response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "Hello! I'm just a computer program, so I don't have feelings, but thanks for asking. How can I assist you today?"},
  {'text': "Hello! I'm here and ready to assist you. How can I help you today?"}],
 'name': None}

### Gemini on Google Generative AI

In [33]:
gemini = GeminiFunctionalChat(api_key_env_name="GEMINI_API_KEY")

In [34]:
response = gemini.run(prompt, model_name="gemini-1.5-flash")
print(response.message.content[0].text)

I am an AI language model, so I don't have feelings or experiences like humans do. However, I am here and ready to assist you with any questions or tasks you may have! How can I help you today? 😊 



In [35]:
response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "I am an AI language model, so I don't have feelings or experiences like humans do. However, I am here and ready to assist you with any questions or tasks you may have! How can I help you today? 😊 \n"}],
 'name': None}

In [36]:
response.raw

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "I am an AI language model, so I don't have feelings or experiences like humans do. However, I am here and ready to assist you with any questions or tasks you may have! How can I help you today? \ud83d\ude0a \n"
              }
            ],
            "role": "model"
          },
          "finish_reason": "STOP",
          "index": 0,
          "safety_ratings": [
            {
              "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
              "probability": "NEGLIGIBLE"
            },
            {
              "category": "HARM_CATEGORY_HATE_SPEECH",
              "probability": "NEGLIGIBLE"
            },
            {
              "category": "HARM_CATEGORY_HARASSMENT",
              "probability": "NEGLIGIBLE"
            },
            {
 

In [37]:
# async
response = await gemini.arun(prompt, model_name="gemini-1.5-flash")
print(response.message.content[0].text)

I am an AI language model, so I don't have feelings or experiences like humans do. However, I am here to assist you with any questions or tasks you may have! How can I help you today? 😊 



In [38]:
# stream
response = gemini.stream(prompt, model_name="gemini-1.5-flash")
[r for r in response]

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI,')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="As an AI, I don't have feelings or experiences like humans do, so I don't have")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do, so I don\'t have a "good" or "bad" day. However, I\'m here and ready')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do, so I don\'t have a "good" or "bad" day. However, I\'m here and ready to help yo

In [39]:
# async stream
response = gemini.astream(prompt, model_name="gemini-1.5-flash")
[r async for r in response]

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI,')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="As an AI, I don't have feelings or experiences like humans do. So I don'")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do. So I don\'t have a "good" or "bad" day. However, I\'m')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do. So I don\'t have a "good" or "bad" day. However, I\'m always here and ready to assist you with any q

### Gemini on VertexAI

In [40]:
gemini_vertexai = GeminiFunctionalChat(
    api_type="vertexai",
    project_id_env_name="PROJECT_ID",
    location_env_name="LOCATION",
)

In [41]:
response = gemini_vertexai.run(prompt, model_name="gemini-1.5-flash")
print(response.message.content[0].text)

As an AI, I don't have feelings or experiences like humans do, so I don't have a "good" or "bad" day.  However, I'm here and ready to assist you with any questions or tasks you may have! 😊 



In [42]:
# async
response = await gemini_vertexai.arun(prompt, model_name="gemini-1.5-flash")
print(response.message.content[0].text)

As an AI, I don't have feelings or experiences like humans do, so I don't have a "good" or "bad" day.  But I'm ready to assist you with any questions or tasks you might have!  How can I help you today? 😊 



In [43]:
# stream
response = gemini_vertexai.stream(prompt, model_name="gemini-1.5-flash")
[r for r in response]

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='As')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="As an AI, I don't have feelings or experiences like humans do, so I don")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do, so I don\'t have a "good" or "bad" day. But I\'m')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do, so I don\'t have a "good" or "bad" day. But I\'m here and ready to assist you with any questions or tasks you mig

In [44]:
# async stream
response = gemini_vertexai.astream(prompt, model_name="gemini-1.5-flash")
[r async for r in response]

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='As')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="As an AI, I don't have feelings or experiences like humans do. So, I'")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do. So, I\'m not "feeling" anything in the way you might be. But I\'m')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt='', raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='As an AI, I don\'t have feelings or experiences like humans do. So, I\'m not "feeling" anything in the way you might be. But I\'m here and ready to assist you wit

multiple resposne generation

In [45]:
response = gemini_vertexai.run(prompt, n_results=2, model_name="gemini-1.5-flash")

In [46]:
response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "As a large language model, I don't have feelings or experiences like humans do. But I'm here and ready to assist you with any questions or tasks you may have! How can I help you today? 😊 \n"},
  {'text': "I am an AI language model, so I don't have feelings like humans do. However, I am here to assist you with any questions or tasks you may have! How can I help you today? 😊 \n"}],
 'name': None}

In [47]:
# async multiple generation
response = await gemini_vertexai.arun(prompt, n_results=2, model_name="gemini-1.5-flash")
response.message.model_dump()

{'role': 'assistant',
 'content': [{'text': "As an AI, I don't have feelings or experiences like humans do. However, I'm here and ready to assist you with any questions or tasks you may have! How can I help you today? 😊 \n"},
  {'text': "I am an AI language model, so I don't have feelings or experiences like humans do. However, I am here to assist you with any questions or tasks you may have! How can I help you today? 😊 \n"}],
 'name': None}

### For Claude on Anthropic

In [48]:
claude = ClaudeFunctionalChat(api_key_env_name="ANTHROPIC_API_KEY")

In [49]:
response = claude.run(prompt, model_name="claude-3-5-sonnet-20240620")
print(response.message.content[0].text)

Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?


In [50]:
# async
response = await claude.arun(prompt, model_name="claude-3-5-sonnet-20240620")
print(response.message.content[0].text)

Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you with any questions or tasks you may have. How can I help you today?


Streaming generation have not been supported yet by `ClaudeFunctionalChat`.

In [51]:
responses = list(claude.chat.stream(prompt, model_name="claude-3-5-sonnet-20240620"))
responses

[CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello! As')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text='Hello! As an AI language')], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! As an AI language model, I don't")], name=None), usage=Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0), prompt=[{}], raw=None),
 CompletionResults(message=Message(role='assistant', content=[TextContent(text="Hello! As an AI language model, I don't have")], name=None), usage=Usage(prompt_tokens=0, c

In [52]:
responses[-1].raw

[RawMessageStartEvent(message=Message(id='msg_01HkgFWfwEByCcbhpRWNGA4q', content=[], model='claude-3-5-sonnet-20240620', role='assistant', stop_reason=None, stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=1)), type='message_start'),
 RawContentBlockStartEvent(content_block=TextBlock(text='', type='text'), index=0, type='content_block_start'),
 RawContentBlockDeltaEvent(delta=TextDelta(text='Hello', type='text_delta'), index=0, type='content_block_delta'),
 RawContentBlockDeltaEvent(delta=TextDelta(text='! As', type='text_delta'), index=0, type='content_block_delta'),
 RawContentBlockDeltaEvent(delta=TextDelta(text=' an AI language', type='text_delta'), index=0, type='content_block_delta'),
 RawContentBlockDeltaEvent(delta=TextDelta(text=" model, I don't", type='text_delta'), index=0, type='content_block_delta'),
 RawContentBlockDeltaEvent(delta=TextDelta(text=' have', type='text_delta'), index=0, type='content_block_delta'),
 RawContentBlockDeltaEvent(del

### For Claude on Amazon Bedrock

In [53]:
claude_bedrock = ClaudeFunctionalChat(
    api_type="bedrock",
    aws_access_key_env_name="AWS_ACCESS_KEY",
    aws_secret_key_env_name="AWS_SECRET_KEY",
    aws_region_env_name="AWS_REGION",
)

In [54]:
response = claude_bedrock.run(prompt, model_name="anthropic.claude-3-sonnet-20240229-v1:0")
print(response.message.content[0].text)

Hello! As an AI language model, I don't have personal feelings or physical sensations, but I'm operating properly and ready to assist you with any questions or tasks you might have. How can I help you today?


In [55]:
response.raw

Message(id='msg_bdrk_01WsgkdRKm5gBnxpwn45EBJK', content=[TextBlock(text="Hello! As an AI language model, I don't have personal feelings or physical sensations, but I'm operating properly and ready to assist you with any questions or tasks you might have. How can I help you today?", type='text')], model='claude-3-sonnet-20240229', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=14, output_tokens=48))

In [56]:
response = await claude_bedrock.arun(prompt, model_name="anthropic.claude-3-sonnet-20240229-v1:0")
print(response.message.content[0].text)

Hello! As an AI language model, I don't have feelings or emotions, but I'm functioning properly and ready to assist you with any questions or tasks you may have. How can I help you today?


# Universal message system

Langrila's chat module transforms user prompt and conversation history to universal message object at first, then converts to each client message. Conversation memory has history with universal message format.

In [57]:
prompt = "Hello, how are you today?"

Converting from prompt to universal message

In [58]:
from langrila.openai import OpenAIMessage

universal_message = OpenAIMessage.to_universal_message(role="user", message=prompt)
universal_message

Message(role='user', content=[TextContent(text='Hello, how are you today?')], name=None)

Converting from universal message to client message

In [59]:
OpenAIMessage.to_client_message(universal_message)

{'role': 'user',
 'content': [{'type': 'text', 'text': 'Hello, how are you today?'}],
 'name': 'User'}

Gemini module has same interface. Also Claude as well.

In [60]:
from langrila.gemini.genai import GeminiMessage

# same method as that of OpenAIMessage
universal_message = GeminiMessage.to_universal_message(role="user", message=prompt)
universal_message

Message(role='user', content=[TextContent(text='Hello, how are you today?')], name=None)

In [61]:
GeminiMessage.to_client_message(universal_message)

parts {
  text: "Hello, how are you today?"
}
role: "user"

If you want to set a role and name for your prompt, Message class allows you to do it. Raw text prompt is converted to TextContent object in langrila's chat modules, so TextContent object also can be specified.

In [62]:
from langrila import Message

prompt = Message(role="user", content="Hello.", name="Nsak")
universal_message = OpenAIMessage.to_universal_message(role="user", message=prompt)
universal_message

Message(role='user', content=[TextContent(text='Hello.')], name='Nsak')

# How to specify system instruction

Langrila's module accepts system instruction as shown below.

### For OpenAI Chat Completion

In [63]:
system_instruction = "You must answer any question only with Yes or No."

In [64]:
chat_openai = OpenAIFunctionalChat(
    api_key_env_name="API_KEY",
    conversation_memory=InMemoryConversationMemory(),  # just for the explanation
)

In [65]:
prompt = "Please tell me about how to sleep well."
response = chat_openai.run(
    prompt=prompt,
    model_name="gpt-4o-2024-08-06",
    system_instruction=system_instruction,
)
print(response.message.content[0].text)

No.


System prompt is not stored into conversation memory but it exists in prompt attribute in response object. It's bacause we can make system prompt replaceable flexibly. System prompt is inserted as a first message just before API call.

In [66]:
# You can see system prompt in the prompt attribute in the response
response.model_dump()

{'message': {'role': 'assistant', 'content': [{'text': 'No.'}], 'name': None},
 'usage': {'model_name': 'gpt-4o-2024-08-06',
  'prompt_tokens': 34,
  'completion_tokens': 2},
 'prompt': [{'role': 'system',
   'content': [{'type': 'text',
     'text': 'You must answer any question only with Yes or No.'}],
   'name': 'System'},
  {'role': 'user',
   'content': [{'type': 'text',
     'text': 'Please tell me about how to sleep well.'}],
   'name': 'User'}]}

In [67]:
# System prompt is not shown in the conversation memory
chat_openai.conversation_memory.load()

[{'role': 'user',
  'content': [{'text': 'Please tell me about how to sleep well.'}],
  'name': None},
 {'role': 'assistant', 'content': [{'text': 'No.'}], 'name': None}]

### For Gemini

Gemini and Claude also accept system prompt.

In [68]:
gemini = GeminiFunctionalChat(api_key_env_name="GEMINI_API_KEY")

In [69]:
prompt = "Please tell me about how to sleep well."
response = gemini.run(
    prompt=prompt, model_name="gemini-1.5-pro", system_instruction=system_instruction
)
print(response.message.content[0].text)

No. 



In [70]:
response.model_dump()

{'message': {'role': 'assistant',
  'content': [{'text': 'No. \n'}],
  'name': None},
 'usage': {'model_name': 'gemini-1.5-pro',
  'prompt_tokens': 21,
  'completion_tokens': 2},
 'prompt': [parts {
    text: "Please tell me about how to sleep well."
  }
  role: "user"]}

### For Claude

In [71]:
claude = ClaudeFunctionalChat(api_key_env_name="ANTHROPIC_API_KEY")

In [72]:
prompt = "Please tell me about how to sleep well."
response = claude.run(
    prompt=prompt, model_name="claude-3-5-sonnet-20240620", system_instruction=system_instruction
)
print(response.message.content[0].text)

Yes.


In [73]:
response.model_dump()

{'message': {'role': 'assistant', 'content': [{'text': 'Yes.'}], 'name': None},
 'usage': {'model_name': 'claude-3-5-sonnet-20240620',
  'prompt_tokens': 27,
  'completion_tokens': 5},
 'prompt': [{'role': 'user',
   'content': [{'text': 'Please tell me about how to sleep well.',
     'type': 'text'}]}]}

# Token management

For OpenAI Chat Completion, # of tokens is calculated before API call, and module truncates prompt automatically to make sure # of tokens meets token limitation. In this section, we simulate this truncation.

Conventional way is deprecated and removed in future version.

In [74]:
chat_openai = OpenAIFunctionalChat(
    api_key_env_name="API_KEY",
    api_type="openai",
    model_name="gpt-4o-2024-08-06",
    context_length=200,
)



Please specify conversation_length_adjuster instead.

In [75]:
chat_openai = OpenAIFunctionalChat(
    api_key_env_name="API_KEY",
    conversation_memory=InMemoryConversationMemory(),  # just for the explanation
)

In [76]:
from langrila.openai import OldConversationTruncationModule

prompts = [
    "Please tell me about a bird called Kiji in Japan.",
    "What does Kiji do in the tale of the Momotaro",
]

for prompt in prompts:
    response = chat_openai.run(
        prompt,
        model_name="gpt-4o-2024-08-06",
        conversation_length_adjuster=OldConversationTruncationModule(
            model_name="gpt-4o-2024-08-06", context_length=200
        ),
    )

Input message is truncated because total length of messages exceeds context length.


Old conversation was truncated as shown below.

In [77]:
# NOTE : Usage after truncation might slightly be different from the specified context_length
response.usage

Usage(prompt_tokens=199, completion_tokens=229, total_tokens=428)

In [78]:
response.prompt

[{'role': 'assistant',
  'content': [{'type': 'text',
    'text': ': The Green Pheasant holds cultural importance in Japan and is considered a national bird of the country. It has been featured in Japanese art, literature, and folklore. The call of the male Kiji is also associated with the arrival of spring.\n\n4. **Behavior**: Green Pheasants are ground-dwelling birds that are known for their strong legs and ability to run quickly. They are also capable of short, rapid flights if startled. Their diet mainly consists of seeds, insects, and small animals.\n\n5. **Role in Ecosystem**: As a part of the ecosystem, they contribute to seed dispersion and also serve as prey for larger predators.\n\nIn summary, the Kiji or Green Pheasant is a distinctive and culturally significant bird in Japan, admired for its beautiful plumage and adaptation to various habitats across the country.'}],
  'name': 'Assistant'},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'What does Kiji do in t

But truncation does not affect conversation memory itself. You can see full conversation history in the memory.

In [79]:
chat_openai.conversation_memory.load()

[{'role': 'user',
  'content': [{'text': 'Please tell me about a bird called Kiji in Japan.'}],
  'name': None},
 {'role': 'assistant',
  'content': [{'text': 'The "Kiji" refers to the Green Pheasant, known in Japanese as "Kijī" (キジ or 雉). This bird, scientifically named *Phasianus versicolor*, is native to Japan and is notable for its vibrant plumage. It is considered a subspecies of the Common Pheasant (*Phasianus colchicus*), although sometimes it is regarded as a separate species.\n\nHere are some key points about the Green Pheasant:\n\n1. **Appearance**: Male Green Pheasants are particularly striking, with iridescent green plumage on their breasts and upper body, a bluish-purple neck, and distinctive red facial wattles. Females are more subdued in color, with mottled brown plumage that provides camouflage.\n\n2. **Habitat**: These birds are commonly found in various habitats across Japan, including forests, grasslands, and farmlands. They are adaptable and can often be seen at the

If prompts contain some images and # of tokens exceeds limitation, image might be truncated entirely.

# Usage gathering across multiple models

Usage object allows you to sum multiple usages.

In [80]:
usage1 = Usage(model_name="gpt-4o-2024-05-13", prompt_tokens=100, completion_tokens=50)

usage2 = Usage(model_name="gpt-4o-2024-05-13", prompt_tokens=200, completion_tokens=100)

usage1 + usage2

Usage(prompt_tokens=300, completion_tokens=150, total_tokens=450)

Usage don't have capability to sum usages from different models.

In [81]:
usage3 = Usage(model_name="gpt-4o-mini-2024-07-18", prompt_tokens=300, completion_tokens=150)

try:
    usage1 + usage3  # Error
except:
    print("Error!")

Error!


But usage with empty model_name is added to any other usage.

In [82]:
usage4 = Usage(prompt_tokens=100, completion_tokens=50)

usage1 + usage4

Usage(prompt_tokens=200, completion_tokens=100, total_tokens=300)

On the other hand, it's not possible to add usage to usage of empty model_name.

In [83]:
try:
    usage4 + usage1
except:
    print("Error!")

Error!


Using this function, we sometimes want to sum up total #of tokens over each models. In langrila, shared token counter is available to integrate total usage from multiple models.

In [84]:
from langrila import TokenCounter

shared_memory = InMemoryConversationMemory()  # just for the explanation

shared_token_counter = TokenCounter()

gemini = GeminiFunctionalChat(
    api_key_env_name="GEMINI_API_KEY",
    model_name="gemini-1.5-flash",
    conversation_memory=shared_memory,
    token_counter=shared_token_counter,
)

chat_openai = OpenAIFunctionalChat(
    api_key_env_name="API_KEY",
    model_name="gpt-4o-2024-05-13",
    api_type="openai",
    conversation_memory=shared_memory,
    token_counter=shared_token_counter,
)

claude = ClaudeFunctionalChat(
    model_name="claude-3-5-sonnet-20240620",
    api_key_env_name="ANTHROPIC_API_KEY",
    conversation_memory=shared_memory,
    token_counter=shared_token_counter,
)

In [85]:
prompts = [
    "What is the pheasant?",
    "Please tell me about a bird called Kiji in Japan.",
    "What does Kiji do in the tale of the Momotaro",
]

clients = [gemini, chat_openai, claude]

for i, prompt in enumerate(prompts):
    client = clients[i]
    response = client.run(prompt)

In [86]:
shared_token_counter

{'gemini-1.5-flash': Usage(prompt_tokens=6, completion_tokens=376, total_tokens=382), 'gpt-4o-2024-05-13': Usage(prompt_tokens=438, completion_tokens=474, total_tokens=912), 'claude-3-5-sonnet-20240620': Usage(prompt_tokens=1018, completion_tokens=457, total_tokens=1475)}

# Prompt template

We can manage a typical prompt as a template.

In [87]:
template = PromptTemplate.from_text_file("../data/sample_prompt_template.txt")
print(template.template)

# INSTRUCTION
{instruction}

# TEXT
{text}


In [88]:
template.set_args(instruction="Hello", text="world!")

PromptTemplate(args={'instruction': 'Hello', 'text': 'world!'}, template='# INSTRUCTION\n{instruction}\n\n# TEXT\n{text}')

In [89]:
print(template.format())

# INSTRUCTION
Hello

# TEXT
world!


We can pass template string to PromptTemplate directly.

In [90]:
PromptTemplate(template="# INSTRUCTION\n{instruction}\n\n# TEXT\n{text}")

PromptTemplate(args={}, template='# INSTRUCTION\n{instruction}\n\n# TEXT\n{text}')