# SambanNova Langchain Wrappers Usage

In [56]:
import os

from dotenv import load_dotenv
from langchain_embeddings import SambaStudioEmbeddings
from langchain_llms import SambaStudio, SambaNovaCloud
from langchain_sambanova import ChatSambaNovaCloud, ChatSambaStudio
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage

current_dir = os.getcwd()
utils_dir = os.path.abspath(os.path.join(current_dir, '..'))
repo_dir = os.path.abspath(os.path.join(utils_dir, '..'))

load_dotenv(os.path.join(repo_dir, '.env'), override=True)

True

# SambaStudio LLM

## Non streaming

In [2]:
llm = SambaStudio(
    model_kwargs={
        'do_sample': False,
        'temperature': 0.01,
        'max_tokens': 256,
        'process_prompt': False,
        'model': 'Meta-Llama-3-70B-Instruct-4096',
    },
)

In [3]:
llm.invoke('tell me a 50 word tale')

'As the clock struck midnight, a lone figure crept into the abandoned mansion. Suddenly, a chandelier crashed down, and a ghostly figure materialized. "You shouldn\'t have come here," it whispered. The intruder froze, trapped in a century-old curse, forever doomed to roam the haunted halls.'

## Streaming

In [4]:
llm = SambaStudio(
    model_kwargs={
        'do_sample': False,
        'max_tokens': 256,
        'temperature': 0.01,
        'process_prompt': False,
        'model': 'Meta-Llama-3-70B-Instruct-4096',
    },
)

In [5]:
for chunk in llm.stream('tell me a 50 word tale'):
    print(chunk, end='', flush=True)

As the clock struck midnight, a lone figure crept into the abandoned mansion. Suddenly, a chandelier crashed down, and a ghostly figure materialized. "You shouldn't have come here," it whispered. The intruder froze, trapped in a century-old curse, forever doomed to roam the haunted halls.

# SambaNovaCloud LLM

## Non Streaming

In [4]:
llm = SambaNovaCloud(model='llama3-70b')

In [5]:
import json

llm.invoke(json.dumps([{'role': 'user', 'content': 'hello'}]))

'Hello. How can I assist you today?'

In [6]:
llm.invoke('hello')

'Hello. How can I assist you today?'

## Streaming

In [7]:
for i in llm.stream('hello tell me a long story'):
    print(i)


Here's a long story 
for you:

Once upon 
a time, in a small village 
nestled in the rolling hills of 
rural France, there lived a 
young girl named Sophie. Sophie 
was a curious and adventurous 
child, with a mop of curly 
brown hair and a smile that 
could light up the darkest 
of rooms. She lived with 
her parents, Pierre and 
Colette, in a small stone cottage 
on the outskirts of 
the village.

Sophie's village was 
a charming 
place, filled with narrow 
cobblestone streets, quaint shops, 
and 
bustling cafes. The villagers 
were a tight-knit 
community, and everyone knew each 
other's names and stories. Sophie 
loved listening to the villagers' 
tales of 
old, which 
often featured brave knights, 
beautiful princesses, and 
magical creatures.

One day, while exploring 
the village, Sophie stumbled upon 
a small, mysterious shop tucked 
away on a quiet street. 
The sign above the door 
read "Curios 
and Wonders," and the 
windows were filled 
with a dazzling array of strange 
and 

# SambaStudio Chat Model

## Non Streaming

In [None]:
llm = ChatSambaStudio(
    model="Meta-Llama-3.3-70B-Instruct",
    max_tokens=1024,
    temperature=0.3,
    top_p=0.01,
    do_sample = True,
)

In [53]:
llm.invoke("tell me a joke")

AIMessage(content='What do you call a fake noodle?\n\nAn impasta.', additional_kwargs={}, response_metadata={'id': 'item0', 'partial': False, 'value': {'completion': 'What do you call a fake noodle?\n\nAn impasta.', 'completion_tokens_count': 13, 'logprobs': {'text_offset': [], 'top_logprobs': []}, 'model_execution_time': 0.43772339820861816, 'prompt': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\ntell me a joke<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n', 'prompt_tokens_count': 39, 'stop_reason': 'end_of_text', 'tokens': ['What', ' do', ' you', ' call', ' a', ' fake', ' nood', 'le', '?\n\n', 'An', ' imp', 'asta', '.'], 'total_tokens_count': 52}, 'params': {}, 'status': None}, id='item0')

In [58]:
messages = [
    SystemMessage(content="You are a helpful assistant with pirate accent"),
    HumanMessage(content="tell me a joke")
]
llm.invoke(messages)

AIMessage(content="Yer lookin' fer a joke, eh? Alright then, matey, here be one fer ye:\n\nWhy did the pirate quit his job?\n\n(pause fer dramatic effect, savvy?)\n\nBecause he was sick o' all the arrrr-guments!\n\nYarrr, I hope that made ye laugh, me hearty!", additional_kwargs={}, response_metadata={'id': 'item0', 'partial': False, 'value': {'completion': "Yer lookin' fer a joke, eh? Alright then, matey, here be one fer ye:\n\nWhy did the pirate quit his job?\n\n(pause fer dramatic effect, savvy?)\n\nBecause he was sick o' all the arrrr-guments!\n\nYarrr, I hope that made ye laugh, me hearty!", 'completion_tokens_count': 66, 'logprobs': {'text_offset': [], 'top_logprobs': []}, 'model_execution_time': 0.9974634647369385, 'prompt': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a helpful assistant with pirate accent<|eot_id|><|start_header_id|>user<|end_header_id|>\n\ntell me a joke<|eot_id|><|st

In [21]:
future_response = llm.ainvoke("tell me a joke")
await future_response

AIMessage(content="Here's a joke for you:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you laugh! Do you want to hear another one?", additional_kwargs={}, response_metadata={'id': 'item0', 'partial': False, 'value': {'completion': "Here's a joke for you:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you laugh! Do you want to hear another one?", 'completion_tokens_count': 34, 'logprobs': {'text_offset': [], 'top_logprobs': []}, 'model_execution_time': 0.6353631019592285, 'prompt': '<|begin_of_text|><|begin_of_text|><|start_header_id|>user<|end_header_id|> tell me a joke <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n', 'prompt_tokens_count': 15, 'stop_reason': 'end_of_text', 'tokens': ['Here', "'s", ' a', ' joke', ' for', ' you', ':\n\n', 'What', ' do', ' you', ' call', ' a', ' fake', ' nood', 'le', '?\n\n', 'An', ' imp', 'asta', '!\n\n', 'Hope', ' that', ' made', ' you', ' laugh', '!', ' Do', ' you', ' want', ' to', ' hear', ' anot

## Batching

In [6]:
llm.batch(["tell me a joke", "which is the capital of UK?"])

[AIMessage(content="Here's one:\n\nWhy couldn't the bicycle stand up by itself?\n\n(Wait for it...)\n\nBecause it was two-tired!\n\nHope that made you laugh!", additional_kwargs={}, response_metadata={'finish_reason': None, 'usage': {'prompt_tokens': 14, 'completion_tokens': 32, 'total_tokens': 46, 'throughput_after_first_token': 70.06468839908355, 'time_to_first_token': 0.2191941738128662, 'model_execution_time': 0.5902798175811768}, 'model_name': 'Meta-Llama-3-70B-Instruct-4096', 'system_fingerprint': '', 'created': 1727913510}, id='f6ef319e-4ba2-4117-8d63-f5823d0bc947'),
 AIMessage(content='The capital of the United Kingdom (UK) is London.', additional_kwargs={}, response_metadata={'finish_reason': None, 'usage': {'prompt_tokens': 17, 'completion_tokens': 12, 'total_tokens': 29, 'throughput_after_first_token': 62.08816650383397, 'time_to_first_token': 0.21888446807861328, 'model_execution_time': 0.315521240234375}, 'model_name': 'Meta-Llama-3-70B-Instruct-4096', 'system_fingerprint'

In [7]:
future_responses = llm.abatch(["tell me a joke", "which is the capital of UK?"])
await future_responses

[AIMessage(content="Here's one:\n\nWhy couldn't the bicycle stand up by itself?\n\n(Wait for it...)\n\nBecause it was two-tired!\n\nHope that made you laugh!", additional_kwargs={}, response_metadata={'finish_reason': None, 'usage': {'prompt_tokens': 14, 'completion_tokens': 32, 'total_tokens': 46, 'throughput_after_first_token': 70.03251686884015, 'time_to_first_token': 0.21899962425231934, 'model_execution_time': 0.5902557373046875}, 'model_name': 'Meta-Llama-3-70B-Instruct-4096', 'system_fingerprint': '', 'created': 1727913513}, id='33123501-e370-4a70-bda1-92c8af70865b'),
 AIMessage(content='The capital of the United Kingdom (UK) is London.', additional_kwargs={}, response_metadata={'finish_reason': None, 'usage': {'prompt_tokens': 17, 'completion_tokens': 12, 'total_tokens': 29, 'throughput_after_first_token': 62.36218692927395, 'time_to_first_token': 0.21871089935302734, 'model_execution_time': 0.3149230480194092}, 'model_name': 'Meta-Llama-3-70B-Instruct-4096', 'system_fingerprin

## Streaming

In [8]:
for chunk in llm.stream("tell me a joke"):
    print(chunk.content, end="")

Here's one:

Why couldn't the bicycle stand up by itself?

(Wait for it...)

Because it was two-tired!

Hope that made you laugh!

In [9]:
messages = [
    SystemMessage(content="You are a helpful assistant with pirate accent"),
    HumanMessage(content="tell me a joke"),
]
for chunk in llm.stream(messages):
    print(chunk.content)

Arrr, 
listen up, matey! Here 
be a joke fer ye:


Why did the pirate quit his job?


Because he was 
sick o' all 
the arrrr-guments! (get 
it? arguments, but with a 
pirate "arrr" 
sound? Aye, I be 
a regular comedic genius, savvy?)


So, did I make 
ye laugh, or did I 
walk the plank?



In [10]:
async for chunk in llm.astream("tell me a joke"):
    print(chunk.content)

Here's one:


Why couldn't the bicycle stand 
up by itself?

(Wait 
for it...)

Because it 
was two-tired!

Hope that 
made you laugh!



## Function calling

In [59]:
from langchain_core.tools import tool
from datetime import datetime

@tool()
def get_time(kind: str = 'both') -> str:
    """Returns current date, current time or both.

    Args:
        kind: date, time or both
    """
    if kind == 'date':
        date = datetime.now().strftime('%d/%m/%Y')
        return f'Current date: {date}'
    elif kind == 'time':
        time = datetime.now().strftime('%H:%M:%S')
        return f'Current time: {time}'
    else:
        date = datetime.now().strftime('%d/%m/%Y')
        time = datetime.now().strftime('%H:%M:%S')
        return f'Current date: {date}, Current time: {time}'
    
@tool()  
def add(a: int, b: int) -> int:
    """Add two integers.

    Args:
        a: First integer
        b: Second integer
    """
    return a + b

tools=[get_time, add]

In [None]:
llm = ChatSambaStudio(    
    model="Meta-Llama-3.3-70B-Instruct",
    max_tokens=1024,
    temperature=0.3,
    top_p=0.01,
)
tool_llm=llm.bind_tools(tools)

In [None]:
response=tool_llm.invoke("what time is it?")
response.tool_calls

[{'name': 'get_time',
  'args': {'kind': 'time'},
  'id': 'call_44a33627fb9442ebb9',
  'type': 'tool_call'}]

## Structured output

In [None]:
llm = ChatSambaStudio(
    model="Meta-Llama-3.3-70B-Instruct",
    max_tokens=1024,
    temperature=0.3,
    top_p=0.01
)

In [44]:
from pydantic import BaseModel, Field

# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""
    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")

structured_llm = llm.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

Joke(setup='Why did the cat join a band?', punchline='Because it wanted to be the purr-cussionist!')

In [45]:
from typing import Optional
from typing_extensions import Annotated, TypedDict

# TypedDict
class Joke(TypedDict):
    """Joke to tell user."""
    setup: Annotated[str, ..., "The setup of the joke"]
    punchline: Annotated[str, ..., "The punchline of the joke"]
    rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]


structured_llm = llm.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

{'punchline': 'Because it wanted to be the purr-cussionist!',
 'rating': 8,
 'setup': 'Why did the cat join a band?'}

In [46]:
# json schema
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}
structured_llm = llm.with_structured_output(json_schema)

structured_llm.invoke("Tell me a joke about cats")

{'punchline': 'Because it wanted to be the purr-cussionist!',
 'setup': 'Why did the cat join a band?'}

In [47]:
# Using json_mode method
structured_llm = llm.with_structured_output(
    method="json_mode",
    include_raw=False
)

structured_llm.invoke(
    "Answer the following question. "
    "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n"
    "What's heavier a pound of bricks or a pound of feathers?"
)

{'answer': 'They are the same weight',
 'justification': 'A pound is a unit of weight or mass, so a pound of bricks and a pound of feathers both weigh the same amount, one pound. The difference is in their density and volume. A pound of feathers would take up more space than a pound of bricks due to the difference in their densities.'}

In [48]:
from pydantic import BaseModel
# Using json_schema method
class AnswerWithJustification(BaseModel):
    answer: str
    justification: str

structured_llm = llm.with_structured_output(
    AnswerWithJustification,
    method="json_schema", 
    include_raw=True
    )

structured_llm.invoke(
    "Answer the following question. "
    "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n"
    "What's heavier a pound of bricks or a pound of feathers?"
)

{'raw': AIMessage(content='{\n  "answer": "They are the same weight",\n  "justification": "A pound is a unit of weight or mass, so a pound of bricks and a pound of feathers both weigh the same amount, one pound. The difference is in their density and volume. A pound of feathers would take up more space than a pound of bricks due to the difference in their densities."\n}', additional_kwargs={}, response_metadata={'finish_reason': None, 'usage': {'prompt_tokens': 70, 'completion_tokens': 79, 'total_tokens': 149, 'model_execution_time': 1.191061019897461}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': '', 'created': 1732049872}, id='6ab145da-b369-498e-b928-67b5a09ec299'),
 'parsed': AnswerWithJustification(answer='They are the same weight', justification='A pound is a unit of weight or mass, so a pound of bricks and a pound of feathers both weigh the same amount, one pound. The difference is in their density and volume. A pound of feathers would take up more space tha

# SambaNova Cloud Chat Model

## Non Streaming

In [None]:
llm = ChatSambaNovaCloud(
    model= "Meta-Llama-3.3-70B-Instruct",
    max_tokens=1024,
    temperature=0.7,
    top_p=0.01,
    stream_options={'include_usage':True}
    )

In [None]:
llm.invoke("tell me a joke")

AIMessage(content='What do you call a fake noodle?\n\nAn impasta.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 19, 'completion_tokens': 13, 'completion_tokens_after_first_per_sec': 472.65089024115395, 'completion_tokens_after_first_per_sec_first_ten': 1673.3181312335958, 'completion_tokens_per_sec': 89.81238085801046, 'end_time': 1731021565.6574025, 'is_last_response': True, 'prompt_tokens': 39, 'start_time': 1731021565.4950366, 'time_to_first_token': 0.1369771957397461, 'total_latency': 0.14474619062323318, 'total_tokens': 52, 'total_tokens_per_sec': 359.2495234320418}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1731021565}, id='d928193e-a158-43ee-a4b5-e01d74392ea5')

In [None]:
messages = [
    SystemMessage(content="You are a helpful assistant with pirate accent"),
    HumanMessage(content="tell me a joke")
    ]
llm.invoke(messages)

AIMessage(content="Yer lookin' fer a joke, eh? Alright then, matey, here be one fer ye:\n\nWhy did the pirate quit his job?\n\n(pause fer dramatic effect, savvy?)\n\nBecause he was sick o' all the arrrr-guments!\n\nYarrr, I hope that made ye laugh, me hearty!", additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 8, 'completion_tokens': 66, 'completion_tokens_after_first_per_sec': 520.5848397648267, 'completion_tokens_after_first_per_sec_first_ten': 653.7256857855361, 'completion_tokens_per_sec': 268.51617358915166, 'end_time': 1731021567.2902174, 'is_last_response': True, 'prompt_tokens': 47, 'start_time': 1731021567.0205224, 'time_to_first_token': 0.1448354721069336, 'total_latency': 0.24579524993896484, 'total_tokens': 113, 'total_tokens_per_sec': 459.73223659960814}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1731021567}, id='70258699-0c16-4a94-b6fc-1beffd0d8072')

In [None]:
future_response = llm.ainvoke("tell me a joke")
await(future_response) 

AIMessage(content='What do you call a fake noodle?\n\nAn impasta.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 19, 'completion_tokens': 13, 'completion_tokens_after_first_per_sec': 489.30284647690155, 'completion_tokens_after_first_per_sec_first_ten': 1688.027451811057, 'completion_tokens_per_sec': 90.03422957386933, 'end_time': 1731021568.2752926, 'is_last_response': True, 'prompt_tokens': 39, 'start_time': 1731021568.1140797, 'time_to_first_token': 0.136688232421875, 'total_latency': 0.14438952897724353, 'total_tokens': 52, 'total_tokens_per_sec': 360.1369182954773}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1731021568}, id='49a3ef85-5a04-48de-84b0-b30742ea6c55')

## Batching

In [None]:
llm = ChatSambaNovaCloud(
    model= "Meta-Llama-3.3-70B-Instruct",
    streaming=False,
    max_tokens=1024,
    temperature=0.7,
    top_p=0.01,
    stream_options={'include_usage':True}
    )

In [None]:
llm.batch(["tell me a joke","which is the capital of UK?"])

[AIMessage(content='What do you call a fake noodle?\n\nAn impasta.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 19, 'completion_tokens': 13, 'completion_tokens_after_first_per_sec': 491.99565986647247, 'completion_tokens_after_first_per_sec_first_ten': 1677.686280288836, 'completion_tokens_per_sec': 89.53407040415136, 'end_time': 1731021571.2208374, 'is_last_response': True, 'prompt_tokens': 39, 'start_time': 1731021571.0589995, 'time_to_first_token': 0.13744735717773438, 'total_latency': 0.14519612412703664, 'total_tokens': 52, 'total_tokens_per_sec': 358.13628161660546}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1731021571}, id='2e51b075-b37b-4354-acc8-3609d73b3fa7'),
 AIMessage(content='The capital of the United Kingdom (UK) is London.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 17, 'completion_tokens': 12, 'completion_tokens_after_f

In [None]:
future_responses = llm.abatch(["tell me a joke","which is the capital of UK?"])
await(future_responses)

[AIMessage(content='What do you call a fake noodle?\n\nAn impasta.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 19, 'completion_tokens': 13, 'completion_tokens_after_first_per_sec': 451.44540317517266, 'completion_tokens_after_first_per_sec_first_ten': 1555.5078076202374, 'completion_tokens_per_sec': 84.31290126209767, 'end_time': 1731021572.4831522, 'is_last_response': True, 'prompt_tokens': 39, 'start_time': 1731021572.3107407, 'time_to_first_token': 0.1458301544189453, 'total_latency': 0.1541875538073088, 'total_tokens': 52, 'total_tokens_per_sec': 337.2516050483907}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1731021572}, id='ac0c43e3-0129-4f7b-a778-817498d7d845'),
 AIMessage(content='The capital of the United Kingdom (UK) is London.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 17, 'completion_tokens': 12, 'completion_tokens_after_fir

## Streaming

In [None]:
llm = ChatSambaNovaCloud(
    model= "Meta-Llama-3.3-70B-Instruct",
    streaming=True,
    max_tokens=1024,
    temperature=0.7,
    top_p=0.01,
    stream_options={'include_usage':True}
    )

In [None]:
for chunk in llm.stream("tell me a joke"):
    print(chunk.content)


What do you call a fake 

noodle?

An impasta.





In [None]:
messages = [
    SystemMessage(content="You are a helpful assistant with pirate accent"),
    HumanMessage(content="tell me a joke")
    ]
for chunk in llm.stream(messages):
    print(chunk.content)


Yer lookin' fer a 
joke, eh? Alright 
then, matey, here be 
one fer ye:

Why did the pirate quit his job?



(pause fer 
dramatic effect, savvy?)

Because he was sick o' 
all the arrrr-guments!



Yarrr, I hope that made 
ye 
laugh, 
me hearty!





In [None]:
async for chunk in llm.astream("tell me a joke"):
    print(chunk.content)


What do you call a fake noodle?



An impasta.





## Function calling

In [None]:
from langchain_core.tools import tool
from datetime import datetime

@tool()
def get_time(kind: str = 'both') -> str:
    """Returns current date, current time or both.

    Args:
        kind: date, time or both
    """
    if kind == 'date':
        date = datetime.now().strftime('%d/%m/%Y')
        return f'Current date: {date}'
    elif kind == 'time':
        time = datetime.now().strftime('%H:%M:%S')
        return f'Current time: {time}'
    else:
        date = datetime.now().strftime('%d/%m/%Y')
        time = datetime.now().strftime('%H:%M:%S')
        return f'Current date: {date}, Current time: {time}'
    
@tool()  
def add(a: int, b: int) -> int:
    """Add two integers.

    Args:
        a: First integer
        b: Second integer
    """
    return a + b

tools=[get_time, add]

In [None]:
llm = ChatSambaNovaCloud(
    model="Meta-Llama-3.3-70B-Instruct",
    max_tokens=1024,
    temperature=0.3,
    top_p=0.01,
)
tool_llm=llm.bind_tools(tools)

In [None]:
response=tool_llm.invoke("what time is it?")
response.tool_calls

[{'name': 'get_time',
  'args': {'kind': 'time'},
  'id': 'call_2f4673153d314dfdb2',
  'type': 'tool_call'}]

## Structured output

In [None]:
llm = ChatSambaNovaCloud(
    model="Meta-Llama-3.3-70B-Instruct",
    max_tokens=1024,
    temperature=0.3,
    top_p=0.01,
)

In [None]:
from pydantic import BaseModel, Field

# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""
    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")

structured_llm = llm.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

Joke(setup='Why did the cat join a band?', punchline='Because it wanted to be the purr-cussionist!')

In [None]:
from typing import Optional
from typing_extensions import Annotated, TypedDict

# TypedDict
class Joke(TypedDict):
    """Joke to tell user."""
    setup: Annotated[str, ..., "The setup of the joke"]
    punchline: Annotated[str, ..., "The punchline of the joke"]
    rating: Annotated[Optional[int], None, "How funny the joke is, from 1 to 10"]


structured_llm = llm.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

{'punchline': 'Because it wanted to be the purr-cussionist!',
 'rating': 8,
 'setup': 'Why did the cat join a band?'}

In [None]:
# json schema
json_schema = {
    "title": "joke",
    "description": "Joke to tell user.",
    "type": "object",
    "properties": {
        "setup": {
            "type": "string",
            "description": "The setup of the joke",
        },
        "punchline": {
            "type": "string",
            "description": "The punchline to the joke",
        },
        "rating": {
            "type": "integer",
            "description": "How funny the joke is, from 1 to 10",
            "default": None,
        },
    },
    "required": ["setup", "punchline"],
}
structured_llm = llm.with_structured_output(json_schema)

structured_llm.invoke("Tell me a joke about cats")

{'punchline': 'Because it wanted to be the purr-cussionist!',
 'rating': None,
 'setup': 'Why did the cat join a band?'}

In [None]:
# Using json_mode method
structured_llm = llm.with_structured_output(
    method="json_mode",
    include_raw=False
)

structured_llm.invoke(
    "Answer the following question. "
    "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n"
    "What's heavier a pound of bricks or a pound of feathers?"
)

{'answer': 'They are the same weight',
 'justification': 'One pound is a unit of weight or mass, so a pound of bricks and a pound of feathers both weigh the same amount, one pound. The difference is in their density and volume. A pound of feathers would take up more space than a pound of bricks due to the difference in their densities.'}

In [None]:
from pydantic import BaseModel
# Using json_schema method
class AnswerWithJustification(BaseModel):
    answer: str
    justification: str

structured_llm = llm.with_structured_output(
    AnswerWithJustification,
    method="json_schema", 
    include_raw=True
    )

structured_llm.invoke(
    "Answer the following question. "
    "Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n"
    "What's heavier a pound of bricks or a pound of feathers?"
)

{'raw': AIMessage(content='{\n  "answer": "They are the same weight",\n  "justification": "A pound is a unit of weight or mass, so a pound of bricks and a pound of feathers both weigh the same amount, one pound. The difference is in their density and volume. A pound of feathers would take up more space than a pound of bricks due to the difference in their densities."\n}', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 4.722222222222222, 'completion_tokens': 79, 'completion_tokens_after_first_per_sec': 358.09983493653596, 'completion_tokens_after_first_per_sec_first_ten': 418.25878333749813, 'completion_tokens_per_sec': 242.5663740319178, 'end_time': 1731530854.610591, 'is_last_response': True, 'prompt_tokens': 70, 'start_time': 1731530854.2559688, 'time_to_first_token': 0.13680577278137207, 'total_latency': 0.32568405375761145, 'total_tokens': 149, 'total_tokens_per_sec': 457.4986041867817}, 'model_name': 'Meta-Llama-3.1-70B-Instruct', 's

# Sambastudio Embeddings

In [None]:
embedding = SambaStudioEmbeddings(batch_size=1, model_kwargs={'select_expert': 'e5-mistral-7b-instruct'})
embedding.embed_documents(['tell me a 50 word tale', 'tell me a joke'])
embedding.embed_query('tell me a 50 word tale')

In [None]:
from langchain.schema import Document
from langchain.vectorstores import Chroma

docs = [
    'tell me a 50 word tale',
    'tell me a joke',
    'when was America discoverd?',
    'how to build an engine?',
    'give me 3 party activities',
    'give me three healty dishes',
]
docs = [Document(doc) for doc in docs]

query = 'prompt for generating something fun'

vectordb = Chroma.from_documents(docs, embedding)
retriever = vectordb.as_retriever()

retriever.get_relevant_documents(query)

  warn_deprecated(


[Document(page_content='tell me a 50 word tale'),
 Document(page_content='tell me a joke'),
 Document(page_content='give me 3 party activities'),
 Document(page_content='give me three healty dishes')]