In [11]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

chat = ChatGroq(
    temperature=0,
    model="llama3-70b-8192",
    # api_key="" # Optional if not set as an environment variable
)

system = "You are a helpful assistant."
human = "{text}"
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", human),
    ]
)

chain = prompt | chat
chain.invoke(
    {
        "text": "Explain the importance of low latency for LLMs.",
    }
)

AIMessage(content="Low latency is crucial for Large Language Models (LLMs) because it directly impacts the user experience, model performance, and overall efficiency of language-based applications. Here are some reasons why low latency is essential for LLMs:\n\n1. **Real-time Interaction**: LLMs are often used in applications that require real-time interaction, such as chatbots, virtual assistants, and language translation systems. Low latency ensures that the model responds quickly to user input, providing a seamless and engaging experience.\n2. **Conversational Flow**: In conversational AI, latency can disrupt the natural flow of conversation. High latency can lead to awkward pauses, making the interaction feel unnatural and frustrating. Low latency helps maintain a smooth conversation, allowing users to engage more naturally with the model.\n3. **Model Performance**: LLMs rely on complex algorithms and massive datasets to generate responses. High latency can lead to increased comput

In [12]:
from typing import Optional

from langchain_core.tools import tool


@tool
def get_current_weather(location: str, unit: Optional[str]):
    """Get the current weather in a given location"""
    return "Cloudy with a chance of rain."


tool_model = chat.bind_tools(
    [get_current_weather],
    tool_choice="auto",
)

res = tool_model.invoke("What is the weather like in San Francisco and Tokyo?")

res.tool_calls

[{'name': 'get_current_weather',
  'args': {'location': 'San Francisco', 'unit': 'Celsius'},
  'id': 'call_gkh6',
  'type': 'tool_call'},
 {'name': 'get_current_weather',
  'args': {'location': 'Tokyo', 'unit': 'Celsius'},
  'id': 'call_e598',
  'type': 'tool_call'}]

In [13]:
from langchain_core.pydantic_v1 import BaseModel, Field


class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")


structured_llm = chat.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

Joke(setup='Why did the cat join a band?', punchline='Because it wanted to be the purr-cussionist!', rating=None)

In [14]:
chat = ChatGroq(
    temperature=0,
    model="mixtral-8x7b-32768",
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "Write a Limerick about {topic}"),
    ]
)
chain = prompt | chat
await chain.ainvoke(
    {
        "topic": "The Sun",
    }
)

AIMessage(content='There\'s a bright ball of gas in the sky,\nThat rises and sets, making spirits high.\nIt gives us light and warmth,\nOn sunny days it transforms,\nThe world into a golden, cheery pi.\n\n(Note: I tried to make the last line "The world into a golden, cheery high", but that didn\'t fit the rhythm of a limerick. So I changed it to "pi", which is a mathematical constant and a playful way to end the limerick.)', response_metadata={'token_usage': {'completion_tokens': 114, 'prompt_tokens': 18, 'total_tokens': 132, 'completion_time': 0.178125, 'prompt_time': 0.00215303, 'queue_time': None, 'total_time': 0.18027803}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_c5f20b5bb1', 'finish_reason': 'stop', 'logprobs': None}, id='run-d6bf4469-9571-42e9-881f-92cf4b12423a-0', usage_metadata={'input_tokens': 18, 'output_tokens': 114, 'total_tokens': 132})

In [15]:
chat = ChatGroq(
    temperature=0,
    model="mixtral-8x7b-32768",
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "human",
            "Write a haiku about {topic}",
        )
    ]
)
chain = prompt | chat
for chunk in chain.stream(
    {
        "topic": "The Moon",
    }
):
    print(
        chunk.content,
        end="",
        flush=True,
    )

Silent, luminous,
Glowing in the velvet night,
The Moon's gentle light.

In [16]:
chat = ChatGroq(
    model="mixtral-8x7b-32768",
    model_kwargs={
        "response_format": {
            "type": "json_object",
        }
    },
)

system = """
You are a helpful assistant.
Always respond with a JSON object with two string keys: "response" and "followup_question".
"""
human = "{question}"
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", human),
    ]
)

chain = prompt | chat

chain.invoke(
    {
        "question": "what bear is best?",
    }
)

AIMessage(content='{\n"response": "The best bear is subjective and depends on personal preference. Some popular bears include the Teddy Bear, Grizzly Bear, and Panda Bear.",\n"followup_question": "Do you have a specific type of bear in mind?"\n}', response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 63, 'total_tokens': 123, 'completion_time': 0.09375, 'prompt_time': 0.004336649, 'queue_time': None, 'total_time': 0.098086649}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_c5f20b5bb1', 'finish_reason': 'stop', 'logprobs': None}, id='run-c9f98fbe-e52b-47cf-8c52-a8104ded81a2-0', usage_metadata={'input_tokens': 63, 'output_tokens': 60, 'total_tokens': 123})