In [17]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

chat = ChatGroq(
    temperature=0,
    model="llama3-8b-8192",
    # api_key="" # Optional if not set as an environment variable
)

system = "You are a helpful assistant."
human = "{text}"
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", human),
    ]
)

chain = prompt | chat
chain.invoke(
    {
        "text": "Explain the importance of low latency for LLMs.",
    }
)

AIMessage(content="Low latency is crucial for Large Language Models (LLMs) because it enables them to process and respond to user input in a timely and efficient manner. Here are some reasons why low latency is important for LLMs:\n\n1. **Improved User Experience**: Low latency ensures that users can interact with the LLM quickly and seamlessly, without having to wait for responses. This is particularly important for applications like chatbots, virtual assistants, and language translation tools, where speed and responsiveness are critical.\n2. **Increased Accuracy**: LLMs rely on complex algorithms and computations to generate responses. Low latency allows these models to process and analyze user input more quickly, which can lead to more accurate and relevant responses.\n3. **Reduced Errors**: High latency can lead to errors and misunderstandings, as the LLM may not have enough time to process the user's input correctly. Low latency reduces the likelihood of errors and ensures that th

In [18]:
from typing import Optional

from langchain_core.tools import tool


@tool
def get_current_weather(location: str, unit: Optional[str]):
    """Get the current weather in a given location"""
    return "Cloudy with a chance of rain."


tool_model = chat.bind_tools(
    [get_current_weather],
    tool_choice="auto",
)

res = tool_model.invoke("What is the weather like in San Francisco and Tokyo?")

res.tool_calls

[{'name': 'get_current_weather',
  'args': {'location': 'San Francisco', 'unit': 'metric'},
  'id': 'call_tpc9',
  'type': 'tool_call'},
 {'name': 'get_current_weather',
  'args': {'location': 'Tokyo', 'unit': 'metric'},
  'id': 'call_v7nv',
  'type': 'tool_call'}]

In [19]:
from langchain_core.pydantic_v1 import BaseModel, Field


class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")


structured_llm = chat.with_structured_output(Joke)

structured_llm.invoke("Tell me a joke about cats")

Joke(setup='Why did the cat join a band?', punchline='Because it wanted to be the purr-cussionist!', rating=8)

In [20]:
chat = ChatGroq(
    temperature=0,
    model="mixtral-8x7b-32768",
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "Write a Limerick about {topic}"),
    ]
)
chain = prompt | chat
await chain.ainvoke(
    {
        "topic": "The Sun",
    }
)

AIMessage(content='There\'s a bright ball of gas in the sky,\nThat rises and sets, making spirits high.\nIt gives us light and warmth,\nOn sunny days it transforms,\nThe world into a golden, cheery pi.\n\n(Note: I tried to make the last line "The world into a golden, cheery high", but that didn\'t fit the rhythm of a limerick. So I changed it to "pi", which is a mathematical constant and a playful way to end the limerick.)', response_metadata={'token_usage': {'completion_tokens': 114, 'prompt_tokens': 18, 'total_tokens': 132, 'completion_time': 0.178125, 'prompt_time': 0.002279339, 'queue_time': None, 'total_time': 0.180404339}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_c5f20b5bb1', 'finish_reason': 'stop', 'logprobs': None}, id='run-59d15f52-3a82-4bd0-9599-b36b7ce5b445-0', usage_metadata={'input_tokens': 18, 'output_tokens': 114, 'total_tokens': 132})

In [21]:
chat = ChatGroq(
    temperature=0,
    model="mixtral-8x7b-32768",
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "human",
            "Write a haiku about {topic}",
        )
    ]
)
chain = prompt | chat
for chunk in chain.stream(
    {
        "topic": "The Moon",
    }
):
    print(
        chunk.content,
        end="",
        flush=True,
    )

Silent, luminous,
Glowing in the velvet night,
The Moon's gentle light.

In [22]:
chat = ChatGroq(
    model="mixtral-8x7b-32768",
    model_kwargs={
        "response_format": {
            "type": "json_object",
        }
    },
)

system = """
You are a helpful assistant.
Always respond with a JSON object with two string keys: "response" and "followup_question".
"""
human = "{question}"
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", human),
    ]
)

chain = prompt | chat

chain.invoke(
    {
        "question": "what bear is best?",
    }
)

AIMessage(content='{\n"response": "The best bear is subjective and depends on individual preferences. Some popular choices are the Teddy Bear and the Grizzly Bear.",\n"followup_question": "Is there a specific type of bear you are interested in?"\n}', response_metadata={'token_usage': {'completion_tokens': 57, 'prompt_tokens': 63, 'total_tokens': 120, 'completion_time': 0.0890625, 'prompt_time': 0.004143942, 'queue_time': None, 'total_time': 0.093206442}, 'model_name': 'mixtral-8x7b-32768', 'system_fingerprint': 'fp_c5f20b5bb1', 'finish_reason': 'stop', 'logprobs': None}, id='run-9f6f11e7-f368-43c1-8f02-2d7200b40e50-0', usage_metadata={'input_tokens': 63, 'output_tokens': 57, 'total_tokens': 120})