<a href="https://colab.research.google.com/github/lcoia/LearningLangChain/blob/main/Chapter1/Chapter1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain langchain-groq langchain-community

In [2]:
# a-llm.py
# Note: using a free Groq model instead of paid OpenAI
from langchain_groq.chat_models import ChatGroq

In [3]:
# Store your API keys in Google Colab Secrets
from google.colab import userdata

In [5]:
"""
I'm using Grog (https://groq.com/) because it provides a free api tier.
https://console.groq.com/docs/rate-limits

"""

model = ChatGroq(model="llama3-70b-8192", api_key=userdata.get('GROQ_API_KEY'))

In [6]:
response = model.invoke("The sky is")
print(response.content)

...blue!


In [7]:
"""
b-chat.py

HumanMessage - A message sent from the perspective of the human, with user role.
"""
from langchain_core.messages import HumanMessage
prompt = [HumanMessage("What is the capital of France?")]

In [8]:
response = model.invoke(prompt)
print(response.content)

That's an easy one! The capital of France is Paris.


In [9]:
"""
c-system.py

SystemMessage - A message setting the instructions the AI should follow, with the system role.
"""
from langchain_core.messages import SystemMessage

system_msg = SystemMessage(
    "You are a helpful assistant that responds to questions with three exclamation marks."
)
human_msg = HumanMessage("What is the capital of France?")

response = model.invoke([system_msg, human_msg])
print(response.content)

Paris!!!


In [10]:
"""
d-promt.py

PromptTemplate - Making LLM prompts reusable

https://python.langchain.com/v0.1/docs/modules/model_io/prompts/quick_start/
"""
from langchain_core.prompts import PromptTemplate

template = PromptTemplate.from_template("""Answer the question based on the context below.
If the question cannot be answered using the information provided, answer with "I don't know".

Context: {context}

Question: {question}

Answer: """)

prompt = template.invoke(
    {
        "context": "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.",
        "question": "Which model providers offer LLMs?",
    }
)

print(prompt)

text='Answer the question based on the context below.\nIf the question cannot be answered using the information provided, answer with "I don\'t know".\n\nContext: The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face\'s `transformers` library, or by utilizing OpenAI and Cohere\'s offerings through the `openai` and `cohere` libraries, respectively.\n\nQuestion: Which model providers offer LLMs?\n\nAnswer: '


In [11]:
"""
e-prompt-model.py

Invoke the model with the prompt
"""
response = model.invoke(prompt)
print(response)

content='OpenAI and Cohere.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 135, 'total_tokens': 142, 'completion_time': 0.031282396, 'prompt_time': 0.004219737, 'queue_time': 0.219664299, 'total_time': 0.035502133}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None} id='run-b397f682-2b29-49a8-9e51-fa6e1b2ae111-0' usage_metadata={'input_tokens': 135, 'output_tokens': 7, 'total_tokens': 142}


In [12]:
"""
f-chat-prompt.py

ChatPromptTemplate - Prompt template for chat models.
Note: This example uses the
"""
from langchain_core.prompts import ChatPromptTemplate
template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            'Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don\'t know".',
        ),
        ("human", "Context: {context}"),
        ("human", "Question: {question}"),
    ]
)

response = template.invoke(
    {
        "context": "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.",
        "question": "Which model providers offer LLMs?",
    }
)

print(response)

messages=[SystemMessage(content='Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don\'t know".', additional_kwargs={}, response_metadata={}), HumanMessage(content="Context: The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", additional_kwargs={}, response_metadata={}), HumanMessage(content='Question: Which model providers offer LLMs?', additional_kwargs={}, response_metadata={})]


In [13]:
"""
g-chat-prompt-model.py
"""
template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            'Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don\'t know".',
        ),
        ("human", "Context: {context}"),
        ("human", "Question: {question}"),
    ]
)
prompt = template.invoke(
    {
        "context": "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.",
        "question": "Which model providers offer LLMs?",
    }
)

print(model.invoke(prompt))

content='According to the context, OpenAI and Cohere offer Large Language Models (LLMs) through their respective libraries, `openai` and `cohere`.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 147, 'total_tokens': 180, 'completion_time': 0.094285714, 'prompt_time': 0.004637223, 'queue_time': 0.219722153, 'total_time': 0.098922937}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None} id='run-328626d4-4157-47bc-8c50-0f6632f7d173-0' usage_metadata={'input_tokens': 147, 'output_tokens': 33, 'total_tokens': 180}


In [14]:
"""
h-structured.py

Getting specific output formats from the model.
"""
from pydantic import BaseModel


class AnswerWithJustification(BaseModel):
    """An answer to the user's question along with justification for the answer."""

    answer: str
    """The answer to the user's question"""
    justification: str
    """Justification for the answer"""

structured_llm = model.with_structured_output(AnswerWithJustification)
response = structured_llm.invoke(
    "What weighs more, a pound of bricks or a pound of feathers")
print(response)

answer='They weigh the same' justification='A pound is a unit of weight or mass, so one pound of bricks and one pound of feathers both weigh the same amount, one pound.'


In [15]:
"""
https://python.langchain.com/docs/how_to/structured_output/#pydantic-class

Beyond just the structure of the Pydantic class, the name of the Pydantic class, the docstring,
and the names and provided descriptions of parameters are very important.
Most of the time with_structured_output is using a model's function/tool calling API,
and you can effectively think of all of this information as being added to the model prompt.
"""
from typing import Optional
from pydantic import BaseModel, Field


# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )

structured_llm = model.with_structured_output(Joke)
response = structured_llm.invoke("Tell me a joke about cats")
print(response)

setup='Why did the cat join a band?' punchline='Because it wanted to be the purr-cussionist!' rating=None


In [16]:
"""
i-csv.py

https://python.langchain.com/api_reference/core/output_parsers.html
"""

from langchain_core.output_parsers import CommaSeparatedListOutputParser

parser = CommaSeparatedListOutputParser()

response = parser.invoke("apple, banana, cherry")
print(response)

['apple', 'banana', 'cherry']


In [17]:
"""
j-methods.py

invoke() takes a single input and returns a single output.
"""

completion = model.invoke("What is the capital of France?")
print(completion)


content='The capital of France is Paris.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 17, 'total_tokens': 25, 'completion_time': 0.022857143, 'prompt_time': 0.000247397, 'queue_time': 0.21881393899999999, 'total_time': 0.02310454}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None} id='run-3695c0df-c633-4f27-8ee6-7d953f8841df-0' usage_metadata={'input_tokens': 17, 'output_tokens': 8, 'total_tokens': 25}


In [18]:
# batch() takes a list of inputs and returns a list of outputs.

completions = model.batch(["What is the capital of Ohio?", "What is the capital of Spain?"])
print(completions)

[AIMessage(content='The capital of Ohio is Columbus.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 17, 'total_tokens': 25, 'completion_time': 0.022857143, 'prompt_time': 0.000238987, 'queue_time': 0.221827048, 'total_time': 0.02309613}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None}, id='run-52eec8a0-0af0-4d09-8b4d-4fd844a35f11-0', usage_metadata={'input_tokens': 17, 'output_tokens': 8, 'total_tokens': 25}), AIMessage(content='The capital of Spain is Madrid.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 17, 'total_tokens': 25, 'completion_time': 0.022857143, 'prompt_time': 0.000240067, 'queue_time': 0.21761535099999998, 'total_time': 0.02309721}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None}, id='run-6b8c2825-084f-4e9f-acf3-504c41f8ab7b-0', usag

In [None]:
# stream() takes a single input and returns an iterator of parts of the output as they become available.

for token in model.stream("What is the capital of Germany?"):
    print(token)

In [None]:
"""
k-imperative.py

Imperative Composition
 @chain decorator addes the same Runnable interface for any function you write.
 It adds to invoke(), batch(), and stream() methods
"""

from langchain_core.runnables import chain

template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        ("human", "{question}"),
    ]
)

model = ChatGroq(model="llama3-70b-8192",api_key=userdata.get('GROQ_API_KEY'))

@chain
def chatbot(values):
    prompt = template.invoke(values)
    return model.invoke(prompt)

response = chatbot.invoke({"question": "Which model providers offer LLMs?"})
print(response.content)

In [None]:
"""
ka-stream.py

Enable streaming or async support.

"""

@chain
def chatbot(values):
    prompt = template.invoke(values)
    for token in model.stream(prompt):
        yield token


for part in chatbot.stream({"question": "Which model providers offer LLMs?"}):
    print(part)


In [None]:
"""
kb-async.py

Asynchronous execution

Note: won't execute in Colab
"""

@chain
async def chatbot(values):
    prompt = await template.ainvoke(values)
    return await model.ainvoke(prompt)


async def main():
    return await chatbot.ainvoke({"question": "Which model providers offer LLMs?"})

if __name__ == "__main__":
    import asyncio
    print(asyncio.run(main()))

In [None]:
"""
i-declarative.py

Declarative Composition

LangChain Expression Language (LCEL) is a declarative language for composing LangChain components.
LangChain compiles LCEL compositions into an optimzed execution plan, with
automatic parallelization, streaming, tracing, and async support.
"""

chatbot = template | model

# use it

response = chatbot.invoke({"question": "Which model providers offer LLMs?"})
print(response.content)