## Creating your first LangChain project

In [45]:
import os
os.environ['OPENAI_API_KEY'] = ""

In [3]:
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

In [3]:
chat = ChatOpenAI()

In [4]:
messages = [
    (
        "system",
        "You are a helpful AI that helps the user make travel plans. Respond only in a single line.",
    ),
    ("human", "I want to go skiing. Which city should I go to?"),
]
first_msg = chat.invoke(messages)
first_msg

AIMessage(content='You should consider visiting Aspen, Colorado for a great skiing experience.', response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 44, 'total_tokens': 57}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-18ae549e-4dc1-44e2-86f7-21a36dcec0b0-0', usage_metadata={'input_tokens': 44, 'output_tokens': 13, 'total_tokens': 57})

In [5]:
print(first_msg.content)

You should consider visiting Aspen, Colorado for a great skiing experience.


In [6]:
print(first_msg.usage_metadata["total_tokens"])

57


#### Google Generative AI API

In [None]:
%pip install langchain-google-genai

In [7]:
#os.environ['GOOGLE_API_KEY'] = ""

In [8]:
from langchain_google_genai import ChatGoogleGenerativeAI
chat = ChatGoogleGenerativeAI(model="gemini-1.5-flash", convert_system_message_to_human=True)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
messages = [
    (
        "system",
        "You are a helpful AI that helps the user make travel plans. Respond only in a single line.",
    ),
    ("human", "I want to go skiing. Which city should I go to?"),
]
first_msg = chat.invoke(messages)
first_msg



AIMessage(content='Aspen, Colorado', response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]}, id='run-60de87ef-b0b6-4deb-aac9-53abf1b814d8-0', usage_metadata={'input_tokens': 34, 'output_tokens': 3, 'total_tokens': 37})

In [10]:
print(first_msg.content)

Aspen, Colorado


## Chat vs LLM in Langchain

In [13]:
from langchain_openai import OpenAI
llm = OpenAI()
city = llm.predict("I want to go skiing. Which city should I go to?")
print(city)



It depends on your preferences and budget. Some popular ski destinations include Aspen, Colorado; Whistler, British Columbia; St. Moritz, Switzerland; and Chamonix, France. Consider factors such as the type of terrain, level of difficulty, and overall atmosphere of the city before making a decision.


In [14]:
places = llm.predict("What else can I do in that city?")
print(places)



1. Visit famous landmarks and monuments such as the Eiffel Tower, Arc de Triomphe, and Notre-Dame Cathedral.

2. Explore the Louvre Museum, one of the world's largest and most famous art museums.

3. Take a stroll along the Seine River and admire the beautiful architecture and bridges.

4. Shop at the iconic Galeries Lafayette and Printemps department stores.

5. Indulge in French cuisine at local cafes, bistros, and Michelin-starred restaurants.

6. Attend a cabaret show at the famous Moulin Rouge or Lido.

7. Take a day trip to the Palace of Versailles to see the opulent former residence of French kings.

8. Visit the Luxembourg Gardens, a beautiful public park with fountains, statues, and a palace.

9. Explore the Latin Quarter, known for its lively atmosphere, quaint streets, and historical landmarks.

10. Admire the views from the top of the Montparnasse Tower, the tallest skyscraper in Paris.

11. See a show at the Palais Garnier, the stunning opera house that inspired The Phan

In [15]:
messages = [
    (
        "system",
        "You are a helpful AI that helps the user make travel plans. Respond only in ble line.",
    ),
    ("human", "I want to go skiing. Which city should I go to?"),
    ("ai", "You should consider visiting Aspen, Colorado for a great skiing experience."),
    ("human", "What else can I do in that city?"),
]
ai_msg = chat.invoke(messages)
print(ai_msg.content)



Aspen is also known for its luxury shopping, fine dining, and lively nightlife scene.


### Model parameters

In [None]:
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # api_key="...",  # if you prefer to pass api key in directly instaed of using env vars
    # base_url="...",
    # organization="...",
    # other params...
)

#### Image generation using Dall-E

In [16]:
from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper

In [17]:
image_url = DallEAPIWrapper().run("Generate a high resolution image of a cute dog")
image_url

'https://oaidalleapiprodscus.blob.core.windows.net/private/org-OI0ki8SoEzqcZK986r4Cmsox/user-orPKk2qofimq78RKmXLvIoYg/img-EetPznG1mW7cjWDTu9GceEjf.png?st=2024-06-27T04%3A46%3A44Z&se=2024-06-27T06%3A46%3A44Z&sp=r&sv=2023-11-03&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-06-27T01%3A43%3A55Z&ske=2024-06-28T01%3A43%3A55Z&sks=b&skv=2023-11-03&sig=uzfHISmXLacqdg5zVOkKU6bIhw1KEz0ih4YMV/JYQ6I%3D'

For other tools refer: https://python.langchain.com/v0.2/docs/integrations/tools/

## Prompt Templates

In [18]:
from langchain_core.prompts import PromptTemplate

dictionary_template = PromptTemplate(
    input_variables =['word'],
    template = "Give me a one line definition of {word}. Then, give one example of how it is used."
)
p = dictionary_template.format(word="Sesquipedalian")
print(p)


Give me a one line definition of Sesquipedalian. Then, give one example of how it is used.


In [19]:
openAILLM = ChatOpenAI()
print(openAILLM.invoke(p).content)

Sesquipedalian: a person who uses long words or speaks in a verbose manner.
Example: The professor's sesquipedalian lecture left the students feeling confused and overwhelmed.


#### Multiple input variables

In [20]:
from langchain_core.prompts import PromptTemplate

define_translate = PromptTemplate(
    input_variables =['word','language'],
    template = "Give me a one line definition of {word}. Then, give the translation of that word in {language}."
)
p = define_translate.format(word="Sesquipedalian", language="Hindi")
print(p)
print(chat.invoke(p).content)

Give me a one line definition of Sesquipedalian. Then, give the translation of that word in Hindi.




**Definition:** A long, multisyllabic word.

**Hindi Translation:** बहुवर्णीय


#### Taking input from user

In [21]:
import tkinter as tk
from tkinter import simpledialog

ROOT = tk.Tk()
ROOT.withdraw()
# the input dialog
user_input = simpledialog.askstring(title="Quick Dictionary",
                                  prompt="Enter the word you want to learn about:")

dictionary_template = PromptTemplate(
    input_variables =['word'],
    template = "Give me a one line definition of {word}. Then, give one example of how it is used."
)
p = dictionary_template.format(word=user_input)
print(chat.invoke(p).content)



**Definition:** The act of giving time, money, or other resources to help others.

**Example:** The Bill & Melinda Gates Foundation is a philanthropic organization that works to improve global health, reduce extreme poverty, and expand educational opportunities.


#### General purpose prompt template

In [None]:
template = """You are a helpful assistant.

Human: {human_input}
Assistant:"""

prompt = PromptTemplate(
    input_variables=["human_input"], template=template
)

#### ChatPromptTemplate

In [86]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that gives a one-line definition of the word entered by user"),
        ("human", "{user_input}"),
    ]
)

messages = chat_template.format_messages(user_input="Sesquipedalian")
messages

[SystemMessage(content='You are a helpful assistant that gives a one-line definition of the word entered by user'),
 HumanMessage(content='Sesquipedalian')]

In [87]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that gives a one-line answer to user query"),
        ("human", "Who created theory of relativity?"),
        ("ai", "Albert Einstein developed the theory of relativity."),
        ("human", "{user_input}"),
    ]
)

messages = chat_template.format_messages(user_input="When was it created?")
messages

[SystemMessage(content='You are a helpful assistant that gives a one-line answer to user query'),
 HumanMessage(content='Who created theory of relativity?'),
 AIMessage(content='Albert Einstein developed the theory of relativity.'),
 HumanMessage(content='When was it created?')]

In [None]:
from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template("You are a helpful assistant that gives a one-line definition of the word entered by user"),
        HumanMessagePromptTemplate.from_template("{input}"),
    ]
)
messages = chat_template.format_messages(input="Callous")
messages

#### Few shot prompt templates

In [23]:
from langchain import FewShotPromptTemplate

# create our examples
examples = [
    {
        "rev": "I love this product",
        "answer": "positive"
    }, {
        "rev": "It was an average experience",
        "answer": "neutral"
    },{
        "rev": "I wonder why it is so highly rated.",
        "answer": "negative"
    }
]


# create a prompt example from above template
example_prompt = PromptTemplate(
    input_variables=["rev", "answer"],
    template= """
User: {rev}
AI: {answer}
"""
)

# now break our previous prompt into a prefix and suffix
# the prefix is our instructions
prefix = "Identify the sentiment of the user review. Here are some examples: "
# and the suffix our user input and output indicator
suffix = """
User: {review}
AI: """

# now create the few shot prompt template
few_shot_prompt_template = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["review"],
    example_separator="\n"
)

user_review = "Well structured. Five stars."

print(few_shot_prompt_template.format(review=user_review))

Identify the sentiment of the user review. Here are some examples: 

User: I love this product
AI: positive


User: It was an average experience
AI: neutral


User: I wonder why it is so highly rated.
AI: negative


User: Well structured. Five stars.
AI: 


## Chains

#### Generic chain - LLMChain

In [3]:
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI

dictionary_template = PromptTemplate(
    input_variables =["word"],
    template = "Give me a one line definition of {word}. Then, give one example of how it is used."
)

llm = ChatOpenAI()

#p = dictionary_template.format(word="Sesquipedalian")
#print(chat.invoke(p).content)

chain = LLMChain(llm=llm, prompt=dictionary_template)
chain.invoke("Anachronism")


{'word': 'Anachronism',
 'text': 'Anachronism is when something is placed in a time period where it does not belong.\n\nExample: In the movie Gladiator, there is a scene where a character is seen wearing a wristwatch, which is an anachronism because wristwatches were not invented during the time period of Ancient Rome.'}

In [4]:
input_list = [
    {"word": "Sesquipedalian"},
    {"word": "Anachronism"},
    {"word": "Onomatopoeia"}
]

chain.apply(input_list)

[{'text': "Sesquipedalian means characterized by long words; long-winded or verbose in speech or writing.\n\nExample: The professor's sesquipedalian lecture left the students confused and struggling to understand the material."},
 {'text': 'Anachronism is something or someone that is misplaced in a particular time period, often appearing in a setting where it does not belong.\n\nExample: In the movie "Gladiator," the character Maximus uses a gas canister that clearly did not exist during Roman times, creating an anachronism in the film.'},
 {'text': 'Onomatopoeia is a word that imitates the sound it represents, such as "buzz" or "hiss."\n\nExample: The bees buzzed around the flowers in the garden.'}]

In [5]:
chain.generate(input_list)

LLMResult(generations=[[ChatGeneration(text="Sesquipedalian means using long words or being overly verbose in language. \n\nExample: The professor's sesquipedalian speech made it difficult for the students to understand the material.", generation_info={'finish_reason': 'stop', 'logprobs': None}, message=AIMessage(content="Sesquipedalian means using long words or being overly verbose in language. \n\nExample: The professor's sesquipedalian speech made it difficult for the students to understand the material.", response_metadata={'token_usage': {'completion_tokens': 38, 'prompt_tokens': 30, 'total_tokens': 68}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-9892d216-ac56-45c9-9491-a6c00c1ddb83-0', usage_metadata={'input_tokens': 30, 'output_tokens': 38, 'total_tokens': 68}))], [ChatGeneration(text='Anachronism is something or someone that is not in its correct historical or chronological time period. \n\nExample: In the

#### Utility chains

In [6]:
from langchain.chains import LLMMathChain

In [7]:
#new code
calculator = LLMMathChain.from_llm(llm, verbose=True)
calculator.invoke("Calculate (625 raised to power (0.225))-(log10(100))")



[1m> Entering new LLMMathChain chain...[0m
Calculate (625 raised to power (0.225))-(log10(100))[32;1m[1;3m```text
625**0.225 - log10(100)
```
...numexpr.evaluate("625**0.225 - log10(100)")...
[0m
Answer: [33;1m[1;3m2.2566996126039234[0m
[1m> Finished chain.[0m


{'question': 'Calculate (625 raised to power (0.225))-(log10(100))',
 'answer': 'Answer: 2.2566996126039234'}

In [8]:
print(calculator.prompt.template)

Translate a math problem into a expression that can be executed using Python's numexpr library. Use the output of running this code to answer the question.

Question: ${{Question with math problem.}}
```text
${{single line mathematical expression that solves the problem}}
```
...numexpr.evaluate(text)...
```output
${{Output of running the code}}
```
Answer: ${{Answer}}

Begin.

Question: What is 37593 * 67?
```text
37593 * 67
```
...numexpr.evaluate("37593 * 67")...
```output
2518731
```
Answer: 2518731

Question: 37593^(1/5)
```text
37593**(1/5)
```
...numexpr.evaluate("37593**(1/5)")...
```output
8.222831614237718
```
Answer: 8.222831614237718

Question: {question}



In [9]:
calc_llm =ChatOpenAI()
messages = [
    ("human", "Calculate (625 raised to power (0.225))-(log10(100))"),
]
calculation = calc_llm.invoke(messages)
print(calculation.content)

First, calculate 625 raised to the power of 0.225:

625^0.225 ≈ 9.054

Next, calculate log base 10 of 100:

log10(100) = 2

Finally, subtract the two values:

9.054 - 2 = 7.054

Therefore, (625^0.225) - (log10(100)) ≈ 7.054.


##### List of all chains - https://python.langchain.com/v0.1/docs/modules/chains/

### Sequential Chain

In [12]:
marketing_text = PromptTemplate(
    input_variables =['name','description'],
    template = "Generate a one line facebook ad copy for a product called {name}. Below is a description of this product: {description}"
)

text_llm = ChatOpenAI()

text_chain = LLMChain(llm=text_llm, prompt=marketing_text, output_key="copy_text")


translate_text = PromptTemplate(input_variables=["copy_text"], template="""Translate this text to Hindi:

{copy_text} """)

from langchain_google_genai import ChatGoogleGenerativeAI

translate_llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

translate_chain = LLMChain(llm=translate_llm, prompt=translate_text, output_key="translated_copy")

from langchain.chains import SequentialChain

seq_chain = SequentialChain(
    chains = [text_chain, translate_chain],
    input_variables = ["name", "description"],
    output_variables = ["copy_text","translated_copy"]
)

seq_chain({"name": "AeroGlow Nightlight","description":"AeroGlow Nightlight is a smart, voice-activated nightlight that projects calming, animated constellations onto your ceiling."})

  from .autonotebook import tqdm as notebook_tqdm
  warn_deprecated(


{'name': 'AeroGlow Nightlight',
 'description': 'AeroGlow Nightlight is a smart, voice-activated nightlight that projects calming, animated constellations onto your ceiling.',
 'copy_text': 'Transform your room into a tranquil oasis with AeroGlow Nightlight - the voice-activated nightlight that projects calming constellations onto your ceiling. ✨ #goodnightssleep',
 'translated_copy': 'AeroGlow रात की रोशनी से अपने कमरे को शांत नखलिस्तान में बदल दें - वह वॉयस-एक्टिवेटेड रात की रोशनी जो आपकी छत पर शांत नक्षत्रों को प्रोजेक्ट करती है। ✨ #सुहानीरातोंकीनींद'}

### LCEL - Pipe operator and Runnables

In [13]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

dictionary_template = PromptTemplate(
    input_variables =["word"],
    template = "Give me a one line definition of {word}. Response should not contain the word - {word} itself, only the meaning."
)

llm = ChatOpenAI()

chain = dictionary_template | llm | StrOutputParser()

In [14]:
chain.invoke({"word": "Onomatopoeia"})

'A word that imitates the sound it represents.'

In [15]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

guesser_template = PromptTemplate(
    input_variables =["definition"],
    template = "Give me an English word which has the following meaning- {definition}."
)

llm = ChatOpenAI()

chain2 = guesser_template.pipe(llm).pipe(StrOutputParser())
chain2.invoke("A word that imitates the sound it represents.")

'Onomatopoeia'

In [17]:
seq_chain = {"definition": chain} | chain2

seq_chain.invoke({"word": "Sesquipedalian"})

'Pompous'

#### RunnablePassthrough, RunnableLambda and RunnableParallel

In [18]:
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel

In [19]:
chain = RunnablePassthrough()

In [21]:
chain.invoke("abcd")

'abcd'

In [23]:
def output_length(input: str):
    output = len(input)
    return output

chain = RunnableLambda(output_length)
chain.invoke("input to output")

15

In [25]:
chain = RunnableParallel(text = RunnablePassthrough(), length = RunnableLambda(output_length))
chain.invoke("start-tech academy")

{'text': 'start-tech academy', 'length': 18}

#### Example

In [26]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

explainer_template = PromptTemplate(
    input_variables =["topic"],
    template = "Explain this topic in a single paragraph as if you were explaining it to a 10 year old - {topic}."
)

llm = ChatOpenAI()

openai_chain = explainer_template | llm | StrOutputParser()
openai_chain.invoke({"topic": "Gravity"})

"Gravity is a force that pulls things towards each other, like how the Earth pulls us down so we don't float away into space. It's what keeps us on the ground and makes things fall when we drop them. Gravity is why planets stay in orbit around the sun and why the moon stays in orbit around Earth."

In [27]:
from langchain_google_genai import ChatGoogleGenerativeAI

googlellm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

google_chain = explainer_template | googlellm | StrOutputParser()
google_chain.invoke({"topic": "Gravity"})

"Gravity is like an invisible superpower that makes things fall down and stay on the ground. It's like a magnet that pulls everything toward the center of the Earth, even you! The bigger an object is, the stronger its gravity pull. So, the Earth has a much stronger gravity pull than you do, which is why you can't jump off the planet!"

In [28]:
analyzer_template = PromptTemplate(
    input_variables =["topic", "explanation1", "explanation2"],
    template = """
    Which of the two explanations given below are better for explaining {topic} to 10 year old students.
    ###
    Explanation 1 - {explanation1}
    ###
    ***
    Explanation 2 - {explanation2}
    ***
    """
)

combined_chain = (
    RunnableParallel({"explanation1": openai_chain, "explanation2": google_chain, "topic": RunnablePassthrough()})
    | analyzer_template | llm | StrOutputParser()
)

combined_chain.invoke({"topic": "Gravity"})

'Both explanations are good for explaining gravity to 10 year old students, but Explanation 1 may be slightly better because it is simpler and more straightforward. It uses the analogy of a magnet to help kids understand how gravity works in a familiar way. However, Explanation 2 also does a good job of explaining gravity in a fun and engaging way, comparing it to a superpower and emphasizing how it keeps us grounded on Earth. Ultimately, either explanation would likely be effective in helping kids grasp the concept of gravity.'

In [30]:
analyzer_chain = analyzer_template | googlellm | StrOutputParser()

updated_chain = (
    RunnableParallel({"explanation1": openai_chain, "explanation2": google_chain, "topic": RunnablePassthrough()}) 
    | RunnableParallel({"input": RunnablePassthrough(), "analysis": analyzer_chain})
)

updated_chain.invoke({"topic": "Gravity"})

{'input': {'explanation1': "Gravity is a force that pulls things towards each other. It's what keeps us on the ground and makes things fall when we drop them. Imagine it like a magnet that is always pulling things down towards the Earth.",
  'explanation2': "Imagine an invisible force like a magnet that pulls everything with mass towards each other. This force is called gravity. It's why things fall down when you drop them, and why you stay on the ground instead of floating away. The more mass something has, the stronger its gravity is. So, a big planet like Earth has a lot of gravity, while a small ball has very little.",
  'topic': {'topic': 'Gravity'}},
 'analysis': "Explanation 2 is better for explaining the topic 'Gravity' to 10-year-old students. It provides a more vivid and concrete analogy using the concept of an invisible magnet, which is easier for children to understand. The explanation also highlights the relationship between mass and gravity, making it more informative and

#### Dynamic Routing

In [32]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

classifier_template = PromptTemplate(
    input_variables = ["question", "answer"],
    template = """You are given a question and the user's response to that question. Classify the response as either `Correct`, or `Incorrect`.
    Do not respond with more than one word.
    Question - {question}
    User's Answer - {answer}
    Classification:"""
)

llm = ChatOpenAI()

output_parser = StrOutputParser()

classifier_chain = classifier_template | llm | output_parser

classifier_chain.invoke({"question": "what are penguins", "answer": "Penguins are birds"})

'Correct'

In [33]:
correct_template = PromptTemplate(
    input_variables = ["question"],
    template = """The user was asked the following question and user answered it correctly. Now ask a more difficult question on the same topic to the user.
    Question: {question}
    New Question:"""
)

correct_chain = correct_template | llm | output_parser

incorrect_template = PromptTemplate(
    input_variables = ["question"],
    template = """The user was asked the following question and user answered it incorrectly. Give the correct answer and explain it to the user.
    Question: {question}
    Correct Answer:
    Explanation: """
)

incorrect_chain = incorrect_template | llm | output_parser

In [34]:
def route(info):
    if  info["result"].lower() == "correct":
        return correct_chain
    elif info["result"].lower() == "incorrect":
        return incorrect_chain
    else:
        return "Format is not correct"

In [35]:
from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough


final_chain = RunnableParallel({"result": classifier_chain, "question": lambda x: x["question"], "answer": lambda x: x["answer"]}) | RunnableParallel({"response":RunnableLambda(route), "input": RunnablePassthrough()})

In [37]:
final_chain.invoke({"question": "what are penguins", "answer": "Penguins are sea animals"})

{'response': 'Penguins are flightless birds that are mostly found in the Southern Hemisphere, especially in Antarctica. They are known for their distinctive black and white coloration, waddling walk, and their ability to swim underwater at high speeds. They primarily feed on fish and other marine life.',
 'input': {'result': 'Incorrect',
  'question': 'what are penguins',
  'answer': 'Penguins are sea animals'}}

In [38]:
final_chain.invoke({"question": "what are SQL joins", "answer": "Joins are used to join two queries"})

{'response': 'SQL joins are used to combine rows from two or more tables based on a related column between them. There are different types of joins such as INNER JOIN, LEFT JOIN, RIGHT JOIN, and FULL JOIN, each serving a different purpose in combining data from multiple tables in a database. Joins are essential in SQL for retrieving data from multiple tables and creating meaningful relationships between them.',
 'input': {'result': 'Incorrect',
  'question': 'what are SQL joins',
  'answer': 'Joins are used to join two queries'}}

In [39]:
final_chain.invoke({"question": "what are SQL joins", "answer": "SQL joins are operations used to combine rows from two or more tables based on a related column"})

{'response': 'Can you explain the difference between an inner join and an outer join in SQL?',
 'input': {'result': 'Correct',
  'question': 'what are SQL joins',
  'answer': 'SQL joins are operations used to combine rows from two or more tables based on a related column'}}

### Output Parsing

#### StrOutputParser()

In [40]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

dictionary_template = PromptTemplate(
    input_variables =["word"],
    template = "Give me a one line definition of {word}. Response should not contain the word itself, only the meaning."
)

llm = ChatOpenAI()

chain = dictionary_template | llm 
chain.invoke("Onomatopoeia")

AIMessage(content='Onomatopoeia is a word that imitates the sound it represents.', response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 32, 'total_tokens': 48}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d86b9864-ab27-460e-870d-ff2b2d2b6f40-0', usage_metadata={'input_tokens': 32, 'output_tokens': 16, 'total_tokens': 48})

In [41]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

dictionary_template = PromptTemplate(
    input_variables =["word"],
    template = "Give me a one line definition of {word}. Response should not contain the word itself, only the meaning."
)

llm = ChatOpenAI()

chain = dictionary_template | llm | StrOutputParser()
chain.invoke("Onomatopoeia")

'A word that imitates the sound it represents.'

#### StructuredOutputParser - Output as a specified Schema

In [42]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI()

response_schemas = [
    ResponseSchema(name="word", description="word entered by user"),
    ResponseSchema(name="meaning", description="One line meaning of the word given by user"),
    ResponseSchema(name="example", description="An example of how that word can be used in a line"),
    ResponseSchema(name="Etymology", description="Origin or history of the word")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"word": string  // word entered by user
	"meaning": string  // One line meaning of the word given by user
	"example": string  // An example of how that word can be used in a line
	"Etymology": string  // Origin or history of the word
}
```


In [43]:
prompt = PromptTemplate(
    template="Provide the meaning, an example of how the word is used in a sentence and the etymology of this word: {word}.\n{format_instructions}",
    input_variables=["word"],
    partial_variables={"format_instructions": format_instructions}
)

chain = prompt | chat_model | output_parser

chain.invoke("Onomatopoeia")

{'word': 'Onomatopoeia',
 'meaning': 'The formation of a word from a sound associated with what is named.',
 'example': "The word 'buzz' is an example of onomatopoeia, as it imitates the sound of a bee.",
 'Etymology': "The word 'onomatopoeia' comes from the Greek words 'onoma' meaning 'name' and 'poiein' meaning 'to make', translating to 'to make a name (sound)'"}

#### Comma separated list

In [48]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
    template="List five {topic}.\n{format_instructions}",
    input_variables=["topic"],
    partial_variables={"format_instructions": format_instructions},
)

model = ChatOpenAI()

chain = prompt | model | output_parser 

In [45]:
print(format_instructions)

Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [49]:
chain.invoke({"topic": "healthy foods"})

['Spinach', 'quinoa', 'salmon', 'blueberries', 'almonds']

#### Date Time Parser

In [50]:
from langchain.output_parsers import DatetimeOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI()

output_parser = DatetimeOutputParser()
template = """Answer the users question:

{question}

{format_instructions}"""
prompt = PromptTemplate.from_template(
    template,
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

In [51]:
prompt

PromptTemplate(input_variables=['question'], partial_variables={'format_instructions': "Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.\n\nExamples: 1865-02-18T10:42:49.109154Z, 0062-01-24T07:11:15.276680Z, 1953-03-07T13:38:17.961494Z\n\nReturn ONLY this string, no other words!"}, template='Answer the users question:\n\n{question}\n\n{format_instructions}')

In [53]:
chain = prompt | chat_model | output_parser

In [54]:
output = chain.invoke({"question": "When did India get independence?"})
print(output)

1947-08-15 00:00:00


Output Parser Documentation - https://python.langchain.com/v0.1/docs/modules/model_io/output_parsers/quick_start/

## Memory

In [55]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

prompt_template = PromptTemplate(
    input_variables =["history", "new_input"],
    template = """You are having a chat with a human.
    previous chat: {history}
    Human: {new_input}
    Assistant: """
)

memory = ConversationBufferMemory()

#llm = ChatOpenAI()
llm = OpenAI()

chain = LLMChain(llm=llm, prompt=prompt_template, memory = memory, verbose=True)

chain.invoke({"new_input": "Which country is the biggest exporter of cotton?"})




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are having a chat with a human.
    previous chat: 
    Human: Which country is the biggest exporter of cotton?
    Assistant: [0m

[1m> Finished chain.[0m


{'new_input': 'Which country is the biggest exporter of cotton?',
 'history': '',
 'text': 'As of 2021, the biggest exporter of cotton is India.'}

In [56]:
chain.invoke({"new_input": "What is the total value of cotton exports of this country"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are having a chat with a human.
    previous chat: Human: Which country is the biggest exporter of cotton?
AI: As of 2021, the biggest exporter of cotton is India.
    Human: What is the total value of cotton exports of this country
    Assistant: [0m

[1m> Finished chain.[0m


{'new_input': 'What is the total value of cotton exports of this country',
 'history': 'Human: Which country is the biggest exporter of cotton?\nAI: As of 2021, the biggest exporter of cotton is India.',
 'text': "In 2020, India's total value of cotton exports was approximately $6.3 billion USD."}

In [57]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain_openai import ChatOpenAI

chat_prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content="You are having a chat with a human."
        ), 
        MessagesPlaceholder(
            variable_name="history"
        ),
        HumanMessagePromptTemplate.from_template(
            "{new_input}"
        ),  
    ]
)

chat_memory = ConversationBufferMemory(return_messages=True)

chat_llm = ChatOpenAI()

chat_chain = LLMChain(llm=chat_llm, prompt=chat_prompt, memory = chat_memory, verbose=True)

chat_chain.invoke({"new_input": "Which country is the biggest exporter of cotton?"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are having a chat with a human.
Human: Which country is the biggest exporter of cotton?[0m

[1m> Finished chain.[0m


{'new_input': 'Which country is the biggest exporter of cotton?',
 'history': [HumanMessage(content='Which country is the biggest exporter of cotton?'),
  AIMessage(content='The largest exporter of cotton in the world is the United States. The U.S. is known for producing large quantities of high-quality cotton, which is exported to many countries around the world.')],
 'text': 'The largest exporter of cotton in the world is the United States. The U.S. is known for producing large quantities of high-quality cotton, which is exported to many countries around the world.'}

In [58]:
chat_chain.invoke({"new_input": "What is the total value of cotton exports of this country"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are having a chat with a human.
Human: Which country is the biggest exporter of cotton?
AI: The largest exporter of cotton in the world is the United States. The U.S. is known for producing large quantities of high-quality cotton, which is exported to many countries around the world.
Human: What is the total value of cotton exports of this country[0m

[1m> Finished chain.[0m


{'new_input': 'What is the total value of cotton exports of this country',
 'history': [HumanMessage(content='Which country is the biggest exporter of cotton?'),
  AIMessage(content='The largest exporter of cotton in the world is the United States. The U.S. is known for producing large quantities of high-quality cotton, which is exported to many countries around the world.'),
  HumanMessage(content='What is the total value of cotton exports of this country'),
  AIMessage(content='The total value of cotton exports from the United States varies from year to year depending on factors such as crop yield, global demand, and market prices. In recent years, the value of U.S. cotton exports has been around $5-6 billion annually.')],
 'text': 'The total value of cotton exports from the United States varies from year to year depending on factors such as crop yield, global demand, and market prices. In recent years, the value of U.S. cotton exports has been around $5-6 billion annually.'}

#### Changing the memory variable name

In [60]:
chat_prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content="You are having a chat with a human."
        ), 
        MessagesPlaceholder(
            variable_name="prev_conv"
        ),
        HumanMessagePromptTemplate.from_template(
            "{new_input}"
        ),  
    ]
)

chat_memory = ConversationBufferMemory(memory_key="prev_conv", return_messages=True)

chat_llm = ChatOpenAI()

chat_chain = LLMChain(llm=chat_llm, prompt=chat_prompt, memory = chat_memory, verbose=True)

chat_chain.invoke({"new_input": "Which country is the biggest exporter of cotton?"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are having a chat with a human.
Human: Which country is the biggest exporter of cotton?[0m

[1m> Finished chain.[0m


{'new_input': 'Which country is the biggest exporter of cotton?',
 'prev_conv': [HumanMessage(content='Which country is the biggest exporter of cotton?'),
  AIMessage(content='The biggest exporter of cotton in the world is the United States.')],
 'text': 'The biggest exporter of cotton in the world is the United States.'}

#### Adding messages to memory

In [61]:
sample_memory = ConversationBufferMemory(memory_key="history")

In [62]:
sample_memory.load_memory_variables({})

{'history': ''}

In [63]:
sample_memory.chat_memory.add_user_message("Which country is the biggest exporter of cotton?")
sample_memory.chat_memory.add_ai_message("China")

In [64]:
sample_memory.load_memory_variables({})

{'history': 'Human: Which country is the biggest exporter of cotton?\nAI: China'}

In [65]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

prompt_template = PromptTemplate(
    input_variables =["history", "new_input"],
    template = """You are having a chat with a human.
    previous chat: {history}
    Human: {new_input}
    Assistant: """
)


llm = ChatOpenAI()

chain = LLMChain(llm=llm, prompt=prompt_template, memory = sample_memory, verbose=True)


In [66]:
chain.invoke({"new_input": "What is the total value of cotton exports of this country"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are having a chat with a human.
    previous chat: Human: Which country is the biggest exporter of cotton?
AI: China
    Human: What is the total value of cotton exports of this country
    Assistant: [0m

[1m> Finished chain.[0m


{'new_input': 'What is the total value of cotton exports of this country',
 'history': 'Human: Which country is the biggest exporter of cotton?\nAI: China',
 'text': 'I am not sure about the exact total value of cotton exports from China. Would you like me to look it up for you?'}

#### Conversation Chain instead of LLMChain

In [67]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

from langchain.chains import ConversationChain

memory = ConversationBufferMemory(memory_key="history")

llm = ChatOpenAI()

chain = ConversationChain(llm=llm, memory = memory, verbose=True)

chain.invoke({"input": "Which country is the biggest exporter of cotton?"})



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Which country is the biggest exporter of cotton?
AI:[0m

[1m> Finished chain.[0m


{'input': 'Which country is the biggest exporter of cotton?',
 'history': '',
 'response': 'The biggest exporter of cotton is currently India. India exports a large amount of cotton to various countries around the world due to its high-quality production and competitive prices.'}

In [68]:
print(chain.prompt.template)

The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Human: {input}
AI:


In [69]:
chain.invoke({"input": "What is the total value of cotton exports of this country"})



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Which country is the biggest exporter of cotton?
AI: The biggest exporter of cotton is currently India. India exports a large amount of cotton to various countries around the world due to its high-quality production and competitive prices.
Human: What is the total value of cotton exports of this country
AI:[0m

[1m> Finished chain.[0m


{'input': 'What is the total value of cotton exports of this country',
 'history': 'Human: Which country is the biggest exporter of cotton?\nAI: The biggest exporter of cotton is currently India. India exports a large amount of cotton to various countries around the world due to its high-quality production and competitive prices.',
 'response': 'The total value of cotton exports from India in 2020 was approximately $1.9 billion. This makes India one of the leading exporters of cotton globally.'}

In [70]:
print(chain.memory.buffer)

Human: Which country is the biggest exporter of cotton?
AI: The biggest exporter of cotton is currently India. India exports a large amount of cotton to various countries around the world due to its high-quality production and competitive prices.
Human: What is the total value of cotton exports of this country
AI: The total value of cotton exports from India in 2020 was approximately $1.9 billion. This makes India one of the leading exporters of cotton globally.


#### ConversationBufferWindowMemory

In [5]:
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationChain

memory = ConversationBufferWindowMemory(k=1)

llm = ChatOpenAI()

convo_chain = ConversationChain(llm=llm, memory=memory)

convo_chain.invoke({"input": "Which country is the biggest exporter of cotton?"})

{'input': 'Which country is the biggest exporter of cotton?',
 'history': '',
 'response': "The biggest exporter of cotton is currently India, followed by the United States and Brazil. India's favorable climate and large-scale production make it the top exporter of cotton in the world."}

In [6]:
convo_chain.invoke({"input": "What is the total value of cotton exports of this country"})

{'input': 'What is the total value of cotton exports of this country',
 'history': "Human: Which country is the biggest exporter of cotton?\nAI: The biggest exporter of cotton is currently India, followed by the United States and Brazil. India's favorable climate and large-scale production make it the top exporter of cotton in the world.",
 'response': 'In the 2019/2020 season, India exported around $1.9 billion worth of cotton. This accounted for a significant portion of the global cotton trade.'}

In [7]:
print(convo_chain.memory.buffer)

Human: What is the total value of cotton exports of this country
AI: In the 2019/2020 season, India exported around $1.9 billion worth of cotton. This accounted for a significant portion of the global cotton trade.


#### ConversationSummaryMemory

In [8]:
from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
from langchain_openai import OpenAI

memory = ConversationSummaryMemory(llm=OpenAI(temperature=0))
memory.save_context({"input": "Which country is the biggest exporter of cotton?"}, {"output": "China"})

In [9]:
memory.load_memory_variables({})

{'history': '\nThe human asks which country is the biggest exporter of cotton. The AI responds with China.'}

In [10]:
from langchain_openai import OpenAI
from langchain.chains import ConversationChain
llm = OpenAI(temperature=0)
conversation_with_summary = ConversationChain(
    llm=llm,
    memory=ConversationSummaryMemory(llm=OpenAI()),
    verbose=True
)
conversation_with_summary.invoke(input="Which country is the biggest exporter of cotton?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Which country is the biggest exporter of cotton?
AI:[0m

[1m> Finished chain.[0m


{'input': 'Which country is the biggest exporter of cotton?',
 'history': '',
 'response': " According to the latest data from the World Trade Organization, China is currently the biggest exporter of cotton, accounting for approximately 26% of the global cotton exports. Other major exporters include India, the United States, and Pakistan. However, it's worth noting that the top exporters can vary from year to year depending on factors such as weather conditions and market demand. Is there anything else you would like to know about cotton exports?"}

In [11]:
conversation_with_summary.invoke(input="What is the total value of cotton exports of this country")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

The human asks which country is the biggest exporter of cotton. The AI provides information from the World Trade Organization, stating that China is currently the biggest exporter, followed by India, the United States, and Pakistan. However, the top exporters can change annually due to various factors. The AI offers to provide more information on cotton exports.
Human: What is the total value of cotton exports of this country
AI:[0m

[1m> Finished chain.[0m


{'input': 'What is the total value of cotton exports of this country',
 'history': '\nThe human asks which country is the biggest exporter of cotton. The AI provides information from the World Trade Organization, stating that China is currently the biggest exporter, followed by India, the United States, and Pakistan. However, the top exporters can change annually due to various factors. The AI offers to provide more information on cotton exports.',
 'response': " According to the World Trade Organization, the total value of cotton exports for China in 2020 was approximately $9.3 billion. This accounted for 17.6% of the global cotton export market. However, this value can fluctuate depending on factors such as weather conditions, demand, and trade policies. Would you like me to provide more specific information on China's cotton exports?"}

#### Runnable with Message History with session ID

In [1]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

model = ChatOpenAI()
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are having a chat with a human.",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{input}"),
    ]
)
chain = prompt | model | StrOutputParser()

  from pydantic.v1.fields import FieldInfo as FieldInfoV1


In [2]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


runnable = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

In [3]:
runnable.invoke(
    {"input": "Which country is the biggest exporter of cotton?"},
    config={"configurable": {"session_id": "sess1"}},
)

'The largest exporter of cotton in the world is the United States. The US is known for producing high-quality cotton and it exports a significant amount of cotton to various countries around the world.'

In [4]:
# Remembers
runnable.invoke(
    {"input": "What is the total value of cotton exports of this country"},
    config={"configurable": {"session_id": "sess1"}},
)

"In 2020, the United States exported approximately $6.4 billion worth of cotton worldwide. Cotton is a significant agricultural product for the US economy, and the country's cotton exports play a key role in global trade."

In [5]:
# New session_id --> does not remember.
runnable.invoke(
    {"input": "What is the total value of cotton exports of this country"},
    config={"configurable": {"session_id": "sess2"}},
)

"I'm not able to provide real-time data on specific figures like the total value of cotton exports of a country. You may want to check the latest reports and statistics from official sources, such as government agencies or organizations that track trade data, to get the most up-to-date information on cotton exports."

# RAG

### Document Loading

#### Text

In [14]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./RAGFiles/LangchainRetrieval.txt")
documents = loader.load()

documents

[Document(metadata={'source': './RAGFiles/LangchainRetrieval.txt'}, page_content="Retrieval\nMany LLM applications require user-specific data that is not part of the model's training set. The primary way of accomplishing this is through Retrieval Augmented Generation (RAG). In this process, external data is retrieved and then passed to the LLM when doing the generation step.\n\nLangChain provides all the building blocks for RAG applications - from simple to complex. This section of the documentation covers everything related to the retrieval step - e.g. the fetching of the data. Although this sounds simple, it can be subtly complex. This encompasses several key modules.\n\nIllustrative diagram showing the data connection process with steps: Source, Load, Transform, Embed, Store, and Retrieve.\n\nDocument loaders\nDocument loaders load documents from many different sources. LangChain provides over 100 different document loaders as well as integrations with other major providers in the s

In [9]:
pip install pypdf

Collecting pypdf
  Downloading pypdf-6.4.1-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-6.4.1-py3-none-any.whl (328 kB)
Installing collected packages: pypdf
Successfully installed pypdf-6.4.1
Note: you may need to restart the kernel to use updated packages.


In [13]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./RAGfiles/ExcelCourseDocument.pdf")
pages = loader.load_and_split()

In [22]:
pages[1]

Document(metadata={'source': './RAGfiles/Excel Course Document.pdf', 'page': 1}, page_content="What\nyou'll\nlearn\n●\nA\nBeginner's\nGuide\nto\nMicrosoft\nExcel\n-\nMicrosoft\nExcel,\nLearn\nExcel,\nSpreadsheets,\nFormulas,\nShortcuts,\nMacros\n●\nKnowledge\nof\nall\nthe\nessential\nExcel\nformulas\n●\nBecome\nproﬁcient\nin\nExcel\ndata\ntools\nlike\nSorting,\nFiltering,\nData\nvalidations\nand\nData\nimporting\n●\nMaster\nExcel's\nmost\npopular\nlookup\nfunctions\nsuch\nas\nVlookup,\nHlookup,\nIndex\nand\nMatch\n●\nHarness\nfull\npotential\nof\nExcel\nby\ncreating\nPivot\ntables\nwith\nslicers\n●\nMake\ngreat\npresentations\nusing\nthe\nConditional\nand\nTable\nformatting\noptions\n●\nVisually\nenchant\nviewers\nusing\nBar\ncharts,\nScatter\nPlots,\nHistograms\netc.\n●\nIncrease\nyour\nefﬁciency\nby\nlearning\nhow\nto\ncreate\nand\nuse\nimportant\nExcel\nshortcuts\n●\nExplore\nfun\nand\nexciting\nuse\ncases\nof\nExcel\nRequirements\nYou\nwill\nneed\na\nPC\nwith\nany\nversion\nof\nExc

In [15]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader('./RAGfiles/', glob="**/*.txt")

In [19]:
pip install unstructured

Collecting unstructured
  Downloading unstructured-0.18.21-py3-none-any.whl.metadata (25 kB)
Collecting python-magic (from unstructured)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting lxml (from unstructured)
  Using cached lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl.metadata (3.6 kB)
Collecting nltk (from unstructured)
  Downloading nltk-3.9.2-py3-none-any.whl.metadata (3.2 kB)
Collecting beautifulsoup4 (from unstructured)
  Using cached beautifulsoup4-4.14.3-py3-none-any.whl.metadata (3.8 kB)
Collecting emoji (from unstructured)
  Downloading emoji-2.15.0-py3-none-any.whl.metadata (5.7 kB)
Collecting python-iso639 (from unstructured)
  Downloading python_iso639-2025.11.16-py3-none-any.whl.metadata (15 kB)
Collecting langdetect (from unstructured)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m22.8 MB/s[0m  [33m0:00:00[0m
[?25h  Installing build depen

In [21]:
pip install libmagic

Collecting libmagic
  Downloading libmagic-1.0.tar.gz (3.7 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: libmagic
  Building wheel for libmagic (pyproject.toml) ... [?25ldone
[?25h  Created wheel for libmagic: filename=libmagic-1.0-py3-none-any.whl size=4311 sha256=5f6e4fa6ee9d1e719605c27a62b00d40625ddbf3f53d0414dc795ec0c046dc65
  Stored in directory: /Users/rupeshpanwar/Library/Caches/pip/wheels/27/cb/60/f3e7ff506e66cfea6d265d12df06319c85a9ce620c42feff42
Successfully built libmagic
Installing collected packages: libmagic
Successfully installed libmagic-1.0
Note: you may need to restart the kernel to use updated packages.


In [22]:
docs = loader.load()

libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


In [23]:
len(docs)

4

In [24]:
docs[1]

Document(metadata={'source': 'RAGfiles/RAG.txt'}, page_content="Overview One of the most powerful applications enabled by LLMs is sophisticated question-answering (Q&A) chatbots. These are applications that can answer questions about specific source information. These applications use a technique known as Retrieval Augmented Generation, or RAG.\n\nWhat is RAG? RAG is a technique for augmenting LLM knowledge with additional data.\n\nLLMs can reason about wide-ranging topics, but their knowledge is limited to the public data up to a specific point in time that they were trained on. If you want to build AI applications that can reason about private data or data introduced after a model's cutoff date, you need to augment the knowledge of the model with the specific information it needs. The process of bringing the appropriate information and inserting it into the model prompt is known as Retrieval Augmented Generation (RAG).\n\nLangChain has a number of components designed to help build Q&

In [25]:
loader = DirectoryLoader('./RAGfiles/', glob="**/*.txt", show_progress=True)
docs = loader.load()

  0%|          | 0/4 [00:00<?, ?it/s]libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
100%|██████████| 4/4 [00:00<00:00, 46.25it/s]


In [7]:
from langchain_community.document_loaders.csv_loader import CSVLoader


loader = CSVLoader(file_path='./RAGfiles/Movie_collection_dataset.csv')
data = loader.load()

In [8]:
print(data)

[Document(metadata={'source': './RAGfiles/Movie_collection_dataset.csv', 'row': 0}, page_content='Collection: 48000\nMarketin_expense: 20.1264\nBudget: 36524.125\nLead_ Actor_Rating: 7.825\nLead_Actress_rating: 8.095\nTrailer_views: 527367\nGenre: Thriller\nNum_multiplex: 494\n3D_available: YES'), Document(metadata={'source': './RAGfiles/Movie_collection_dataset.csv', 'row': 1}, page_content='Collection: 43200\nMarketin_expense: 20.5462\nBudget: 35668.655\nLead_ Actor_Rating: 7.505\nLead_Actress_rating: 7.65\nTrailer_views: 494055\nGenre: Drama\nNum_multiplex: 462\n3D_available: NO'), Document(metadata={'source': './RAGfiles/Movie_collection_dataset.csv', 'row': 2}, page_content='Collection: 69400\nMarketin_expense: 20.5458\nBudget: 39912.675\nLead_ Actor_Rating: 7.485\nLead_Actress_rating: 7.57\nTrailer_views: 547051\nGenre: Comedy\nNum_multiplex: 458\n3D_available: NO'), Document(metadata={'source': './RAGfiles/Movie_collection_dataset.csv', 'row': 3}, page_content='Collection: 66800

In [None]:
loader = CSVLoader(file_path='./RAGfiles/Movie_collection_dataset.csv', csv_args={
    'delimiter': ',',
    'quotechar': '"',
    'fieldnames': ['Genre', 'Budget', 'Actor_rating']
})

data = loader.load()

### Splitting the document - Chunking

#### Recursively split by character

In [26]:
%pip install -qU langchain-text-splitters

Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./RAGfiles/LangchainRetrieval.txt")
text = loader.load()

  from pydantic.v1.fields import FieldInfo as FieldInfoV1


In [2]:
text

[Document(metadata={'source': './RAGfiles/LangchainRetrieval.txt'}, page_content="Retrieval\nMany LLM applications require user-specific data that is not part of the model's training set. The primary way of accomplishing this is through Retrieval Augmented Generation (RAG). In this process, external data is retrieved and then passed to the LLM when doing the generation step.\n\nLangChain provides all the building blocks for RAG applications - from simple to complex. This section of the documentation covers everything related to the retrieval step - e.g. the fetching of the data. Although this sounds simple, it can be subtly complex. This encompasses several key modules.\n\nIllustrative diagram showing the data connection process with steps: Source, Load, Transform, Embed, Store, and Retrieve.\n\nDocument loaders\nDocument loaders load documents from many different sources. LangChain provides over 100 different document loaders as well as integrations with other major providers in the s

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len,
)

In [5]:
texts = text_splitter.split_documents(text)
print(texts[0])
print(texts[1])
print(texts[2])

page_content='Retrieval' metadata={'source': './RAGfiles/LangchainRetrieval.txt'}
page_content='Many LLM applications require user-specific data that is not part of the model's training set. The primary way of accomplishing this is through Retrieval Augmented Generation (RAG). In this process,' metadata={'source': './RAGfiles/LangchainRetrieval.txt'}
page_content='In this process, external data is retrieved and then passed to the LLM when doing the generation step.' metadata={'source': './RAGfiles/LangchainRetrieval.txt'}


### Embedding

#### OpenAI embedding

In [6]:
pip install langchain-openai

Note: you may need to restart the kernel to use updated packages.


In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings()

In [10]:
embeddings = embeddings_model.embed_documents(
    [
        "Hi",
        "What's up!",
        "Learning LangChain",
        "You should learn it from Selftaught Academy",
        "trust yourself"
    ]
)
len(embeddings), len(embeddings[0])

(5, 1536)

In [11]:
embeddings[0]

[-0.03629858046770096,
 -0.007224537897855043,
 -0.03371885418891907,
 -0.02866363152861595,
 -0.02686564065515995,
 0.03460482135415077,
 -0.012318846769630909,
 -0.007752209436148405,
 0.0019380523590371013,
 -0.0027018729597330093,
 0.024781012907624245,
 -0.002477124100551009,
 -0.00573272630572319,
 -0.002905449829995632,
 0.006677323020994663,
 -0.00303248199634254,
 0.033849142491817474,
 -0.001503212028183043,
 0.02109382674098015,
 -0.008996471762657166,
 -0.02171921543776989,
 0.01038405206054449,
 0.006244111340492964,
 0.007081219926476479,
 -0.012312332168221474,
 0.0008998099947348237,
 0.005876044277101755,
 -0.009888952597975731,
 -0.0030731973238289356,
 -0.024572549387812614,
 0.010742347687482834,
 -0.01381065882742405,
 -0.024429231882095337,
 -0.01411032397300005,
 0.0024347801227122545,
 -0.018878910690546036,
 0.0005618723225779831,
 -0.011270018294453621,
 0.018110202625393867,
 -0.009967125952243805,
 0.01302892342209816,
 -0.011328648775815964,
 -0.00913327559

In [12]:
embedded_query = embeddings_model.embed_query("What was the name mentioned in the conversation?")
embedded_query[:5]

[0.005329647101461887,
 -0.0006122003542259336,
 0.0389961302280426,
 -0.002898985054343939,
 -0.008904732763767242]

#### Huggingface embeddings

In [13]:
pip install -U sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-5.1.2-py3-none-any.whl.metadata (16 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Using cached transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.8.0-cp314-cp314-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting scipy (from sentence-transformers)
  Using cached scipy-1.16.3-cp314-cp314-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Downloading huggingface_hub-1.2.2-py3-none-any.whl.metadata (13 kB)
Collecting Pillow (from sentence-transformers)
  Downloading pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl.metadata (8.8 kB)
Collecting filelock (from transformers<5.0.0,>=4.41.0->sentence-transformers)
  Using cached filelo

In [1]:
from langchain_community.embeddings import HuggingFaceInstructEmbeddings

# Initialize instructor embeddings using the Hugging Face model
embeddings_model = HuggingFaceInstructEmbeddings()

embedded_query = embeddings_model.embed_query("What was the name mentioned in the conversation?")

  from pydantic.v1.fields import FieldInfo as FieldInfoV1
  embeddings_model = HuggingFaceInstructEmbeddings()
  embeddings_model = HuggingFaceInstructEmbeddings()


ImportError: Dependencies for InstructorEmbedding not found.

### Vector Storage

#### Chroma

In [17]:
pip install chromadb

Collecting chromadb
  Using cached chromadb-1.3.6-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Using cached pybase64-1.4.3-cp314-cp314-macosx_11_0_arm64.whl.metadata (8.7 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Using cached uvicorn-0.38.0-py3-none-any.whl.metadata (6.8 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Using cached posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
INFO: pip is looking at multiple versions of chromadb to determine which version is compatible with other requirements. This could take a while.
Collecting chromadb
  Using cached chromadb-1.3.5-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
  Using cached chromadb-1.3.4-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
  Using cached chromadb-1.3.3-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
  Using cached chrom

In [3]:
pip install --upgrade chromadb

Collecting chromadb
  Using cached chromadb-1.3.6-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.3.0-py3-none-any.whl.metadata (5.6 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Using cached pybase64-1.4.3-cp314-cp314-macosx_11_0_arm64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Using cached posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
INFO: pip is looking at multiple versions of chromadb to determine which version is compatible with other requirements. This could take a while.
Collecting chromadb
  Using cached chromadb-1.3.5-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
  Using cached chromadb-1.3.4-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
  Using cached chromadb-1.3.3-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
  Using cached chromadb-1.3.2-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2 kB)
  Using cached chromadb-1.3.0-cp39-abi3-macosx_11_0_arm64.whl.metadata (7.2

In [3]:
pip install pydantic-settings

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-macosx_14_0_arm64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-macosx_14_0_arm64.whl (3.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m24.9 MB/s[0m  [33m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS  # ✅ Use FAISS instead

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader("./RAGfiles/LangchainRetrieval.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=20)
documents = text_splitter.split_documents(raw_documents)
db = FAISS.from_documents(documents, OpenAIEmbeddings())

  from pydantic.v1.fields import FieldInfo as FieldInfoV1
  from .autonotebook import tqdm as notebook_tqdm
Created a chunk of size 760, which is longer than the specified 500


In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import Chroma

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader("./RAGfiles/LangchainRetrieval.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, OpenAIEmbeddings())

  from pydantic.v1.fields import FieldInfo as FieldInfoV1
  from .autonotebook import tqdm as notebook_tqdm


ImportError: Could not import chromadb python package. Please install it with `pip install chromadb`.

In [2]:
query = "What is text embedding and how does langchain help in doing it"
docs = db.similarity_search(query)
print(docs[1].page_content)

Vector stores
With the rise of embeddings, there has emerged a need for databases to support efficient storage and searching of these embeddings. LangChain provides integrations with over 50 different vectorstores, from open-source local ones to cloud-hosted proprietary ones, allowing you to choose the one best suited for your needs. LangChain exposes a standard interface, allowing you to easily swap between vector stores.


In [3]:
embedding_vector = OpenAIEmbeddings().embed_query(query)
docs = db.similarity_search_by_vector(embedding_vector)
print(docs[0].page_content)

Text embedding models
Another key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain provides integrations with over 25 different embedding providers and methods, from open-source to proprietary API, allowing you to choose the one best suited for your needs. LangChain provides a standard interface, allowing you to easily swap between models.


In [None]:
pip install faiss-cpu

In [4]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader("./RAGfiles/LangchainRetrieval.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = FAISS.from_documents(documents, OpenAIEmbeddings())

## Retrievers

In [30]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader("./RAGfiles/LangchainRetrieval.txt").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=20)
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, OpenAIEmbeddings())

In [5]:
retriever = db.as_retriever()

In [6]:
docs = retriever.invoke("What is text embedding and how does langchain help in doing it")

In [7]:
len(docs)

4

In [8]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """Answer the question based only on the following context:

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()


def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke("What is text embedding and how does langchain help in doing it")


'Text embedding is a technique used to create numerical representations of text data, capturing the semantic meaning of the text. LangChain helps in text embedding by providing integrations with over 25 different embedding providers and methods, allowing users to choose the best suited one for their needs. LangChain also provides a standard interface for easily swapping between different embedding models.'

In [9]:
retriever = db.as_retriever(search_kwargs={"k": 1})

In [10]:
docs = retriever.invoke("What is text embedding and how does langchain help in doing it")

In [11]:
docs

[Document(id='6880ae08-1fc5-42ab-a30f-4bdbbb3a3c90', metadata={'source': './RAGfiles/LangchainRetrieval.txt'}, page_content='Text embedding models\nAnother key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain provides integrations with over 25 different embedding providers and methods, from open-source to proprietary API, allowing you to choose the one best suited for your needs. LangChain provides a standard interface, allowing you to easily swap between models.\n\nVector stores\nWith the rise of embeddings, there has emerged a need for databases to support efficient storage and searching of these embeddings. LangChain provides integrations with over 50 different vectorstores, from open-source local ones to cloud-hosted proprietary ones, allowing you to choose the one best suited for your needs. LangChain exposes a standard interfa

In [12]:
retriever = db.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.8}
)

In [13]:
docs = retriever.invoke("What is text embedding and how does langchain help in doing it")

In [14]:
docs

[Document(id='6880ae08-1fc5-42ab-a30f-4bdbbb3a3c90', metadata={'source': './RAGfiles/LangchainRetrieval.txt'}, page_content='Text embedding models\nAnother key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain provides integrations with over 25 different embedding providers and methods, from open-source to proprietary API, allowing you to choose the one best suited for your needs. LangChain provides a standard interface, allowing you to easily swap between models.\n\nVector stores\nWith the rise of embeddings, there has emerged a need for databases to support efficient storage and searching of these embeddings. LangChain provides integrations with over 50 different vectorstores, from open-source local ones to cloud-hosted proprietary ones, allowing you to choose the one best suited for your needs. LangChain exposes a standard interfa

### Tools

#### Creating own custom tool

In [19]:
# For tool creation
from langchain_core.tools import BaseTool, StructuredTool, tool

# For tool utilities
#from langchain.agents import Tool

# For tool calling with models
#from langchain_openai import ChatOpenAI
  #from langchain_openai import ChatOpenAI

In [20]:
@tool
def name_of_tool(input: str) -> str:
    """Tool_Description"""
    return "Result"

In [21]:
print(name_of_tool.name)
print(name_of_tool.description)
print(name_of_tool.args)

name_of_tool
Tool_Description
{'input': {'title': 'Input', 'type': 'string'}}


#### Defining, Binding and Calling the tool

In [22]:
@tool
def sta_coins(input: float) -> float:
    """Use this tool to convert USD to Self-Tech Academy coins"""
    return 1.3*(float(input))

In [23]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()
llm_bind_tools = llm.bind_tools([sta_coins])

In [24]:
result = llm_bind_tools.invoke("How many start-tech academy coins can I get for USD10")
result

AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 64, 'total_tokens': 80, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-ClVPTrCr0sDNusCOKCnSpVx5oyczA', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019b0c50-f2e9-7550-8b85-7ca408bed3ef-0', tool_calls=[{'name': 'sta_coins', 'args': {'input': 10}, 'id': 'call_Q3PoiQCzH05qbXxwaAs8B4KU', 'type': 'tool_call'}], usage_metadata={'input_tokens': 64, 'output_tokens': 16, 'total_tokens': 80, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [25]:
result.tool_calls

[{'name': 'sta_coins',
  'args': {'input': 10},
  'id': 'call_Q3PoiQCzH05qbXxwaAs8B4KU',
  'type': 'tool_call'}]

In [26]:
tool_mapping = {
    'sta_coins': sta_coins
}
tool_mapping

{'sta_coins': StructuredTool(name='sta_coins', description='Use this tool to convert USD to Self-Tech Academy coins', args_schema=<class 'langchain_core.utils.pydantic.sta_coins'>, func=<function sta_coins at 0x133d0e560>)}

In [27]:
tool = tool_mapping[result.tool_calls[0]["name"]]

In [28]:
tool_output = tool.invoke(result.tool_calls[0]["args"])

In [29]:
tool_output

13.0

In [30]:
# For multiple tool calls in LLM response, we can loop through the list of tools in tool call
tool_mapping = {
    'sta_coins': sta_coins
    'tool2_name': tool2
}
tool_mapping
for tool_call in result.tool_calls:
    tool = tool_mapping[tool_call["name"]]
    tool_output = tool.invoke(tool_call["args"])

SyntaxError: invalid syntax (269771591.py, line 4)

### Using in-built tools
https://python.langchain.com/v0.1/docs/integrations/tools/

#### DuckDuckGo Search tool

In [34]:
%pip install --upgrade --quiet  duckduckgo-search
%pip install -U ddgs

Note: you may need to restart the kernel to use updated packages.
Collecting ddgs
  Downloading ddgs-9.9.3-py3-none-any.whl.metadata (19 kB)
Collecting fake-useragent>=2.2.0 (from ddgs)
  Downloading fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Collecting brotli (from httpx[brotli,http2,socks]>=0.28.1->ddgs)
  Downloading brotli-1.2.0-cp314-cp314-macosx_10_15_universal2.whl.metadata (6.1 kB)
Collecting h2<5,>=3 (from httpx[brotli,http2,socks]>=0.28.1->ddgs)
  Using cached h2-4.3.0-py3-none-any.whl.metadata (5.1 kB)
Collecting socksio==1.* (from httpx[brotli,http2,socks]>=0.28.1->ddgs)
  Using cached socksio-1.0.0-py3-none-any.whl.metadata (6.1 kB)
Collecting hyperframe<7,>=6.1 (from h2<5,>=3->httpx[brotli,http2,socks]>=0.28.1->ddgs)
  Using cached hyperframe-6.1.0-py3-none-any.whl.metadata (4.3 kB)
Collecting hpack<5,>=4.1 (from h2<5,>=3->httpx[brotli,http2,socks]>=0.28.1->ddgs)
  Using cached hpack-4.1.0-py3-none-any.whl.metadata (4.6 kB)
Downloading ddgs-9.9.3-py3-none-any.

In [35]:
from langchain_community.tools import DuckDuckGoSearchRun

# Initialize the search tool
search = DuckDuckGoSearchRun()

In [36]:
search.name, search.description

('duckduckgo_search',
 'A wrapper around DuckDuckGo Search. Useful for when you need to answer questions about current events. Input should be a search query.')

In [37]:
search.run("What are tools in Langchain?")

'Tools are components that agents call to perform actions . They extend model capabilities by letting them interact with the world through well-defined inputs and outputs. Tools encapsulate a callable function and its input schema. Sep 1, 2025 · LangChain is a framework for building applications with Large Language Models (LLMs). Its core components are Tools and Agents. Tools extend the capabilities of LLMs, while agents orchestrate tools to solve complex tasks intelligently. Tools: External functions, APIs or logic that an agent can call . Jun 7, 2025 · LangChain offers dozens of built-in tools , but here are 5 must-know tools that will instantly level up your LLM-powered applications. 1. RequestsTool — Call Any API with Ease. This tool... May 16, 2025 · The LangChain ecosystem provides a rich collection of tools and utilities that enable AI systems to interact with external services, process and transform data, and perform specialized operations. This page provides an overview of th

In [38]:
from langchain_community.tools import DuckDuckGoSearchResults
search = DuckDuckGoSearchResults()
search.invoke("What are tools in Langchain?")

'snippet: Tools are components that agents call to perform actions . They extend model capabilities by letting them interact with the world through well-defined inputs and outputs. Tools encapsulate a callable function and its input schema., title: Tools - Docs by LangChain, link: https://docs.langchain.com/oss/javascript/langchain/tools, snippet: Sep 1, 2025 · LangChain is a framework for building applications with Large Language Models (LLMs). Its core components are Tools and Agents. Tools extend the capabilities of LLMs, while agents orchestrate tools to solve complex tasks intelligently. Tools: External functions, APIs or logic that an agent can call ., title: Agents and Tools in LangChain - GeeksforGeeks, link: https://www.geeksforgeeks.org/artificial-intelligence/agents-and-tools-in-langchain/, snippet: Jun 7, 2025 · LangChain offers dozens of built-in tools , but here are 5 must-know tools that will instantly level up your LLM-powered applications. 1. RequestsTool — Call Any AP

### Wikipedia

In [39]:
%pip install --upgrade --quiet  wikipedia

Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

wikipedia.invoke("LangChain")

  from pydantic.v1.fields import FieldInfo as FieldInfoV1
  from .autonotebook import tqdm as notebook_tqdm


ImportError: cannot import name 'WikipediaQueryRun' from 'langchain.tools' (/Users/rupeshpanwar/Documents/AI-Projects/LangChain/.venv/lib/python3.14/site-packages/langchain/tools/__init__.py)

### Agents

In [13]:
# from langchain.agents import create_tool_calling_agent
# from langchain.agents import AgentExecutor
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_openai import ChatOpenAI

# prompt = ChatPromptTemplate.from_messages(
#       [
#           (
#               "system",
#               "You are a helpful assistant. For answering the user query, look for information using DuckDuckGo Search and Wikipedia and then give the final answer",
#           ),
#           ("placeholder", "{chat_history}"),
#           ("human", "{input}"),
#           ("placeholder", "{agent_scratchpad}"),
#       ]
#   )

# tools = [search, wikipedia]

# llm = ChatOpenAI()

# agent = create_tool_calling_agent(llm, tools, prompt)
# agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

#If that still doesn't work, try:

from langchain.agents import create_tool_calling_agent, initialize_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# Use initialize_agent instead
agent_executor = initialize_agent(
      tools=tools,
      llm=llm,
      agent="openai-tools",
      verbose=True
  )

ImportError: cannot import name 'create_tool_calling_agent' from 'langchain.agents' (/Users/rupeshpanwar/Documents/AI-Projects/LangChain/.venv/lib/python3.14/site-packages/langchain/agents/__init__.py)

In [66]:
agent = create_tool_calling_agent(llm, tools, prompt)

In [67]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [70]:
agent_executor.invoke({"input": "weather in delhi"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `duckduckgo_results_json` with `{'query': 'weather in Delhi'}`


[0m[36;1m[1;3m[snippet: Get the latest weather forecast for New Delhi, India, including temperature, precipitation, wind, UV, and radar. See the 5-day meteogram and compare different forecasts with MultiModel., title: Weather New Delhi - meteoblue, link: https://www.meteoblue.com/en/weather/week/new-delhi_india_1261481], [snippet: Get Delhi, DL, IN current weather report with temperature, feels like, wind, humidity, pressure, UV and more from TheWeatherNetwork.com., title: Delhi, DL, IN Hourly Forecast - The Weather Network, link: https://www.theweathernetwork.com/en/city/in/delhi/delhi/hourly], [snippet: The Lodhi Road observatory recorded 12 mm rainfall from 11.30 pm on Friday till 2.30 am on Saturday, the IMD said. The incessant rainfall brought Delhi's minimum temperature down to 25.8 degrees Celsius - 1.2 degrees below the season's average., t

{'input': 'weather in delhi',
 'output': 'You can check the latest weather forecast for Delhi on [Meteoblue](https://www.meteoblue.com/en/weather/week/new-delhi_india_1261481) or [The Weather Network](https://www.theweathernetwork.com/en/city/in/delhi/delhi/hourly).'}

In [71]:
agent_executor.invoke({"input": "When was Nelson Mandela born and what are some of his famous quotes?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'Nelson Mandela'}`


[0m[33;1m[1;3mPage: Nelson Mandela
Summary: Nelson Rolihlahla Mandela ( man-DEH-lə; Xhosa: [xolíɬaɬa mandɛ̂ːla]; born Rolihlahla Mandela; 18 July 1918 – 5 December 2013) was a South African anti-apartheid activist, politician, and statesman who served as the first president of South Africa from 1994 to 1999. He was the country's first black head of state and the first elected in a fully representative democratic election. His government focused on dismantling the legacy of apartheid by fostering racial reconciliation. Ideologically an African nationalist and socialist, he served as the president of the African National Congress (ANC) party from 1991 to 1997.
A Xhosa, Mandela was born into the Thembu royal family in Mvezo, South Africa. He studied law at the University of Fort Hare and the University of Witwatersrand before working as a lawyer in Johannesburg. Ther

{'input': 'When was Nelson Mandela born and what are some of his famous quotes?',
 'output': 'Nelson Mandela was born on July 18, 1918, and he passed away on December 5, 2013. Some of his famous quotes include:\n\n1. "I am fundamentally an optimist. Whether that comes from nature or nurture, I cannot say. Part of being optimistic is keeping one\'s head pointed toward the sun, one\'s feet moving forward."\n   \n2. "Education is the most powerful weapon which you can use to change the world."\n\n3. "A good head and a good heart are always a formidable combination."\n\n4. "It always seems impossible until it\'s done."\n\n5. "I learned that courage was not the absence of fear, but the triumph over it. The brave man is not he who does not feel afraid, but he who conquers that fear."\n\nNelson Mandela received numerous awards and honors throughout his life, including the Nobel Peace Prize in 1993.'}

#### Agent with memory

In [14]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Based on user query and the chat history, look for information using DuckDuckGo Search and Wikipedia and then give the final answer",
        ),
        ("placeholder", "{history}"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

NameError: name 'ChatPromptTemplate' is not defined

In [15]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


agent_with_history = RunnableWithMessageHistory(
    agent_executor,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

NameError: name 'agent_executor' is not defined

In [74]:
agent_with_history.invoke(
    {"input": "When was Nelson Mandela born?"},
    config={"configurable": {"session_id": "sess1"}},
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'Nelson Mandela'}`


[0m[33;1m[1;3mPage: Nelson Mandela
Summary: Nelson Rolihlahla Mandela ( man-DEH-lə; Xhosa: [xolíɬaɬa mandɛ̂ːla]; born Rolihlahla Mandela; 18 July 1918 – 5 December 2013) was a South African anti-apartheid activist, politician, and statesman who served as the first president of South Africa from 1994 to 1999. He was the country's first black head of state and the first elected in a fully representative democratic election. His government focused on dismantling the legacy of apartheid by fostering racial reconciliation. Ideologically an African nationalist and socialist, he served as the president of the African National Congress (ANC) party from 1991 to 1997.
A Xhosa, Mandela was born into the Thembu royal family in Mvezo, South Africa. He studied law at the University of Fort Hare and the University of Witwatersrand before working as a lawyer in Johannesburg. Ther

{'input': 'When was Nelson Mandela born?',
 'history': [],
 'output': 'Nelson Mandela was born on July 18, 1918.'}

In [75]:
result = agent_with_history.invoke(
    {"input": "What are some of his famous quotes?"},
    config={"configurable": {"session_id": "sess1"}},
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'Nelson Mandela'}`


[0m[33;1m[1;3mPage: Nelson Mandela
Summary: Nelson Rolihlahla Mandela ( man-DEH-lə; Xhosa: [xolíɬaɬa mandɛ̂ːla]; born Rolihlahla Mandela; 18 July 1918 – 5 December 2013) was a South African anti-apartheid activist, politician, and statesman who served as the first president of South Africa from 1994 to 1999. He was the country's first black head of state and the first elected in a fully representative democratic election. His government focused on dismantling the legacy of apartheid by fostering racial reconciliation. Ideologically an African nationalist and socialist, he served as the president of the African National Congress (ANC) party from 1991 to 1997.
A Xhosa, Mandela was born into the Thembu royal family in Mvezo, South Africa. He studied law at the University of Fort Hare and the University of Witwatersrand before working as a lawyer in Johannesburg. Ther

In [77]:
result['output']

'Here are some famous quotes by Nelson Mandela:\n1. "Education is the most powerful weapon which you can use to change the world."\n2. "It always seems impossible until it\'s done."\n3. "Keep your friends close — and your rivals even closer."\n4. "Do not judge me by my successes, judge me by how many times I fell down and got back up again."\n\nThese quotes reflect Mandela\'s wisdom, resilience, and commitment to justice and equality.'

### LangSmith for monitoring the application

In [16]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = "true"
    "os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY', 'your-api-key-here')  # Set this in your .env file\n",os.environ['LANGCHAIN_PROJECT'] = "langchain_course"

In [17]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()
llm.invoke("what is langchain")

AIMessage(content='Langchain is a project that aims to create a decentralized language learning platform using blockchain technology. This platform would allow learners to connect with tutors and other language enthusiasts, as well as track their progress and incentivize learning through a token system.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 47, 'prompt_tokens': 11, 'total_tokens': 58, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-ClVb1orfEWV4fmyyPjl7kGJ46l5jg', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019b0c5b-dfaf-7201-9267-965cb3eeec0e-0', usage_metadata={'input_tokens': 11, 'output_tokens': 47, 'total_tokens': 58, 'input_t

In [18]:
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()

search = DuckDuckGoSearchResults()
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

tools = [search, wikipedia]

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Based on user query and the chat history, look for information using DuckDuckGo Search and Wikipedia and then give the final answer",
        ),
        ("placeholder", "{history}"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)

agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


agent_with_history = RunnableWithMessageHistory(
    agent_executor,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

agent_with_history.invoke(
    {"input": "When was Nelson Mandela born?"},
    config={"configurable": {"session_id": "sess1"}},
)

ImportError: cannot import name 'WikipediaQueryRun' from 'langchain.tools' (/Users/rupeshpanwar/Documents/AI-Projects/LangChain/.venv/lib/python3.14/site-packages/langchain/tools/__init__.py)