# Generative AI - LangChain x OpenAI

<img src="langchain_n_openai.png">

In [None]:
# once you have installed the libraries, you can comment it back
# you only need to run this once

#!pip install arxiv chroma docarray duckduckgo-search langchain numexpr openai pydantic 
#!pip install pypdfium2 python-dotenv pytube tiktoken youtube-transcript-api wikipedia

# arxiv                  = 2.0.0
# Chroma                 = 0.4.18
# duckduckgo-search      = 3.9.8
# langchain              = 0.0.339
# numexpr                = 2.8.7
# openai                 = 1.3.4
# pydantic               = 2.5.2
# python-dotenv          = 1.0.0
# pytube                 = 15.0.0
# tiktoken               = 0.5.1
# youtube-transcript-api = 0.6.1
# pypdfium2              = 4.24.0
# wikipedia              = 1.4.0

## <font color=blue>Keep API Key Safe</font>

In [53]:
# impport libraries

import openai
import os
from dotenv import load_dotenv, find_dotenv

In [54]:
# load the env.txt that contains the OPENAI key
# env.txt contains:
# OPENAI_API_KEY=“sk-1234567890…………………..”

load_dotenv('env.txt')
openai_api_key = os.getenv('OPENAI_API_KEY')

In [None]:
# print the key 
#print(openai.api_key)

## Setup Test #1

In [None]:
import os 
from dotenv import load_dotenv

# load OPENAI API key
load_dotenv('env.txt')
openai_api_key = os.getenv('OPENAI_API_KEY')

In [None]:
# load langchain libraries

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

In [None]:
# initialise ChatModel with API key
chat_model = ChatOpenAI(openai_api_key=openai_api_key)

# setup message prompt
text = "What date is Singapore National Day?"
messages = [HumanMessage(content=text)]

# ask ChatGPT
print(chat_model.invoke(messages).content)

## Setup Test #2

In [None]:
# The code to read in the OpenAI API key is skipped here
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate

In [None]:
# initialise ChatModel with API key
chat_model = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0.3)

template = "You are a helpful assistant that translates {input_language} to {output_language}."
human_template = "{text}"
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", human_template),
])

# trsnslate English to French
print(chat_model(
        chat_prompt.format_messages(
            input_language="English", 
            output_language="French", 
            text="I love programming."
        )
    )
)

print()

# translate English to Chinese
print(chat_model(
        chat_prompt.format_messages(
            input_language="English", 
            output_language="Chinese", 
            text="I love programming."
        )
    )
)

## <font color=blue>Models</font>
LangChain provides two types of models
* LLMs
* ChatModels

Let's take a look at LLMs first

### LLMs

In [None]:
from langchain.llms import OpenAI

# notice the model_name is 'gpt-3.5-turbo-instruct'
llm = OpenAI(
    model_name='gpt-3.5-turbo-instruct',
    temperature=0.7,
    max_tokens=30
)

In [None]:
llm("Explain what is generative AI")

### ChatModels
* LLMs under the hood
* Specific to chatbots
* 3 major components
  - HumanMessage
  - AIMessage
  - SystemMessage

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage

messages = [SystemMessage(
    content="You are a Grammar Teacher who responds Yes for correct Grammar input"),
    HumanMessage(content="I love, programming.")]

# notice the model name is 'gpt-3.5-turbo'
chat = ChatOpenAI(
    model_name='gpt-3.5-turbo'
)

In [None]:
chat(messages).content

## <font color=blue>Prompt Template</font>
Create a prompt from string input

In [None]:
from langchain.prompts import PromptTemplate
from langchain import LLMChain

# define the template
template="Write {lines} lines about {topic}"

In [None]:
# create the LLM

llm=OpenAI(
    model_name='gpt-3.5-turbo-instruct',
    temperature=0.7,
    openai_api_key=openai_api_key
)

In [None]:
# setup prompt template

prompt=PromptTemplate(
   template=template, 
   input_variables=["lines","topic"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

In [None]:
# initialise lines and topic
lines="3"
topic="Sir Stamford Raffles"

In [None]:
# run the chain

llm_chain.run(lines=lines, topic=topic)

### Alternative code 
Using LangChain Expression Language (LCEL) - Pipe operator '|'


In [None]:
chain = prompt | llm

chain.invoke({
   'lines':'3', 
   'topic':'Sir Stamford Raffles'
})

## ChatPromptTemplate
* PromptTemplate are for LLM models
* ChatPromptTemplate are for ChatModels

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.prompts.chat import SystemMessage, HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI

In [None]:
template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content=("You are a python coder that helps user with writing programs")),
        HumanMessagePromptTemplate.from_template("{text}"),
    ]
)

In [None]:
llm = ChatOpenAI(openai_api_key=openai_api_key)

In [None]:
print(llm(template.format_messages(text='Check whether a number is prime or not')).content)

## <font color=blue>OutputParsers</font>
* Not only you can customise your prompt, you can set formats for the output as well

In [None]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

In [None]:
output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()

prompt = PromptTemplate(
    template="List down 5 countries that start with letter'{alphabet}'\n{format_instructions}",
    input_variables=["alphabet"],
    partial_variables={"format_instructions": format_instructions}
)

In [None]:
# print(format_instructions)
# print()
# print(prompt)

In [None]:
llm = OpenAI(
    temperature=0, 
    openai_api_key=openai_api_key,
)

In [None]:
# Uncomment to see the default parameters of OpenAI

# print(llm)

In [None]:
inputp = prompt.format(alphabet="S")
print(inputp)

output = llm(inputp)

In [None]:
print(output)

## Pydantic Model

* https://betterprogramming.pub/how-to-add-natural-language-input-to-an-existing-python-application-with-langchain-and-pydantic-7774048c7ab7

In [None]:
from pydantic import BaseModel, Field, conlist
from typing import List, Optional, Tuple

In [None]:
# Pydantic model to describe the structure of the data we want to parse. In this case, 
# we want to capture information about a user’s music taste, including the genres, bands, 
# and albums they like. Here’s how to define such a model:

class MusicTasteDescriptionResult(BaseModel):
    genres: Optional [conlist(str, min_length=1, max_length=5)] = \
       Field(None, description="Music genres liked by the user. Must contain between 1 and 5 genres")
    
    bands: Optional [conlist(str, min_length=1, max_length=5)] = \
       Field(None, description="Specific bands or artists liked by the user. If provided, must contain between 1 and 5 bands or artists")
    
    albums: Optional [conlist(str, min_length=1, max_length=5)] = \
       Field(None, description="Specific albums liked by the user. If provided, must contain between 1 and 5 albums")
    
    year_range: Optional [conlist(int, min_length=2, max_length=2)] = \
       Field(None, description="Year range of music liked by the user. If provided, must contain exactly 2 years indicating the start and end of the range")

In [None]:
# integrate Pydantic with LangChain
from langchain.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=MusicTasteDescriptionResult)

In [None]:
# define some examples to guide LLM in outputting the expected object

# We need to add .replace("{", "{{").replace("}", "}}") after serialising as JSON so that the curly brackets 
# in the JSON won’t be mistaken for prompt variables by LangChain. This piece simply escapes those curly brackets.
examples = [
    {
        "music taste description": "I like rock such as Rolling Stones or The Ramones, or the album London Calling from the clash",
        "result": MusicTasteDescriptionResult.parse_obj({
            "genres": ["rock"],
            "bands": ["Rolling Stones", "The Ramones", "The Clash"],
            "albums": ["London Calling"]
        }).json().replace("{", "{{").replace("}", "}}"),
    },
    {
        "music taste description": "I enjoy rock music from the 70s like Led Zeppelin",
        "result": MusicTasteDescriptionResult.parse_obj({
            "genres": ["rock"],
            "bands": ["Led Zeppelin"],
            "year_range": [1970, 1979]
        }).json().replace("{", "{{").replace("}", "}}"),
    },
    # add more examples as needed...
]

In [None]:
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate

example_prompt = PromptTemplate(
    input_variables=["music taste description", "result"], 
    template="Query: {music taste description}\nResult:\n{result}")

In [None]:
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt, 
    prefix="""Given a query describing a user's music taste, transform it into a structured object. {format_instructions}""",
    suffix="Query: {input}\nResult:\n", 
    input_variables=["input"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
# call .format() to generate the string, and pass the input variables as keyword arguments
print(example_prompt.format(**examples[0])) 

print("\n#######\n")

print(prompt.format(input="My favorite band is The Beatles"))

## Running the Chain

* More details are convered in the **Chains** section

In [None]:
from langchain.chains import LLMChain

chain = LLMChain(
    llm=llm, 
    prompt=prompt
)

In [None]:
output = chain.run("I love pop music from the 80s, especially Madonna")
print(output)

In [None]:
try:
    parsed_taste = parser.parse(output)
    print(f"""
Genres: {", ".join(parsed_taste.genres) if parsed_taste.genres else 'Not specified'}
Bands: {", ".join(parsed_taste.bands) if parsed_taste.bands else 'Not specified'}
Albums: {", ".join(parsed_taste.albums) if parsed_taste.albums else 'Not specified'}
Year Range: {f"{parsed_taste.year_range[0]} - {parsed_taste.year_range[1]}" if parsed_taste.year_range else 'Not specified'}
    """)
except Exception as e:
    print(e)

## <font color=blue>Chains</font>
* Using an LLM in isolation is fine for simple applications
* More complex applications require chaining LLMs
* Either with each other or with other components

### Basic Chains

In [None]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain

In [None]:
prompt = PromptTemplate(
    input_variables=["city"],
    template="Describe a perfect day in {city}?",
) 

In [None]:
llm = OpenAI(
    temperature=0, 
    openai_api_key=openai_api_key,
)

print(llm)

In [None]:
llmchain = LLMChain(
    llm=llm, 
    prompt=prompt
)

llmchain.run("Paris")

### LLM Math Chain
* Designed to solve complex word math problems

In [20]:
from langchain.chains import LLMMathChain
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)

# verbose = False
# Answer: 2.4116004626599237
#
# verbose = True
# > Entering new LLMMathChain chain...
# What is 13 raised to the .3432 power?
# ```text
# 13**.3432
# ```
# ...numexpr.evaluate("13**.3432")...
#
# Answer: 2.4116004626599237
# > Finished chain.
# 'Answer: 2.4116004626599237'
llm_math = LLMMathChain.from_llm(
    llm, 
    verbose=True
)  

llm_math.run("What is 13 raised to the .3432 power?")



[1m> Entering new LLMMathChain chain...[0m
What is 13 raised to the .3432 power?[32;1m[1;3m
```text
13**.3432
```
...numexpr.evaluate("13**.3432")...
[0m
Answer: [33;1m[1;3m2.4116004626599237[0m
[1m> Finished chain.[0m


'Answer: 2.4116004626599237'

### More Chains

### Simple Sequential Chain
* Single input/output
* If more than 1 input/output, error is "Chains used in SimplePipeline should all have one input"

In [85]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import SimpleSequentialChain

In [86]:
llm = ChatOpenAI(
    temperature=0.9,  # more creative
)

In [87]:
first_prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe a company that makes {product}?"
)

chain_one = LLMChain(
    llm = llm,
    prompt = first_prompt
)

In [88]:
second_prompt = ChatPromptTemplate.from_template(
    "Write a 20 words description for the following company: {company}."
)

chain_two = LLMChain(
    llm = llm,
    prompt = second_prompt
)

In [89]:
overall_simple_seq_chain = SimpleSequentialChain(
    chains=[chain_one, chain_two],
    verbose=True
)

In [82]:
product = "running shoes"

overall_simple_seq_chain.run(product)



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mStrideTech[0m
[33;1m[1;3mStrideTech is a tech company specializing in innovative solutions and software for optimizing and enhancing athletic performance and coaching.[0m

[1m> Finished chain.[0m


'StrideTech is a tech company specializing in innovative solutions and software for optimizing and enhancing athletic performance and coaching.'

### Sequential Chain
* Multiple inputs/outputs

In [108]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import SequentialChain

In [109]:
# prompt template 1: translate to english
first_prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe a company that makes {product}?",    
)

chain_one = LLMChain(
    llm=llm, 
    prompt=first_prompt, 
    output_key="output_first_chain"
)

In [110]:
second_prompt = ChatPromptTemplate.from_template(
    "Write a 20 words description for the {output_first_chain} that makes best in class {product}."
)

chain_two = LLMChain(
    llm = llm,
    prompt = second_prompt,
    output_key = "output_second_chain"
)

In [113]:
overall_seq_chain = SequentialChain(
    chains = [chain_one, chain_two],
    input_variables=["product"],
    output_variables=["output_second_chain"]
)

In [114]:
product = "running shoe"

overall_seq_chain.run(product)

'StrideTech: A leading running shoe company that blends cutting-edge technology with superior craftsmanship to create top-tier footwear.'

### Router Chain
* Create a chain that dynamically selects a single chain out of a series of other chains to use, depending on the user input or the prompt provided.
* Contains two main things:
  * RounterChain: responsible for selecting the next chain to call
  * destination_chains: chains that the router chain can route to


In [128]:
from langchain.chains.router import MultiPromptChain
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# LLM Router chains
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser

from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE

### Creating And Mapping Prompts

In [129]:
math_teacher = """Your name is Mary, you are a Math teacher at a \
primary school. You are very good at teaching math due to your ability \
to breakdown complicated tasks into much smaller ones. \
Students ask different questions about math, you are responsible to answer them. \
Use your solid mathematics skills to explain the concepts in an easy to understand manner. \
All your responses should start in the format below:
Hello,
          
Below is a question from a student in your class:
{input}\
"""

science_teacher = """Your name is Susan, you are a science teacher at a \
primary school. You are very good at teaching science due to \
your ability to explain help students develop exploratory questions, develop hypotheses \
to explain natural events, and encourage students to test and refine their explanations based on scientific evidence. \
Students ask different questions about science, you are responsible to answer them \
Use your scientific thinking skills to explain the concepts in very easy to understand manner. \
All your responses should start in the format below:
Hello,
         
Below is a question from a student in your class:
{input}\
"""

In [130]:
prompt_infos = [
    {
        "name": "Math Teacher",
        "description": "Good for answering questions about Math",
        "prompt_template": math_teacher,
    },
    {
        "name": "Science Teacher",
        "description": "Good for answering questions about Science",
        "prompt_template": science_teacher,
    },
]

In [131]:
llm = ChatOpenAI(
    temperature=0
)

### Mapping Destination Chains

In [132]:
# map destination chains
destination_chains = {}

for prompt_info in prompt_infos:
    name = prompt_info["name"]
    prompt_template = prompt_info["prompt_template"]
    prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[name] = chain

In [133]:
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)

In [142]:
print(destinations_str)

Math Teacher: Good for answering questions about Math
Science Teacher: Good for answering questions about Science


In [134]:
default_prompt = ChatPromptTemplate.from_template("{input}")
default_chain = LLMChain(llm=llm, prompt=default_prompt)

In [135]:
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
    destinations=destinations_str
)

In [136]:
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)

### Create Router Chain

In [137]:
# creating the router chain
router_chain = LLMRouterChain.from_llm(
    llm, 
    router_prompt
)

In [138]:
# Multiple Prompt Chain
chain = MultiPromptChain(
    router_chain=router_chain,
    destination_chains=destination_chains,
    default_chain=default_chain,
    verbose=True,
)

### Testing

In [141]:
chain.run("What is the meaning of integration?")

# notice the output is answered by "Math Teacher"



[1m> Entering new MultiPromptChain chain...[0m




Math Teacher: {'input': 'What is the meaning of integration?'}
[1m> Finished chain.[0m


"Hello,\n\nIntegration is a mathematical concept that involves finding the area under a curve. It is a way to calculate the total accumulation of a quantity over a given interval. \n\nTo understand integration, let's imagine you have a graph that represents the speed of a car over time. The graph shows how the car's speed changes at different moments. Integration allows us to determine the total distance traveled by the car during a specific time interval.\n\nIn simpler terms, integration helps us find the total amount or the sum of something. It can be used to calculate the total area of a shape, the total amount of water in a tank, or even the total amount of money spent over a period of time.\n\nTo perform integration, we use a mathematical tool called the integral symbol (∫) and a function that represents the curve we want to find the area under. By applying integration techniques, we can calculate the exact value of the area or the total accumulation.\n\nI hope this explanation he

In [145]:
chain.run("What is the basic theory of quantum physics?")

# notice the output is answered by "Science Teacher"



[1m> Entering new MultiPromptChain chain...[0m




Science Teacher: {'input': 'What is the basic theory of quantum physics?'}
[1m> Finished chain.[0m


"Hello,\n\nThe basic theory of quantum physics is a branch of physics that explains how very tiny particles, like atoms and subatomic particles, behave. It is based on the idea that these particles can exist in multiple states at the same time, called superposition. \n\nIn quantum physics, particles can also be connected to each other in a special way called entanglement. This means that what happens to one particle can instantly affect another particle, no matter how far apart they are.\n\nAnother important concept in quantum physics is uncertainty. This means that we can't know both the exact position and the exact speed of a particle at the same time. It's like trying to take a picture of a moving object in the dark - you can either see where it is or how fast it's moving, but not both.\n\nQuantum physics is a very complex and fascinating field, and scientists are still learning more about it every day. I hope this helps you understand the basic theory of quantum physics!\n\nBest re

In [146]:
chain.run("Translate 'Singapore is a garden city' to japanese")

# notice the output is answered by "None"



[1m> Entering new MultiPromptChain chain...[0m




None: {'input': "Translate 'Singapore is a garden city' to japanese"}
[1m> Finished chain.[0m


'シンガポールはガーデンシティです。 (Shingapōru wa gādenshiti desu.)'

## <font color=blue>Memory</font>
* LLMs are stateless. Each inbcoming query is processed independently of the other interactions.
* * Memory allows a LLM to remember previous interactions with the user

In [None]:
from langchain import OpenAI
from langchain.chains import ConversationChain

# first initialize the large language model
llm = OpenAI(
	temperature=0,
	openai_api_key=openai.api_key,
	#model_name="text-davinci-003"
)

In [None]:
print(llm)

In [None]:
# now initialize the conversation chain
conversation = ConversationChain(llm=llm)

# print default prompt template
# notice the prompt attempts to reduce hallucinations by stating:
# "If the AI does not know the answer to a question, it truthfully says it does not know."
print(conversation.prompt.template)

In [None]:
# ConversationBufferMemory is the most straightforward conversational memory in LangChain
from langchain.chains.conversation.memory import ConversationBufferMemory

conversation_buf = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory()
)

In [None]:
# let's start the conversation
conversation_buf("Good morning AI!")

In [None]:
from langchain.callbacks import get_openai_callback

# see how many tokens are being used by each interaction/conversation.
def count_tokens(chain, query):
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(str(cb) + '\n')
        #print(f'Spent a total of {cb.total_tokens} tokens')

    return result

In [None]:
count_tokens(
    conversation_buf, 
    "My interest here is to understand Large Language Models and then be able to explain its working to a 10 years old."
)

In [None]:
count_tokens(
    conversation_buf, 
    "Can you suggest some practical examples of Large Language Model to help me in my explanation?"
)

In [None]:
count_tokens(
    conversation_buf, 
    "How do I explain 'text classification' to a 10 years old? Give some examples to help me explain."
)

In [None]:
count_tokens(
    conversation_buf, 
    "What is my aim again?"
)

### ConversationBufferMemory

From the above, we see that LLM can clearly remember the history of conversation. Let’s take a look at how this conversation history is stored by the ConversationBufferMemory

In [None]:
print(conversation_buf.memory.buffer.replace('\n', '\n\n'))

## ConversationSummaryMemory
Using **ConversationBufferMemory**, tokens will be used quickly during the exchanges.  It can exceed the context window limit of even the most advanced LLMs available today.

To avoid excessive token usage, we can use ConversationSummaryMemory. As the name would suggest, this form of memory summarizes the conversation history before it is passed to the {history} parameter.

In [None]:
from langchain.chains.conversation.memory import ConversationSummaryMemory

conversation_sum = ConversationChain(
	llm=llm,
	memory=ConversationSummaryMemory(llm=llm)
)

print(conversation.prompt.template)

In [None]:
conversation_sum("Good morning AI!")

In [None]:
count_tokens(
    conversation_sum, 
    "My interest here is to understand Large Language Models and then be able to explain its working to a 10 years old."
)

In [None]:
count_tokens(
    conversation_sum, 
    "Can you suggest some practical examples of Large Language Model to help me in my explanation?"
)

In [None]:
count_tokens(
    conversation_sum, 
    "How do I explain 'text classification' to a 10 years old? Give some examples to help me explain."
)

In [None]:
count_tokens(
    conversation_sum, 
    "What is my aim again?"
)

In [None]:
print(conversation_sum.memory.buffer)

### ConversationBufferMemory vs ConversationBufferMemory
The number of tokens being used for this conversation is greater than when using the ConversationBufferMemory, so is there any advantage to using ConversationSummaryMemory over the buffer memory?

Yes, for long conversation.  

[Source](https://github.com/pinecone-io/examples/blob/master/learn/generation/langchain/handbook/03a-token-counter.ipynb)
<img src="langchain_conversation_summary.png">

## <font color=blue>Retrieval</font>
* Retrieve relevant information from an external data source and pass the information to LLM

### Retrieve and Summarise a Text File

In [None]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

In [None]:
#Document loader
loader = TextLoader("MicrosoftEULA.txt", encoding="utf-8")
documents = loader.load()

In [None]:
#Document Transoformer
text_splitter = CharacterTextSplitter(chunk_size=1450, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [None]:
#Embedding model
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

In [None]:
#Vector DB to store embeddings
docsearch = Chroma.from_documents(texts, embeddings)

In [None]:
qa_text_llm = OpenAI(openai_api_key=openai_api_key)

print(qa_text_llm)

In [None]:
#Retriever being used using parameter retriever
qa_text = RetrievalQA.from_chain_type(
    llm=qa_text_llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever()
)

In [None]:
query = "What this document about? Summarize it in a paragraph"

print(qa_text.run(query).strip())

### Retrieve and Query a CSV File

Check the source [**here**](https://github.com/Ryota-Kawamura/LangChain-for-LLM-Application-Development/blob/main/L4-QnA.ipynb).

In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator

from IPython.display import display, Markdown

In [None]:
file = 'OutdoorClothingCatalog_1000.csv'

loader = CSVLoader(
    file_path=file, 
    encoding='utf-8'
)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [None]:
query ="Please list all your shirts with sun protection in a table in markdown and summarize each one."

In [None]:
openai = ChatOpenAI()

qa_csv = RetrievalQA.from_chain_type(
    llm = openai,
    chain_type='stuff',
    retriever = index.vectorstore.as_retriever()
)

response = qa_csv.run(query)

In [None]:
display(Markdown(response))

### Retrieve and Query a YouTube Video

In [None]:
from langchain.document_loaders import YoutubeLoader
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

In [None]:
# the youtube videos are about:
# 1. The Best Explanation of Pi
# 2. Cool Drone Arc Aerosystem Amazing Invention

loader = YoutubeLoader.from_youtube_url(
    #"https://www.youtube.com/watch?v=TlY-Sh9Rzas", 
    "https://www.youtube.com/watch?v=n5qcYwxyb7E",
    add_video_info=True,
    language=["en"]    
)

youtube_data = loader.load()

In [None]:
print(youtube_data)

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
youtube_text = text_splitter.split_documents(youtube_data)

In [None]:
#print(youtube_text)

In [None]:
#Embedding model
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

In [None]:
qa_youtube_llm = OpenAI(openai_api_key=openai_api_key)

In [None]:
docsearch = Chroma.from_documents(youtube_text, embeddings)

In [None]:
#Retriever being used using parameter retriever
qa_youtube = RetrievalQA.from_chain_type(
    llm=qa_youtube_llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever()
)

In [None]:
#query = "Summarize the text in short bullet-points"   # for best explanation of pi
query = "What is so amazing about the invention?"      # for Cool Drone Arc Aerosystem

print(qa_youtube.run(query).strip())

### Retrieve and Query a Text PDF

In [None]:
from langchain.document_loaders import YoutubeLoader
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFium2Loader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

In [None]:
loader = PyPDFium2Loader("1706.03762.pdf")
pdf_data = loader.load()

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
pdf_text = text_splitter.split_documents(pdf_data)

In [None]:
#Embedding model
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

qa_pdf_llm = OpenAI(openai_api_key=openai_api_key)

In [None]:
docsearch = Chroma.from_documents(pdf_text, embeddings)

In [None]:
#Retriever being used using parameter retriever
qa_pdf = RetrievalQA.from_chain_type(
    llm=qa_pdf_llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever()
)

In [None]:
query = "Summarise the paper"

print(qa_pdf.run(query).strip())

In [None]:
query = "Explain the workings of multi-attention head(s) to a computer undergraduate."

print(qa_pdf.run(query).strip())

## <font color=blue>Agent</font>
* The core idea of agents is to use a language model to choose a sequence of actions to take. In chains, a sequence of actions is hardcoded (in code). In agents, a language model is used as a reasoning engine to determine which actions to take and in which order.

### Tools
* Tools are interfaces that an agent can use to interact with the world
* For a list of available tools, refer to [LangChain-Agent-Tools](https://python.langchain.com/docs/integrations/tools/)

In [9]:
# import necessary libraries

from langchain.tools import Tool, DuckDuckGoSearchRun, ArxivQueryRun, WikipediaQueryRun 
from langchain.utilities import WikipediaAPIWrapper 

from langchain.agents import initialize_agent 
from langchain.agents import AgentType 
from langchain.chat_models import ChatOpenAI 
from langchain.chains import LLMChain 
from langchain.prompts import PromptTemplate

In [10]:
# initialise the DuckDuckGo search tool along with Arxiv and Wikipedia

search = DuckDuckGoSearchRun()  
arxiv = ArxivQueryRun()  
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

### Set Up Essay Tool

In [11]:
llm = ChatOpenAI(
    temperature=0, 
    openai_api_key=openai_api_key
)

In [12]:
prompt_template = "Write an essay for the topic provided by the user with the help of following content: {content}"  

essay = LLMChain(  
    llm=llm,  
    prompt=PromptTemplate.from_template(prompt_template)  
)

In [13]:
tools = [  
    Tool(  
	    name="Search",  
	    func=search.run,  
	    description="Useful for when you need to answer questions about current events."  
	),  
    
	Tool(  
	    name="Arxiv",  
	    func=arxiv.run,  
	    description="Useful when you need an answer about encyclopedic general knowledge"  
	),  
    
	Tool(  
	    name="Wikipedia",  
	    func=wiki.run,  
	    description="Useful when you need an answer about encyclopedic general knowledge"  
	),  
    
	Tool.from_function(  
	    func=essay.run,  
	    name="Essay",  
	    description="Useful when you need to write an essay"  
	),
]

In [14]:
# initialise the agent

agent = initialize_agent(
    tools, 
    llm, 
    agent=AgentType.OPENAI_FUNCTIONS, 
    verbose=True
)

In [21]:
# running the agent
# define a prompt for the agent and run it
# the agent will search web, Arxiv and Wikipedia for information on global warming
# fetch the content and write an eassy

prompt = "Write an essay in 1000 words for the topic {input}, use the tools to retrieve the necessary information"  
input = "Essay on Global Warming – Causes and Solutions"  
  
print(agent.run(prompt.format(input=input)))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Essay` with `Essay on Global Warming – Causes and Solutions`


[0m[36;1m[1;3mGlobal warming is a pressing issue that has garnered significant attention in recent years. It refers to the long-term increase in Earth's average surface temperature due to human activities, primarily the emission of greenhouse gases. This essay will explore the causes of global warming and propose potential solutions to mitigate its effects.

One of the main causes of global warming is the burning of fossil fuels such as coal, oil, and natural gas. These fuels release carbon dioxide (CO2) and other greenhouse gases into the atmosphere, trapping heat and causing the planet to warm. The increased use of these fuels for transportation, electricity generation, and industrial processes has led to a significant rise in greenhouse gas emissions.

Deforestation is another major contributor to global warming. Trees absorb CO2 from the atmosph

## <font color=blue>Callbacks</font>
* Provides a callbacks system that allows you to hook into the various stages of your LLM application. 
* This is useful for logging, monitoring, streaming, and other tasks.
* Built-in callbacks integrations with 3rd-party tools can be found [**here**](https://python.langchain.com/docs/integrations/callbacks/).
* LangChain provides a few built-in handlers that are available in the langchain/callbacks module.
* The most basic handler is the **StdOutCallbackHandler** which logs all events to **stdout**.

In [34]:
from langchain.callbacks import StdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import AgentAction, AgentFinish, BaseMessage, LLMResult


from typing import Dict, Any, List, Union

In [35]:
class BaseCallbackHandler:
    """Base callback handler that can be used to handle callbacks from langchain."""

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        """Run when LLM starts running."""

    def on_chat_model_start(
        self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], **kwargs: Any
    ) -> Any:
        """Run when Chat Model starts running."""

    def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:
        """Run on new LLM token. Only available when streaming is enabled."""

    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
        """Run when LLM ends running."""

    def on_llm_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when LLM errors."""

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        """Run when chain starts running."""

    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
        """Run when chain ends running."""

    def on_chain_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when chain errors."""

    def on_tool_start(
        self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
    ) -> Any:
        """Run when tool starts running."""

    def on_tool_end(self, output: str, **kwargs: Any) -> Any:
        """Run when tool ends running."""

    def on_tool_error(
        self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
    ) -> Any:
        """Run when tool errors."""

    def on_text(self, text: str, **kwargs: Any) -> Any:
        """Run on arbitrary text."""

    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
        """Run on agent action."""

    def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
        """Run on agent end."""

In [None]:
handler = StdOutCallbackHandler()
llm = OpenAI()
prompt = PromptTemplate.from_template("1 + {number} = ")

In [37]:
# Constructor callback: First, let's explicitly set the StdOutCallbackHandler when initializing our chain
chain = LLMChain(
    llm=llm, 
    prompt=prompt, 
    callbacks=[handler]
)

chain.run(number=2)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m1 + 2 = [0m

[1m> Finished chain.[0m


'\n3'

In [38]:
# Use verbose flag: Then, let's use the `verbose` flag to achieve the same result
chain = LLMChain(
    llm=llm, 
    prompt=prompt, 
    verbose=True
)

chain.run(number=2)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m1 + 2 = [0m

[1m> Finished chain.[0m


'\n\n3'

In [39]:
# Request callbacks: Finally, let's use the request `callbacks` to achieve the same result
chain = LLMChain(
    llm=llm, 
    prompt=prompt
)

chain.run(number=2, callbacks=[handler])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m1 + 2 = [0m

[1m> Finished chain.[0m


'\n\n3'

### Callbacks argument

The **callbacks** argument is available on most objects throughout the API in two different places.
* Constructor callbacks
* Request callbacks

Check [**here**](https://python.langchain.com/docs/modules/callbacks/) for more details.