In [2]:
# Using predict function (input is string)
from langchain.llms.openai import OpenAI
from langchain.chat_models import ChatOpenAI

from dotenv import load_dotenv
import os

load_dotenv()
token = os.environ.get("OPENAI_API_KEY")

llm = OpenAI()
chat = ChatOpenAI(
    openai_api_key=token,
    temperature=0.1
)

# a = llm.predict("How many planets are there?")
# b = chat.predict("How many planets are there?")

# b

'As of now, there are eight recognized planets in our solar system: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. However, there is ongoing debate among astronomers regarding the classification of Pluto as a planet.'

In [3]:
# Using predict_messages function (input is message list)
from langchain.schema import HumanMessage, AIMessage, SystemMessage, messages

chat2 = ChatOpenAI(
    temperature=0.1
)

messages = [
    SystemMessage(
        content="You are a geography expert. And you only reply in Italian."
    ),
    AIMessage(content="Ciao, mi chiamo Paolo!"),
    HumanMessage(content="What is the distance between Mexico and Thailand. Also what is your name?")
]

# chat2.predict_messages(messages)

AIMessage(content='Ciao! Il mio nome è Paolo. La distanza tra il Messico e la Thailandia è di circa 17.000 chilometri.')

In [4]:
# Using PromptTemplate and ChatPromptTemplate
from langchain.prompts import PromptTemplate, ChatPromptTemplate

template = PromptTemplate.from_template(
    "What is the distance between {country_a} and {country_b}"
)

prompt = template.format(
    country_a="Mexico",
    country_b="Thailand"
)

# chat2.predict(prompt)

# Another way of using PromptTemplate
t = PromptTemplate(
    template="What is capital of {country}",
    input_variables=["country"]
)

t.format(country="France")

# ChatPromptTemplate
template = ChatPromptTemplate.from_messages(
    [
        ("system",
         "You are a geography expert. And you only reply in {language}."),
        ("ai", "Ciao, mi chiamo {name}!"),
        (
            "human",
            "What is the distance between {country_a} and {country_b}. Also what is your name?"
        )
    ]
)

prompt = template.format_messages(
    language="Greek",
    name="Socrates",
    country_a="Mexico",
    country_b="Thailand"
)

# chat2.predict_messages(prompt)

AIMessage(content='Γεια σου! Το όνομά μου είναι Σωκράτης. Η απόσταση μεταξύ Μεξικού και Ταϊλάνδης είναι περίπου 16.000 χιλιόμετρα.')

In [5]:
# Using output parser without chain (more complecated)
from langchain.schema import BaseOutputParser

class CommaOutputParser(BaseOutputParser):
    def parse(self, text):
        items = text.strip().split(",")
        return list(map(str.strip, items))
    
p = CommaOutputParser()
p.parse("Hello,how, are,you  ")

template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a list generating machine. Everything you are asked will be answered with a comma separated list of max {max_items} in lowercase.Do NOT reply with anything else."
        ),
        (
            "human", "{question}"
        )
    ]
)

prompt = template.format_messages(
    max_items=10,
    question="What are the planets?"
)

# result = chat.predict_messages(prompt)

# p = CommaOutputParser()
# p.parse(result.content)

['mercury',
 'venus',
 'earth',
 'mars',
 'jupiter',
 'saturn',
 'uranus',
 'neptune',
 'pluto']

In [6]:
# Using output parser with chain
from langchain.schema import BaseOutputParser

class CommaOutputParser(BaseOutputParser):
    def parse(self, text):
        items = text.strip().split(",")
        return list(map(str.strip, items))

template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a list generating machine. Everything you are asked will be answered with a comma separated list of max {max_items} in lowercase.Do NOT reply with anything else."
        ),
        (
            "human", "{question}"
        )
    ]
)

chain = template | chat | CommaOutputParser()

# chain.invoke({
#     "max_items":5,
#     "question":"What are the colors?"
# })

['red', 'blue', 'green', 'yellow', 'orange']

In [6]:
# Chain Chains together + Streaming the output
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat3 = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

chef_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a world-class international chef. You create easy to follow recipies for any type of cuisine with easy to find ingredients.",
        ),
        ("human", "I want to cook {cuisine} food."),
    ]
)

chef_chain = chef_prompt | chat3

veg_chef_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a vegetarian chef specialized on making traditional recipies vegetarian. You find alternative ingredients and explain their preparation. You don't radically modify the recipe. If there is no alternative for a food just say you don't know how to replace it.",
        ),
        ("human", "{recipe}"),
    ]
)

veg_chain = veg_chef_prompt | chat3

final_chain = {"recipe": chef_chain} | veg_chain
# final_chain.invoke({"cuisine": "indian"})

### Modules

In [8]:
# Model I/O module
# model input = prompt

from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

# FewShotPromptTemplate
#   Providing few examples how the model should answer

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]


example_prompt = PromptTemplate.from_template("Human: {question}\nAI:{answer}")

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
    suffix="Human: What do you know about {country}?",
    input_variables=["country"],
)

chain = prompt | chat

# chain.invoke({"country": "Korea"})


AI: 
        I know this:
        Capital: Seoul
        Language: Korean
        Food: Kimchi and Bibimbap
        Currency: South Korean Won

AIMessageChunk(content='AI: \n        I know this:\n        Capital: Seoul\n        Language: Korean\n        Food: Kimchi and Bibimbap\n        Currency: South Korean Won')

In [None]:
# FewShotChatMessagePromptTemplate
#   FewShotPromptTemplate for chat messages

from langchain.prompts.few_shot import FewShotChatMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, ChatPromptTemplate

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

examples = [
    {
        "country": "France",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "country": "Italy",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "country": "Greece",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "What do you know about {country}?"),
        ("ai", "{answer}"),
    ]
)

prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a geography expert, you give short answers."),
        prompt,
        ("human", "What do you know about {country}?"),
    ]
)

chain = final_prompt | chat

chain.invoke({"country": "Thailand"})

In [14]:
# Using Example Selector

from langchain.prompts import example_selector
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.example_selector.base import BaseExampleSelector
from langchain.prompts.example_selector import LengthBasedExampleSelector


chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)


examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]


class RandomExampleSelector(BaseExampleSelector):
    def __init__(self, examples):
        self.examples = examples

    def add_example(self, example):
        self.examples.append(example)

    def select_examples(self, input_variables):
        from random import choice

        return [choice(self.examples)]


example_prompt = PromptTemplate.from_template("Human: {question}\nAI:{answer}")

# Make sure the examples being passed is less than length 180.
# Used to keep the costs down
# example_selector = LengthBasedExampleSelector(
#     examples=examples,
#     example_prompt=example_prompt,
#     max_length=180
# )

# random selector
example_selector = RandomExampleSelector(
    examples=examples,
)

prompt = FewShotPromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector,
    suffix="Human: What do you know about {country}?",
    input_variables=["country"],
)

prompt.format(country="Brazil")

'Human: What do you know about France?\nAI:\n        Here is what I know:\n        Capital: Paris\n        Language: French\n        Food: Wine and Cheese\n        Currency: Euro\n        \n\nHuman: What do you know about Brazil?'

In [16]:
# loading prompts from json and yaml

from langchain.prompts import load_prompt

prompt1 = load_prompt("./prompt.json")
prompt2 = load_prompt("./prompt.yaml")

a = prompt1.format(country="Brazil")
b = prompt2.format(country="Korea")

a, b

('What is the capital of Brazil', 'What is the capital of Korea')

In [None]:
# Serialization and Composition
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.prompts.pipeline import PipelinePromptTemplate

chat = ChatOpenAI(
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

intro = PromptTemplate.from_template(
    """
    You are a role playing assistant.
    And you are impersonating a {character}
"""
)

example = PromptTemplate.from_template(
    """
    This is an example of how you talk:

    Human: {example_question}
    You: {example_answer}
"""
)

start = PromptTemplate.from_template(
    """
    Start now!

    Human: {question}
    You:
"""
)

final = PromptTemplate.from_template(
    """
    {intro}
                                     
    {example}
                              
    {start}
"""
)

prompts = [
    ("intro", intro),
    ("example", example),
    ("start", start),
]


full_prompt = PipelinePromptTemplate(
    final_prompt=final,
    pipeline_prompts=prompts,
)


chain = full_prompt | chat

chain.invoke(
    {
        "character": "Pirate",
        "example_question": "What is your location?",
        "example_answer": "Arrrrg! That is a secret!! Arg arg!!",
        "question": "What is your fav food?",
    }
)

In [None]:
# caching: lets us save answers from the llm

from langchain.globals import set_llm_cache, set_debug
from langchain.cache import InMemoryCache, SQLiteCache

set_llm_cache(SQLiteCache("cache.db"))
# set_debug(True)

chat2 = ChatOpenAI(
    temperature=0.1
)

# chat.predict("How do you make italian pasta")


In [None]:
# How to know how much money we are spending on each call
from langchain.callbacks import get_openai_callback

# prints the cost of all the calls in this "with" statement
with get_openai_callback() as usage:
    # a = chat.predict("What is the recipe for soju")
    # print(a)
    print(usage)

In [19]:
# Saving and Loading models
from langchain.llms.openai import OpenAI
from langchain.llms.loading import load_llm

# chat = OpenAI(
#     temperature=0.1,
#     max_tokens=450,
#     model="gpt-3.5-turbo"
# )

# chat.save("model1.json")

chat = load_llm("model1.json")
chat



OpenAIChat(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_kwargs={'temperature': 0.1, 'max_tokens': 450, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'request_timeout': None, 'logit_bias': {}})

In [1]:
# Memory module
# Adding memory to chatbots
from operator import itemgetter
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(return_messages=True)

memory.save_context({"input": "Hi!"}, {"output": "How are you?"})

# Need to call with empty dict
memory.load_memory_variables({})

{'history': [HumanMessage(content='Hi!'), AIMessage(content='How are you?')]}

In [2]:
# ConversationBufferWindowMemory
#   Save recent k conversations
#   the memory has a window of length k
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(
    return_messages=True,
    k=4, 
)

def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})

add_message(1, 1)
add_message(2, 2)
add_message(3, 3)
add_message(4, 4)
add_message(5, 5)

memory.load_memory_variables({})

{'history': [HumanMessage(content='2'),
  AIMessage(content='2'),
  HumanMessage(content='3'),
  AIMessage(content='3'),
  HumanMessage(content='4'),
  AIMessage(content='4'),
  HumanMessage(content='5'),
  AIMessage(content='5')]}

In [3]:
# ConversationSummaryMemory
#   BufferMemory saves conversation itself, but SummaryMemory saves summary of the conversations
#   Initially, it takes up more token than Buffermemory, but as conversation progresses, SummarMemory is better.
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryMemory(llm=llm)


def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})


def get_history():
    return memory.load_memory_variables({})


add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")

# get_history()

{'history': 'Nicolas introduces himself as living in South Korea. The AI responds by expressing admiration for his location.'}

In [None]:
# ConversationSummaryBufferMemory: Summary + Buffer memory
#   The moment the model hits max_token_limit, it summarizes the old messages and saves it into memory
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=150,
    return_messages=True,
)


def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})


def get_history():
    return memory.load_memory_variables({})


add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")
get_history()

In [None]:
add_message("South Korea is so pretty", "I wish I could go!!!")
get_history()

In [None]:
add_message("How far is Korea from Argentina?", "I don't know! Super far!")
get_history()

In [None]:
add_message("How far is Brazil from Argentina?", "I don't know! Super far!")
get_history()

In [None]:
# ConversationKGMemory
#   Builds a knowledge graph of entities on the conversation
from langchain.memory import ConversationKGMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)

memory = ConversationKGMemory(
    llm=llm,
    return_messages=True,
)


def add_message(input, output):
    memory.save_context({"input": input}, {"output": output})


add_message("Hi I'm Nicolas, I live in South Korea", "Wow that is so cool!")
memory.load_memory_variables({"input": "who is Nicolas"})
add_message("Nicolas likes kimchi", "Wow that is so cool!")
memory.load_memory_variables({"inputs": "what does nicolas like"})

In [None]:
# Memory on LLMChain
#   LLMChain is "Off-the-shelf" chain: it means it is general purpose chain
#   You can initially build fast with off the shelf products, but it is hard to customize
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    # let the memory know to put its content into "chat_history" in the template
    memory_key="chat_history",
)

template = """
    You are a helpful AI talking to a human.

    {chat_history}
    Human:{question}
    You:
"""

chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=PromptTemplate.from_template(template),
    # get the log of what the chain is doing
    verbose=True,
)

chain.predict(question="My name is Nico")
chain.predict(question="I live in Seoul")
chain.predict(question="What is my name?")

In [None]:
# Use chat based memory
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI talking to a human"),
        # Placeholder for unknown size of message from unknown sender (system, human, ai, etc.)
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=prompt,
    verbose=True,
)

chain.predict(question="My name is Nico")

chain.predict(question="I live in Seoul")

chain.predict(question="What is my name?")

In [None]:
# Using a custom chain instead of "Off-the-shelf" LLMChain
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI talking to a human"),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

# RunnablePassthrough passes one input to load_memory(), so we set argument to _
def load_memory(_):
    return memory.load_memory_variables({})["history"]

chain = RunnablePassthrough.assign(history=load_memory) | prompt | llm

def invoke_chain(question):
    result = chain.invoke({"question": question})
    # Saving manually built memory 
    memory.save_context(
        {"input": question},
        {"output": result.content},
    )
    print(result)

# "My name is nico" is input to the first item in the chain: load_memory()
invoke_chain("My name is nico")
invoke_chain("What is my name?")

#### Retrieval

In [None]:
# Retrieval module
# working with outside source: document loaders, text embedding, etc.

# 1. Load: From outside source, "Load" data
#   - Use Document Loader
from langchain.document_loaders import TextLoader, UnstructuredFileLoader

loader = UnstructuredFileLoader("./files/chapter_one.txt")
loader2 = TextLoader("./files/chapter_one.txt")

loader.load()

In [15]:
# 2. Transform: Split the data into sections
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)

from langchain.text_splitter import CharacterTextSplitter

splitter = CharacterTextSplitter(
    chunk_size=1000,
    separator="\n",
    chunk_overlap=100,
)

from langchain.text_splitter import CharacterTextSplitter

# now chunk_size is number of tokens, instead of number of characters
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=200,
    separator="\n",
    chunk_overlap=100,
)

# docs = loader.load()
# splitter.split_documents(docs)

loader.load_and_split(text_splitter=splitter)

Created a chunk of size 218, which is longer than the specified 200
Created a chunk of size 212, which is longer than the specified 200
Created a chunk of size 254, which is longer than the specified 200
Created a chunk of size 241, which is longer than the specified 200
Created a chunk of size 215, which is longer than the specified 200
Created a chunk of size 215, which is longer than the specified 200
Created a chunk of size 252, which is longer than the specified 200
Created a chunk of size 322, which is longer than the specified 200
Created a chunk of size 304, which is longer than the specified 200
Created a chunk of size 237, which is longer than the specified 200
Created a chunk of size 285, which is longer than the specified 200
Created a chunk of size 306, which is longer than the specified 200
Created a chunk of size 293, which is longer than the specified 200
Created a chunk of size 215, which is longer than the specified 200
Created a chunk of size 207, which is longer tha

[Document(page_content='Part 1, Chapter 1\nPart One\n1 It was a bright cold day in April, and the clocks were striking thirteen. Winston Smith, his chin nuzzled into his breast in an effort to escape the vile wind, slipped quickly through the glass doors of Victory Mansions, though not quickly enough to prevent a swirl of gritty dust from entering along with him.', metadata={'source': './files/chapter_one.txt'}),
 Document(page_content='The hallway smelt of boiled cabbage and old rag mats. At one end of it a coloured poster, too large for indoor display, had been tacked to the wall. It depicted simply an enormous face, more than a metre wide: the face of a man of about forty-five, with a heavy black moustache and ruggedly handsome features. Winston made for the stairs. It was no use trying the lift. Even at the best of times it was seldom working, and at present the electric current was cut off during daylight hours. It was part of the economy drive in preparation for Hate Week. The fl

In [None]:
# 3. Embed: Embed the data into word vectors
from langchain.embeddings import OpenAIEmbeddings

embedder = OpenAIEmbeddings()

vector = embedder.embed_documents([
    "hi", "how", "are", "you longer sentences because"
])

print(vector, len(vector))

In [27]:
# 4. Store: Store the embedding into database
from langchain.vectorstores import Chroma, FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache/")

loader = UnstructuredFileLoader("./files/chapter_one_small.txt")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=50,
    separator="\n",
    chunk_overlap=10,
)
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# vectorstore = Chroma.from_documents(docs, cached_embeddings)
vectorstore = FAISS.from_documents(docs, cached_embeddings)

Created a chunk of size 64, which is longer than the specified 50


In [23]:
# 5. Retrieve: Get data from storage
results = vectorstore.similarity_search("what month is it")
results

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(page_content='It was a bright cold day in April, and the clocks were striking thirteen. Winston Smith, his chin nuzzled into his breast in an effort to escape the vile wind, slipped quickly through the glass doors of Victory Mansions, though not quickly enough to prevent a swirl of gritty dust from entering along with him.', metadata={'source': './files/chapter_one_small.txt'}),
 Document(page_content='The hallway smelt of boiled cabbage and old rag mats. At one end of it a coloured poster, too large for indoor display, had been tacked to the wall. It depicted simply an enormous face, more than a metre wide: the face of a man of about forty-five, with a heavy black moustache and ruggedly handsome features.', metadata={'source': './files/chapter_one_small.txt'})]

#### Chain Module

In [25]:
# Chain module
#   Documents chain module
#   1. Stuff Document chain
#       based on user query, find all documents related to query
#       stuff all the documents inside the prompt
#       this makes the prompt pretty big
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    temperature=0.1
)
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

chain.run("what month is it")



'It is April.'

In [28]:
#   2. Refine Document chain
#       based on user query, find all documents related to query
#       for each document found, the model tries to answer intermediate answers
#       in the for loop we also feed in previous document's intermediate answer into the next document's query
#       this costs more money
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    temperature=0.1
)
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=vectorstore.as_retriever()
)

chain.run("what month is it")



'The context provided does not relate to the question "what month is it." Therefore, the original answer "It is April" remains appropriate.'

In [29]:
#   3. Map reduce Document chain
#       based on user query, find all documents related to query
#       summarize each one of the documents
#       than combine all to ask one more time
#       this costs more money
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    temperature=0.1
)
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_reduce",
    retriever=vectorstore.as_retriever()
)

chain.run("what month is it")



"I'm sorry, but I do not have real-time information."

In [None]:
#   4. Map re-rank Document chain
#       based on user query, find all documents related to query
#       summarize each one of the documents
#       score each answer, and output the best answer
#       this costs more money
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    temperature=0.1
)
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever()
)

chain.run("what month is it")

In [30]:
# Stuff document chain using LCEL (Langchain expression language)
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one_small.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        ("human", "{question}"),
    ]
)

chain = (
    {
        "context": retriver,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
)

chain.invoke("Describe Victory Mansions")



AIMessage(content='Victory Mansions is a place with glass doors that let in gritty dust. The hallway smells of boiled cabbage and old rag mats. There is a large colored poster on one end of the hallway depicting an enormous face of a man around forty-five years old, with a heavy black mustache and ruggedly handsome features.')

In [None]:
# Map reduce document chain using LCEL (Langchain expression language)
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()


map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("How many ministries are mentioned")

### Other Modules

In [None]:
# Callbacks module
# knowing what the module is doing in the middle

In [None]:
# Agents module
# making llm AI autonomous

Funcation callings

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate


function = {
    "name": "create_quiz",
    "description": "function that takes a list of questions and answers and returns a quiz",
    "parameters": {
        "type": "object",
        "properties": {
            "questions": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "question": {
                            "type": "string",
                        },
                        "answers": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "answer": {
                                        "type": "string",
                                    },
                                    "correct": {
                                        "type": "boolean",
                                    },
                                },
                                "required": ["answer", "correct"],
                            },
                        },
                    },
                    "required": ["question", "answers"],
                },
            }
        },
        "required": ["questions"],
    },
}


llm = ChatOpenAI(
    temperature=0.1,
).bind(
    function_call={
        "name": "create_quiz",
    },
    functions=[
        function,
    ],
)

prompt = PromptTemplate.from_template("Make a quiz about {city}")

chain = prompt | llm

response = chain.invoke({"city": "rome"})


response = response.additional_kwargs["function_call"]["arguments"]

response

In [None]:
import json

for question in json.loads(response)["questions"]:
    print(question)