# Resources

- [Langchain howtos](https://python.langchain.com/en/latest/modules/chains/how_to_guides.html)

# Load env

In [None]:
from langchain.llms import OpenAI
from dotenv import load_dotenv

In [None]:
# finds .env file and loads the vars
load_dotenv()

In [None]:
llm = OpenAI(temperature=0.9)

In [None]:
text = "What are the 5 vacation destinations for someone that likes to eat fish?"
print(llm(text))

In [None]:
from langchain.prompts import PromptTemplate

# Prompt templates

In [None]:
prompt = PromptTemplate(
    input_variables=["food"],
    template="What are the 5 vacation destinations for someone  likes to  {food}",
)

In [None]:
print(prompt.format(food="steak"))

In [None]:
print(llm(prompt.format(food="steak")))

# Chains: combine llms and prompts in multi-step workflows

In [None]:
from langchain.chains import LLMChain

In [None]:
chain = LLMChain(llm=llm, prompt=prompt)

In [None]:
print(chain.run("fruit"))

# Agents: Dynamically call chains based on user input

In [None]:
from langchain.agents import load_tools, initialize_agent

In [None]:
# here we need to redifine the temperature so that we get a deterministic model that can do math
llm = OpenAI(temperature=0)

In [None]:
tools = load_tools(["serpapi", "llm-math"], llm=llm)

In [None]:
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [None]:
agent.run("Who is the current prime minister of Greece? What is the largest prime number that is less than their age?")

# Memory: add state to chains and agents

In [None]:
from langchain import ConversationChain

In [None]:
conversation = ConversationChain(llm=llm, verbose=True)

In [None]:
conversation.predict(input="Hi there!")

In [None]:
conversation.predict(input="My name is X. Nice to meet you too.")

In [None]:
conversation.predict(input="Can you repeat to me the first sentence that I said to you?")

In [None]:
conversation.predict(input="Can you rephrase the first sentence that I said to you?")

# Chat Messages

- System: AI persona
- Human: communicator
- AI: previous messages of the AI

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=.7)

In [None]:
chat(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out what to eat in one short sentence"),
        HumanMessage(content="I like fish, what should I eat?")
    ]
)

In [None]:
chat(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out where to travel in one short sentence"),
        HumanMessage(content="I like the scuba diving where should I go?"),
        AIMessage(content="You should go to Florida Keys, US"),
        HumanMessage(content="What else should I do when I'm there?")
    ]
)

# Models

## Language Model

text in -> text out

In [None]:
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-ada-001")

In [None]:
llm("What food can be combined with red wine?")

## Chat model

Takes as input series of messages -> returns responses

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=1)

In [None]:
chat(
    [
        SystemMessage(content="You are an nice AI bot that makes a joke at whatever the user says"),
        HumanMessage(content="I would like to go on a cave diving trip, how should I do this?")
    ]
)

## Text embedding

- text is transformed to a vector of numbers
- good for comparing text

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [None]:
text = "Hi! It's time to cook."

In [None]:
text_embedding = embeddings.embed_query(text)
print (f"Your embedding is length {len(text_embedding)}")
print (f"Here's a sample: {text_embedding[:5]}...")

# Example selectors

- gives format
- makes AI learn a pattern

In [None]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003")

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Example Input: {input}\nExample Output: {output}",
)

# Examples of locations that nouns are found
examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output": "car"},
    {"input": "tree", "output": "ground"},
    {"input": "bird", "output": "nest"},
]

In [None]:
# SemanticSimilarityExampleSelector will select examples that are similar to your input by semantic meaning

example_selector = SemanticSimilarityExampleSelector.from_examples(
    # This is the list of examples available to select from.
    examples, 
    
    # This is the embedding class used to produce embeddings which are used to measure semantic similarity.
    OpenAIEmbeddings(), 
    
    # This is the VectorStore class that is used to store the embeddings and do a similarity search over.
    FAISS, 
    
    # This is the number of examples to produce.
    k=2
)

In [None]:
similar_prompt = FewShotPromptTemplate(
    # The object that will help select examples
    example_selector=example_selector,
    
    # Your prompt
    example_prompt=example_prompt,
    
    # Customizations that will be added to the top and bottom of your prompt
    prefix="Give the location an item is usually found in",
    suffix="Input: {noun}\nOutput:",
    
    # What inputs your prompt will receive
    input_variables=["noun"],
)

In [None]:
# Select a noun!
my_noun = "cave diver"

print(similar_prompt.format(noun=my_noun))

In [None]:
llm(similar_prompt.format(noun=my_noun))

# Output parsers

Formatting output data for further use. Concepts:
1. Format instructions: how to format
2. Parser: method that will extract models output

In [None]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI

In [None]:
llm = OpenAI(model_name="text-davinci-003")

In [None]:
# How you would like your reponse structured. This is basically a fancy prompt template
response_schemas = [
    ResponseSchema(name="bad_string", description="This a poorly formatted user input string"),
    ResponseSchema(name="good_string", description="This is your response, a reformatted response")
]

# How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [None]:
format_instructions = output_parser.get_format_instructions()
format_instructions

In [None]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

promptValue = prompt.format(user_input="welcom to califonya!")

print(promptValue)

In [None]:
llm_output = llm(promptValue)
llm_output

In [None]:
output_parser.parse(llm_output)

# Indexes

Structuring docs so that LLMs can work with them

## Document loaders

Loads data from different sources:
- [List of doc loaders](https://python.langchain.com/en/latest/modules/indexes/document_loaders.html)
- [LLama index](https://llamahub.ai/)

In [None]:
from langchain.document_loaders import HNLoader

In [None]:
loader = HNLoader("https://news.ycombinator.com/item?id=34422627")

In [None]:
data = loader.load()

In [None]:
print(f"Found {len(data)} comments")

In [None]:
print(f"Here is a sample: \n\n {''.join([x.page_content[:150] for x in data[:2]])}")

## Text splitters

Sometimes text is too long, like a book for example. Text splitters divide it to manageable chunks.

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
# This is a long document we can split up.
with open('data/worked.txt') as f:
    pg_work = f.read()
    
print (f"You have {len([pg_work])} document")

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 150,
    chunk_overlap  = 20,
)

texts = text_splitter.create_documents([pg_work])

In [None]:
print ("Preview:")
print (texts[0].page_content, "\n")
print (texts[1].page_content)

## Retrievers

Combine docs with LLMs

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader('data/worked.txt')
documents = loader.load()

In [None]:
# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# Get embedding engine ready
embeddings = OpenAIEmbeddings()

In [None]:
# Embedd your texts
db = FAISS.from_documents(texts, embeddings)

In [None]:
# Init your retriever. Asking for just 1 document back
retriever = db.as_retriever()

In [None]:
retriever

In [None]:
docs = retriever.get_relevant_documents("what types of things did the author want to build?")

In [None]:
print("\n\n".join([x.page_content[:200] for x in docs[:2]]))

# Vectorstores

Vector databases. Chroma & FAISS for local storage, Pinecone and Weaviate most popular solutions. Columns for embedding vectors and columns for metadata. Ex:

| Embedding      | Metadata |
| ----------- | ----------- |
| [-0.00015641732898075134, -0.003165106289088726, ...]      | {'date' : '1/2/23}       |
| [-0.00035465431654651654, 1.4654131651654516546, ...]   | {'date' : '1/3/23}        |

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader('data/worked.txt')
documents = loader.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# Get embedding engine ready
embeddings = OpenAIEmbeddings()

In [None]:
print(f"You have {len(texts)} documents")

In [None]:
embeddings_list = embeddings.embed_documents([text.page_content for text in texts])

In [None]:
print(f"You have {len(embeddings_list)} embeddings")
print(f"Here is a sample of one {embeddings_list[0][:3]}")

# Memory

Memory: remember what was said previously. Multiple types of memory:
- LLM
- Chat
- Multi input

In [None]:
from langchain.memory import ChatMessageHistory
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(temperature=0)

history = ChatMessageHistory()

history.add_ai_message("hi!")

history.add_user_message("what is the capital of france?")

In [None]:
history.messages

In [None]:
ai_response = chat(history.messages)
ai_response

In [None]:
history.add_ai_message(ai_response.content)
history.messages

# Chains
Combining LLM calls and actions

Ex. Summary 1, Summary 2, Summary 3 > Final Summary

## Simple Sequential Chains

Us the output of an LLM as an input to another. Good for breaking large tasks into smaller ones and keeping your AI focused.

In [None]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SimpleSequentialChain

llm = OpenAI(temperature=1)

In [None]:
template = """Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)

# Holds my location chain
location_chain = LLMChain(llm=llm, prompt=prompt_template)

In [None]:
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)

# Holds my 'meal' chain
meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [None]:
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)

In [None]:
review = overall_chain.run("Athens")

## Summarization Chains

Easily run through multiple docs to get a summary.

In [None]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = TextLoader('data/disc.txt')
documents = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)

In [None]:
texts = text_splitter.split_documents(documents)

In [None]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)
chain.run(texts)

# Agents

- not knowing the unknowns: make an agent that can access custom tools
- it makes decisions on its own
- Agent is the language model that drives the decision making
- What is a tool?
  - Agent skills
  - OpenAI plugins
- Toolkit: set of tools for agent to select from

In [None]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.llms import OpenAI
import json

llm = OpenAI(temperature=0)

In [None]:
toolkit = load_tools(["serpapi"], llm=llm)


In [None]:
agent = initialize_agent(toolkit, llm, agent="zero-shot-react-description", return_intermediate_steps=True)

In [None]:
response = agent({"input": "what is the first album of the band that Natalie Bergman is a part of?"})

In [None]:
print(json.dumps(response["intermediate_steps"], indent=2))