In [7]:
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass("OpenAI API Key: ")

### Chat messages

In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, AIMessage, HumanMessage

chat = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.7,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

In [10]:
response = chat([
    SystemMessage(content="You are a nice AI bot that helps a user to figure out what to eat in one short sentence."),
    HumanMessage(content="I like tomatoes, what should I eat?")
])

response.content.strip()

'You could try a caprese salad with fresh mozzarella and basil!'

In [12]:
history = [
    SystemMessage(content="You are a nice AI bot that helps a user figure out where to travel in one short sentence."),
    HumanMessage(content="I like the beaches where should I go?"),
    AIMessage(content="You should go to Nice, France."),
    HumanMessage(content="What else should I do when I'm there?")
]

response = chat(history)
print(response.content.strip())

While in Nice, you can also explore the old town, visit the museums, and enjoy the delicious Provencal cuisine.


### Documents

In [17]:
from langchain.schema import Document

my_document = Document(
    page_content="this the content of my document.",
    metadata={
        "id": 1234,
        "source": "www.wikipedia.com",
        "create_time": 20230402
    }
)

my_document

Document(page_content='this the content of my document.', metadata={'id': 1234, 'source': 'www.wikipedia.com', 'create_time': 20230402})

### Language model

In [21]:
from langchain.llms import OpenAI

llm = OpenAI(
    model_name="text-davinci-003",
    temperature=0.7,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

In [24]:
query = "What day comes after Friday?"
response = llm(query)

print(response.strip())

Saturday


### Chat Models

In [30]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, AIMessage, HumanMessage

chat_llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=1.,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

In [33]:
history = [
    SystemMessage(content="You are an unhelpful AI bot that makes a joke at whatever the user says."),
    HumanMessage(content="I would like to go to New York, how should I do this?")
]

response = chat_llm(history)
print(response.content.strip())

Why don't you try walking there? It's great exercise and you'll get to see a lot of interesting sights along the way.


In [32]:
response

AIMessage(content="Walk, duh! Just put on some comfortable shoes and start walking to New York. You'll get there eventually... or not.", additional_kwargs={})

### Embedding Models

In [36]:
from langchain.embeddings import OpenAIEmbeddings

embed_model = OpenAIEmbeddings(model="text-embedding-ada-002")

In [37]:
text = "Hi! It's time for the beach."

text_embed = embed_model.embed_query(text)

In [39]:
len(text_embed)

1536

### Prompts

In [45]:
from langchain.llms import OpenAI

llm = OpenAI(
    model_name="text-davinci-003",
    temperature=0.7,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

prompt = """
Today is Monday, tomorrow is Wednesday.
What is wrong with that statement?
""".strip()

response = llm(prompt)
print(response.strip())

That statement is incorrect; tomorrow is Tuesday, not Wednesday.


In [44]:
response

'\n\nTuesday is missing. The correct statement would be: Today is Monday, tomorrow is Tuesday, and Wednesday.'

### Prompt Template

In [49]:
from langchain.llms import OpenAI
from langchain import PromptTemplate

llm = OpenAI(
    model_name="text-davinci-003",
    temperature=.7,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

template = """
I really want to travel to {location}. What should I do there?
Response in one short sentence.
""".strip()

prompt = PromptTemplate(
    template=template,
    input_variables=["location"]
)

final_prompt = prompt.format(location="Rome")
print(final_prompt)

response = llm(final_prompt)
print(response.strip())

I really want to travel to Rome. What should I do there?
Response in one short sentence.
Visit the Colosseum, the Pantheon, and the Trevi Fountain.


### Example Selectors

In [3]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector

In [22]:
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI

llm = OpenAI(
    model_name="text-davinci-003",
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

embed_model = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

example_template = """
Input: {input}
Output: {output}
""".strip()

example_prompt = PromptTemplate(
    template=example_template,
    input_variables=["input", "output"]
)

examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output": "car"},
    {"input": "tree", "output": "ground"},
    {"input": "bird", "output": "nest"}
]

In [23]:
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples=examples,
    embeddings=embed_model,
    vectorstore_cls=FAISS,
    k=2
)

In [24]:
similar_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="Give the location an item is usually found in.",
    suffix="Input: {noun}\nOutput: ",
    input_variables=["noun"]
)

In [25]:
my_noun = "student"

print(similar_prompt.format(noun=my_noun))

Give the location an item is usually found in.

Input: driver
Output: car

Input: pilot
Output: plane

Input: student
Output: 


In [26]:
response = llm(similar_prompt.format(noun=my_noun))
print(response.strip())

classroom


### Output Parsers

In [29]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(
    model_name="text-davinci-003",
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

response_schemas = [
    ResponseSchema(name="bad_string", description="This is a poorly formatted user input string."),
    ResponseSchema(name="good_string", description="This is your response, a reformatted response.")
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema:

```json
{
	"bad_string": string  // This is a poorly formatted user input string.
	"good_string": string  // This is your response, a reformatted response.
}
```


In [30]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure that all the words are spelled properly.

{format_instructions}

USER INPUT:
{user_input}

YOUR RESPONSE:
""".strip()

prompt = PromptTemplate(
    template=template,
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions}
)

user_input = "welcom to califonya!"
final_prompt = prompt.format(user_input=user_input)

print(final_prompt)

You will be given a poorly formatted string from a user.
Reformat it and make sure that all the words are spelled properly.

The output should be a markdown code snippet formatted in the following schema:

```json
{
	"bad_string": string  // This is a poorly formatted user input string.
	"good_string": string  // This is your response, a reformatted response.
}
```

USER INPUT:
welcom to califonya!

YOUR RESPONSE:


In [31]:
response = llm(final_prompt)
print(response.strip())

```json
{
	"bad_string": "welcom to califonya!",
	"good_string": "Welcome to California!"
}
```


In [32]:
print(output_parser.parse(response.strip()))

{'bad_string': 'welcom to califonya!', 'good_string': 'Welcome to California!'}


### Document Loaders

In [33]:
from langchain.document_loaders import HNLoader

loader = HNLoader("https://news.ycombinator.com/item?id=34422627")
data = loader.load()

In [39]:
print(data[1].page_content)

Ozzie_osman 75 days ago  
             | prev | next [–] 

LangChain is awesome. For people not sure what it's doing, large language models (LLMs) are very powerful but they're very general. As a common example for this limitation, imagine you want your LLM to answer questions over a large corpus.You can't pass the entire corpus into the prompt. So you might:
- preprocess the corpus by iterating over documents, splitting them into chunks, and summarizing them
- embed those chunks/summaries in some vector space
- when you get a question, search your vector space for similar chunks
- pass those chunks to the LLM in the prompt, along with your questionThis ends up being a very common pattern, where you need to do some preprocessing of some information, some real-time collecting of pieces, and then an interaction with the LLM (in some cases, you might go back and forth with the LLM). For instance, code and semantic search follows a similar pattern (preprocess -> embed -> nearest-neighbors 

In [40]:
print(len(data))

76


### Text Splitters

In [45]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open("data\PaulGrahamEssays\worked.txt", "r") as f:
    essay = f.read()

In [46]:
essay

'February 2021Before college the two main things I worked on, outside of school,\nwere writing and programming. I didn\'t write essays. I wrote what\nbeginning writers were supposed to write then, and probably still\nare: short stories. My stories were awful. They had hardly any plot,\njust characters with strong feelings, which I imagined made them\ndeep.The first programs I tried writing were on the IBM 1401 that our\nschool district used for what was then called "data processing."\nThis was in 9th grade, so I was 13 or 14. The school district\'s\n1401 happened to be in the basement of our junior high school, and\nmy friend Rich Draves and I got permission to use it. It was like\na mini Bond villain\'s lair down there, with all these alien-looking\nmachines \x97 CPU, disk drives, printer, card reader \x97 sitting up\non a raised floor under bright fluorescent lights.The language we used was an early version of Fortran. You had to\ntype programs on punch cards, then stack them in the 

In [48]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=20
)

texts = text_splitter.create_documents([essay])

In [54]:
print(len(texts))
len_texts = [len(text.page_content) for text in texts]
print(len_texts[:10])

606
[145, 134, 134, 128, 132, 130, 114, 131, 144, 134]


### Retrievers

In [59]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader("data/PaulGrahamEssays/worked.txt")
documents = loader.load()

In [60]:
documents

[Document(page_content='February 2021Before college the two main things I worked on, outside of school,\nwere writing and programming. I didn\'t write essays. I wrote what\nbeginning writers were supposed to write then, and probably still\nare: short stories. My stories were awful. They had hardly any plot,\njust characters with strong feelings, which I imagined made them\ndeep.The first programs I tried writing were on the IBM 1401 that our\nschool district used for what was then called "data processing."\nThis was in 9th grade, so I was 13 or 14. The school district\'s\n1401 happened to be in the basement of our junior high school, and\nmy friend Rich Draves and I got permission to use it. It was like\na mini Bond villain\'s lair down there, with all these alien-looking\nmachines \x97 CPU, disk drives, printer, card reader \x97 sitting up\non a raised floor under bright fluorescent lights.The language we used was an early version of Fortran. You had to\ntype programs on punch cards, 

In [62]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=50
)

texts = text_splitter.split_documents(documents)

embed_model = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

db = FAISS.from_documents(documents=texts, embedding=embed_model)

In [63]:
retriever = db.as_retriever()

In [64]:
retriever

VectorStoreRetriever(vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x000001A67AFB6F80>, search_type='similarity', search_kwargs={})

In [65]:
docs = retriever.get_relevant_documents("what types of things did the author want to build?")

In [71]:
print("\n\n-----------\n\n".join([doc.page_content[:200] for doc in docs]))

standards; what was the point? No one else wanted one either, so
off they went. That was what happened to systems work.I wanted not just to build things, but to build things that would
last.In this di

-----------

much of it in grad school.Computer Science is an uneasy alliance between two halves, theory
and systems. The theory people prove things, and the systems people
build things. I wanted to build things. 

-----------

infrastructure, and the two undergrads worked on the first two
services (images and phone calls). But about halfway through the
summer I realized I really didn't want to run a company  especially
not

-----------

been generating for galleries. This impressive-sounding thing called
an "internet storefront" was something we already knew how to build.So in the summer of 1995, after I submitted the camera-ready co
