In [3]:
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [4]:
from langchain.schema import Document

In [5]:
Document(
    title="About me",
    page_content="I'm a software developer and a data scientist. I'm interested in machine learning, data science, and software development. I'm currently working as a software developer at a startup in Berlin.",
    metadata={
        "id": "2",
        "author": "Johb Doe",
        "date": "2020-01-01",
    },
)


Document(page_content="I'm a software developer and a data scientist. I'm interested in machine learning, data science, and software development. I'm currently working as a software developer at a startup in Berlin.", metadata={'id': '2', 'author': 'Johb Doe', 'date': '2020-01-01'})

In [6]:
from langchain.llms import OpenAI

llm = OpenAI(model_name="gpt-3.5-turbo-0301")




In [7]:
llm("What day comes after Monday?")

'Tuesday.'

In [10]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=1, model_name="gpt-3.5-turbo-0301")

In [12]:
chat(
    [
        SystemMessage(
            content="You're a sarcastic bot, that make jokes about whatever user says."
        ),
        HumanMessage(content="How can I go to the moon?"),
    ]
)


AIMessage(content="Oh, it's simple. Just wait for commercial flights to be available for the masses, save up a few million dollars, and you're good to go. Piece of cake, really.", additional_kwargs={})

In [13]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [16]:
text = "Hello world!"

embeddings.embed_query(text)

[0.006555966101586819,
 0.003670461941510439,
 -0.011642491444945335,
 -0.026776473969221115,
 -0.012383491732180119,
 -0.0014341175556182861,
 -0.013375678099691868,
 0.009356695227324963,
 -0.006364436354488134,
 -0.0294390507042408,
 0.023950627073645592,
 0.0029859787318855524,
 -0.023234745487570763,
 -0.009205983020365238,
 0.006744355894625187,
 0.0011790062999352813,
 0.02607315219938755,
 -0.018437083810567856,
 0.008904559537768364,
 0.009620440192520618,
 -0.01306169480085373,
 -0.0011358336778357625,
 0.007253008428961039,
 0.00875384733080864,
 -0.012710033915936947,
 0.0037206991109997034,
 0.005419347435235977,
 -0.017243949696421623,
 0.036246202886104584,
 -0.0266759991645813,
 0.012647237628698349,
 -0.008552898652851582,
 -0.00762350857257843,
 -0.012546762824058533,
 0.007083457428961992,
 -0.014078999869525433,
 0.0048761568032205105,
 -0.013689660467207432,
 0.018211016431450844,
 -0.014367864467203617,
 0.008307991549372673,
 0.006022194866091013,
 0.005457025486

In [20]:
from langchain.llms import OpenAI

llm = OpenAI(model_name="gpt-3.5-turbo-0301")

prompt = """
Today is Monday, Yesterday was Saturday.

What is wrong with this sentence?
"""

llm(prompt)

'It implies that Sunday did not happen.'

In [22]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate

template = PromptTemplate(
    template="I really like {thing}. I think they're {adjective}.",
    input_variables=["thing", "adjective"],
)

prompt = template.format(thing="dogs", adjective="cute")

print(prompt)


I really like dogs. I think it's cute.


In [30]:
%pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.7.3-cp310-cp310-macosx_11_0_arm64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [36]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(model_name="gpt-3.5-turbo-0301")

example_prompt = PromptTemplate(
    template="Example input: {input}, Example output: {output}",
    input_variables=["input", "output"],
)

examples = [
    {"input": "pirate", "output": "sea"},
    {"input": "cat", "output": "street"},
    {"input": "dog", "output": "park"},
    {"input": "car", "output": "road"},
    {"input": "plane", "output": "sky"},
    {"input": "train", "output": "railway"},
]



In [53]:
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(),
    FAISS,
    k=3,
)

In [54]:
similar_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="Give exactly one location an item is usually found in.",
    suffix="Input: {item}, Output: ",
    input_variables=["item"],
)


In [55]:
item = "bird"
prompt = similar_prompt.format(item=item)
print(prompt)

llm(prompt)

Give exactly one location an item is usually found in.

Example input: plane, Example output: sky

Example input: dog, Example output: park

Example input: cat, Example output: street

Input: bird, Output: 


'tree'

In [67]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

In [85]:
llm = OpenAI(model_name="gpt-3.5-turbo-0301")

response_schemas = [
    ResponseSchema(name="bad_str", description="This is poorly formatted user input."),
    ResponseSchema(
        name="good_str",
        description="This is correctly formatted AI response.",
    ),
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)




In [88]:
format_instruction = output_parser.get_format_instructions()
print(format_instruction)

The output should be a markdown code snippet formatted in the following schema:

```json
{
	"bad_str": string  // This is poorly formatted user input.
	"good_str": string  // This is correctly formatted AI response.
}
```


In [89]:
template = """
You are given a poorly formatted string. Your task is to format it and make sure words are spelled correctly.

{format_instruction}

% User input: {bad_str}

% AI response:
"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["bad_str"],
    partial_variables={"format_instruction": format_instruction},
)

In [90]:
prompt = prompt_template.format(bad_str="welcom to califonai fokls!")

print(prompt)

llm_output = llm(prompt)
llm_output


You are given a poorly formatted string. Your task is to format it and make sure words are spelled correctly.

The output should be a markdown code snippet formatted in the following schema:

```json
{
	"bad_str": string  // This is poorly formatted user input.
	"good_str": string  // This is correctly formatted AI response.
}
```

% User input: welcom to califonai fokls!

% AI response:



'{\n\t"bad_str": "welcom to califonai fokls!",\n\t"good_str": "Welcome to California, folks!"\n}'

In [98]:
output_parser.parse(llm_output)

IndexError: list index out of range

In [None]:
%pip install bs4

In [97]:
from langchain.document_loaders import HNLoader

loader = HNLoader("https://news.ycombinator.com/item?id=22338084")
data = loader.load()
data

[Document(page_content='rhizome on Feb 16, 2020  \n[–] \n\nYou still believe that take on the story?', metadata={'source': 'https://news.ycombinator.com/item?id=22338084', 'title': 'Not burn their customers so badly they were put in the hospital.'}),
 Document(page_content='amanaplanacanal on Feb 16, 2020  \n             | parent [–] \n\nAbsolutely', metadata={'source': 'https://news.ycombinator.com/item?id=22338084', 'title': 'Not burn their customers so badly they were put in the hospital.'}),
 Document(page_content='rhizome on Feb 17, 2020  \n             | root | parent [–] \n\nOK that\'s weird. I was wrong to dispute it, but I think the story has gone around and around on my radar so many times that I was confused as to which was the right story. Kind of an "eggs are bad" "eggs are good" depending on which decade you\'re talking about. Mea culpa!', metadata={'source': 'https://news.ycombinator.com/item?id=22338084', 'title': 'Not burn their customers so badly they were put in the 

In [99]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [109]:
with open("./paul-graham-worked-essay.txt") as f:
    pg_worked = f.read()

splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=150,
)

texts = splitter.create_documents([pg_worked])

print(f"Number of documents: {len(texts)}")
print(f"Preview of first document: {texts[0].page_content}")
print(f"Preview of second document: {texts[1].page_content}")


Number of documents: 7173
Preview of first document: What I Worked On

February 2021
Preview of second document: Before college the two main things I worked on, outside of school, were writing and programming. I didn't write essays. I wrote what beginning writers


In [110]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader("./paul-graham-worked-essay.txt")
documents = loader.load()

In [111]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever()
retriever

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


VectorStoreRetriever(vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x12886fb20>, search_type='similarity', search_kwargs={})

In [112]:
docs = retriever.get_relevant_documents("What type of thing did the author want to build?")
print("\n\n".join([doc.page_content for doc in docs]))

I started working on the application builder, Dan worked on network infrastructure, and the two undergrads worked on the first two services (images and phone calls). But about halfway through the summer I realized I really didn't want to run a company — especially not a big one, which it was looking like this would have to be. I'd only started Viaweb because I needed the money. Now that I didn't need money anymore, why was I doing this? If this vision had to be realized as a company, then screw the vision. I'd build a subset that could be done as an open source project.

Much to my surprise, the time I spent working on this stuff was not wasted after all. After we started Y Combinator, I would often encounter startups working on parts of this new architecture, and it was very useful to have spent so much time thinking about it and even trying to write some of it.

We were working out of Robert's apartment in Cambridge. His roommate was away for big chunks of time, during which I got to

In [113]:
from langchain.memory import ChatMessageHistory
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(temperature=0)
history = ChatMessageHistory()
history.add_ai_message("Hi")
history.add_user_message("What is the capital of France?")
history.messages

[AIMessage(content='Hi', additional_kwargs={}),
 HumanMessage(content='What is the capital of France?', additional_kwargs={})]

In [115]:
ai_response = chat(history.messages)
ai_response

AIMessage(content='The capital of France is Paris.', additional_kwargs={})

In [117]:
history.add_ai_message(ai_response.content)
history.messages

[AIMessage(content='Hi', additional_kwargs={}),
 HumanMessage(content='What is the capital of France?', additional_kwargs={}),
 AIMessage(content='The capital of France is Paris.', additional_kwargs={})]

In [125]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain

llm = OpenAI(model_name="gpt-3.5-turbo-0301")

template = """Your job is to come up with a dish from the area that user suggests.
% User input: {location}
% AI response:"""

prompt_template = PromptTemplate(
    template=template,
    input_variables=["location"],
)

location_chain = LLMChain(llm=llm, prompt=prompt_template)

In [126]:
template = """Given the meal your job is to come up with a simple recipe on how to make it in house.
% Meal: {user_location}
% AI response:"""
prompt_template = PromptTemplate(
    template=template,
    input_variables=["user_location"],
)
meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [128]:
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)
overall_chain.run("Iran")




[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mHow about a traditional Iranian dish called "Chelo kebab"? It consists of tender grilled skewered meat (usually beef or chicken) served on a bed of fluffy saffron-infused steamed rice, accompanied by grilled tomatoes and onions. This flavorful and aromatic dish is a staple in Iranian cuisine and is often enjoyed during special occasions and gatherings with friends and family.[0m
[33;1m[1;3mHere is a simple recipe for making Chelo kebab at home:

Ingredients:
- 1 pound of beef or chicken, cut into 1-inch cubes
- 1/4 cup of olive oil
- 1 tablespoon of freshly squeezed lemon juice
- 1 tablespoon of tomato paste
- 1 teaspoon of ground turmeric
- 1/2 teaspoon of ground cinnamon
- 1/2 teaspoon of ground cumin
- Salt and pepper, to taste
- 2 cups of long-grain white rice
- 1/4 teaspoon of saffron threads
- 3 tablespoons of butter
- 3 tomatoes, cut into wedges
- 2 onions, cut into wedges

Instructions:

1. In a large bowl,

'Here is a simple recipe for making Chelo kebab at home:\n\nIngredients:\n- 1 pound of beef or chicken, cut into 1-inch cubes\n- 1/4 cup of olive oil\n- 1 tablespoon of freshly squeezed lemon juice\n- 1 tablespoon of tomato paste\n- 1 teaspoon of ground turmeric\n- 1/2 teaspoon of ground cinnamon\n- 1/2 teaspoon of ground cumin\n- Salt and pepper, to taste\n- 2 cups of long-grain white rice\n- 1/4 teaspoon of saffron threads\n- 3 tablespoons of butter\n- 3 tomatoes, cut into wedges\n- 2 onions, cut into wedges\n\nInstructions:\n\n1. In a large bowl, whisk together the olive oil, lemon juice, tomato paste, turmeric, cinnamon, cumin, salt, and pepper to make the marinade.\n2. Add the cubed beef or chicken to the marinade and let it marinate for at least 30 minutes (or up to 24 hours) in the refrigerator.\n3. Meanwhile, rinse the rice several times in cold water until the water runs clear. In a large pot, bring 4 cups of water to a boil. Add the rice and saffron threads and stir to combin