## 1. LLM Chain

In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
llm = ChatOpenAI()
llm.invoke("How can langsmith helps with testing?")

AIMessage(content='Langsmith can help with testing in the following ways:\n\n1. Test case generation: Langsmith can automatically generate test cases based on the defined grammar and rules. This can help in quickly creating a large number of test cases for different scenarios and inputs.\n\n2. Test data generation: Langsmith can generate valid and invalid test data for specific data types and structures. This can help in testing the robustness of the system by providing a wide range of inputs.\n\n3. Test script generation: Langsmith can generate test scripts or code snippets in different programming languages. These scripts can be used to automate the testing process and execute test cases.\n\n4. Test coverage analysis: Langsmith can analyze the test coverage by tracking the execution of generated test cases. It can identify the parts of the system that have not been adequately tested and suggest additional test cases to improve coverage.\n\n5. Test result analysis: Langsmith can analy

### 1.1 Prompts

#### 1.1.1 Prompt Templates

In [4]:
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(
    "Tell me a {adjective} joke about {content}"
)
prompt_template.format(adjective="funny", content="chickens")

'Tell me a funny joke about chickens'

In [5]:
# The template supports a number of variables to no variables
prompt_template = PromptTemplate.from_template("Tell me a joke")
prompt_template.format()

'Tell me a joke'

In [6]:
prompt = (
    PromptTemplate.from_template("Tell me a joke about {topic}")
    + ", make it funny"
    + "\n\nand in {language}"
)
prompt

PromptTemplate(input_variables=['language', 'topic'], template='Tell me a joke about {topic}, make it funny\n\nand in {language}')

In [8]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)
chain.run(topic="animals", language="english")

  warn_deprecated(


"Why don't scientists trust atoms?\n\nBecause they make up everything!"

#### 1.1.1 Chat Prompt Templates

In [29]:
from langchain_core.prompts import ChatPromptTemplate
new_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a general knowledge expert."),
    ("user", "{input}")
])

In [30]:
chain = new_prompt | llm
chain.invoke({"input": "What is the capital of Pakistan?"})

AIMessage(content='The capital of Pakistan is Islamabad.')

In [11]:
chat_template = ChatPromptTemplate.from_messages([
    ("system", "You're a helpful AI bot. Your name is {name}"),
    ("human", "Hello, how're your doing?"),
    ("ai", "I'm doing well, thanks"),
    ("human", "{user_input}")
])
messages = chat_template.format_messages(name="Bob", user_input="What is your name?")

In [12]:
from langchain.prompts import HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage

chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content=("You're a helpful assistant that re-writes the user's text to sound more upbeat")
        ),
        HumanMessagePromptTemplate.from_template("{text}")
    ]
)
messages = chat_template.format_messages(text="I don't like eating tasty things")
print(messages)

[SystemMessage(content="You're a helpful assistant that re-writes the user's text to sound more upbeat"), HumanMessage(content="I don't like eating tasty things")]


In [13]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage

prompt = SystemMessage(content="You're a nice pirate")
new_prompt = (
    prompt + HumanMessage(content="hi") + AIMessage(content="what?") + "{input}"
)

In [14]:
new_prompt.format_messages(input="I said hi")

[SystemMessage(content="You're a nice pirate"),
 HumanMessage(content='hi'),
 AIMessage(content='what?'),
 HumanMessage(content='I said hi')]

In [15]:
chain = LLMChain(llm=llm, prompt=new_prompt)
chain.run("I said hi")

'Oh, hello there! How can I help you today?'

#### 1.1.3 LCEL

In [16]:
prompt_val = prompt_template.invoke({"adjective": "funny", "content": "chickens"}
                                    )
prompt_val

StringPromptValue(text='Tell me a joke')

In [17]:
prompt_val.to_string()

'Tell me a joke'

In [18]:
prompt_val.to_messages()

[HumanMessage(content='Tell me a joke')]

In [19]:
chat_val = chat_template.invoke({"text": "i dont like eating tasty things."})
chat_val.to_messages()

[SystemMessage(content="You're a helpful assistant that re-writes the user's text to sound more upbeat"),
 HumanMessage(content='i dont like eating tasty things.')]

In [20]:
chat_val.to_string()

"System: You're a helpful assistant that re-writes the user's text to sound more upbeat\nHuman: i dont like eating tasty things."

### 1.2 LLM'S

In [21]:
llm.invoke(
    "Can you tell which team won the 2011 Cricket World Cup?"
)

AIMessage(content='Yes, the Indian cricket team won the 2011 Cricket World Cup.')

In [22]:
for chunk in llm.stream(
    "Can you tell which team won the 2011 Cricket World Cup?"
):
    print(chunk, end="", flush=True)

content=''content='Yes'content=','content=' the'content=' Indian'content=' cricket'content=' team'content=' won'content=' the'content=' 'content='201'content='1'content=' Cricket'content=' World'content=' Cup'content='.'content=''

In [23]:
llm.batch(
    [
        "Can you tell which team won the 2011 Cricket World Cup?"
    ]
)

[AIMessage(content='Yes, the Indian cricket team won the 2011 Cricket World Cup.')]

In [24]:
await llm.ainvoke(
    "Can you tell which team won the 2011 Cricket World Cup?"
)

AIMessage(content='Yes, the Indian cricket team won the 2011 Cricket World Cup.')

### 1.3 Chat Models

In [25]:
from langchain_core.messages import HumanMessage, SystemMessage
messages = [
    SystemMessage(content = "You're an expert zoologist."),
    HumanMessage(content = "What is the scientific name of frog")
]

In [26]:
llm.invoke(messages)

AIMessage(content='The scientific name for frogs is Order Anura, which includes various families and species. Some common species include Rana temporaria (common frog) and Hyla versicolor (gray tree frog).')

### 1.4 Output Parser

In [27]:
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

In [32]:
chain = new_prompt | llm | output_parser
chain.invoke({"input": "What's the capital of Pakistan?"})

'The capital of Pakistan is Islamabad.'

## 2. Retrieval Chain

### 2.1 Vector Stores

#### 2.1.1 Chromdb

In [67]:
from langchain_community.document_loaders import TextLoader 
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma

# Load the document split it into chunks, embed each chunk and load it in the vector store
raw_documents = TextLoader('./Info.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, OpenAIEmbeddings())

[Document(page_content="The Earth is the third planet from the Sun in our solar system. The Great Wall of China is the longest wall in the world, stretching over 13,000 miles. The human brain weighs about 3 pounds and is responsible for various cognitive functions. Honey never spoils. Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still perfectly edible. The Pacific Ocean is the largest and deepest ocean on Earth. Bees are crucial for pollination, playing a vital role in the production of many crops.\nThe Eiffel Tower in Paris, France, was completed in 1889 and stands at 324 meters (1,063 feet) tall. The Amazon Rainforest produces 20% of the world's oxygen. The speed of light is approximately 299,792 kilometers per second (186,282 miles per second). The first manned moon landing occurred on July 20, 1969, during the Apollo 11 mission, with astronauts Neil Armstrong and Buzz Aldrin.", metadata={'source': './Info.txt'})]


##### 2.1.1.1 Similarity Search

In [68]:
query = "Where is the Eiffel Tower located?"
docs = db.similarity_search(query)
print(docs[0].page_content)

The Earth is the third planet from the Sun in our solar system. The Great Wall of China is the longest wall in the world, stretching over 13,000 miles. The human brain weighs about 3 pounds and is responsible for various cognitive functions. Honey never spoils. Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still perfectly edible. The Pacific Ocean is the largest and deepest ocean on Earth. Bees are crucial for pollination, playing a vital role in the production of many crops.
The Eiffel Tower in Paris, France, was completed in 1889 and stands at 324 meters (1,063 feet) tall. The Amazon Rainforest produces 20% of the world's oxygen. The speed of light is approximately 299,792 kilometers per second (186,282 miles per second). The first manned moon landing occurred on July 20, 1969, during the Apollo 11 mission, with astronauts Neil Armstrong and Buzz Aldrin.


##### 2.1.1.2 Similarity Search By Vector

In [69]:
embedding_vector = OpenAIEmbeddings().embed_query(query)
docs = db.similarity_search_by_vector(embedding_vector)
print(docs[0].page_content)

The Earth is the third planet from the Sun in our solar system. The Great Wall of China is the longest wall in the world, stretching over 13,000 miles. The human brain weighs about 3 pounds and is responsible for various cognitive functions. Honey never spoils. Archaeologists have found pots of honey in ancient Egyptian tombs that are over 3,000 years old and still perfectly edible. The Pacific Ocean is the largest and deepest ocean on Earth. Bees are crucial for pollination, playing a vital role in the production of many crops.
The Eiffel Tower in Paris, France, was completed in 1889 and stands at 324 meters (1,063 feet) tall. The Amazon Rainforest produces 20% of the world's oxygen. The speed of light is approximately 299,792 kilometers per second (186,282 miles per second). The first manned moon landing occurred on July 20, 1969, during the Apollo 11 mission, with astronauts Neil Armstrong and Buzz Aldrin.


### 2.2 Chains

In [78]:
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.smith.langchain.com/overview")

docs = loader.load()

In [79]:
embeddings = OpenAIEmbeddings()

In [81]:
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)

In [82]:
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:
<context>                                          
{context}
<context/>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt)

In [83]:
from langchain_core.documents import Document

document_chain.invoke({
    "input": "How can langsmith help with testing?",
    "context": [Document(page_content="langsmith can let you visualize test results")]
})

'Langsmith can help with testing by allowing users to visualize test results.'

In [84]:
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [85]:
response = retrieval_chain.invoke({"input": "how can langsmith help with testing?"})
print(response["answer"])

LangSmith can help with testing by providing tools and features to evaluate and analyze the performance of prompts, chains, and agents. It allows users to curate datasets and run chains over the data points to visualize the outputs. Users can also assign feedback programmatically to runs, track performance over time, and pinpoint underperforming data points. LangSmith documentation provides examples and guidance on extracting insights from logged runs. Additionally, LangSmith offers annotation queues for human review and manual evaluation of runs, which can be helpful in assessing subjective qualities and validating automatic evaluation metrics.


### 2.3 Retrievers

#### 2.3.1 Parent Document Retriever

In [86]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

In [88]:
loaders = [
    TextLoader('./Info.txt')
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [89]:
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
vectorstore = Chroma(
    collection_name = "full_documents", embedding_function=OpenAIEmbeddings()
)

store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter
)

In [90]:
retriever.add_documents(docs, ids=None)

In [91]:
list(store.yield_keys())

['90d5fcb3-94e6-4cec-857b-84f5bb5d5d89']

In [92]:
sub_docs = vectorstore.similarity_search("France")
print(sub_docs[0].page_content)

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


The Eiffel Tower in Paris, France, was completed in 1889 and stands at 324 meters (1,063 feet) tall. The Amazon Rainforest produces 20% of the world's oxygen. The speed of light is approximately 299,792 kilometers per second (186,282 miles per second). The first manned moon landing occurred on July 20, 1969, during the Apollo 11 mission, with astronauts Neil Armstrong and Buzz Aldrin.


In [93]:
retrieved_docs = retriever.get_relevant_documents("France")
len(retrieved_docs[0].page_content)

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


923

#### 2.3.2 Self Query Retrieval

In [1]:
from langchain.schema import Document
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="Three men walk into the Zone, three men walk out of the Zone",
        metadata={
            "year": 1979,
            "director": "Andrei Tarkovsky",
            "genre": "thriller",
            "rating": 9.9,
        },
    ),
]
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings())

In [2]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
]
document_content_description = "Brief summary of a movie"
llm = ChatOpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
)

In [3]:
retriever.invoke("I want to watch a movie rated higher than 8.5")

[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979}),
 Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006})]

In [4]:
# Filter k
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    enable_limit=True,
)

# This example only specifies a relevant query
retriever.invoke("What are two movies about dinosaurs")

[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}),
 Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995})]

### 2.3 Conversational Retrieval Chain

In [6]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder, ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name = "chat_history"),
    ("user", "{input}"),
    ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
])

retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

In [7]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history = [HumanMessage(content="Can LangSmith help test my LLM applications?"), AIMessage(content="Yes!")]
retriever_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})

[Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995}),
 Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
 Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979}),
 Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'director': 'Christopher Nolan', 'rating': 8.2, 'year': 2010})]

In [10]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the user's questions based on the below context:\n\n{context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
])
document_chain = create_stuff_documents_chain(llm, prompt)

retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)

In [11]:
chat_history = [HumanMessage(content="Can LangSmith help test my LLM applications?"), AIMessage(content="Yes!")]
retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})

{'chat_history': [HumanMessage(content='Can LangSmith help test my LLM applications?'),
  AIMessage(content='Yes!')],
 'input': 'Tell me how',
 'context': [Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995}),
  Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}),
  Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979}),
  Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'director': 'Christopher Nolan', 'rating': 8.2, 'year': 2010})],
 'answer': 'LangSmith can help test your LLM applications by providing expert guidance and support throughout the application process. They can review your a