# Langchain tutorial

In [1]:
import os
from dotenv import load_dotenv

# Load the environment variables
load_dotenv()

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## 1 - Most basic model query

In [3]:
# 1 - Invoke the model
llm.invoke("how can langsmith help with testing?")

AIMessage(content='Langsmith can help with testing by providing automated testing tools and frameworks that can be integrated into the development process. These tools can help streamline the testing process, identify bugs and issues, and ensure that the software meets the desired quality standards. Langsmith can also provide developers with guidance on best testing practices and help them create comprehensive test plans and strategies. Additionally, Langsmith can assist in setting up continuous integration and deployment pipelines to automate the testing process and ensure that code changes are thoroughly tested before being deployed to production.')

## 2 - Chains

In [4]:
# 2.1 - Use a prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical documentation writer."),
    ("user", "{input}")
])

In [5]:
# 2.2 - (Optional) Output parser
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

In [6]:
chain = prompt | llm | output_parser
chain.invoke({"input": "how can langsmith help with testing?"})

'Langsmith is a powerful tool that can greatly assist with testing in various ways. Here are some ways in which Langsmith can help with testing:\n\n1. **Automated Testing**: Langsmith can be used to automate testing processes, making it easier to run tests repeatedly and consistently. This can help in identifying bugs and issues early in the development cycle.\n\n2. **Test Case Generation**: Langsmith can be used to generate test cases based on the specifications and requirements of the software being tested. This can help in ensuring comprehensive test coverage and in identifying edge cases that manual testers might overlook.\n\n3. **Data Generation**: Langsmith can help in generating test data that can be used for testing different scenarios and conditions. This can help in validating the behavior of the software under various inputs and conditions.\n\n4. **Performance Testing**: Langsmith can be used to simulate different load conditions and test the performance of the software unde

## 3 - Retrieval chains

In [40]:
# 3.1 Retrieve information from website using beautifulsoup (single URL)
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
docs = loader.load()

[Document(page_content="\n\n\n\n\nLangSmith User Guide | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\nSkip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookThis is outdated documentation for 🦜️🛠️ LangSmith, which is no longer actively maintained.For up-to-date documentation, see the latest version.User GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping\u200bPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\nThe ability to rapidly un

In [41]:
# Multiple URLs
website_urls = [
    "https://docs.smith.langchain.com/user_guide",
    "https://docs.smith.langchain.com",
]

# Initialize an empty list to store all documents
all_documents = []

# Loop through website URLs and use WebBaseLoader for each
for url in website_urls:
  loader = WebBaseLoader(url)
  website_documents = loader.load()
  all_documents.extend(website_documents)

# Process the all_documents list further (e.g., vectorization)

print(all_documents)

[Document(page_content="\n\n\n\n\nLangSmith User Guide | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\nSkip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookThis is outdated documentation for 🦜️🛠️ LangSmith, which is no longer actively maintained.For up-to-date documentation, see the latest version.User GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping\u200bPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\nThe ability to rapidly un

In [42]:
# 3.2 Load openAI embedding model to get the embeddings of the documents
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [43]:
# 3.3 Add vector store
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(all_documents)
vector_store = FAISS.from_documents(documents, embeddings)

In [44]:
# Retrieve vectors from the vector store

# This is the underlying FAISS index
faiss_index = vector_store.index
print(faiss_index)

# 0 is the starting index and faiss_index.ntotal is the ending index
vectors = faiss_index.reconstruct_n(0, faiss_index.ntotal)
for i, vector in enumerate(vectors):
    print(f"Vector {i}: {vector}")

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7fc2726fb360> >
Vector 0: [-0.0080638   0.01916084  0.01222096 ... -0.00213274  0.02612103
 -0.01153036]
Vector 1: [-0.02374188  0.01507666  0.00888921 ...  0.01401275  0.00637293
 -0.01765243]
Vector 2: [-0.01742077  0.01093862  0.01232167 ...  0.00759277 -0.00432376
 -0.01816119]
Vector 3: [-0.021696    0.01003824  0.01190209 ... -0.00271375 -0.0079929
 -0.03096637]
Vector 4: [ 0.00756025  0.01780557  0.01517474 ...  0.00688559  0.0135542
 -0.00869937]
Vector 5: [ 0.00599423  0.00548193  0.00797372 ...  0.01548741  0.00085122
 -0.02121678]
Vector 6: [-0.00539542  0.00660179  0.00760884 ... -0.00329915 -0.01941373
 -0.05169536]


In [72]:
from langchain import hub

#We can use PromptTemplate to add more instructions to our input for the LLM, (instructions, context from retriever and the question user wants to ask)
from langchain_core.prompts.prompt import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
prompt_custom = PromptTemplate.from_template(template)

In [73]:
retriever = vector_store.as_retriever()

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

#print(rag_chain)

for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)



Task Decomposition is the process of breaking down a task into smaller, more manageable subtasks. It helps in organizing and tracking the performance of an application across multiple interactions. This approach can assist in identifying areas for improvement and enhancing overall efficiency.