# Langchain tutorial

In [74]:
import os
from dotenv import load_dotenv

# Load the environment variables
load_dotenv()

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## 1 - Most basic model query

In [75]:
# 1 - Invoke the model
llm.invoke("how can langsmith help with testing?")

AIMessage(content='Langsmith can help with testing by providing automated testing tools and frameworks that can be used to quickly and efficiently test code for bugs and errors. It can also assist in creating test cases, running tests, and analyzing the results to identify areas of improvement. Additionally, Langsmith can help with performance testing, security testing, and regression testing to ensure that the software is functioning as expected and meeting the requirements. Overall, Langsmith can streamline the testing process and help developers deliver high-quality, reliable software.')

## 2 - Chains

In [76]:
# 2.1 - Use a prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical documentation writer."),
    ("user", "{input}")
])

In [77]:
# 2.2 - (Optional) Output parser
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

In [78]:
chain = prompt | llm | output_parser
chain.invoke({"input": "how can langsmith help with testing?"})

"Langsmith is a versatile tool that can greatly assist with testing in a number of ways. Here are some key ways in which Langsmith can be useful in the testing process:\n\n1. **Automated Testing**: Langsmith can be used to automate various testing tasks, such as running test suites, performing regression testing, and executing test scripts. By automating these repetitive tasks, Langsmith can help improve efficiency and accuracy in the testing process.\n\n2. **Test Data Generation**: Langsmith can generate synthetic test data that can be used to simulate different scenarios and edge cases during testing. This can help ensure thorough test coverage and enhance the reliability of the testing process.\n\n3. **Test Case Management**: Langsmith can assist in organizing and managing test cases effectively. It allows testers to create, track, and maintain test cases in a structured manner, making it easier to execute and monitor tests.\n\n4. **Integration Testing**: Langsmith can be integrated

## 3 - Retrieval chains

In [80]:
# 3.1 Retrieve information from website using beautifulsoup (single URL)
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
docs = loader.load()

In [95]:
headlines

[<a class="JtKRv" data-n-tid="29" href="./articles/CBMiWmh0dHBzOi8vYml0Y29pbm1hZ2F6aW5lLmNvbS9idXNpbmVzcy9nb2xkbWFuLXNhY2hzLWJpdGNvaW4tZXRmcy1hcmUtYW4tYXN0b25pc2hpbmctc3VjY2Vzc9IBAA?hl=en-US&amp;gl=US&amp;ceid=US%3Aen" jsaction="click:kkIcoc;" tabindex="0" target="_blank">Goldman Sachs: Bitcoin ETFs Are an 'Astonishing Success'</a>,
 <a class="JtKRv" data-n-tid="29" href="./articles/CBMiS2h0dHBzOi8vY29pbnRlbGVncmFwaC5jb20vbmV3cy9iaXRjb2luLWV0ZnMtdHJhZGl0aW9uYWwtZmluYW5jZS1pbnZlc3RtZW50c9IBAA?hl=en-US&amp;gl=US&amp;ceid=US%3Aen" jsaction="click:kkIcoc;" tabindex="0" target="_blank">Bitcoin ETFs boost TradFi Investments: Binance France president</a>,
 <a class="JtKRv" data-n-tid="29" href="./articles/CBMilgFodHRwczovL3d3dy5mb3JiZXMuY29tL3NpdGVzL2RpZ2l0YWwtYXNzZXRzLzIwMjQvMDUvMzAvZ29sZG1hbi1zYWNocy1pc3N1ZXMtYXN0b25pc2hpbmctYml0Y29pbi1hbmQtZXRoZXJldW0tZXRmLXByZWRpY3Rpb24tYWZ0ZXItcHJpY2UtdHVybmluZy1wb2ludC_SAQA?hl=en-US&amp;gl=US&amp;ceid=US%3Aen" jsaction="click:kkIcoc;" tabindex="0" targe

In [107]:
import requests
from bs4 import BeautifulSoup

# Define the URL of the Google News page
url = "https://news.google.com/search?q=bitcoin&hl=en-US&gl=US&ceid=US%3Aen"

# Fetch the HTML content of the page
response = requests.get(url)
html_content = response.content

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

headlines = soup.find_all('a', class_='JtKRv')

# Print all the headlines
for headline in headlines:
    print(headline.text)

# Optionally, extract URLs of the news articles
#for headline in headlines:
#    print(f"https://news.google.com{headline['href'][1:]}")

Traders say Bitcoin price fights “last resistance” at $69K before new all-time highs
Bitcoin price today: steady at $68k as inflation, rate jitters weigh on sentiment By Investing.com
Goldman Sachs Issues 'Astonishing' Bitcoin And Ethereum ETF Prediction After Price 'Turning Point'
Elon Musk, Donald Trump Discuss Bitcoin (BTC), Crypto Policy Ahead of Election
Is The MEV Monster Under Bitcoin's Bed?
Bitcoin, Ether Prices Ease as SHIB Drives Gains in Meme Tokens
Paradigm leads $70 million raise for Bitcoin staking protocol Babylon
BlackRock's $20 Billion IBIT Fund Is World's Biggest Bitcoin (BTC) ETF
BlackRock’s IBIT continues to lead net inflows in spot bitcoin ETFs
BlackRock’s bitcoin ETF on verge of eclipsing Grayscale’s fund
Researchers 'hack time' to recover $3 million bitcoin wallet
Researchers find lost password to crypto wallet holding 43.6 BTC: Wired
Hackers finally unlock $3 million Bitcoin wallet after man forgot password for 11 years
Prediction: The Bitcoin Halving Could Be a

In [108]:
from langchain_core.prompts.prompt import PromptTemplate

llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

template = """
You need to clean data. This is an example:
This is an example of a news article:
<a class="JtKRv" data-n-tid="29" href="./articles/CBMilgFodHRwczovL3d3dy5mb3JiZXMuY29tL3NpdGVzL2RpZ2l0YWwtYXNzZXRzLzIwMjQvMDUvMzAvZ29sZG1hbi1zYWNocy1pc3N1ZXMtYXN0b25pc2hpbmctYml0Y29pbi1hbmQtZXRoZXJldW0tZXRmLXByZWRpY3Rpb24tYWZ0ZXItcHJpY2UtdHVybmluZy1wb2ludC_SAQA?hl=en-US&amp;gl=US&amp;ceid=US%3Aen" jsaction="click:kkIcoc;" tabindex="0" target="_blank">Goldman Sachs Issues 'Astonishing' Bitcoin And Ethereum ETF Prediction After Price 'Turning Point'</a>
This is how it should look like:
Goldman Sachs Issues 'Astonishing' Bitcoin And Ethereum ETF Prediction After Price 'Turning Point'

String to clean: {string}
Cleaned string:"""
prompt_custom = PromptTemplate.from_template(template)

from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"string": RunnablePassthrough()}
    | prompt_custom
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream(headlines[0]):
    print(chunk, end="", flush=True)


Traders say Bitcoin price fights “last resistance” at $69K before new all-time highs

In [41]:
# Multiple URLs
website_urls = [
    "https://docs.smith.langchain.com/user_guide",
    "https://docs.smith.langchain.com",
]

# Initialize an empty list to store all documents
all_documents = []

# Loop through website URLs and use WebBaseLoader for each
for url in website_urls:
  loader = WebBaseLoader(url)
  website_documents = loader.load()
  all_documents.extend(website_documents)

# Process the all_documents list further (e.g., vectorization)

print(all_documents)

[Document(page_content="\n\n\n\n\nLangSmith User Guide | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\nSkip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookThis is outdated documentation for 🦜️🛠️ LangSmith, which is no longer actively maintained.For up-to-date documentation, see the latest version.User GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping\u200bPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\nThe ability to rapidly un

In [42]:
# 3.2 Load openAI embedding model to get the embeddings of the documents
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [43]:
# 3.3 Add vector store
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(all_documents)
vector_store = FAISS.from_documents(documents, embeddings)

In [44]:
# Retrieve vectors from the vector store

# This is the underlying FAISS index
faiss_index = vector_store.index
print(faiss_index)

# 0 is the starting index and faiss_index.ntotal is the ending index
vectors = faiss_index.reconstruct_n(0, faiss_index.ntotal)
for i, vector in enumerate(vectors):
    print(f"Vector {i}: {vector}")

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7fc2726fb360> >
Vector 0: [-0.0080638   0.01916084  0.01222096 ... -0.00213274  0.02612103
 -0.01153036]
Vector 1: [-0.02374188  0.01507666  0.00888921 ...  0.01401275  0.00637293
 -0.01765243]
Vector 2: [-0.01742077  0.01093862  0.01232167 ...  0.00759277 -0.00432376
 -0.01816119]
Vector 3: [-0.021696    0.01003824  0.01190209 ... -0.00271375 -0.0079929
 -0.03096637]
Vector 4: [ 0.00756025  0.01780557  0.01517474 ...  0.00688559  0.0135542
 -0.00869937]
Vector 5: [ 0.00599423  0.00548193  0.00797372 ...  0.01548741  0.00085122
 -0.02121678]
Vector 6: [-0.00539542  0.00660179  0.00760884 ... -0.00329915 -0.01941373
 -0.05169536]


In [88]:
from langchain import hub

#We can use PromptTemplate to add more instructions to our input for the LLM, (instructions, context from retriever and the question user wants to ask)
from langchain_core.prompts.prompt import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
prompt_custom = PromptTemplate.from_template(template)

print(prompt_custom)

input_variables=['context', 'question'] template='Use the following pieces of context to answer the question at the end.\nIf you don\'t know the answer, just say that you don\'t know. Use three sentences maximum and keep the answer as concise as possible.\nAlways say "thanks for asking!" at the end of the answer.\n{context}\nQuestion: {question}\nHelpful Answer:'


In [73]:
retriever = vector_store.as_retriever()

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

#print(rag_chain)

for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)



Task Decomposition is the process of breaking down a task into smaller, more manageable subtasks. It helps in organizing and tracking the performance of an application across multiple interactions. This approach can assist in identifying areas for improvement and enhancing overall efficiency.