# Langchain tutorial

In [143]:
import os
from dotenv import load_dotenv

# Load the environment variables
load_dotenv()

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## 1 - Most basic model query

In [144]:
# 1 - Invoke the model
llm.invoke("how can langsmith help with testing?")

AIMessage(content='Langsmith can help with testing in the following ways:\n\n1. Test Automation: Langsmith can be used to automate the testing process, making it faster and more efficient. It can be used to write test scripts, execute tests, and generate test reports.\n\n2. Test Data Generation: Langsmith can be used to generate test data that can be used to test the application under different scenarios and conditions.\n\n3. Performance Testing: Langsmith can be used to simulate load on the application and perform performance testing to identify bottlenecks and optimize the application.\n\n4. Integration Testing: Langsmith can be used to test the integration of different modules or components of the application to ensure they work together seamlessly.\n\n5. Regression Testing: Langsmith can be used to automate regression testing, ensuring that new code changes do not break existing functionality.\n\nOverall, Langsmith can help streamline the testing process, improve test coverage, and

## 2 - Prompt templates

In [148]:
# 2.1 - Use a prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world class technical documentation writer."),
    ("user", "{input}")
])

In [149]:
# 2.2 - (Optional) Output parser
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

In [150]:
chain = prompt | llm | output_parser
chain.invoke({"input": "how can langsmith help with testing?"})

'Langsmith is a powerful tool that can significantly aid in the testing process. Here are some ways in which Langsmith can help with testing:\n\n1. Automated Testing: Langsmith can be used to automate various testing tasks such as running test scripts, comparing actual results with expected results, and generating test reports. This can help in increasing the efficiency of the testing process and reducing the manual effort required.\n\n2. Language Support: Langsmith supports multiple programming languages, which makes it suitable for testing a wide range of applications developed in different languages. This ensures that testing can be carried out effectively regardless of the technology stack used in the application.\n\n3. Integration with Testing Frameworks: Langsmith can be integrated with popular testing frameworks such as JUnit, Selenium, and TestNG. This allows testers to leverage the features of these frameworks along with the capabilities of Langsmith to create comprehensive an

## 3 - Retrieval chains

In [151]:
# Scrape google news

import requests
from bs4 import BeautifulSoup

# Define the URL of the Google News page
url = "https://news.google.com/search?q=bitcoin&hl=en-US&gl=US&ceid=US%3Aen"

# Fetch the HTML content of the page
response = requests.get(url)
html_content = response.content

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

headlines = soup.find_all('a', class_='JtKRv')

# Print all the headlines
all_headlines = []
for headline in headlines:
    all_headlines.append(headline.text)

all_headlines
# Optionally, extract URLs of the news articles
#for headline in headlines:
#    print(f"https://news.google.com{headline['href'][1:]}")

['BlackRock’s bitcoin ETF on verge of eclipsing Grayscale’s fund',
 "Bitcoin ETFs are an 'astonishing success,' Goldman Sachs executive says",
 "Goldman Sachs Issues 'Astonishing' Bitcoin And Ethereum ETF Prediction After Price 'Turning Point'",
 "Bitcoin Halving Post Mortem: Insights from Hashrate Index's Q1-2024 Report",
 'Bitcoin Could Skyrocket to ‘Unprecedented Heights’ if This Happens, According to Glassnode Co-Founders',
 'Researchers find lost password to crypto wallet holding 43.6 BTC: Wired',
 "Researchers 'hack time' to recover $3 million bitcoin wallet",
 'This guy got $3 million in Bitcoin back after he lost an 11-year-old password',
 'How 1,500 new Bitcoin millionaires per day deal with getting rich',
 "Companies in Two Capital Markets Reportedly in Negotiations to Implement Microstrategy's Bitcoin Business Model ...",
 "Traders say Bitcoin price fights 'last resistance' at $69K before new all-time highs",
 'Elon Musk, Donald Trump Discuss Bitcoin (BTC), Crypto Policy Ahe

In [152]:
from langchain_core.prompts.prompt import PromptTemplate

template = """
You are the best financial analyst in the world. You were educated at Harvard and have been working in the industry for 20 years.
These are the headlines of Google News articles about Bitcoin:
Headlines: {string}
Task: Provide me with an analysis how the bitcoin is going to move in the next three days."""

prompt_custom = PromptTemplate.from_template(template)

from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"string": RunnablePassthrough()}
    | prompt_custom
    | llm
    | StrOutputParser()
)
result_1 = rag_chain.invoke({"string": all_headlines})

## If results should be streamed
# for chunk in rag_chain.stream(all_headlines):
#     print(chunk, end="", flush=True)

In [133]:
result_1

"Based on the headlines provided, it is clear that the Bitcoin market is currently experiencing a mix of positive and negative news. Traders are optimistic about new all-time highs, while concerns about inflation, rate jitters, and Mt. Gox transfers are weighing on sentiment. Additionally, there are discussions around the potential impact of macro data, ETF predictions, and staking protocols on the price of Bitcoin.\n\nTaking all these factors into consideration, it is likely that Bitcoin will continue to face volatility in the coming days. The resistance at $69K may be a key level to watch, as traders assess the market sentiment and news developments. The outcome of the U.S. inflation data, Mt. Gox transfers, and ETF predictions could have a significant impact on Bitcoin's price movement.\n\nOverall, it is important to monitor the market closely and stay informed about any new developments that could influence the price of Bitcoin. The next three days are likely to be crucial in deter

# Bitcoin

In [153]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

def download_bitcoin_data():
    # Define the ticker symbol for Bitcoin
    ticker_symbol = 'BTC-USD'

    # Calculate the start and end dates
    end_date = datetime.now()
    start_date = end_date - timedelta(days=3)

    # Download the data
    bitcoin_data = yf.download(ticker_symbol, start=start_date, end=end_date, interval='1h')

    return bitcoin_data

bitcoin_data = download_bitcoin_data()

# Display the first few rows of the dataframe
print(bitcoin_data.head())

[*********************100%%**********************]  1 of 1 completed

                                   Open          High           Low  \
Datetime                                                              
2024-05-28 09:00:00+00:00  68072.195312  68428.476562  67988.195312   
2024-05-28 10:00:00+00:00  68291.921875  68801.835938  68231.195312   
2024-05-28 11:00:00+00:00  68518.617188  68577.976562  68245.351562   
2024-05-28 12:00:00+00:00  68326.664062  68479.226562  68191.281250   
2024-05-28 13:00:00+00:00  68144.210938  68381.484375  67685.765625   

                                  Close     Adj Close     Volume  
Datetime                                                          
2024-05-28 09:00:00+00:00  68308.812500  68308.812500          0  
2024-05-28 10:00:00+00:00  68545.570312  68545.570312  889368576  
2024-05-28 11:00:00+00:00  68293.687500  68293.687500  105232384  
2024-05-28 12:00:00+00:00  68208.953125  68208.953125  208418816  
2024-05-28 13:00:00+00:00  67685.765625  67685.765625  912472064  





In [154]:
template = """
You are the best chart analyst in the world. You were educated at Harvard and have been working in the industry for 20 years.
These is the market data for bitcoin at an hourly interval for the past three days:
Data: {string}
Task: Provide me with an chart analysis how the bitcoin is going to move in the next three days."""

prompt_custom = PromptTemplate.from_template(template)

rag_chain = (
    {"string": RunnablePassthrough()}
    | prompt_custom
    | llm
    | StrOutputParser()
)
result_2 = rag_chain.invoke({"string": bitcoin_data})

# for chunk in rag_chain.stream(all_headlines):
#     print(chunk, end="", flush=True)

In [155]:
result_2

'Based on the provided market data for bitcoin at an hourly interval for the past three days, here is my chart analysis for how bitcoin is likely to move in the next three days:\n\n1. Overall Trend: Bitcoin has been experiencing fluctuations in price over the past three days, with some periods of volatility and consolidation. The overall trend has been slightly bearish, with prices showing a downward movement.\n\n2. Support and Resistance Levels: The support level for bitcoin appears to be around $67,000, as prices have tested this level multiple times without breaking below it. The resistance level is around $69,000, with prices struggling to break above this level.\n\n3. Moving Averages: The moving averages for bitcoin show a slight bearish crossover, with the short-term moving average crossing below the long-term moving average. This indicates a potential downside momentum in the short term.\n\n4. Volume Analysis: The trading volume for bitcoin has been fluctuating, with periods of 

# Manager

In [156]:
template = """
You are the best financial analyst in the world. You were educated at Harvard and have been working in the industry for 60 years.
You have been a math prodigy as a kid. You did research with Daniel Kahneman and won the Nobel Prize in Economics.
Two of your analysts have come up with predictions for the Bitcoin price in the next three days.
Analyst 1: {string_1}
Analyst 2: {string_2}
Task: You have to decide if we are going to buy or sell Bitcoin in the next three days."""

prompt_custom = PromptTemplate.from_template(template)

rag_chain = (
    {"string_1": RunnablePassthrough(), "string_2": RunnablePassthrough()}
    | prompt_custom
    | llm
    | StrOutputParser()
)
result = rag_chain.invoke({"string_1": result_1, "string_2": result_2})

# for chunk in rag_chain.stream(all_headlines):
#     print(chunk, end="", flush=True)

In [157]:
result

'Based on the analysis provided by both Analyst 1 and Analyst 2, it seems that there is a consensus that Bitcoin will likely continue to experience positive momentum in the next three days. The positive sentiment, activity surrounding Bitcoin, and potential increase in demand and price for Bitcoin suggest a bullish outlook.\n\nHowever, it is important to note that the cryptocurrency market is highly volatile and unpredictable, and external factors can impact the price of Bitcoin. Both analysts also highlighted the importance of closely monitoring market trends and news updates.\n\nGiven the analysis and the potential for Bitcoin to trade within the range of $67,000 to $69,000 in the next three days, it may be prudent to consider buying Bitcoin. However, it is essential to stay informed and be prepared to adjust the investment strategy based on changing market conditions.'

In [158]:
from langchain_community.document_loaders import WebBaseLoader

# Multiple URLs
website_urls = [
    "https://docs.smith.langchain.com/user_guide",
    "https://docs.smith.langchain.com",
]

# Initialize an empty list to store all documents
all_documents = []

# Loop through website URLs and use WebBaseLoader for each
for url in website_urls:
  loader = WebBaseLoader(url)
  website_documents = loader.load()
  all_documents.extend(website_documents)

# Process the all_documents list further (e.g., vectorization)

print(all_documents)

[Document(page_content="\n\n\n\n\nLangSmith User Guide | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\nSkip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookThis is outdated documentation for 🦜️🛠️ LangSmith, which is no longer actively maintained.For up-to-date documentation, see the latest version.User GuideOn this pageLangSmith User GuideLangSmith is a platform for LLM application development, monitoring, and testing. In this guide, we’ll highlight the breadth of workflows LangSmith supports and how they fit into each stage of the application development lifecycle. We hope this will inform users how to best utilize this powerful platform or give them something to consider if they’re just starting their journey.Prototyping\u200bPrototyping LLM applications often involves quick experimentation between prompts, model types, retrieval strategy and other parameters.\nThe ability to rapidly un

In [159]:
# 3.2 Load openAI embedding model to get the embeddings of the documents
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [160]:
# 3.3 Add vector store
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(all_documents)
vector_store = FAISS.from_documents(documents, embeddings)

In [161]:
# Retrieve vectors from the vector store

# This is the underlying FAISS index
faiss_index = vector_store.index
print(faiss_index)

# 0 is the starting index and faiss_index.ntotal is the ending index
vectors = faiss_index.reconstruct_n(0, faiss_index.ntotal)
for i, vector in enumerate(vectors):
    print(f"Vector {i}: {vector}")

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x7fc278914b70> >
Vector 0: [-0.0080638   0.01916084  0.01222096 ... -0.00213274  0.02612103
 -0.01153036]
Vector 1: [-0.02374188  0.01507666  0.00888921 ...  0.01401275  0.00637293
 -0.01765243]
Vector 2: [-0.01742077  0.01093862  0.01232167 ...  0.00759277 -0.00432376
 -0.01816119]
Vector 3: [-0.021696    0.01003824  0.01190209 ... -0.00271375 -0.0079929
 -0.03096637]
Vector 4: [ 0.00756025  0.01780557  0.01517474 ...  0.00688559  0.0135542
 -0.00869937]
Vector 5: [ 0.00599423  0.00548193  0.00797372 ...  0.01548741  0.00085122
 -0.02121678]
Vector 6: [-0.00539542  0.00660179  0.00760884 ... -0.00329915 -0.01941373
 -0.05169536]


In [162]:
from langchain import hub

#We can use PromptTemplate to add more instructions to our input for the LLM, (instructions, context from retriever and the question user wants to ask)
from langchain_core.prompts.prompt import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
prompt_custom = PromptTemplate.from_template(template)

print(prompt_custom)

input_variables=['context', 'question'] template='Use the following pieces of context to answer the question at the end.\nIf you don\'t know the answer, just say that you don\'t know. Use three sentences maximum and keep the answer as concise as possible.\nAlways say "thanks for asking!" at the end of the answer.\n{context}\nQuestion: {question}\nHelpful Answer:'


In [164]:
retriever = vector_store.as_retriever()

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_custom
    | llm
    | StrOutputParser()
)

#print(rag_chain)

for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)



Task decomposition is the process of breaking down a complex task into smaller, more manageable sub-tasks. This helps in understanding the task better, assigning work to different team members, and monitoring progress effectively. Thanks for asking!