In [3]:
! pip3 install -q langchain langchain-openai langchain-core langchain-community docx2txt pypdf langchain-chroma sentence_transformers python-dotenv

import langchain
print(langchain.__version__)

1.2.7


In [4]:
import os
from dotenv import load_dotenv
load_dotenv()

print(os.environ["LANGSMITH_ENDPOINT"])
print(os.environ["LANGCHAIN_TRACING_V2"])

https://api.smith.langchain.com
true


### What is Retrieval Augmented Generation (RAG)?

RAG is a technique that enhances language models by combining them with a retrieval system. It allows the model to access and utilize external knowledge when generating responses.

### Overview of Langchain

Langchain is a framework for developing applications powered by language models. It provides a set of tools and abstractions that make it easier to build complex AI applications. Key features include:

- Modular components for common LLM tasks
- Built-in support for various LLM providers
- Tools for document loading, text splitting, and vector storage
- Abstractions for building conversational agents and question-answering systems

Call LLM

In [5]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")
llm_response = llm.invoke("Tell me a joke about programming.")
print(f"1st llm_response: {llm_response}")
print(f"2nd llm_response: {llm_response.content}")

  from .autonotebook import tqdm as notebook_tqdm


1st llm_response: content='Why do programmers prefer dark mode?\n\nBecause light attracts bugs!' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 14, 'total_tokens': 26, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-D4tuCTUnmaStWItXhGfci2SJ6FUwD', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--019c1fd5-128e-7de1-861e-a4b552e37283-0' tool_calls=[] invalid_tool_calls=[] usage_metadata={'input_tokens': 14, 'output_tokens': 12, 'total_tokens': 26, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
2nd llm_response: Why do programmers prefer dark mode?

Because light attract

In [6]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()
output_parser.invoke(llm_response)

'Why do programmers prefer dark mode?\n\nBecause light attracts bugs!'

In [7]:
chain = llm | output_parser
chain.invoke("Tell me a joke about programming.")

'Why do programmers prefer dark mode? Because the light attracts bugs!'

In [9]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
  phone_model: str = Field(description="The name and model of the mobile phone being reviewed")
  rating: int = Field(description="The rating of the mobile review from 1 to 5")
  pros: List[str] = Field(description="A list of positive things mentioned in the mobile review")
  cons: List[str] = Field(description="A list of negative things mentioned in the mobile review")
  summary: str = Field(description="Brief summary of the mobile review")

review_text = """
Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous, colors pop like crazy.
Camera's insane too, especially at night - my Insta game's never been stronger. Battery life is solid, easily lasts me all day with heavy use.

Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung.
Also, still getting used to the new button layout, keep hitting Bixby by mistake.

Overall, I'd say it's a solid 4 out of 5. Great phone, but a few annoying quirks keep it from being perfect.
If you're due for an upgrade, definitely worth checkng out.
"""

structured_llm = llm.with_structured_output(MobileReview)
output = structured_llm.invoke(review_text)

output



MobileReview(phone_model='Galaxy S21', rating=4, pros=['Gorgeous screen with vibrant colors', 'Impressive camera performance, especially at night', 'Solid battery life'], cons=['Pricey', 'No charger included', 'New button layout takes getting used to'], summary='The Galaxy S21 is a great phone with a few quirks. It has a gorgeous screen, impressive camera, and solid battery life. However, the high price, lack of included charger, and new button layout may be drawbacks for some users. Overall, a solid 4 out of 5 rating - worth checking out for those due for an upgrade.')

## Prompt Template

In [10]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("Tell me a joke about {topic}.")

# prompt = ChatPromptTemplate.from_template([
#     ("system", "You are a helpful assistant that extracts structured data from mobile phone reviews."),
#     ("user", "Extract the following review into structured data:\n{review_text}")
# ])
prompt.invoke({"topic": "programming"})

ChatPromptValue(messages=[HumanMessage(content='Tell me a joke about programming.', additional_kwargs={}, response_metadata={})])

In [11]:
chain = prompt | llm | output_parser
chain.invoke({"topic": "car drivers"})

'Why did the car driver bring a map to the bar? \nBecause he heard the drinks were on the house!'

In [12]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# define the prompt
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")

# Initilize the LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Define the output parser
output_parser = StrOutputParser()

# Compose the chain
chain = prompt | llm | output_parser

# Use the chain
result = chain.invoke({"topic": "artificial intelligence"})
print(result)


Why did the robot go on a diet? 

Because it had too many bytes!


## LLM Messages

In [13]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage

system_message = SystemMessage(content="You are a helpful assistant that tells jokes.")
human_message = HumanMessage(content="Tell me a joke about robots.")
llm.invoke([system_message, human_message])

AIMessage(content='Why was the robot so bad at soccer?\n\nBecause it kept kicking up sparks instead of the ball!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 27, 'total_tokens': 47, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1590f93f9d', 'id': 'chatcmpl-D4z7G7Mvr4bJGeWJUKvQJGzg0zDxj', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019c2106-b33b-7420-9f12-78439452bc24-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 27, 'output_tokens': 20, 'total_tokens': 47, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [15]:
template = ChatPromptTemplate([
  ("system", "You are a helpful assistant that tells jokes."),
  ("user", "Tell me a joke about {topic}.")
])
prompt_values = template.invoke({
  "topic": "quantum computing"
})
prompt_values

ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant that tells jokes.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Tell me a joke about quantum computing.', additional_kwargs={}, response_metadata={})])

In [18]:
llm.invoke(prompt_values)

AIMessage(content='Why did the quantum computer break up with its partner?\n\nBecause it couldn’t find a stable state!', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 28, 'total_tokens': 48, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1590f93f9d', 'id': 'chatcmpl-D4zZkpLhzj5gFB2UY3RGxI7Qy0gL2', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019c2121-a5e5-7c11-a037-f01c58369cbe-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 28, 'output_tokens': 20, 'total_tokens': 48, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [33]:
# ! pip3 install docx2txt pypdf unstructured sentence-transformers

## Document Processing for RAG Systems
After setting up our LangChain components, the next crucial step in building a RAG system is processing our documents. This involves loading the documents and splitting them into manageable chunks.

### Loading Documents
We start by loading documents from various file types:

In [20]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
from langchain_core.documents import Document
import os

def load_documents(folder_path: str) -> List[Document]:
  documents = []
  for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if filename.endswith('.pdf'):
      loader = PyPDFLoader(file_path)
    elif filename.endswith('.docx'):
      loader = Docx2txtLoader(file_path)
    else:
      print(f"Unsupported file type: {filename}")
      continue
    documents.extend(loader.load())
  return documents

folder_path = "docs"
documents = load_documents(folder_path)
print(f"Loaded {len(documents)} documents from the folder.")


Loaded 14 documents from the folder.


In [22]:
documents[0]

Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-11-06T10:08:55+00:00', 'author': '', 'keywords': '', 'moddate': '2024-11-06T10:08:55+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'docs/langchain_turing.pdf', 'total_pages': 14, 'page': 0, 'page_label': '1'}, page_content='LangChain\nVasilios Mavroudis\nAlan Turing Institute\nvmavroudis@turing.ac.uk\nAbstract. LangChain is a rapidly emerging framework that offers a ver-\nsatile and modular approach to developing applications powered by large\nlanguage models (LLMs). By leveraging LangChain, developers can sim-\nplify complex stages of the application lifecycle—such as development,\nproductionization, and deployment—making it easier to build scalable,\nstateful, and contextually aware applications. It provides tools for han-\ndling chat models, integratin

In [25]:
documents[0].metadata

{'producer': 'pdfTeX-1.40.26',
 'creator': 'LaTeX with hyperref',
 'creationdate': '2024-11-06T10:08:55+00:00',
 'author': '',
 'keywords': '',
 'moddate': '2024-11-06T10:08:55+00:00',
 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0',
 'subject': '',
 'title': '',
 'trapped': '/False',
 'source': 'docs/langchain_turing.pdf',
 'total_pages': 14,
 'page': 0,
 'page_label': '1'}

In [26]:
documents[0].page_content

'LangChain\nVasilios Mavroudis\nAlan Turing Institute\nvmavroudis@turing.ac.uk\nAbstract. LangChain is a rapidly emerging framework that offers a ver-\nsatile and modular approach to developing applications powered by large\nlanguage models (LLMs). By leveraging LangChain, developers can sim-\nplify complex stages of the application lifecycle—such as development,\nproductionization, and deployment—making it easier to build scalable,\nstateful, and contextually aware applications. It provides tools for han-\ndling chat models, integrating retrieval-augmented generation (RAG),\nand offering secure API interactions. With LangChain, rapid deployment\nof sophisticated LLM solutions across diverse domains becomes feasible.\nHowever, despite its strengths, LangChain’s emphasis on modularity and\nintegration introduces complexities and potential security concerns that\nwarrant critical examination. This paper provides an in-depth analysis\nof LangChain’s architecture and core components, inclu

### Splitting Documents
Next, we split these documents into smaller, more manageable chunks:

In [28]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

splits = text_splitter.split_documents(documents)
print(f"Split the document(s) into {len(splits)} chunks.")

Split the document(s) into 49 chunks.


## Creating Embeddings for RAG Systems
After processing our documents, the next crucial step is to create embeddings. Embeddings are vector representations of our text chunks that allow for efficient similarity search, which is key to the retrieval part of our RAG system.

#### Using OpenAI Embeddings
First, let's look at how we can create embeddings using OpenAI's embedding model:

In [75]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
document_embeddings = embeddings.embed_documents([split.page_content for split in splits])
print(f"Created embeddings for {len(document_embeddings)} document chunks.")
document_embeddings[0][:5] ### only print first 5 values of the first embedding

Created embeddings for 49 document chunks.


[0.0014729297254234552,
 0.015365492552518845,
 -0.010614775121212006,
 -0.03759082779288292,
 0.005141423549503088]

## Setting Up the Vector Store for RAG Systems
Now that we have our document embeddings, we need a way to store and efficiently search through them. This is where a vector store comes in. I'll use Chroma, a popular vector store that integrates well with LangChain.

## Creating the Vector Store
Set up our Chroma vector store:

In [77]:
from langchain_chroma import Chroma


embeddings_function = OpenAIEmbeddings()
collection_name = "my_rag_collection"

vectorstore = Chroma.from_documents(
    collection_name=collection_name,
    documents=splits,
    embedding=embeddings_function,
    persist_directory="./chroma_db"
)
print("Vector store created and persisted to './chroma_db'")


Vector store created and persisted to './chroma_db'


## Performing Similarity Search
Now that our vector store is set up, we can perform similarity searches. This is a key component of the retrieval process in our RAG system:

In [78]:
query = "When was Langchain founded?"
search_results = vectorstore.similarity_search(query, k=2) # retrieve top 2 most relevant documents
print(f"\nTop 2 most relevant documents for the query: '{query}'\n")
for i, result in enumerate(search_results, 1):
    print(f"Result {i}:")
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content: {result.page_content}")
    print()


Top 2 most relevant chunks for the query: 'When was Langchain founded?'

Result 1:
Source: docs/langchain_turing.pdf
Content: LangChain
Vasilios Mavroudis
Alan Turing Institute
vmavroudis@turing.ac.uk
Abstract. LangChain is a rapidly emerging framework that offers a ver-
satile and modular approach to developing applications powered by large
language models (LLMs). By leveraging LangChain, developers can sim-
plify complex stages of the application lifecycle—such as development,
productionization, and deployment—making it easier to build scalable,
stateful, and contextually aware applications. It provides tools for han-
dling chat models, integrating retrieval-augmented generation (RAG),
and offering secure API interactions. With LangChain, rapid deployment
of sophisticated LLM solutions across diverse domains becomes feasible.
However, despite its strengths, LangChain’s emphasis on modularity and
integration introduces complexities and potential security concerns that
warrant critica

### Creating a Retriever
We can also create a retriever from our vector store, which will be useful when we build our full RAG chain:

In [79]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
retriever_results = retriever.invoke("When was GreenGrow Innovations founded?")
print(retriever_results)

[Document(id='9cbaf462-8fd6-4899-876f-1941c087620d', metadata={'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'author': '', 'title': '', 'producer': 'pdfTeX-1.40.26', 'moddate': '2024-11-06T10:08:55+00:00', 'page_label': '14', 'subject': '', 'keywords': '', 'source': 'docs/langchain_turing.pdf', 'creator': 'LaTeX with hyperref', 'trapped': '/False', 'page': 13, 'creationdate': '2024-11-06T10:08:55+00:00', 'total_pages': 14}, page_content='11. Grzegorz Malewicz, Matthew H Austern, Aart JC Bik, James C Dehnert, Ilan\nHorn, Naty Leiser, and Grzegorz Czajkowski. Pregel: A System for Large-Scale\nGraph Processing. InProceedings of the 2010 ACM SIGMOD International Con-\nference on Management of Data, pages 135–146, 2010.\n12. OpenAI. Hello GPT-4O, 05 2024.\n13. OpenAI. Introducing OpenAI O1-Preview, 09 2024.\n14. Gemini Team, Rohan Anil, Sebastian Borgeaud, Jean-Baptiste Alayrac, Jiahui\nYu, Radu Soricut, Johan Schalkwyk, Andrew 

The retriever provides a convenient interface for retrieving relevant documents, which we'll use when constructing our RAG pipeline.

By setting up this vector store, we've created a powerful tool for quickly finding relevant information from our document collection. This is a crucial component of our RAG system, enabling it to retrieve context-relevant information to support the language model's responses.

## Creating the RAG Chain
Let's construct our RAG chain using LangChain components:

In [82]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

template = """Answer the question based only on the following context:
{context}
Question: {question}
Answer: """

prompt = ChatPromptTemplate.from_template(template)

def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("When was Langchain founded?")

'The provided context does not mention when LangChain was founded.'