### Building RAG System with LangChain and ChromaDb

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
# langchain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
#vectorstore
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import DirectoryLoader, TextLoader # To load a document from any dir
#Embedding
from sentence_transformers import SentenceTransformer
from langchain_huggingface import HuggingFaceEmbeddings
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline # Huggingface model
from langchain_groq import ChatGroq
from langchain.chat_models.base import init_chat_model # Initialize llm model
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
#RAG using LCEL
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from dotenv import load_dotenv
#import utilities
import numpy as np
from typing import List

import tempfile

  from .autonotebook import tqdm as notebook_tqdm


Create Data (Document)

In [2]:
# Define a list of documents, each as a separate string
docs = [
    '''
    Python Programming Language
    
    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  
    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  
    üöÄ Widely used in web development, data science, automation, AI, and more.  
    üìö Python has a vast ecosystem of libraries and frameworks like Django, NumPy, and TensorFlow.  
    üåç Its clear syntax and strong community make it ideal for beginners and professionals alike.
    ''',

    '''
    Introduction to Machine Learning
    
    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  
    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  
    üß† ML models are trained on datasets and adjust themselves based on feedback or new inputs.  
    üîç Common types include supervised, unsupervised, and reinforcement learning.  
    üí¨ Applications range from speech recognition and spam filtering to fraud detection and self-driving cars.  
    üõ†Ô∏è Tools like Python, TensorFlow, and Scikit-learn are popular in ML development.  
    üåç ML is transforming industries by automating decisions and uncovering insights from big data.
    ''',

    '''
    Django for Backend Development
    
    üåê **Django** is a high-level Python web framework that enables rapid development of secure and maintainable websites.  
    ‚öôÔ∏è It follows the Model-View-Template (MVT) architectural pattern to separate data, logic, and presentation.  
    üöÄ Django comes with built-in features like authentication, admin interface, and URL routing.  
    üõ°Ô∏è It emphasizes security, helping developers avoid common threats like SQL injection and cross-site scripting.  
    üì¶ With its "batteries-included" philosophy, Django provides everything needed to build full-stack web apps.  
    üîç It's widely used for content management systems, social networks, and scientific platforms.  
    üåç Django powers popular sites like Instagram, Pinterest, and Mozilla.
    ''',

    '''
    Introduction to Deep Learning
    
    üß† **Deep Learning** is a subset of machine learning that uses neural networks with many layers to model complex patterns in data.  
    üîó These layers mimic the human brain‚Äôs structure, enabling systems to learn hierarchical representations.  
    üìö It excels at tasks like image recognition, natural language processing, and speech translation.  
    üß™ Deep learning models require large datasets and powerful computing resources to train effectively.  
    üï∏Ô∏è Popular architectures include Convolutional Neural Networks (CNNs) and Recurrent Neural Networks (RNNs).  
    üõ†Ô∏è Frameworks like TensorFlow and PyTorch are commonly used to build deep learning applications.  
    üöÄ Deep learning drives innovations in autonomous vehicles, medical diagnostics, and generative AI.
    '''
]

# Verify how many documents are in the list
print(len(docs))  # Should print 4

4


In [3]:
# Import the tempfile module, which provides functions to create temporary files and directories
import tempfile

# Import the os module, which provides utilities for file and path operations
import os

# mkdtemp() creates a temporary directory and returns its path as a string
# Unlike mkstemp(), this avoids dealing with file descriptors
temp_dir = tempfile.mkdtemp()

# Loop through the list of documents 'docs', with both index (i) and content (doc)
for i, doc in enumerate(docs):
    
    # Construct a new file path inside the temporary directory
    # Each file will be named "doc_0.txt", "doc_1.txt", etc.
    file_path = os.path.join(temp_dir, f'doc_{i}.txt')
    
    # Open the file in write mode ("w") with UTF-8 encoding to support all Unicode characters
    with open(file_path, 'w', encoding='utf-8') as f:
        
        # Write the document content into the file
        f.write(doc)

# Print the location of the temporary directory where files were created
print(f'Sample documents created in: {temp_dir}')
print("Files created:", os.listdir(temp_dir))

Sample documents created in: C:\Users\USER\AppData\Local\Temp\tmp7gffdysx
Files created: ['doc_0.txt', 'doc_1.txt', 'doc_2.txt', 'doc_3.txt']


In [4]:
# Import the tempfile module, which provides functions to create temporary files and directories
import tempfile

# Import the os module, which provides utilities for file and path operations
import os

# Assume docs is defined here
# ...

# mkdtemp() creates a temporary directory and returns its path as a string
# Unlike mkstemp(), this avoids dealing with file descriptors
temp_dir = tempfile.mkdtemp()

# Loop through the list of documents 'docs', with both index (i) and content (doc)
for i, doc in enumerate(docs):
    
    # Construct a new file path inside the temporary directory (FIXED)
    # Each file will be named "doc_0.txt", "doc_1.txt", etc.
    #file_path = os.path.join(temp_dir, f'doc_{i}.txt') # NOW INCLUDES temp_dir
    
    # Open the file in write mode ("w") with UTF-8 encoding to support all Unicode characters (FIXED)
    with open(f'doc_{i}.txt', 'w', encoding='utf-8') as f: # NOW USES the full file_path
        
        # Write the document content into the file
        f.write(doc)

# Print the location of the temporary directory where files were created
print(f'Sample documents created in: {temp_dir}')

Sample documents created in: C:\Users\USER\AppData\Local\Temp\tmpfqeojx3i


Document Loading

In [5]:
# Load documents from directory

loader = DirectoryLoader( # Initialize a DirectoryLoader object to find and load text files from a specific folder.
    'data', # Specify the directory to search within.
    glob='*.txt', # Use a glob pattern to only select files ending in '.txt'.
    loader_cls=TextLoader, # Define the class to be used for loading each file (TextLoader handles simple text).
    loader_kwargs={'encoding': 'utf-8'} # Pass arguments to the TextLoader, ensuring files are read with UTF-8 encoding.
)

documents = loader.load() # Execute the loader to read all matching files and store their contents as a list of 'documents'.

print(f'Loaded {len(documents)} documents')
print(f'\nFirst document preview:') 
print(documents[0].page_content[:200] + '...') # Print the beginning content (first 200 characters) of the first loaded document for verification.

Loaded 4 documents

First document preview:

    Python Programming Language

    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  
    üí° It supports multiple programming paradigms, inclu...


In [6]:
documents

[Document(metadata={'source': 'data\\doc_0.txt'}, page_content='\n    Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    üìö Python has a vast ecosystem of libraries and frameworks like Django, NumPy, and TensorFlow.  \n    üåç Its clear syntax and strong community make it ideal for beginners and professionals alike.\n    '),
 Document(metadata={'source': 'data\\doc_1.txt'}, page_content='\n    Introduction to Machine Learning\n\n    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  \n    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  \n    

Document Splitting

In [7]:
# Create a RecursiveCharacterTextSplitter instance to break long text into smaller, structured segments for efficient LLM input
test_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,              # Limit each chunk to 500 characters
    chunk_overlap=50,            # Retain 50 characters of overlap for context continuity
    length_function=len,         # Measure chunk size using character count
    separators=[' ']  # Use spaces as the main split point; can be expanded to paragraphs, sentences, or characters
)

#separators=['\n\n', '. ', ' ',  ''] # Ordered list of delimiters, split hierarchically: # Split by paragraphs, sentences, words, then characters

chunks = test_splitter.split_documents(documents)

In [8]:
print(f'Created {len(chunks)} chunks from {len(documents)} documents')
print(f'\n Chunk examples')
print(f'Content: {chunks[0].page_content[:150]}...')
print(f'Metadata: {chunks[0].metadata}')

Created 8 chunks from 4 documents

 Chunk examples
Content: Python Programming Language

    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  
    üí° It s...
Metadata: {'source': 'data\\doc_0.txt'}


In [9]:
print('Documents')
documents

Documents


[Document(metadata={'source': 'data\\doc_0.txt'}, page_content='\n    Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    üìö Python has a vast ecosystem of libraries and frameworks like Django, NumPy, and TensorFlow.  \n    üåç Its clear syntax and strong community make it ideal for beginners and professionals alike.\n    '),
 Document(metadata={'source': 'data\\doc_1.txt'}, page_content='\n    Introduction to Machine Learning\n\n    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  \n    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  \n    

In [10]:
print(f'{len(documents)} is divided into {len(chunks)}')
chunks

4 is divided into 8


[Document(metadata={'source': 'data\\doc_0.txt'}, page_content='Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    üìö Python has a vast ecosystem of libraries and frameworks like Django, NumPy, and TensorFlow.  \n    üåç Its clear syntax and strong community make it ideal for'),
 Document(metadata={'source': 'data\\doc_0.txt'}, page_content='syntax and strong community make it ideal for beginners and professionals alike.'),
 Document(metadata={'source': 'data\\doc_1.txt'}, page_content='Introduction to Machine Learning\n\n    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  \n    üìà It us

Embedding Model

1. Embedding Model With HuggingFace

In [11]:
# 1. With HuggingFace

hf_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

In [12]:
# Initialize Hugging Face embeddings
hf_emddings = HuggingFaceEmbeddings(model='sentence-transformers/all-MiniLM-L6-v2')

# Example query
text = 'LangChain makes it easy to use LLMs.'
vector = hf_emddings.embed_query(text)
print(f'HF embedding length: {len(vector)}')
print(f'HF embedding preview: {vector[:10]}') 

HF embedding length: 384
HF embedding preview: [-0.024293294176459312, -0.04733593016862869, 0.02925313450396061, -0.07217934727668762, -0.03708483651280403, -0.0029654146637767553, -0.017474163323640823, 0.06621578335762024, 0.025721492245793343, 0.005787475500255823]


In [13]:
vector

[-0.024293294176459312,
 -0.04733593016862869,
 0.02925313450396061,
 -0.07217934727668762,
 -0.03708483651280403,
 -0.0029654146637767553,
 -0.017474163323640823,
 0.06621578335762024,
 0.025721492245793343,
 0.005787475500255823,
 0.027516314759850502,
 0.05101742595434189,
 0.02604765072464943,
 0.023985203355550766,
 0.057821813970804214,
 0.030977502465248108,
 0.06038447842001915,
 0.09654975682497025,
 0.0828777402639389,
 -0.08375709503889084,
 -0.016788316890597343,
 -0.05233118683099747,
 0.03956417366862297,
 0.02312644198536873,
 0.06321760267019272,
 -0.03850008174777031,
 0.0018039962742477655,
 0.07416681200265884,
 0.08565447479486465,
 -0.09831225872039795,
 0.017451513558626175,
 0.10537837445735931,
 -0.01619138941168785,
 -0.017408110201358795,
 -0.14930865168571472,
 0.034462373703718185,
 0.003808281384408474,
 0.021017681807279587,
 -0.06894774734973907,
 -0.035061608999967575,
 -0.034660983830690384,
 -0.013271057978272438,
 0.05375698208808899,
 -0.029748683795

Initialize ChromaDB Vector Store, then insert the chunks in vector format

In [14]:
# Create Chromadb vector store

persist_directory = './chroma_db' # Path/directory where the vector store data will be saved

# Instantiate Chroma from documents using the specified embedding model(HuggingFace)
vectorstore = Chroma.from_documents(
    documents=chunks, # Documents/chunks to be embedded
    embedding=HuggingFaceEmbeddings(), # Model to create embeddings (HuggingFace)
    persist_directory=persist_directory, # Save the store to this directory
    collection_name='rag_collection' # Name for the collection in Chroma
)

print(f'Vector store created with {vectorstore._collection.count()} vectors')
print(f'Persisted to: {persist_directory}')

Vector store created with 32 vectors
Persisted to: ./chroma_db


2. Embedding Model with OpenAI

In [15]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_KEY')

In [16]:
sample_text = 'What is machine learning?'
embeddings = OpenAIEmbeddings()
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000001C82B1F7B60>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000001C82CFD8590>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [17]:
# vector = embeddings.embed_query(sample_text)
# vector

Test Similarity Search

In [18]:
query = 'List the type of machine learning'

similar_docs = vectorstore.similarity_search(query, k=2)
similar_docs

[Document(metadata={'source': 'data\\doc_1.txt'}, page_content='Introduction to Machine Learning\n\n    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  \n    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  \n    üß† ML models are trained on datasets and adjust themselves based on feedback or new inputs.  \n    üîç Common types include supervised, unsupervised, and reinforcement learning.  \n    üí¨ Applications range from'),
 Document(metadata={'source': 'data\\doc_1.txt'}, page_content='Introduction to Machine Learning\n\n    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  \n    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  \n    üß† ML models are trained on datasets and adjust them

In [19]:
query1 = 'what is a python?'

similar_docs_1 = vectorstore.similarity_search(query1, k=3)
similar_docs_1

[Document(metadata={'source': 'data\\doc_0.txt'}, page_content='Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    üìö Python has a vast ecosystem of libraries and frameworks like Django, NumPy, and TensorFlow.  \n    üåç Its clear syntax and strong community make it ideal for'),
 Document(metadata={'source': 'data\\doc_0.txt'}, page_content='Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    

In [20]:
print(f'Query: {query}')
print(f'\n {len(similar_docs)} similar chunks:')

for i, doc in enumerate(similar_docs):
    print(f'\n********** chunk {i+1} *************')
    print(doc.page_content[:300] + '...')
    print(f'Source: {doc.metadata.get('source', 'Unknown')}')

Query: List the type of machine learning

 2 similar chunks:

********** chunk 1 *************
Introduction to Machine Learning

    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  
    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  
    üß† ML models...
Source: data\doc_1.txt

********** chunk 2 *************
Introduction to Machine Learning

    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  
    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  
    üß† ML models...
Source: data\doc_1.txt


In [21]:
query1 = 'what is a python?'

similar_docs_1 = vectorstore.similarity_search(query1, k=3)
similar_docs_1

[Document(metadata={'source': 'data\\doc_0.txt'}, page_content='Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    üìö Python has a vast ecosystem of libraries and frameworks like Django, NumPy, and TensorFlow.  \n    üåç Its clear syntax and strong community make it ideal for'),
 Document(metadata={'source': 'data\\doc_0.txt'}, page_content='Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    

In [22]:
print(f'Query: {query1}')
print(f'\n {len(similar_docs_1)} similar chunks:')

for i, doc in enumerate(similar_docs_1):
    print(f'\n********** chunk {i+1} *************')
    print(doc.page_content[:300] + '...')
    print(f'Source: {doc.metadata.get('source', 'Unknown')}')

Query: what is a python?

 3 similar chunks:

********** chunk 1 *************
Python Programming Language

    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  
    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  
    üöÄ Widely used in web development, data sci...
Source: data\doc_0.txt

********** chunk 2 *************
Python Programming Language

    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  
    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  
    üöÄ Widely used in web development, data sci...
Source: data\doc_0.txt

********** chunk 3 *************
Python Programming Language

    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  
    üí° It supports multiple programm

Similarity Search with Score

In [23]:
similarity_score1 = vectorstore.similarity_search_with_score(query, k=3)
similarity_score1

[(Document(metadata={'source': 'data\\doc_1.txt'}, page_content='Introduction to Machine Learning\n\n    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  \n    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  \n    üß† ML models are trained on datasets and adjust themselves based on feedback or new inputs.  \n    üîç Common types include supervised, unsupervised, and reinforcement learning.  \n    üí¨ Applications range from'),
  0.5205760598182678),
 (Document(metadata={'source': 'data\\doc_1.txt'}, page_content='Introduction to Machine Learning\n\n    ü§ñ **Machine Learning (ML)** is a branch of artificial intelligence that enables systems to learn from data without being explicitly programmed.  \n    üìà It uses algorithms to identify patterns, make predictions, or improve performance over time.  \n    üß† ML models are trained on

In [24]:
similarity_score2 = vectorstore.similarity_search_with_score(query1, k=3)
similarity_score2

[(Document(metadata={'source': 'data\\doc_0.txt'}, page_content='Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automation, AI, and more.  \n    üìö Python has a vast ecosystem of libraries and frameworks like Django, NumPy, and TensorFlow.  \n    üåç Its clear syntax and strong community make it ideal for'),
  0.8915737867355347),
 (Document(metadata={'source': 'data\\doc_0.txt'}, page_content='Python Programming Language\n\n    üêç **Python** is a high-level, interpreted programming language known for its simplicity and readability.  \n    üí° It supports multiple programming paradigms, including procedural, object-oriented, and functional styles.  \n    üöÄ Widely used in web development, data science, automati

Initialize LLM(huggingFace), RAG Chain, Prompt Template, Query the RAG System 

In [25]:
# from langchain_openai import ChatOpenAI

# llm = ChatOpenAI(
#     model_name='gpt-3.5-turbo',
#     temperature=0.2,
#     max_tokens=500
# )



In [26]:
model_name = 'google/flan-t5-base'

tokenizer = AutoTokenizer.from_pretrained(model_name)
llm = AutoModelForSeq2SeqLM.from_pretrained(model_name)

qa_pipeline = pipeline(
    'text2text-generation',
    model=llm,
    tokenizer=tokenizer,
    temperature=0.2
)

Device set to use cpu
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [27]:
question = 'Question: What is the capital of Nigeria? Answer:'
response = qa_pipeline(question, max_new_tokens=100)
print(response[0]['generated_text'])
#response

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


nairobi


In [28]:
#GROQ_API_KEY = os.getenv('GROQ_API_KEY')
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

llm = init_chat_model('groq:llama-3.2-70b-versatile', api_key=groq_api_key)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001C82CFDBCB0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001C82D0F8440>, model_name='llama-3.2-70b-versatile', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [29]:
# llm.invoke('What is MAchine learning?')

Modern RAG Chain

In [30]:
# Convert vector store to retriever

retriever = vectorstore.as_retriever(
    search_kwargs={'k': 3} # Retrieve top 3 relevant chunks
)
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C828E42CF0>, search_kwargs={'k': 3})

In [31]:
# Create prompt template

system_prompt = """
You are a helpful AI assistant.
Respond clearly, politely, and concisely, using simple examples when needed.
Limit answers to a maximum of four sentences.
Always incorporate the provided context: {context}

Context: {context}
"""

prompt = ChatPromptTemplate.from_messages([
    ('system', system_prompt),
    ('human', '{input}')
])

In [32]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nYou are a helpful AI assistant.\nRespond clearly, politely, and concisely, using simple examples when needed.\nLimit answers to a maximum of four sentences.\nAlways incorporate the provided context: {context}\n\nContext: {context}\n'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

In [33]:
# Create document chain

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nYou are a helpful AI assistant.\nRespond clearly, politely, and concisely, using simple examples when needed.\nLimit answers to a maximum of four sentences.\nAlways incorporate the provided context: {context}\n\nContext: {context}\n'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001C82CFDBCB0>, async_client=<groq.resources.chat.completions.AsyncCompletions object a

##### document_chain = create_stuff_documents_chain(llm, prompt) performs the following key steps:

Creates a Chain: It constructs a specific type of LangChain sequence.

The "Stuff" Strategy: It uses the stuff method, which is the simplest way to get data into a model. It takes all the documents provided, joins them into a single string, and "stuffs" that entire string into the context window of the Large Language Model (llm) along with the user's question, according to the structure defined by the prompt.

Takes Inputs:

llm: The initialized Language Model (e.g., GPT-4, Llama 3) that will process the document content.

prompt: A custom or standard ChatPromptTemplate that defines how the model should treat the stuffed documents (e.g., "Use the following context to answer the question...").

In [34]:
# Create the final RAG Chain

rag_chain = create_retrieval_chain(
    retriever,
    document_chain
)

rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C828E42CF0>, search_kwargs={'k': 3}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nYou are a helpful AI assistant.\nRespond clearly, politely, and concisely, using simple examples when needed.\nLimit answer

In [35]:
#response = rag_chain.invoke({'input': 'What is a Deep learning'})
#response

In [36]:
response

[{'generated_text': 'nairobi'}]

Create RAG Chain Using LCEL(LangChain Expression Language) 

In [37]:
# Create a custom prompt
custom_prompt = ChatPromptTemplate.from_template(
    '''
    Use the following context to answer the question.
    If you don't know the answer to the question, say you have no idea.
    Provide specific details from the context to support your answer.
    
    Context:
    {context}
    
    Question: {question}
    
    Answer:
    '''
)
custom_prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="\n    Use the following context to answer the question.\n    If you don't know the answer to the question, say you have no idea.\n    Provide specific details from the context to support your answer.\n\n    Context:\n    {context}\n\n    Question: {question}\n\n    Answer:\n    "), additional_kwargs={})])

In [38]:
# Format the output documents for the prompt
def format_docs(docs):
    
    format_docs = '\n\n'.join(doc.page_content for doc in docs)
    return format_docs

In [39]:
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C828E42CF0>, search_kwargs={'k': 3})

In [40]:
# Build chain using LCEL
rag_chain_lcel = (
    {
        'context': retriever | format_docs,
        'question': RunnablePassthrough()
    }
    | custom_prompt
    | llm
    | StrOutputParser()
)

rag_chain_lcel

{
  context: VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C828E42CF0>, search_kwargs={'k': 3})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="\n    Use the following context to answer the question.\n    If you don't know the answer to the question, say you have no idea.\n    Provide specific details from the context to support your answer.\n\n    Context:\n    {context}\n\n    Question: {question}\n\n    Answer:\n    "), additional_kwargs={})])
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001C82CFDBCB0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001C

In [41]:
#response = rag_chain_lcel.invoke('What Django?')
response

[{'generated_text': 'nairobi'}]

In [42]:
#retriever.get_relevant_documents('What is machine learning?')

Advanced RAG Technique - Conversational Memory

In [43]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage

In [44]:
contextualize_q_system_prompt = """  # Define the system prompt as a multi-line string
You are given a chat history between a user and an AI assistant, along with the latest user question.  # Explain input context
The latest question may reference prior turns or omit details already discussed, so review the history carefully.  # Note possible dependencies on chat history
Rewrite the question so it is fully self-contained, clear, and unambiguous while preserving the user‚Äôs intent and tone.  # Instruction to rewrite the question
"""  # End of system prompt definition

contextualize_q_prompt = ChatPromptTemplate.from_messages([  # Create a ChatPromptTemplate using messages
    ('system', contextualize_q_system_prompt),  # Add the system role with the defined system prompt
    MessagesPlaceholder('chat_history'),  # Insert a placeholder for the chat history
    ('human', '{input}'),  # Add the human role with the latest user input
])  # End of ChatPromptTemplate definition

In [49]:
# Create history aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm,  # Use the initialized language model
    retriever, # the existing retriever created from the vector store
    contextualize_q_prompt # the custom prompt defined above
)
history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001C828E42CF0>, search_kwargs={'k': 3}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, T

In [None]:
# Create a new document chain that includes chat history
qa_system_prompt = """
You are a helpful AI assistant.
When answering questions, always refer to the provided context from previous conversations.
Limit answers to a maximum of four sentences.

Context: {context}
"""

# Create the QA prompt template using chat history
qa_prompt = ChatPromptTemplate.from_messages([ # Create a ChatPromptTemplate using messages  
    ('system', qa_system_prompt), # Add the system role with the defined system prompt to include chat history
    MessagesPlaceholder('chat_history'), # Insert a placeholder for the chat history to maintain context i.e. previous Q&A pairs
    ('human', '{input}'), # Add the human role with the latest user input
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)