# Context-Aware Chatbot Using LangChain and RAG

This project builds a conversational chatbot that retrieves information from a knowledge base using Retrieval-Augmented Generation (RAG) while maintaining conversation context.

## Project Initialization

In [7]:
# Install required packages
import subprocess
import sys

packages = [
    "langchain",
    "langchain-huggingface",
    "langchain-community",
    "langchain-text-splitters",
    "sentence-transformers",
    "faiss-cpu",
    "transformers",
    "torch",
    "streamlit"
]

print("Installing required packages...")
for package in packages:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

print("✓ All packages installed successfully")

Installing required packages...
✓ All packages installed successfully


In [8]:
# Import Core Libraries
import warnings
warnings.filterwarnings('ignore')

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from typing import List, Dict

print("✓ Imports successful")

✓ Imports successful


## 1. Create Sample Knowledge Base

In [11]:
# Process documents: Split into chunks and create embeddings
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)

# Split documents into chunks
document_chunks = text_splitter.create_documents(sample_documents)

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Create FAISS vector store
vector_store = FAISS.from_documents(document_chunks, embeddings)

print(f"✓ Split documents into {len(document_chunks)} chunks")
print(f"✓ Generated embeddings with dimension: {embeddings.embed_query('test').__len__()}")
print(f"✓ Created FAISS vector store successfully")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]



special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✓ Split documents into 4 chunks
✓ Generated embeddings with dimension: 384
✓ Created FAISS vector store successfully


In [10]:
# Create sample knowledge base
sample_documents = [
    """LangChain is a framework for developing applications powered by language models. 
    It enables applications that are data-aware and agentic, allowing them to interact with 
    their environment and use external tools for computation and information retrieval.""",
    
    """Retrieval-Augmented Generation (RAG) combines retrieval and generation capabilities. 
    It retrieves relevant documents from a knowledge base and uses them to augment the prompt 
    for better, more contextual responses from language models.""",
    """Vector databases like FAISS store embeddings of documents, enabling semantic search. 
    
    When a user query is converted to embeddings, the database finds similar documents 
    based on vector similarity, which is faster than traditional keyword matching.""",
    
    """Sentence Transformers are pre-trained models that encode text into dense vector representations. 
    These embeddings capture semantic meaning, allowing documents with similar meaning to have 
    similar vectors regardless of exact wording."""
]

print(f"✓ Created {len(sample_documents)} sample documents for knowledge base")
print(f"Sample preview: {sample_documents[0][:100]}...")

✓ Created 4 sample documents for knowledge base
Sample preview: LangChain is a framework for developing applications powered by language models. 
    It enables app...


In [12]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)


In [17]:
# Create conversation memory for context awareness
# Simple memory implementation to store chat history

class ConversationMemory:
    """Custom memory to store chat history for context awareness"""
    def __init__(self, memory_key: str = "chat_history"):
        self.memory_key = memory_key
        self.messages = []
    
    def add_message(self, role: str, content: str):
        """Add a message to memory"""
        self.messages.append({"role": role, "content": content})
    
    def get_memory(self) -> List[Dict]:
        """Retrieve stored messages"""
        return self.messages
    
    def clear(self):
        """Clear memory"""
        self.messages = []

# Initialize memory
memory = ConversationMemory(memory_key="chat_history")

print("✓ Conversation memory initialized successfully")
print(f"Memory configured with: {memory.memory_key}")
print(f"Ready to store multi-turn conversations")

✓ Conversation memory initialized successfully
Memory configured with: chat_history
Ready to store multi-turn conversations


In [19]:
# Initialize Language Model (LLM) for response generation
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline

# Load text generation model
hf_pipeline = pipeline(
    "text-generation",
    model="google/flan-t5-small",
    max_new_tokens=256
)

# Wrap with LangChain
llm = HuggingFacePipeline(pipeline=hf_pipeline)

print("✓ LLM initialized successfully")
print(f"Model: google/flan-t5-small")

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/190 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Passing `generation_config` together with generation-related arguments=({'max_new_tokens'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.
The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'AfmoeForCausalLM', 'ApertusForCausalLM', 'ArceeForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BitNetForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'BltForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'CwmForCausalLM', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaF

✓ LLM initialized successfully
Model: google/flan-t5-small


In [None]:
# Create RAG chain for conversational retrieval
# Custom implementation for combining retriever + LLM + memory

class RAGChain:
    """Custom RAG chain combining retriever, LLM, and memory"""
    def __init__(self, llm, retriever, memory):
        self.llm = llm
        self.retriever = retriever
        self.memory = memory
    
    def retrieve_documents(self, query: str) -> List[str]:
        """Retrieve relevant documents for query"""
        docs = self.retriever.invoke(query)
        return [doc.page_content for doc in docs]
    
    def format_context(self, retrieved_docs: List[str]) -> str:
        """Format retrieved documents as context"""
        return "\n\n".join([f"Document: {doc[:200]}..." for doc in retrieved_docs])
    
    def generate_response(self, query: str) -> Dict:
        """Generate response using RAG approach"""
        # Retrieve relevant documents
        retrieved_docs = self.retrieve_documents(query)
        
        # Format context
        context = self.format_context(retrieved_docs)
        
        # Get chat history
        chat_history = self.memory.get_memory()
        history_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
        
        # Combine prompt
        rag_prompt = f"""Context from knowledge base:
{context}

Chat History:
{history_text}

User Query: {query}

Provide a helpful response:"""
        
        # Generate response
        response = self.llm.invoke(rag_prompt)
        
        # Store in memory
        self.memory.add_message("user", query)
        self.memory.add_message("assistant", response)
        
        return {
            "response": response,
            "source_documents": retrieved_docs
        }

# Initialize RAG chain
rag_chain = RAGChain(
    llm=llm,
    retriever=retriever,
    memory=memory
)

print("✓ RAG chain initialized successfully")
print("Ready for conversational retrieval tasks")

ModuleNotFoundError: No module named 'langchain.chains'