# Context-Aware Chatbot Using LangChain and RAG

This project builds a conversational chatbot that retrieves information from a knowledge base using Retrieval-Augmented Generation (RAG) while maintaining conversation context.

## Project Initialization

In [1]:
print("Hello")

Hello


In [2]:
# Install required packages
import subprocess
import sys

packages = [
    "langchain",
    "langchain-huggingface",
    "langchain-community",
    "langchain-text-splitters",
    "sentence-transformers",
    "faiss-cpu",
    "transformers",
    "torch",
    "streamlit"
]

print("Installing required packages...")
for package in packages:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

print("✓ All packages installed successfully")

Installing required packages...
✓ All packages installed successfully


In [3]:
# Import Core Libraries
import warnings
warnings.filterwarnings('ignore')

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from typing import List, Dict

print("✓ Imports successful")

✓ Imports successful


## 1. Create Sample Knowledge Base

In [None]:
# Process documents: Split into chunks and create embeddings
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)

# Split documents into chunks
document_chunks = text_splitter.create_documents(sample_documents)

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Create FAISS vector store
vector_store = FAISS.from_documents(document_chunks, embeddings)

print(f"✓ Split documents into {len(document_chunks)} chunks")
print(f"✓ Generated embeddings with dimension: {embeddings.embed_query('test').__len__()}")
print(f"✓ Created FAISS vector store successfully")

In [None]:
# Create sample knowledge base
sample_documents = [
    """LangChain is a framework for developing applications powered by language models. 
    It enables applications that are data-aware and agentic, allowing them to interact with 
    their environment and use external tools for computation and information retrieval.""",
    
    """Retrieval-Augmented Generation (RAG) combines retrieval and generation capabilities. 
    It retrieves relevant documents from a knowledge base and uses them to augment the prompt 
    for better, more contextual responses from language models.""",
    
    """Vector databases like FAISS store embeddings of documents, enabling semantic search. 
    When a user query is converted to embeddings, the database finds similar documents 
    based on vector similarity, which is faster than traditional keyword matching.""",
    
    """Sentence Transformers are pre-trained models that encode text into dense vector representations. 
    These embeddings capture semantic meaning, allowing documents with similar meaning to have 
    similar vectors regardless of exact wording."""
]

print(f"✓ Created {len(sample_documents)} sample documents for knowledge base")
print(f"Sample preview: {sample_documents[0][:100]}...")