# LangChain RAG App

Upload a PDF, ask questions about its content.

In [None]:
!pip install langchain langchain-community pypdf faiss-cpu sentence-transformers google-generativeai python-dotenv

In [None]:
!pip install -U langchain-google-genai


In [None]:
import os
from pathlib import Path
from dotenv import load_dotenv
from pypdf import PdfReader

import faiss
import numpy as np

from sentence_transformers import SentenceTransformer

from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_classic.memory import ConversationBufferMemory
from langchain_classic.chains import ConversationalRetrievalChain

# For Gemini (Google Generative AI)
import google.generativeai as genai

load_dotenv()


## 1. Set PDF Path

In [None]:
pdf_path = '/content/LoveStories.pdf'
pdf_file = Path(pdf_path).expanduser()
print(f"Using PDF file: {pdf_file}")

reader = PdfReader(pdf_file)
all_text = ""
for page in reader.pages:
    all_text += page.extract_text() + "\n"
print(f"Extracted {len(all_text)} characters.")


## 3. Chunk Text

In [None]:
def chunk_text(text, chunk_size=500, overlap=100):
    words = text.split()
    chunks = []
    i = 0
    while i < len(words):
        chunk = words[i:i+chunk_size]
        chunks.append(' '.join(chunk))
        i += chunk_size - overlap
    return chunks

chunks = chunk_text(all_text)
print(f"Total chunks: {len(chunks)}")


## 4. Embed Chunks and Build Vector Store

In [None]:
# Use HuggingFaceEmbeddings for LangChain compatibility
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create FAISS vector store using LangChain
vectorstore = FAISS.from_texts(chunks, embedding=embeddings)


## 5. Ask Questions

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key='AIzaSyBiKEDdAlyeFoLak_YLi8RssZw8fnu8R5A')



In [None]:
# Conversation memory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Conversational Retrieval Chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
    memory=memory,
    verbose=False
)


## 6. Interactive Chat

In [None]:
print("Chat with your PDF! Type 'exit' to quit.\n")

while True:
    query = input("You: ")
    if query.lower() in ['exit', 'quit']:
        print("Goodbye!")
        break
    answer = qa_chain.run(query)
    print(f"Bot: {answer}\n")
