# Basic RAG (Retrieval-Augmented Generation) App

Upload a PDF, ask questions about its content.

In [None]:
# Install required packages (uncomment if needed)
!pip install pypdf faiss-cpu sentence-transformers google-generativeai python-dotenv

In [None]:
import os
import faiss
import numpy as np
import google.generativeai as genai
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
load_dotenv()
from pathlib import Path

## 1. Set PDF Path

In [None]:
# Set the path to your PDF file
pdf_path = '/content/LoveStories.pdf' # Provide path properly like: ./report.pdf, C:\\Users\\YOUR_USERNAME\\path\\project.pdf
pdf_file = Path(pdf_path).expanduser()
print(f"Using PDF file: {pdf_file}")

## 2. Extract Text from PDF

In [None]:
reader = PdfReader(pdf_file)
all_text = ""
for page in reader.pages:
    all_text += page.extract_text() + "\n"
print(f"Extracted {len(all_text)} characters.")

## 3. Chunk Text

In [None]:
def chunk_text(text, chunk_size=500, overlap=100):
    words = text.split()
    chunks = []
    i = 0
    while i < len(words):
        chunk = words[i:i+chunk_size]
        chunks.append(' '.join(chunk))
        i += chunk_size - overlap
    return chunks

chunks = chunk_text(all_text)
print(f"Total chunks: {len(chunks)}")

## 4. Embed Chunks and Build Vector Store

In [None]:
embedder = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedder.encode(chunks, show_progress_bar=True)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype('float32'))
print(f"FAISS index built with {index.ntotal} vectors.")

## 5. Ask Questions

In [None]:
# Google Generative AI API setup
genai.configure(api_key="YOUR_GOOGLE_API_KEY")  # Replace with your actual API key

# Initialize Gemini model
model = genai.GenerativeModel('gemini-2.0-flash')

def retrieve(query, k=3):
    q_emb = embedder.encode([query])
    D, I = index.search(np.array(q_emb).astype('float32'), k)
    return [chunks[i] for i in I[0]]

# Initialize conversation history outside the function
conversation_history = []

def rag_answer(query):
    # Retrieve relevant context for the current query
    context = '\n'.join(retrieve(query))

    # Build the conversation history string
    history_str = ""
    for turn in conversation_history:
        history_str += f"User: {turn['query']}\nAssistant: {turn['response']}\n"

    # Build the prompt including the conversation history
    prompt = f"""Use the following context and conversation history to answer the question. Keep your answer concise and relevant.

    Context:
    {context}

    Conversation History:
    {history_str}
    User: {query}
    Assistant:"""

    # Generate response from the model
    response = model.generate_content(prompt)
    answer = response.text.strip()

    # Update the conversation history
    conversation_history.append({'query': query, 'response': answer})

    return answer


## 6. Interactive Chat

In [None]:
print("Chat with your PDF! Type 'exit' to quit.\n")

while True:
    query = input("You: ")
    if query.lower() in ['exit', 'quit']:
        print("Goodbye!")
        break
    answer = rag_answer(query)
    print(f"Bot: {answer}\n")
