In [None]:
# Imports and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from typing import TypedDict, List, Dict, Optional
import os
import json
import dotenv

# LLM and embedding related imports
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import StateGraph, START, END

# Evaluation related imports
from ragas.metrics import AnswerAccuracy, ContextRelevance, ResponseGroundedness
from ragas.dataset_schema import SingleTurnSample, EvaluationDataset
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper

# Load environment variables
dotenv.load_dotenv()


In [75]:
# Model and Vector Store Setup

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vectorstore = InMemoryVectorStore(embeddings)

In [None]:
# Document Loading

docs = []
data_folder = "data"

if os.path.exists(data_folder) and os.path.isdir(data_folder):
    for filename in os.listdir(data_folder):
        if filename.endswith(".pdf"):
            file_path = os.path.join(data_folder, filename)
            try:
                loader = PyPDFLoader(file_path)
                file_docs = loader.load()
                docs.extend(file_docs)
                print(f"Loaded {len(file_docs)} documents from {filename}")
            except Exception as e:
                print(f"Error loading {filename}: {e}")
else:
    print(f"Folder {data_folder} does not exist or is not a directory")

print(f"Total documents loaded: {len(docs)}")

In [None]:
# Cell 4: Document Processing and Vectorization

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

print("Splitting documents into chunks...")
splitted_docs = text_splitter.split_documents(docs)
print(f"Splitted documents into: {len(splitted_docs)} chunks")

print("Adding documents to vector store...")
_ = vectorstore.add_documents(splitted_docs)
print("Documents added to vector store")


In [None]:
# RAG Pipeline Implementation with LangGraph

class State(TypedDict):
    query: str
    context: List[Document]
    answer: str
    
def retrieve(state: State):
    print(f"🔍 Retrieving documents for: {state['query']}")
    retrieved_informations = vectorstore.similarity_search(query=state["query"], k=2)
    return {"context": retrieved_informations}

def generate_answer(state: State):
    print("💬 Generating answer...")
    
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    
    prompt = f"""
    You are a helpful AI assistant. Answer the question based only on the context provided. 
    If the context doesn't contain the information needed to answer the question, say "I don't have enough information to answer this question."
    
    Question: {state["query"]}
    Context: {docs_content}
    
    Provide a concise and accurate answer based solely on the information in the context:
    """
    
    response = llm.invoke(prompt)
    return {"answer": response.content}

# Build RAG pipeline graph
graph_builder = StateGraph(State)
graph_builder.add_node("retrieve", retrieve)
graph_builder.add_node("generate_answer", generate_answer)

# Connect nodes in sequence
graph_builder.add_edge(START, "retrieve")
graph_builder.add_edge("retrieve", "generate_answer")
graph_builder.add_edge("generate_answer", END)

# Compile the graph
graph = graph_builder.compile()
print("✓ RAG pipeline ready")


In [None]:
graph.invoke({"query": "what is deep mind?"})

In [None]:
# Cell 7: Load and Explore Test Dataset
print("Loading test dataset...")
test_data = pd.read_csv("test-set/synthetic-test-set.csv")
print(f"Loaded {len(test_data)} test examples with query - ground_truth pairs")
test_data.head(3)

In [None]:
# Processing of Test Questions

print(f"Processing {len(test_data)} test questions...")
results = []

for index, row in tqdm(test_data.iterrows(), total=len(test_data), desc="Generating answers"):
    query = row['query']
    ground_truth = row['ground_truth']
    
    result = graph.invoke({"query": query})
    
    results.append({
        "query": query,
        "ground_truth": ground_truth,
        "response": result["answer"],
        "contexts": [doc.page_content for doc in result["context"]]
    }) 
    
   

# Save final results to JSON file
with open("test_results.json", "w") as f:
    json.dump(results, f, indent=2)

print(f"✓ Successfully generated answers for {len(results)} questions")
print(f"Results saved to 'test_results.json'")

In [None]:
# Prepare Evaluation Dataset for RAGAS
print("Preparing evaluation dataset...")
samples = []

# Convert results to RAGAS format
for item in tqdm(results, desc="Converting to RAGAS format"):
    sample = SingleTurnSample(
        user_input=item["query"],
        response=item["response"],
        reference=item["ground_truth"],
        retrieved_contexts=item["contexts"]
    )
    samples.append(sample)

# Create evaluation dataset
dataset = EvaluationDataset(samples=samples)
print(f"✓ Prepared evaluation dataset with {len(samples)} samples")

In [None]:
# RAGAS Evaluation with Visualizations

# Initialize evaluation metrics
metrics = [
    AnswerAccuracy(),   # Measures correctness against reference answer
    ContextRelevance(), # Measures relevance of retrieved context
    ResponseGroundedness() # Measures if response is grounded in context
]

print("Running RAGAS evaluation...")
# Wrap LLM for RAGAS
ragas_llm = LangchainLLMWrapper(llm)
# Run evaluation
result = evaluate(
    dataset=dataset,
    metrics=metrics,
    llm=ragas_llm
)
print("✓ Evaluation complete")
print(result)
result.to_pandas()