In [1]:
%pip install faiss-cpu python-dotenv google-generativeai numpy


Collecting google-ai-generativelanguage==0.6.15 (from google-generativeai)
  Using cached google_ai_generativelanguage-0.6.15-py3-none-any.whl.metadata (5.7 kB)
Using cached google_ai_generativelanguage-0.6.15-py3-none-any.whl (1.3 MB)
Installing collected packages: google-ai-generativelanguage
  Attempting uninstall: google-ai-generativelanguage
    Found existing installation: google-ai-generativelanguage 0.6.18
    Uninstalling google-ai-generativelanguage-0.6.18:
      Successfully uninstalled google-ai-generativelanguage-0.6.18
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-google-genai 2.1.9 requires google-ai-generativelanguage<0.7.0,>=0.6.18, but you have google-ai-generativelanguage 0.6.15 which is incompatible.[0m[31m
[0mSuccessfully installed google-ai-generativelanguage-0.6.15
Note: you may need to restart the kernel to use update

In [2]:
import os
import numpy as np
import faiss
import google.generativeai as genai
from dotenv import load_dotenv
from typing import List, Tuple

# Load environment variables
load_dotenv()

# Configure Google API
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

print("Setup complete!")



Setup complete!


  from .autonotebook import tqdm as notebook_tqdm


# First, Define the RAG System Class

In [3]:
from typing import List, Dict, Tuple
import faiss
import numpy as np
import pandas as pd
import google.generativeai as genai

class SimpleRAG:
    def __init__(self, embedding_dim: int = 768):
        """
        Initialize the RAG system.
        """
        self.embedding_dim = embedding_dim
        self.index = faiss.IndexFlatL2(embedding_dim)
        
        # This will now store a list of dictionaries, not just strings
        self.documents: List[Dict] = []
        
        print(f"RAG system initialized with embedding dimension: {embedding_dim}")
    
    def embed_text_batch(self, texts: List[str]) -> np.ndarray:
        """
        Generate embeddings for a list of texts using batching.
        """
        result = genai.embed_content(
            model="models/text-embedding-004",
            content=texts,
            task_type="retrieval_document"
        )
        return np.array(result['embedding'], dtype='float32')

    def add_documents(self, documents_df: pd.DataFrame):
        """
        Add documents from a DataFrame to the vector store.
        
        Args:
            documents_df: A pandas DataFrame with '_id', 'title', and 'text' columns.
        """
        print(f"Adding {len(documents_df)} documents to the index...")

        # 1. Combine title and text for better embeddings
        texts_to_embed = (documents_df['title'] + " " + documents_df['text']).tolist()
        
        # 2. Generate embeddings in a single batch call
        embeddings_array = self.embed_text_batch(texts_to_embed)
        
        # 3. Add embeddings to FAISS index
        self.index.add(embeddings_array)
        
        # 4. Store the original data (as dictionaries)
        self.documents.extend(documents_df.to_dict('records'))
        
        print(f"Total documents in index: {self.index.ntotal}")
    
    def retrieve(self, query: str, k: int = 3) -> List[Tuple[Dict, float]]:
        """
        Retrieve the most relevant documents for a query.
        
        Args:
            query: Search query
            k: Number of documents to retrieve
            
        Returns:
            List of tuples (document_dictionary, distance)
        """
        # Embed the query (using a single-item list for consistency)
        query_embedding = self.embed_text_batch([query]).reshape(1, -1)
        
        # Search in FAISS index
        distances, indices = self.index.search(query_embedding, k)
        
        # Prepare results
        results = []
        for idx, distance in zip(indices[0], distances[0]):
            if idx < len(self.documents):
                # Retrieve the full dictionary using the index
                results.append((self.documents[idx], float(distance)))
        
        return results
    
    def generate_answer(self, query: str, retrieved_docs: List[Dict]) -> str:
        """
        Generate an answer based on the query and retrieved documents.
        """
        # Combine the content of the retrieved documents into a single context string
        context = "\\n\\n".join([f"Title: {doc['title']}\\n{doc['text']}" for doc, dist in retrieved_docs])

        # Create a prompt for the generative model
        prompt = f"""
        Context information is provided below.
        ---------------------
        {context}
        ---------------------
        Given the context information and not prior knowledge, answer the query.
        Query: {query}
        Answer:
        """

        # Use a generative model to get the final answer
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(prompt)
        return response.text

# Initialize the RAG System

In [4]:
rag_system = SimpleRAG()

# load the corpus from the csv file
df = pd.read_csv('./assets/corpus.csv')

rag_system.add_documents(df)

RAG system initialized with embedding dimension: 768
Adding 3633 documents to the index...
Total documents in index: 3633


# Pack the data to what RAGAS expects

In [5]:
import pandas as pd

# Load your datasets
queries_df = pd.read_csv('./assets/queries.csv')
ground_truth_df = pd.read_csv('./assets/train.csv')

# The queries.csv has the text, but the _id is the query-id
# Let's rename the column for clarity
queries_df.rename(columns={'_id': 'query-id'}, inplace=True)

# Group the ground truth by query-id to get a list of all correct corpus-ids for each query
ground_truth_grouped = ground_truth_df.groupby('query-id')['corpus-id'].apply(list).reset_index()
ground_truth_grouped.rename(columns={'corpus-id': 'ground_truth_doc_ids'}, inplace=True)

# Merge the query texts with the ground truth document IDs
eval_df = pd.merge(queries_df, ground_truth_grouped, on='query-id')

print("Prepared Evaluation DataFrame:")
print(eval_df.head())

Prepared Evaluation DataFrame:
  query-id  title                                          text  \
0  PLAIN-3    NaN       Breast Cancer Cells Feed on Cholesterol   
1  PLAIN-4    NaN         Using Diet to Treat Asthma and Eczema   
2  PLAIN-5    NaN         Treating Asthma With Plants vs. Pills   
3  PLAIN-6    NaN    How Fruits and Vegetables Can Treat Asthma   
4  PLAIN-7    NaN  How Fruits and Vegetables Can Prevent Asthma   

                                ground_truth_doc_ids  
0  [MED-2436, MED-2437, MED-2438, MED-2439, MED-2...  
1                     [MED-2441, MED-2472, MED-2444]  
2  [MED-2445, MED-2458, MED-2448, MED-2450, MED-2...  
3  [MED-2456, MED-2459, MED-2458, MED-5072, MED-2...  
4  [MED-2461, MED-2464, MED-2468, MED-2469, MED-2...  


In [6]:
ragas_data = []

# Let's process the first 10 queries for this example
for index, row in eval_df.head(10).iterrows():
    query_text = row['text']

    # 1. Use your RAG system to retrieve document chunks
    retrieved_docs_with_dist = rag_system.retrieve(query_text, k=3)

    # 2. Extract just the text content for the contexts
    retrieved_contexts = [doc['text'] for doc, dist in retrieved_docs_with_dist]

    # 3. Use your RAG system to generate an answer
    generated_answer = rag_system.generate_answer(query_text, retrieved_docs_with_dist)

    ragas_data.append({
        "question": query_text,
        "contexts": retrieved_contexts,
        "answer": generated_answer,
        # THE FIX: Add the required 'reference' column with a placeholder
        "reference": ""
    })

# Convert to a Hugging Face Dataset for RAGAS
from datasets import Dataset
ragas_dataset = Dataset.from_list(ragas_data)

In [7]:
%pip install ragas datasets

Note: you may need to restart the kernel to use updated packages.


In [15]:
%pip uninstall google-generativeai google-ai-generativelanguage langchain-google-genai -y


I0000 00:00:1755513140.948270 21940712 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


Found existing installation: google-generativeai 0.7.2
Uninstalling google-generativeai-0.7.2:
  Successfully uninstalled google-generativeai-0.7.2
Found existing installation: google-ai-generativelanguage 0.6.6
Uninstalling google-ai-generativelanguage-0.6.6:
  Successfully uninstalled google-ai-generativelanguage-0.6.6
Found existing installation: langchain-google-genai 2.1.9
Uninstalling langchain-google-genai-2.1.9:
  Successfully uninstalled langchain-google-genai-2.1.9
Note: you may need to restart the kernel to use updated packages.


In [16]:
%pip install google-generativeai==0.7.2
%pip install langchain-google-genai==2.0.0
%pip install ragas==0.1.10

I0000 00:00:1755513162.124290 21940712 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


Collecting google-generativeai==0.7.2
  Using cached google_generativeai-0.7.2-py3-none-any.whl.metadata (4.0 kB)
Collecting google-ai-generativelanguage==0.6.6 (from google-generativeai==0.7.2)
  Using cached google_ai_generativelanguage-0.6.6-py3-none-any.whl.metadata (5.6 kB)
Using cached google_generativeai-0.7.2-py3-none-any.whl (164 kB)
Using cached google_ai_generativelanguage-0.6.6-py3-none-any.whl (718 kB)
Installing collected packages: google-ai-generativelanguage, google-generativeai
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [google-generativeai]language]
[1A[2KSuccessfully installed google-ai-generativelanguage-0.6.6 google-generativeai-0.7.2
Note: you may need to restart the kernel to use updated packages.


I0000 00:00:1755513163.256449 21940712 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


Collecting langchain-google-genai==2.0.0
  Downloading langchain_google_genai-2.0.0-py3-none-any.whl.metadata (3.9 kB)
Downloading langchain_google_genai-2.0.0-py3-none-any.whl (39 kB)
Installing collected packages: langchain-google-genai
Successfully installed langchain-google-genai-2.0.0
Note: you may need to restart the kernel to use updated packages.


I0000 00:00:1755513164.200941 21940712 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


Collecting ragas==0.1.10
  Downloading ragas-0.1.10-py3-none-any.whl.metadata (5.2 kB)
Collecting pysbd>=0.3.4 (from ragas==0.1.10)
  Downloading pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)
Downloading ragas-0.1.10-py3-none-any.whl (91 kB)
Downloading pysbd-0.3.4-py3-none-any.whl (71 kB)
Installing collected packages: pysbd, ragas
[2K  Attempting uninstall: ragas
[2K    Found existing installation: ragas 0.3.1
[2K    Uninstalling ragas-0.3.1:
[2K      Successfully uninstalled ragas-0.3.1
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [ragas]
[1A[2KSuccessfully installed pysbd-0.3.4 ragas-0.1.10
Note: you may need to restart the kernel to use updated packages.


In [17]:
# Simplified evaluation approach
import os
import pandas as pd
import google.generativeai as genai
from typing import List, Dict

# Configure Gemini
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

def evaluate_rag_manually(ragas_data: List[Dict]) -> pd.DataFrame:
    """
    Manual evaluation of RAG outputs using Gemini directly
    """
    model = genai.GenerativeModel('gemini-1.5-flash')
    
    results = []
    
    for item in ragas_data:
        question = item['question']
        contexts = item['contexts']
        answer = item['answer']
        
        # Evaluate Faithfulness
        faithfulness_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        Answer: {answer}
        
        Rate how faithful the answer is to the provided context on a scale of 0-1.
        Consider if all claims in the answer can be verified from the context.
        Return only a number between 0 and 1.
        """
        
        faithfulness_score = float(model.generate_content(faithfulness_prompt).text.strip())
        
        # Evaluate Answer Relevancy
        relevancy_prompt = f"""
        Question: {question}
        Answer: {answer}
        
        Rate how relevant the answer is to the question on a scale of 0-1.
        Consider if the answer directly addresses what was asked.
        Return only a number between 0 and 1.
        """
        
        relevancy_score = float(model.generate_content(relevancy_prompt).text.strip())
        
        # Evaluate Context Precision
        precision_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        
        Rate how precise and relevant the retrieved context is for answering the question on a scale of 0-1.
        Return only a number between 0 and 1.
        """
        
        precision_score = float(model.generate_content(precision_prompt).text.strip())
        
        results.append({
            'question': question[:50] + '...',  # Truncate for display
            'faithfulness': faithfulness_score,
            'answer_relevancy': relevancy_score,
            'context_precision': precision_score,
            'average_score': (faithfulness_score + relevancy_score + precision_score) / 3
        })
        
        print(f"Evaluated: {len(results)}/{len(ragas_data)}")
    
    return pd.DataFrame(results)

# Use this function with your ragas_data
print("Starting manual evaluation...")
evaluation_df = evaluate_rag_manually(ragas_data)

print("\n=== Evaluation Results ===")
print(evaluation_df)

print("\n=== Summary Statistics ===")
print(evaluation_df[['faithfulness', 'answer_relevancy', 'context_precision', 'average_score']].describe())

Starting manual evaluation...
Evaluated: 1/10
Evaluated: 2/10
Evaluated: 3/10
Evaluated: 4/10
Evaluated: 5/10
Evaluated: 6/10
Evaluated: 7/10
Evaluated: 8/10
Evaluated: 9/10
Evaluated: 10/10

=== Evaluation Results ===
                                            question  faithfulness  \
0         Breast Cancer Cells Feed on Cholesterol...           1.0   
1           Using Diet to Treat Asthma and Eczema...           1.0   
2           Treating Asthma With Plants vs. Pills...           1.0   
3      How Fruits and Vegetables Can Treat Asthma...           1.0   
4    How Fruits and Vegetables Can Prevent Asthma...           1.0   
5       Our Tax Dollars Subsidize Unhealthy Foods...           0.8   
6            Reducing Arsenic in Chicken and Rice...           1.0   
7              How Contaminated Are Our Children?...           1.0   
8      Blood Type Diet Perceived as "Crass Fraud"...           1.0   
9  Why Do Heart Doctors Favor Surgery and Drugs O...           1.0   

   answer_

In [None]:
import pandas as pd

# Load your datasets
queries_df = pd.read_csv('./assets/queries.csv')
ground_truth_df = pd.read_csv('./assets/train.csv')

# The queries.csv has the text, but the _id is the query-id
# Let's rename the column for clarity
queries_df.rename(columns={'_id': 'query-id'}, inplace=True)

# Group the ground truth by query-id to get a list of all correct corpus-ids for each query
ground_truth_grouped = ground_truth_df.groupby('query-id')['corpus-id'].apply(list).reset_index()
ground_truth_grouped.rename(columns={'corpus-id': 'ground_truth_doc_ids'}, inplace=True)

# Merge the query texts with the ground truth document IDs
eval_df = pd.merge(queries_df, ground_truth_grouped, on='query-id')

print("Prepared Evaluation DataFrame:")
print(eval_df.head())

Prepared Evaluation DataFrame:
  query-id  title                                          text  \
0  PLAIN-3    NaN       Breast Cancer Cells Feed on Cholesterol   
1  PLAIN-4    NaN         Using Diet to Treat Asthma and Eczema   
2  PLAIN-5    NaN         Treating Asthma With Plants vs. Pills   
3  PLAIN-6    NaN    How Fruits and Vegetables Can Treat Asthma   
4  PLAIN-7    NaN  How Fruits and Vegetables Can Prevent Asthma   

                                ground_truth_doc_ids  
0  [MED-2436, MED-2437, MED-2438, MED-2439, MED-2...  
1                     [MED-2441, MED-2472, MED-2444]  
2  [MED-2445, MED-2458, MED-2448, MED-2450, MED-2...  
3  [MED-2456, MED-2459, MED-2458, MED-5072, MED-2...  
4  [MED-2461, MED-2464, MED-2468, MED-2469, MED-2...  


In [None]:
import pandas as pd

# Load your datasets
queries_df = pd.read_csv('./assets/queries.csv')
ground_truth_df = pd.read_csv('./assets/train.csv')

# The queries.csv has the text, but the _id is the query-id
# Let's rename the column for clarity
queries_df.rename(columns={'_id': 'query-id'}, inplace=True)

# Group the ground truth by query-id to get a list of all correct corpus-ids for each query
ground_truth_grouped = ground_truth_df.groupby('query-id')['corpus-id'].apply(list).reset_index()
ground_truth_grouped.rename(columns={'corpus-id': 'ground_truth_doc_ids'}, inplace=True)

# Merge the query texts with the ground truth document IDs
eval_df = pd.merge(queries_df, ground_truth_grouped, on='query-id')

print("Prepared Evaluation DataFrame:")
print(eval_df.head())

Prepared Evaluation DataFrame:
  query-id  title                                          text  \
0  PLAIN-3    NaN       Breast Cancer Cells Feed on Cholesterol   
1  PLAIN-4    NaN         Using Diet to Treat Asthma and Eczema   
2  PLAIN-5    NaN         Treating Asthma With Plants vs. Pills   
3  PLAIN-6    NaN    How Fruits and Vegetables Can Treat Asthma   
4  PLAIN-7    NaN  How Fruits and Vegetables Can Prevent Asthma   

                                ground_truth_doc_ids  
0  [MED-2436, MED-2437, MED-2438, MED-2439, MED-2...  
1                     [MED-2441, MED-2472, MED-2444]  
2  [MED-2445, MED-2458, MED-2448, MED-2450, MED-2...  
3  [MED-2456, MED-2459, MED-2458, MED-5072, MED-2...  
4  [MED-2461, MED-2464, MED-2468, MED-2469, MED-2...  
