In [1]:

import os
import numpy as np
import faiss
import google.generativeai as genai
from dotenv import load_dotenv
from typing import List, Tuple

# Load environment variables
load_dotenv()

# Configure Google API
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

print("Setup complete!")



Setup complete!


  from .autonotebook import tqdm as notebook_tqdm


# Update the RAG System Class 

In [None]:
from typing import List, Dict, Tuple
import faiss
import numpy as np
import pandas as pd
import google.generativeai as genai

class improved_RAG:
    def __init__(self, embedding_dim: int = 768):
        """
        Initialize the RAG system.
        """
        self.embedding_dim = embedding_dim
        self.index = faiss.IndexFlatL2(embedding_dim)
        
        # This will now store a list of dictionaries, not just strings
        self.documents: List[Dict] = []
        
        print(f"RAG system initialized with embedding dimension: {embedding_dim}")
    
    def embed_text_batch(self, texts: List[str]) -> np.ndarray:
        """
        Generate embeddings for a list of texts using batching.
        """
        result = genai.embed_content(
            model="models/text-embedding-004",
            content=texts,
            task_type="retrieval_document"
        )
        return np.array(result['embedding'], dtype='float32')

    
    
    
    def generate_answer(self, query: str, retrieved_docs: List[Dict]) -> str:
        """
        Generate an answer based on the query and retrieved documents.
        """
        # Combine the content of the retrieved documents into a single context string
        context = "\\n\\n".join([f"Title: {doc['title']}\\n{doc['text']}" for doc, dist in retrieved_docs])

        # Create a prompt for the generative model
        prompt = f"""
        Context information is provided below.
        ---------------------
        {context}
        ---------------------
        Given the context information and not prior knowledge, answer the query.
        Query: {query}
        Answer:
        """

        # Use a generative model to get the final answer
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(prompt)
        return response.text


# update the add documents first, as it directly affect the retrieve process

In [None]:

def add_documents(self, documents_df: pd.DataFrame):
        """
        Add documents from a DataFrame to the vector store.
        
        Args:
            documents_df: A pandas DataFrame with '_id', 'title', and 'text' columns.
        """
        print(f"Adding {len(documents_df)} documents to the index...")

        # 1. Combine title and text for better embeddings
        texts_to_embed = (documents_df['title'] + " " + documents_df['text']).tolist()
        
        # 2. Generate embeddings in a single batch call
        embeddings_array = self.embed_text_batch(texts_to_embed)
        
        # 3. Add embeddings to FAISS index
        self.index.add(embeddings_array)
        
        # 4. Store the original data (as dictionaries)
        self.documents.extend(documents_df.to_dict('records'))
        
        print(f"Total documents in index: {self.index.ntotal}")


improved_RAG.add_documents = add_documents

# now update the retrieve, this directly affect the context of LLM

In [None]:

def retrieve(self, query: str, k: int = 3) -> List[Tuple[Dict, float]]:
        """
        Retrieve the most relevant documents for a query.
        
        Args:
            query: Search query
            k: Number of documents to retrieve
            
        Returns:
            List of tuples (document_dictionary, distance)
        """
        # Embed the query (using a single-item list for consistency)
        query_embedding = self.embed_text_batch([query]).reshape(1, -1)
        
        # Search in FAISS index
        distances, indices = self.index.search(query_embedding, k)
        
        # Prepare results
        results = []
        for idx, distance in zip(indices[0], distances[0]):
            if idx < len(self.documents):
                # Retrieve the full dictionary using the index
                results.append((self.documents[idx], float(distance)))
        
        return results

improved_RAG.retrieve = retrieve

# update the answer function, more robust and stick to the context

In [None]:

def generate_answer(self, query: str, retrieved_docs: List[Dict]) -> str:
        """
        Generate an answer based on the query and retrieved documents.
        """
        # Combine the content of the retrieved documents into a single context string
        context = "\\n\\n".join([f"Title: {doc['title']}\\n{doc['text']}" for doc, dist in retrieved_docs])

        # Create a prompt for the generative model
        prompt = f"""
        Context information is provided below.
        ---------------------
        {context}
        ---------------------
        Given the context information and not prior knowledge, answer the query.
        Query: {query}
        Answer:
        """

        # Use a generative model to get the final answer
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(prompt)
        return response.text

improved_RAG.generate_answer = generate_answer

# test the performance now

## prepare the ragas schema

In [None]:

rag_system = improved_RAG()

# load the corpus from the csv file
df = pd.read_csv('./assets/corpus.csv')

rag_system.add_documents(df)

In [None]:

import pandas as pd

# Load your datasets
queries_df = pd.read_csv('./assets/queries.csv')
ground_truth_df = pd.read_csv('./assets/train.csv')

# The queries.csv has the text, but the _id is the query-id
# Let's rename the column for clarity
queries_df.rename(columns={'_id': 'query-id'}, inplace=True)

# Group the ground truth by query-id to get a list of all correct corpus-ids for each query
ground_truth_grouped = ground_truth_df.groupby('query-id')['corpus-id'].apply(list).reset_index()
ground_truth_grouped.rename(columns={'corpus-id': 'ground_truth_doc_ids'}, inplace=True)

# Merge the query texts with the ground truth document IDs
eval_df = pd.merge(queries_df, ground_truth_grouped, on='query-id')

print("Prepared Evaluation DataFrame:")
print(eval_df.head())

In [None]:
ragas_data = []

# Let's process the first 10 queries for this example
for index, row in eval_df.head(10).iterrows():
    query_text = row['text']

    # 1. Use your RAG system to retrieve document chunks
    retrieved_docs_with_dist = rag_system.retrieve(query_text, k=3)

    # 2. Extract just the text content for the contexts
    retrieved_contexts = [doc['text'] for doc, dist in retrieved_docs_with_dist]

    # 3. Use your RAG system to generate an answer
    generated_answer = rag_system.generate_answer(query_text, retrieved_docs_with_dist)

    ragas_data.append({
        "question": query_text,
        "contexts": retrieved_contexts,
        "answer": generated_answer,
        # THE FIX: Add the required 'reference' column with a placeholder
        "reference": ""
    })

# Convert to a Hugging Face Dataset for RAGAS
from datasets import Dataset
ragas_dataset = Dataset.from_list(ragas_data)


# Evaluate

In [None]:
# Simplified evaluation approach
import os
import pandas as pd
import google.generativeai as genai
from typing import List, Dict

# Configure Gemini
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

def evaluate_rag_manually(ragas_data: List[Dict]) -> pd.DataFrame:
    """
    Manual evaluation of RAG outputs using Gemini directly
    """
    model = genai.GenerativeModel('gemini-1.5-flash')
    
    results = []
    
    for item in ragas_data:
        question = item['question']
        contexts = item['contexts']
        answer = item['answer']
        
        # Evaluate Faithfulness
        faithfulness_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        Answer: {answer}
        
        Rate how faithful the answer is to the provided context on a scale of 0-1.
        Consider if all claims in the answer can be verified from the context.
        Return only a number between 0 and 1.
        """
        
        faithfulness_score = float(model.generate_content(faithfulness_prompt).text.strip())
        
        # Evaluate Answer Relevancy
        relevancy_prompt = f"""
        Question: {question}
        Answer: {answer}
        
        Rate how relevant the answer is to the question on a scale of 0-1.
        Consider if the answer directly addresses what was asked.
        Return only a number between 0 and 1.
        """
        
        relevancy_score = float(model.generate_content(relevancy_prompt).text.strip())
        
        # Evaluate Context Precision
        precision_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        
        Rate how precise and relevant the retrieved context is for answering the question on a scale of 0-1.
        Return only a number between 0 and 1.
        """
        
        precision_score = float(model.generate_content(precision_prompt).text.strip())
        
        results.append({
            'question': question[:50] + '...',  # Truncate for display
            'faithfulness': faithfulness_score,
            'answer_relevancy': relevancy_score,
            'context_precision': precision_score,
            'average_score': (faithfulness_score + relevancy_score + precision_score) / 3
        })
        
        print(f"Evaluated: {len(results)}/{len(ragas_data)}")
    
    return pd.DataFrame(results)

# Use this function with your ragas_data
print("Starting manual evaluation...")
evaluation_df = evaluate_rag_manually(ragas_data)

print("\n=== Evaluation Results ===")
print(evaluation_df)

print("\n=== Summary Statistics ===")
print(evaluation_df[['faithfulness', 'answer_relevancy', 'context_precision', 'average_score']].describe())
