In [1]:
! pip install langchain-community sentence-transformers torch



In [2]:

import os
import numpy as np
import faiss
import google.generativeai as genai
from dotenv import load_dotenv
from typing import List, Tuple

# Load environment variables
load_dotenv()

# Configure Google API
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

print("Setup complete!")



Setup complete!


  from .autonotebook import tqdm as notebook_tqdm


# Update the RAG System Class 

In [3]:
! pip install langchain-experimental langchain-google-genai




In [4]:
! pip install cohere



In [5]:
! pip install rank-bm25



In [6]:
from typing import List, Dict, Tuple
import faiss
import numpy as np
import pandas as pd
import google.generativeai as genai
import cohere
from rank_bm25 import BM25Okapi
from langchain_community.embeddings import HuggingFaceEmbeddings


class improved_RAG:
    def __init__(self, embedding_dim: int = 384):
        """
        Initialize the RAG system.
        """
        self.embedding_dim = embedding_dim
        self.index = faiss.IndexFlatL2(embedding_dim)
        self.embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

        # This will now store a list of dictionaries, not just strings
        self.documents: List[Dict] = []
        self.cohere_client = cohere.Client(os.getenv('COHERE_API_KEY'))
        self.bm25 = None  # Will be created when documents are added
        self.tokenized_docs = []  # Store tokenized documents for BM25

        
        print(f"RAG system initialized with embedding dimension: {embedding_dim}")
    
    def embed_text_batch(self, texts: List[str]) -> np.ndarray:
        """
        Generate embeddings for a list of texts using batching.
        """
        embeddings = self.embedding_model.embed_documents(texts)
        return np.array(embeddings, dtype='float32')

    
    
    
    def generate_answer(self, query: str, retrieved_docs: List[Dict]) -> str:
        """
        Generate an answer based on the query and retrieved documents.
        """
        # Combine the content of the retrieved documents into a single context string
        context = "\\n\\n".join([f"Title: {doc['title']}\\n{doc['text']}" for doc, dist in retrieved_docs])

        # Create a prompt for the generative model
        prompt = f"""
        Context information is provided below.
        ---------------------
        {context}
        ---------------------
        Given the context information and not prior knowledge, answer the query.
        Query: {query}
        Answer:
        """

        # Use a generative model to get the final answer
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(prompt)
        return response.text


# update the add documents first, as it directly affect the retrieve process

## since previous we treat one row as a chunk, this means even some information in this article is related, the whole article might contain many irrelevant information to the specific query.

### so we need a chunk splitting

In [7]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings

import time
def add_documents(self, documents_df: pd.DataFrame, *, chunk_size: int = 1000, chunk_overlap: int = 150) -> None:
    """Chunk docs using semantic similarity, embed in batch, build both FAISS and BM25 indices."""
    
    # 1. Initialize a local embedding model instead of Google's.
    # This runs entirely on your machine.
    print("Loading local embedding model for semantic chunking...")
    
    # 2. Create the semantic chunker using the local model.
    splitter = SemanticChunker(
        embeddings=self.embedding_model,
        breakpoint_threshold_type="percentile",
        breakpoint_threshold_amount=95
    )

    texts_to_embed = []
    chunks_meta = []
    tokenized_chunks = []  # For BM25

    for _id, title, text in documents_df[['_id', 'title', 'text']].itertuples(index=False, name=None):
        # Use semantic chunking instead of fixed-size chunking
        chunks = splitter.split_text(text)
        
        for i, chunk in enumerate(chunks):
            # Prepare for embedding
            full_text = f"Title: {title}\n\n{chunk}"
            texts_to_embed.append(full_text)
            
            # Store metadata
            chunks_meta.append({
                'original_doc_id': _id,
                'title': title,
                'text': chunk
            })
            
            # Tokenize for BM25
            tokenized_text = (title + " " + chunk).lower().split()
            tokenized_chunks.append(tokenized_text)


    # Build FAISS index
    embeddings = self.embed_text_batch(texts_to_embed)
    self.index.add(embeddings)
    
    # Build BM25 index
    self.tokenized_docs.extend(tokenized_chunks)
    self.bm25 = BM25Okapi(self.tokenized_docs)
    
    # Update documents
    self.documents.extend(chunks_meta)
    
    print(f"Added {len(chunks_meta)} chunks using semantic chunking. Index size: {self.index.ntotal}")
    print(f"BM25 index built with {len(self.tokenized_docs)} documents")

improved_RAG.add_documents = add_documents

# now update the retrieve, this directly affect the context of LLM

In [8]:
def retrieve(self, query: str, k: int = 3) -> List[Tuple[Dict, float]]:
    """
    Retrieve documents using hybrid search with RRF and re-ranking.
    
    Args:
        query: Search query
        k: Number of documents to return after re-ranking
        
    Returns:
        List of tuples (document_dictionary, relevance_score)
    """
    initial_k = 20  # Number of candidates from each method
    
    # Step 1: FAISS Vector Search
    query_embedding = self.embed_text_batch([query]).reshape(1, -1)
    distances, indices = self.index.search(query_embedding, initial_k)
    
    # Store vector search results with their ranks
    vector_results = {}
    for rank, (idx, distance) in enumerate(zip(indices[0], distances[0])):
        if idx < len(self.documents):
            vector_results[idx] = rank + 1  # Rank starts from 1
    
    # Step 2: BM25 Keyword Search
    tokenized_query = query.lower().split()
    bm25_scores = self.bm25.get_scores(tokenized_query)
    
    # Get top-k indices from BM25
    top_bm25_indices = np.argsort(bm25_scores)[::-1][:initial_k]
    
    # Store BM25 results with their ranks
    bm25_results = {}
    for rank, idx in enumerate(top_bm25_indices):
        if idx < len(self.documents):
            bm25_results[idx] = rank + 1  # Rank starts from 1
    
    # Step 3: Reciprocal Rank Fusion (RRF)
    # RRF formula: score = 1 / (k + rank), where k is a constant (typically 60)
    rrf_k = 60
    rrf_scores = {}
    
    # Combine all unique document indices
    all_indices = set(vector_results.keys()) | set(bm25_results.keys())
    
    for idx in all_indices:
        score = 0
        # Add vector search contribution
        if idx in vector_results:
            score += 1 / (rrf_k + vector_results[idx])
        # Add BM25 contribution
        if idx in bm25_results:
            score += 1 / (rrf_k + bm25_results[idx])
        rrf_scores[idx] = score
    
    # Sort by RRF score and get top candidates
    sorted_indices = sorted(rrf_scores.keys(), key=lambda x: rrf_scores[x], reverse=True)
    top_fused_indices = sorted_indices[:initial_k]
    
    # Prepare candidates for re-ranking
    candidates = []
    for idx in top_fused_indices:
        doc = self.documents[idx]
        doc_text = f"Title: {doc['title']}\n{doc['text']}"
        candidates.append({
            'doc': doc,
            'text': doc_text,
            'rrf_score': rrf_scores[idx]
        })
    
    # Step 4: Re-rank with Cohere
    if len(candidates) == 0:
        return []
    
    documents = [c['text'] for c in candidates]
    
    rerank_results = self.cohere_client.rerank(
        query=query,
        documents=documents,
        top_n=min(k, len(documents)),
        model='rerank-english-v3.0'
    )
    
    # Return final results
    results = []
    for result in rerank_results.results:
        idx = result.index
        relevance_score = result.relevance_score
        results.append((candidates[idx]['doc'], relevance_score))
    
    return results

improved_RAG.retrieve = retrieve

# update the answer function, more robust and stick to the context

In [9]:
def generate_answer(self, query: str, retrieved_docs: List[Dict]) -> str:
        """
        Generate an answer based on the query and retrieved documents.
        """
        # Combine the content of the retrieved documents into a single context string
        context = "\\n\\n".join([f"Title: {doc['title']}\\n{doc['text']}" for doc, dist in retrieved_docs])

        # Create a prompt for the generative model
        prompt = f"""
        Context information is provided below.
        ---------------------
        {context}
        ---------------------
        Given the context information and not prior knowledge, answer the query.
        Query: {query}
        Answer:
        """

        # Use a generative model to get the final answer
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(prompt)
        return response.text

improved_RAG.generate_answer = generate_answer

# test the performance now

## prepare the ragas schema

In [10]:

rag_system = improved_RAG()

# load the corpus from the csv file
df = pd.read_csv('./assets/corpus.csv')

rag_system.add_documents(df)

  self.embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


RAG system initialized with embedding dimension: 384
Loading local embedding model for semantic chunking...
Added 7264 chunks using semantic chunking. Index size: 7264
BM25 index built with 7264 documents


In [11]:

import pandas as pd

# Load your datasets
queries_df = pd.read_csv('./assets/queries.csv')
ground_truth_df = pd.read_csv('./assets/train.csv')

# The queries.csv has the text, but the _id is the query-id
# Let's rename the column for clarity
queries_df.rename(columns={'_id': 'query-id'}, inplace=True)

# Group the ground truth by query-id to get a list of all correct corpus-ids for each query
ground_truth_grouped = ground_truth_df.groupby('query-id')['corpus-id'].apply(list).reset_index()
ground_truth_grouped.rename(columns={'corpus-id': 'ground_truth_doc_ids'}, inplace=True)

# Merge the query texts with the ground truth document IDs
eval_df = pd.merge(queries_df, ground_truth_grouped, on='query-id')

print("Prepared Evaluation DataFrame:")
print(eval_df.head())

Prepared Evaluation DataFrame:
  query-id  title                                          text  \
0  PLAIN-3    NaN       Breast Cancer Cells Feed on Cholesterol   
1  PLAIN-4    NaN         Using Diet to Treat Asthma and Eczema   
2  PLAIN-5    NaN         Treating Asthma With Plants vs. Pills   
3  PLAIN-6    NaN    How Fruits and Vegetables Can Treat Asthma   
4  PLAIN-7    NaN  How Fruits and Vegetables Can Prevent Asthma   

                                ground_truth_doc_ids  
0  [MED-2436, MED-2437, MED-2438, MED-2439, MED-2...  
1                     [MED-2441, MED-2472, MED-2444]  
2  [MED-2445, MED-2458, MED-2448, MED-2450, MED-2...  
3  [MED-2456, MED-2459, MED-2458, MED-5072, MED-2...  
4  [MED-2461, MED-2464, MED-2468, MED-2469, MED-2...  


In [12]:
ragas_data = []

# Let's process the first 10 queries for this example
for index, row in eval_df.head(10).iterrows():
    query_text = row['text']

    # 1. Use your RAG system to retrieve document chunks
    retrieved_docs_with_dist = rag_system.retrieve(query_text, k=3)

    # 2. Extract just the text content for the contexts
    retrieved_contexts = [doc['text'] for doc, dist in retrieved_docs_with_dist]

    # 3. Use your RAG system to generate an answer
    generated_answer = rag_system.generate_answer(query_text, retrieved_docs_with_dist)

    ragas_data.append({
        "question": query_text,
        "contexts": retrieved_contexts,
        "answer": generated_answer,
        # THE FIX: Add the required 'reference' column with a placeholder
        "reference": ""
    })

# Convert to a Hugging Face Dataset for RAGAS
from datasets import Dataset
ragas_dataset = Dataset.from_list(ragas_data)


KeyboardInterrupt: 

# Evaluate

In [None]:
# Simplified evaluation approach
import os
import pandas as pd
import google.generativeai as genai
from typing import List, Dict

# Configure Gemini
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

def evaluate_rag_manually(ragas_data: List[Dict]) -> pd.DataFrame:
    """
    Manual evaluation of RAG outputs using Gemini directly
    """
    model = genai.GenerativeModel('gemini-2.5-flash')
    
    results = []
    
    for item in ragas_data:
        question = item['question']
        contexts = item['contexts']
        answer = item['answer']
        
        # Evaluate Faithfulness
        faithfulness_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        Answer: {answer}
        
        Rate how faithful the answer is to the provided context on a scale of 0-1.
        Consider if all claims in the answer can be verified from the context.
        Return only a number between 0 and 1.
        """
        
        faithfulness_score = float(model.generate_content(faithfulness_prompt).text.strip())
        
        # Evaluate Answer Relevancy
        relevancy_prompt = f"""
        Question: {question}
        Answer: {answer}
        
        Rate how relevant the answer is to the question on a scale of 0-1.
        Consider if the answer directly addresses what was asked.
        Return only a number between 0 and 1.
        """
        
        relevancy_score = float(model.generate_content(relevancy_prompt).text.strip())
        
        # Evaluate Context Precision
        precision_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        
        Rate how precise and relevant the retrieved context is for answering the question on a scale of 0-1.
        Return only a number between 0 and 1.
        """
        
        precision_score = float(model.generate_content(precision_prompt).text.strip())
        
        results.append({
            'question': question[:50] + '...',  # Truncate for display
            'faithfulness': faithfulness_score,
            'answer_relevancy': relevancy_score,
            'context_precision': precision_score,
            'average_score': (faithfulness_score + relevancy_score + precision_score) / 3
        })
        
        print(f"Evaluated: {len(results)}/{len(ragas_data)}")
    
    return pd.DataFrame(results)

# Use this function with your ragas_data
print("Starting manual evaluation...")
evaluation_df = evaluate_rag_manually(ragas_data)

print("\n=== Evaluation Results ===")
print(evaluation_df)

print("\n=== Summary Statistics ===")
print(evaluation_df[['faithfulness', 'answer_relevancy', 'context_precision', 'average_score']].describe())


Starting manual evaluation...
Evaluated: 1/10
Evaluated: 2/10
Evaluated: 3/10
Evaluated: 4/10
Evaluated: 5/10
Evaluated: 6/10
Evaluated: 7/10
Evaluated: 8/10
Evaluated: 9/10
Evaluated: 10/10

=== Evaluation Results ===
                                            question  faithfulness  \
0         Breast Cancer Cells Feed on Cholesterol...           1.0   
1           Using Diet to Treat Asthma and Eczema...           0.8   
2           Treating Asthma With Plants vs. Pills...           1.0   
3      How Fruits and Vegetables Can Treat Asthma...           1.0   
4    How Fruits and Vegetables Can Prevent Asthma...           1.0   
5       Our Tax Dollars Subsidize Unhealthy Foods...           0.5   
6            Reducing Arsenic in Chicken and Rice...           1.0   
7              How Contaminated Are Our Children?...           0.0   
8      Blood Type Diet Perceived as "Crass Fraud"...           1.0   
9  Why Do Heart Doctors Favor Surgery and Drugs O...           0.6   

   answer_

In [None]:
! pip install optuna



In [None]:
# Create a modified version of the RAG class that accepts hyperparameters
class TunableRAG(improved_RAG):
    def __init__(self, embedding_dim: int = 384):
        super().__init__(embedding_dim)
        self.initial_k = 20  # Default value
        self.final_k = 3     # Default value
        self.temperature = 0.5  # Default value
        
    def add_documents_with_threshold(self, documents_df: pd.DataFrame, breakpoint_threshold_amount: int = 95) -> None:
        """Add documents with configurable semantic chunking threshold."""
        # Clear existing documents and indices
        self.documents = []
        self.tokenized_docs = []
        self.index = faiss.IndexFlatL2(self.embedding_dim)
        
        # Initialize the Google embeddings for semantic chunking
        embeddings = GoogleGenerativeAIEmbeddings(
            model="models/text-embedding-004",
            google_api_key=os.getenv('GEMINI_API_KEY')
        )
        
        # Create semantic chunker with specified threshold
        splitter = SemanticChunker(
            embeddings=embeddings,
            breakpoint_threshold_type="percentile",
            breakpoint_threshold_amount=breakpoint_threshold_amount,  # Use the parameter
            number_of_chunks=None
        )
        
        texts_to_embed = []
        chunks_meta = []
        tokenized_chunks = []
        
        for _id, title, text in documents_df[['_id', 'title', 'text']].itertuples(index=False, name=None):
            chunks = splitter.split_text(text)
            
            for i, chunk in enumerate(chunks):
                full_text = f"Title: {title}\n\n{chunk}"
                texts_to_embed.append(full_text)
                
                chunks_meta.append({
                    'original_doc_id': _id,
                    'title': title,
                    'text': chunk
                })
                
                tokenized_text = (title + " " + chunk).lower().split()
                tokenized_chunks.append(tokenized_text)
        
        # Build FAISS index
        embeddings_array = self.embed_text_batch(texts_to_embed)
        self.index.add(embeddings_array)
        
        # Build BM25 index
        self.tokenized_docs.extend(tokenized_chunks)
        self.bm25 = BM25Okapi(self.tokenized_docs)
        
        # Update documents
        self.documents.extend(chunks_meta)
    
    def retrieve_with_params(self, query: str, initial_k: int, final_k: int) -> List[Tuple[Dict, float]]:
        """Retrieve with configurable k parameters."""
        # Store old values
        old_initial_k = self.initial_k
        old_final_k = self.final_k
        
        # Set new values
        self.initial_k = initial_k
        self.final_k = final_k
        
        # Call the original retrieve with modified params
        # We need to modify the retrieve function to use self.initial_k and self.final_k
        
        # FAISS Vector Search
        query_embedding = self.embed_text_batch([query]).reshape(1, -1)
        distances, indices = self.index.search(query_embedding, initial_k)
        
        vector_results = {}
        for rank, (idx, distance) in enumerate(zip(indices[0], distances[0])):
            if idx < len(self.documents):
                vector_results[idx] = rank + 1
        
        # BM25 Keyword Search
        tokenized_query = query.lower().split()
        bm25_scores = self.bm25.get_scores(tokenized_query)
        top_bm25_indices = np.argsort(bm25_scores)[::-1][:initial_k]
        
        bm25_results = {}
        for rank, idx in enumerate(top_bm25_indices):
            if idx < len(self.documents):
                bm25_results[idx] = rank + 1
        
        # RRF
        rrf_k = 60
        rrf_scores = {}
        all_indices = set(vector_results.keys()) | set(bm25_results.keys())
        
        for idx in all_indices:
            score = 0
            if idx in vector_results:
                score += 1 / (rrf_k + vector_results[idx])
            if idx in bm25_results:
                score += 1 / (rrf_k + bm25_results[idx])
            rrf_scores[idx] = score
        
        sorted_indices = sorted(rrf_scores.keys(), key=lambda x: rrf_scores[x], reverse=True)
        top_fused_indices = sorted_indices[:initial_k]
        
        candidates = []
        for idx in top_fused_indices:
            doc = self.documents[idx]
            doc_text = f"Title: {doc['title']}\n{doc['text']}"
            candidates.append({
                'doc': doc,
                'text': doc_text,
                'rrf_score': rrf_scores[idx]
            })
        
        if len(candidates) == 0:
            return []
        
        documents = [c['text'] for c in candidates]
        
        rerank_results = self.cohere_client.rerank(
            query=query,
            documents=documents,
            top_n=min(final_k, len(documents)),
            model='rerank-english-v3.0'
        )
        
        results = []
        for result in rerank_results.results:
            idx = result.index
            relevance_score = result.relevance_score
            results.append((candidates[idx]['doc'], relevance_score))
        
        # Restore old values
        self.initial_k = old_initial_k
        self.final_k = old_final_k
        
        return results
    
    def generate_answer_with_temp(self, query: str, retrieved_docs: List[Dict], temperature: float) -> str:
        """Generate answer with configurable temperature."""
        context = "\\n\\n".join([f"Title: {doc['title']}\\n{doc['text']}" for doc, dist in retrieved_docs])
        
        prompt = f"""
        Context information is provided below.
        ---------------------
        {context}
        ---------------------
        Given the context information and not prior knowledge, answer the query.
        Query: {query}
        Answer:
        """
        
        # Configure generation with temperature
        generation_config = genai.types.GenerationConfig(
            temperature=temperature
        )
        
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(prompt, generation_config=generation_config)
        return response.text

NameError: name 'improved_RAG' is not defined

In [None]:
import optuna
from typing import Dict
import numpy as np

def objective(trial):
    """
    Objective function for Optuna to optimize RAG hyperparameters.
    """
    # Suggest hyperparameters for this trial
    breakpoint_threshold = trial.suggest_int("breakpoint_threshold", 90, 99)
    initial_k = trial.suggest_categorical("initial_k", [20,25,30,35,40])
    final_k = trial.suggest_int("final_k", 3, 10)
    temperature = trial.suggest_categorical("temperature", [0.1, 0.5, 0.9])
    
    print(f"\nTrial {trial.number}: Testing params - threshold={breakpoint_threshold}, "
          f"initial_k={initial_k}, final_k={final_k}, temp={temperature}")
    
    # Initialize RAG system with new parameters
    rag_tunable = TunableRAG()
    
    # Add documents with the specified chunking threshold
    print("  Adding documents with new chunking...")
    rag_tunable.add_documents_with_threshold(df, breakpoint_threshold_amount=breakpoint_threshold)
    
    # Prepare evaluation data
    eval_results = []
    
    # Use first 10 queries for faster tuning (you can increase this)
    eval_subset = eval_df.head(10)
    
    for index, row in eval_subset.iterrows():
        query_text = row['text']
        
        # Retrieve with specified parameters
        retrieved_docs = rag_tunable.retrieve_with_params(query_text, initial_k, final_k)
        
        # Generate answer with specified temperature
        generated_answer = rag_tunable.generate_answer_with_temp(query_text, retrieved_docs, temperature)
        
        # Extract contexts for evaluation
        contexts = [doc['text'] for doc, _ in retrieved_docs]
        
        eval_results.append({
            'question': query_text,
            'contexts': contexts,
            'answer': generated_answer
        })
    
    # Evaluate using the same manual evaluation function
    print("  Evaluating performance...")
    model = genai.GenerativeModel('gemini-2.5-flash')
    
    faithfulness_scores = []
    relevancy_scores = []
    precision_scores = []
    
    for item in eval_results:
        question = item['question']
        contexts = item['contexts']
        answer = item['answer']
        
        # Evaluate Faithfulness
        faithfulness_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        Answer: {answer}
        
        Rate how faithful the answer is to the provided context on a scale of 0-1.
        Consider if all claims in the answer can be verified from the context.
        Return only a number between 0 and 1.
        """
        
        try:
            faithfulness = float(model.generate_content(faithfulness_prompt).text.strip())
        except:
            faithfulness = 0.5  # Default if parsing fails
            
        # Evaluate Answer Relevancy
        relevancy_prompt = f"""
        Question: {question}
        Answer: {answer}
        
        Rate how relevant the answer is to the question on a scale of 0-1.
        Consider if the answer directly addresses what was asked.
        Return only a number between 0 and 1.
        """
        
        try:
            relevancy = float(model.generate_content(relevancy_prompt).text.strip())
        except:
            relevancy = 0.5
            
        # Evaluate Context Precision
        precision_prompt = f"""
        Question: {question}
        Context: {' '.join(contexts)}
        
        Rate how precise and relevant the retrieved context is for answering the question on a scale of 0-1.
        Return only a number between 0 and 1.
        """
        
        try:
            precision = float(model.generate_content(precision_prompt).text.strip())
        except:
            precision = 0.5
        
        faithfulness_scores.append(faithfulness)
        relevancy_scores.append(relevancy)
        precision_scores.append(precision)
    
    # Calculate average scores
    avg_faithfulness = np.mean(faithfulness_scores)
    avg_relevancy = np.mean(relevancy_scores)
    avg_precision = np.mean(precision_scores)
    
    # Calculate the objective score using the specified formula
    score = (0.5 * avg_relevancy) + (0.4 * avg_precision) + (0.1 * avg_faithfulness)
    
    print(f"  Scores - Faithfulness: {avg_faithfulness:.3f}, "
          f"Relevancy: {avg_relevancy:.3f}, Precision: {avg_precision:.3f}")
    print(f"  Final Score: {score:.3f}")
    
    return score

In [None]:
# Create and run the Optuna study
study = optuna.create_study(
    direction="maximize",
    study_name="rag_hyperparameter_tuning"
)

# Run optimization with fewer trials for testing (increase n_trials for better results)
print("Starting hyperparameter optimization...")
print("=" * 60)

study.optimize(
    objective, 
    n_trials=50  # Start with 20 trials, increase to 50-100 for production
)

print("\n" + "=" * 60)
print("Optimization Complete!")
print("=" * 60)

[I 2025-08-25 22:03:08,813] A new study created in memory with name: rag_hyperparameter_tuning
[W 2025-08-25 22:03:08,815] Trial 0 failed with parameters: {'breakpoint_threshold': 97, 'initial_k': 25, 'final_k': 6, 'temperature': 0.1} because of the following error: NameError("name 'TunableRAG' is not defined").
Traceback (most recent call last):
  File "/Users/shenhao/Desktop/RAG_system/.venv/lib/python3.9/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/9n/v_tm1cxd4r7c_3mh9hb1ww340000gn/T/ipykernel_2185/4259532904.py", line 19, in objective
    rag_tunable = TunableRAG()
NameError: name 'TunableRAG' is not defined
[W 2025-08-25 22:03:08,815] Trial 0 failed with value None.


Starting hyperparameter optimization...

Trial 0: Testing params - threshold=97, initial_k=25, final_k=6, temp=0.1


NameError: name 'TunableRAG' is not defined

In [None]:
# Display the best parameters found
print("\n🏆 BEST HYPERPARAMETERS FOUND:")
print("-" * 40)
for param, value in study.best_params.items():
    print(f"  {param}: {value}")

print(f"\n📊 Best Score Achieved: {study.best_value:.4f}")

# Show optimization history
print("\n📈 Optimization History (Top 5 Trials):")
print("-" * 40)

# Get trial dataframe and sort by value
trials_df = study.trials_dataframe()
top_trials = trials_df.nlargest(5, 'value')[['number', 'value', 'params_breakpoint_threshold', 
                                               'params_initial_k', 'params_final_k', 'params_temperature']]
print(top_trials.to_string())

# Create a visualization of the optimization history
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Plot optimization history
ax = axes[0, 0]
ax.plot(trials_df['number'], trials_df['value'], 'b-', alpha=0.5)
ax.scatter(trials_df['number'], trials_df['value'], c='blue', alpha=0.6)
ax.set_xlabel('Trial Number')
ax.set_ylabel('Objective Score')
ax.set_title('Optimization History')
ax.grid(True, alpha=0.3)

# Plot parameter importance (if enough trials)
if len(trials_df) >= 10:
    try:
        importances = optuna.importance.get_param_importances(study)
        ax = axes[0, 1]
        params = list(importances.keys())
        values = list(importances.values())
        ax.barh(params, values)
        ax.set_xlabel('Importance')
        ax.set_title('Hyperparameter Importance')
    except:
        axes[0, 1].text(0.5, 0.5, 'Not enough trials for importance analysis', 
                        ha='center', va='center')

# Plot score distribution
ax = axes[1, 0]
ax.hist(trials_df['value'].dropna(), bins=15, edgecolor='black', alpha=0.7)
ax.set_xlabel('Objective Score')
ax.set_ylabel('Frequency')
ax.set_title('Score Distribution')
ax.axvline(study.best_value, color='red', linestyle='--', label=f'Best: {study.best_value:.4f}')
ax.legend()

# Parameter value counts
ax = axes[1, 1]
param_counts = {}
for param in ['breakpoint_threshold', 'initial_k', 'final_k', 'temperature']:
    col = f'params_{param}'
    param_counts[param] = trials_df[col].value_counts().to_dict()

# Create a summary text
summary_text = "Parameter Value Frequencies:\n\n"
for param, counts in param_counts.items():
    summary_text += f"{param}:\n"
    for value, count in sorted(counts.items()):
        summary_text += f"  {value}: {count} trials\n"
    summary_text += "\n"

ax.text(0.1, 0.5, summary_text, fontsize=10, verticalalignment='center', fontfamily='monospace')
ax.set_title('Parameter Usage Summary')
ax.axis('off')

plt.tight_layout()
plt.show()


🏆 BEST HYPERPARAMETERS FOUND:
----------------------------------------


ValueError: No trials are completed yet.