# HippoRAG with DSPy Optimization

This notebook implements **DSPy-based prompt optimization** for the HippoRAG Question Answering System on the PQuad dataset.

We'll focus on optimizing the RAG pipeline's QA prompt using advanced DSPy techniques while keeping the existing HippoRAG retrieval mechanism intact.

## Key Components:
- **HippoRAG Retriever**: Pre-built knowledge graph and PPR-based retrieval (from KG_PPR.ipynb)
- **DSPy QA Module**: Optimized question-answering with automatic prompt engineering
- **Enhanced Evaluation**: LLM judge with Persian language considerations
- **Multiple Optimization Strategies**: BootstrapFewShot and advanced techniques

In [None]:
import json
from sentence_transformers import SentenceTransformer, util
import torch
import numpy as np
import igraph as ig
from tqdm import tqdm
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from typing import List, Dict, Any
import dotenv
import os
import pandas as pd
import dspy
import re
from collections import defaultdict
import random

dotenv.load_dotenv()


True

In [None]:
GRAPH_INDEX_PATH = "hippocampal_index.json"
SOURCE_TRIPLES_PATH = "knowledge_graph_triples_structured.json"
ORIGINAL_CONTEXT_PATH = "all_context.json"
ENCODER_MODEL_NAME = 'paraphrase-multilingual-MiniLM-L12-v2'
MODEL_NAME = "gpt-4o-mini"
PPR_DAMPING_FACTOR = 0.5

lm = dspy.LM(
    model=f"openai/{MODEL_NAME}",
    api_key=os.getenv("METIS_API_KEY"),
    api_base="https://api.metisai.ir/openai/v1",
    max_tokens=400,
    temperature=0.1
)

lm_judge = dspy.LM(
    model=f"openai/{MODEL_NAME}",
    api_key=os.getenv("METIS_API_KEY"),
    api_base="https://api.metisai.ir/openai/v1",
    max_tokens=200,
    temperature=0.0
)

dspy.configure(lm=lm)

# HippoRAG Retriever (Pre-built)

We'll use the same HippoRAG retriever from the original KG_PPR.ipynb notebook.

In [None]:
class QueryEntities(BaseModel):
    named_entities: List[str] = Field(
        ...,
        description="A list of named entities that are important for solving the user's question."
    )


class HippoRAGRetriever:
    def __init__(self):
        print("Initializing HippoRAGRetriever...")
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        # 1. Load all necessary data files
        self._load_data()

        # 2. Initialize models
        self.encoder_model = SentenceTransformer(ENCODER_MODEL_NAME, device=self.device)
        self.llm = ChatOpenAI(
            model=MODEL_NAME, temperature=0, api_key=os.getenv("METIS_API_KEY"), base_url="https://api.metisai.ir/openai/v1"
        )
        
        # 3. Pre-compute necessary mappings and structures
        self._build_graph()
        self._compute_node_specificity()
        self._create_passage_to_node_mapping()

        # 4. Pre-encode all graph nodes for fast query-time similarity search
        print("Pre-encoding all graph nodes for retrieval...")
        self.node_labels = [node['label'] for node in self.graph_data['nodes']]
        self.node_embeddings = self.encoder_model.encode(
            self.node_labels, show_progress_bar=True, convert_to_tensor=True
        )
        print("Initialization complete.")

    def _load_data(self):
        print("Loading graph index and source data...")
        try:
            with open(GRAPH_INDEX_PATH, 'r', encoding='utf-8') as f:
                self.graph_data = json.load(f)
            with open(SOURCE_TRIPLES_PATH, 'r', encoding='utf-8') as f:
                self.source_triples = json.load(f)
            with open(ORIGINAL_CONTEXT_PATH, 'r', encoding='utf-8') as f:
                self.passages = json.load(f)
        except FileNotFoundError as e:
            print(f"Error: Could not find a required data file: {e.filename}")
            print("Please ensure all data files from the previous steps are present.")
            exit()

    def _build_graph(self):
        print("Building weighted igraph object...")
        self.g = ig.Graph(directed=False)
        self.g.add_vertices(len(self.graph_data['nodes']))
        
        self.node_label_to_id = {node['label']: node['id'] for node in self.graph_data['nodes']}
        
        edges = []
        weights = []
        for edge_data in self.graph_data['edges']:
            source_id = self.node_label_to_id.get(edge_data['source'])
            target_id = self.node_label_to_id.get(edge_data['target'])
            
            if source_id is not None and target_id is not None:
                edges.append((source_id, target_id))
                if edge_data['type'] == 'synonymy':
                    weights.append(edge_data.get('score', 0.8))
                else:
                    weights.append(1.0)
        
        self.g.add_edges(edges)
        self.g.es['weight'] = weights
        print(f"Graph built with {self.g.vcount()} vertices and {self.g.ecount()} edges.")

    def _compute_node_specificity(self):
        print("Computing node specificity...")
        node_passage_counts = {node_id: 0 for node_id in range(len(self.node_label_to_id))}
        passage_nodes = {}
        
        for item in self.source_triples:
            passage_id = item['id']
            nodes_in_passage = set(item.get('entities', []))
            for triple in item.get('triples', []):
                nodes_in_passage.add(triple['subject'])
                nodes_in_passage.add(triple['object'])
            
            for node_label in nodes_in_passage:
                node_id = self.node_label_to_id.get(node_label)
                if node_id is not None:
                    if passage_id not in passage_nodes.get(node_id, set()):
                        node_passage_counts[node_id] += 1
                        if node_id not in passage_nodes:
                            passage_nodes[node_id] = set()
                        passage_nodes[node_id].add(passage_id)

        self.node_specificity = {}
        for node_id, count in node_passage_counts.items():
            if count > 0:
                self.node_specificity[node_id] = 1.0 / count
            else:
                self.node_specificity[node_id] = 0

    def _create_passage_to_node_mapping(self):
        self.passage_to_nodes = {i: [] for i in range(len(self.passages))}
        for item in self.source_triples:
            passage_idx = item['id']
            nodes_in_passage = set(item.get('entities', []))
            for triple in item.get('triples', []):
                nodes_in_passage.add(triple['subject'])
                nodes_in_passage.add(triple['object'])
            
            node_ids_in_passage = {self.node_label_to_id.get(label) for label in nodes_in_passage}
            self.passage_to_nodes[passage_idx] = [nid for nid in node_ids_in_passage if nid is not None]

    def _extract_query_entities(self, query: str) -> List[str]:
        """Uses an LLM to extract named entities from the user query."""
        system_prompt = """
        Instruction:
        Your task is to extract named entities from the given paragraph in the user's message.
        Respond with a JSON list of entities.

        One-Shot Demonstration:
        If the user provides the paragraph:
        "Radio City is India's first private FM radio station and was started on 3 July 2001. It plays Hindi, English and regional songs. Radio City recently forayed into New Media in May 2008 with the launch of a music portal - PlanetRadiocity.com that offers music related news, videos, songs, and other music-related features."

        Your output should be a JSON object with a list of the extracted entities:
        {{"named_entities": ["Radio City", "India", "3 July 2001", "Hindi", "English", "May 2008", "PlanetRadiocity.com"]}}
        """
        prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            ("human", "Question: {query}")
        ])
        chain = prompt | self.llm.with_structured_output(QueryEntities)
        try:
            response = chain.invoke({"query": query})
            return response.named_entities
        except Exception as e:
            print(f"Error extracting query entities: {e}")
            return []
            
    def _find_query_nodes(self, query_entities: List[str]) -> List[int]:
        if not query_entities:
            return []
        
        query_embeddings = self.encoder_model.encode(query_entities, convert_to_tensor=True)
        cos_scores = util.cos_sim(query_embeddings, self.node_embeddings)
        
        top_matches_indices = torch.argmax(cos_scores, dim=1)
        
        matched_node_ids = []
        for i, entity in enumerate(query_entities):
            node_id = top_matches_indices[i].item()
            matched_node_ids.append(node_id)
        
        return list(set(matched_node_ids))

    def retrieve(self, query: str, top_k: int = 5) -> List[dict]:
        
        # 1. Extract entities from query
        query_entities = self._extract_query_entities(query)
        if not query_entities:
            return []

        # 2. Find corresponding nodes in the graph
        query_node_ids = self._find_query_nodes(query_entities)
        if not query_node_ids:
            return []

        # 3. Perform pattern completion with PPR
        personalization_vector = np.zeros(self.g.vcount())
        total_specificity = 0
        for node_id in query_node_ids:
            specificity = self.node_specificity.get(node_id, 0)
            personalization_vector[node_id] = specificity
            total_specificity += specificity
        
        if total_specificity > 0:
            personalization_vector /= total_specificity
            
        ppr_scores = self.g.personalized_pagerank(
            vertices=None,
            directed=False,
            damping=PPR_DAMPING_FACTOR,
            reset=personalization_vector.tolist(),
            weights=self.g.es['weight']
        )
        
        # 4. Aggregate node scores to get passage scores
        passage_scores = {i: 0.0 for i in range(len(self.passages))}
        for passage_idx, node_ids in self.passage_to_nodes.items():
            score = sum(ppr_scores[node_id] for node_id in node_ids)
            passage_scores[passage_idx] = score
            
        # Sort and return top_k results
        sorted_passages = sorted(passage_scores.items(), key=lambda item: item[1], reverse=True)
        
        results = []
        for passage_idx, score in sorted_passages[:top_k]:
            results.append({
                "passage_index": passage_idx,
                "passage_text": self.source_triples[passage_idx]["passage"],
                "score": score
            })
        return results

In [None]:
retriever = HippoRAGRetriever()

Initializing HippoRAGRetriever...
Loading graph index and source data...
Building weighted igraph object...
Graph built with 4691 vertices and 4649 edges.
Computing node specificity...
Pre-encoding all graph nodes for retrieval...
Building weighted igraph object...
Graph built with 4691 vertices and 4649 edges.
Computing node specificity...
Pre-encoding all graph nodes for retrieval...


Batches:   0%|          | 0/147 [00:00<?, ?it/s]

Initialization complete.


# DSPy QA Module

Now we'll implement the Question Answering system using DSPy for automatic prompt optimization.

In [None]:
class PersianRAGSignature(dspy.Signature):
    """Answer Persian/Farsi questions based on provided context passages. Provide precise, concise answers."""
    context = dspy.InputField(desc="Retrieved context passages relevant to the question")
    question = dspy.InputField(desc="Persian question to answer")
    answer = dspy.OutputField(desc="Precise, concise Persian answer based on the context")

class PersianRAGWithReasoningSignature(dspy.Signature):
    """Answer Persian/Farsi questions using step-by-step reasoning based on context passages."""
    context = dspy.InputField(desc="Retrieved context passages relevant to the question")
    question = dspy.InputField(desc="Persian question requiring reasoning")
    reasoning = dspy.OutputField(desc="Step-by-step reasoning process in Persian")
    answer = dspy.OutputField(desc="Final precise Persian answer based on reasoning")

class EnhancedPersianRAGSignature(dspy.Signature):
    """Advanced Persian QA with context analysis and evidence-based answers."""
    context = dspy.InputField(desc="Retrieved context passages with relevance scores")
    question = dspy.InputField(desc="Persian question to answer comprehensively")
    relevant_evidence = dspy.OutputField(desc="Key evidence from context that supports the answer")
    answer = dspy.OutputField(desc="Accurate Persian answer with supporting evidence")

In [None]:
class DSPyHippoRAGModule(dspy.Module):
    def __init__(self, retriever: HippoRAGRetriever):
        super().__init__()
        self.retriever = retriever
        self.generate_answer = dspy.Predict(PersianRAGSignature)
    
    def forward(self, question, top_k_retrieval=5):
        try:
            retrieved_passages = self.retriever.retrieve(question, top_k=top_k_retrieval)
        except Exception as e:
            print(f"Error in retrieval: {e}")
            return dspy.Prediction(answer="Error")
        
        if not retrieved_passages:
            return dspy.Prediction(answer="No context found")
        
        # Format context
        context_string = ""
        for i, doc in enumerate(retrieved_passages):
            context_string += f"متن {i+1} (امتیاز: {doc['score']:.4f}):\n{doc['passage_text']}\n\n"
        
        try:
            with dspy.context(lm=lm):
                result = self.generate_answer(context=context_string.strip(), question=question)
            return dspy.Prediction(answer=result.answer, context=context_string)
        except Exception as e:
            print(f"Error in answer generation: {e}")
            return dspy.Prediction(answer="Error")

class DSPyHippoRAGWithReasoningModule(dspy.Module):
    def __init__(self, retriever: HippoRAGRetriever):
        super().__init__()
        self.retriever = retriever
        self.generate_answer = dspy.ChainOfThought(PersianRAGWithReasoningSignature)
    
    def forward(self, question, top_k_retrieval=5):
        try:
            retrieved_passages = self.retriever.retrieve(question, top_k=top_k_retrieval)
        except Exception as e:
            print(f"Error in retrieval: {e}")
            return dspy.Prediction(answer="Error")
        
        if not retrieved_passages:
            return dspy.Prediction(answer="No context found")
        
        # Format context
        context_string = ""
        for i, doc in enumerate(retrieved_passages):
            context_string += f"متن {i+1} (امتیاز: {doc['score']:.4f}):\n{doc['passage_text']}\n\n"
        
        try:
            with dspy.context(lm=lm):
                result = self.generate_answer(context=context_string.strip(), question=question)
            return dspy.Prediction(
                answer=result.answer, 
                reasoning=getattr(result, 'reasoning', ''),
                context=context_string
            )
        except Exception as e:
            print(f"Error in answer generation: {e}")
            return dspy.Prediction(answer="Error")

class EnhancedDSPyHippoRAGModule(dspy.Module):
    def __init__(self, retriever: HippoRAGRetriever):
        super().__init__()
        self.retriever = retriever
        self.generate_answer = dspy.Predict(EnhancedPersianRAGSignature)
    
    def forward(self, question, top_k_retrieval=5):
        try:
            retrieved_passages = self.retriever.retrieve(question, top_k=top_k_retrieval)
        except Exception as e:
            print(f"Error in retrieval: {e}")
            return dspy.Prediction(answer="Error")
        
        if not retrieved_passages:
            return dspy.Prediction(answer="No context found")
        
        
        context_string = ""
        for i, doc in enumerate(retrieved_passages):
            context_string += f"متن {i+1} (امتیاز مرتبط بودن: {doc['score']:.4f}):\n{doc['passage_text']}\n\n"
        
        try:
            with dspy.context(lm=lm):
                result = self.generate_answer(context=context_string.strip(), question=question)
            return dspy.Prediction(
                answer=result.answer,
                relevant_evidence=getattr(result, 'relevant_evidence', ''),
                context=context_string
            )
        except Exception as e:
            print(f"Error in answer generation: {e}")
            return dspy.Prediction(answer="Error")

class EnsemblePersianRAGModule(dspy.Module):

    def __init__(self, retriever: HippoRAGRetriever):
        super().__init__()
        self.retriever = retriever
        self.qa_direct = dspy.Predict(PersianRAGSignature)
        self.qa_reasoning = dspy.ChainOfThought(PersianRAGWithReasoningSignature)
        self.qa_enhanced = dspy.Predict(EnhancedPersianRAGSignature)
    
    def forward(self, question, top_k_retrieval=5):
        try:
            retrieved_passages = self.retriever.retrieve(question, top_k=top_k_retrieval)
        except Exception as e:
            print(f"Error in retrieval: {e}")
            return dspy.Prediction(answer="Error")
        
        if not retrieved_passages:
            return dspy.Prediction(answer="No context found")
        
        # Format context
        context_string = ""
        for i, doc in enumerate(retrieved_passages):
            context_string += f"متن {i+1} (امتیاز: {doc['score']:.4f}):\n{doc['passage_text']}\n\n"
        
        try:
            with dspy.context(lm=lm):
                direct = self.qa_direct(context=context_string.strip(), question=question)
                reasoning = self.qa_reasoning(context=context_string.strip(), question=question)
                enhanced = self.qa_enhanced(context=context_string.strip(), question=question)
        except Exception as e:
            print(f"Error in ensemble generation: {e}")
            return dspy.Prediction(answer="Error")
        
        answers = [direct.answer, reasoning.answer, enhanced.answer]
        final_answer = reasoning.answer if hasattr(reasoning, 'answer') else direct.answer
        
        return dspy.Prediction(
            answer=final_answer,
            direct_answer=direct.answer,
            reasoning_answer=reasoning.answer,
            enhanced_answer=enhanced.answer,
            reasoning=getattr(reasoning, 'reasoning', ''),
            context=context_string
        )



# Load and Prepare Data

In [None]:
df_pquad = pd.read_csv('../pquad_questions.csv', encoding='utf-8')
pquad_data = df_pquad.to_dict(orient='records')

print(f"Loaded {len(pquad_data)} PQuad examples")
print(f"Sample question: {pquad_data[0]['question']}")
print(f"Sample answer: {pquad_data[0]['answer']}")

Loaded 500 PQuad examples
Sample question: ساختار آب چیست؟
Sample answer: اکسید هیدروژن


In [None]:
def prepare_dspy_examples(data_list, sample_size=None):
    examples = []
    
    for item in data_list:
        example = dspy.Example(
            question=item['question'],
            answer=item['answer']
        ).with_inputs('question')
        examples.append(example)
    
    if sample_size and sample_size < len(examples):
        return random.sample(examples, sample_size)
    
    return examples

random.seed(42)

pquad_train_examples = prepare_dspy_examples(pquad_data[:60])  
pquad_test_examples = prepare_dspy_examples(pquad_data[60:160])  

print(f"Train examples: {len(pquad_train_examples)}")
print(f"Test examples: {len(pquad_test_examples)}")

Train examples: 60
Test examples: 100


# Enhanced Evaluation Function

In [None]:
# Enhanced evaluation functions for DSPy
def clean_model_answer(model_answer: str) -> str:
    if not model_answer:
        return ""
    
    # Remove various tags and formatting
    cleaned = re.sub(r'<ANSWER>(.*?)</ANSWER>', r'\1', model_answer, flags=re.DOTALL|re.IGNORECASE)
    cleaned = re.sub(r'<[^>]+>', '', cleaned)
    cleaned = re.sub(r'\s+', ' ', cleaned)
    cleaned = cleaned.strip()
    
    # Remove common prefixes
    prefixes = ['پاسخ:', 'جواب:', 'Answer:', 'Response:']
    for prefix in prefixes:
        if cleaned.startswith(prefix):
            cleaned = cleaned[len(prefix):].strip()
    
    return cleaned

def evaluate_answer_with_judge(question: str, correct_answer: str, model_answer: str) -> bool:
    clean_answer = clean_model_answer(model_answer)
    
    prompt = f"""شما یک قاضی خبره هستید که پاسخ‌های فارسی را ارزیابی می‌کنید. 
تعیین کنید که آیا پاسخ مدل از نظر معنایی معادل پاسخ صحیح است یا خیر.
در نظر بگیرید که تغییرات جزئی در املا و عبارات معادل قابل قبول هستند.

سوال: {question}

پاسخ صحیح: {correct_answer}
پاسخ مدل: {clean_answer}

اگر پاسخ مدل از نظر معنایی معادل پاسخ صحیح است، فقط "TRUE" بنویسید.
در غیر این صورت فقط "FALSE" بنویسید.

پاسخ:"""
    
    try:
        response = lm_judge(prompt)
        if isinstance(response, list) and len(response) > 0:
            response_text = str(response[0])
        else:
            response_text = str(response)
        
        return "TRUE" in response_text.upper()
    except Exception as e:
        print(f"Error in judge evaluation: {e}")
        return False

evaluation_cache = {}

def persian_rag_accuracy_metric(gold, pred, trace=None):
    cache_key = (gold.question, gold.answer, pred.answer)
    
    if cache_key in evaluation_cache:
        return evaluation_cache[cache_key]
    
    result = evaluate_answer_with_judge(gold.question, gold.answer, pred.answer)
    evaluation_cache[cache_key] = result
    return result



# DSPy Optimization Experiments

We'll test multiple optimization strategies and compare their effectiveness.

## Strategy 1: Basic DSPy HippoRAG

In [None]:
print("Strategy 1: Basic DSPy HippoRAG Optimization...")

basic_model = DSPyHippoRAGModule(retriever)

basic_teleprompter = dspy.BootstrapFewShot(
    metric=persian_rag_accuracy_metric,
    max_bootstrapped_demos=5,
    max_labeled_demos=3,
    max_rounds=2
)

# Optimize
print("Optimizing basic model...")
basic_optimized = basic_teleprompter.compile(
    basic_model,
    trainset=pquad_train_examples
)

print("Strategy 1 optimization completed!")

print("\n" + "="*60)
print("STRATEGY 1 - BASIC DSPY HIPPORAG RESULTS:")
print("="*60)
for i, predictor in enumerate(basic_optimized.predictors()):
    print(f"\nPredictor {i+1}:")
    print(f"Signature: {predictor.signature}")
    if hasattr(predictor, 'demos') and predictor.demos:
        print(f"Demonstrations: {len(predictor.demos)}")
        for j, demo in enumerate(predictor.demos[:2]):
            print(f"  Demo {j+1}: {demo.question[:80]}... -> {demo.answer}")
print("="*60)

Strategy 1: Basic DSPy HippoRAG Optimization...
Optimizing basic model...


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 5 full traces after 8 examples for up to 2 rounds, amounting to 11 attempts.
Strategy 1 optimization completed!

STRATEGY 1 - BASIC DSPY HIPPORAG RESULTS:

Predictor 1:
Signature: PersianRAGSignature(context, question -> answer
    instructions='Answer Persian/Farsi questions based on provided context passages. Provide precise, concise answers.'
    context = Field(annotation=str required=True json_schema_extra={'desc': 'Retrieved context passages relevant to the question', '__dspy_field_type': 'input', 'prefix': 'Context:'})
    question = Field(annotation=str required=True json_schema_extra={'desc': 'Persian question to answer', '__dspy_field_type': 'input', 'prefix': 'Question:'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'Precise, concise Persian answer based on the context', '__dspy_field_type': 'output', 'prefix': 'Answer:'})
)
Demonstrations: 5
  Demo 1: رضاخان دوران جنگ جهانی را بیشتر در کدام سمت ایران سپری کرد؟... -> غرب ایران
  De




## Strategy 2: Enhanced DSPy with Random Search

In [None]:
print("Strategy 2: Enhanced DSPy with Random Search...")

enhanced_model = EnhancedDSPyHippoRAGModule(retriever)

enhanced_teleprompter = dspy.BootstrapFewShotWithRandomSearch(
    metric=persian_rag_accuracy_metric,
    max_bootstrapped_demos=8,
    max_labeled_demos=6,
    max_rounds=3,
    num_candidate_programs=12,
    num_threads=4
)

# Optimize
print("Optimizing enhanced model...")
enhanced_optimized = enhanced_teleprompter.compile(
    enhanced_model,
    trainset=pquad_train_examples,
    valset=pquad_train_examples[:15]
)

print("Strategy 2 optimization completed!")

print("\n" + "="*60)
print("STRATEGY 2 - ENHANCED DSPY WITH RANDOM SEARCH:")
print("="*60)
for i, predictor in enumerate(enhanced_optimized.predictors()):
    print(f"\nPredictor {i+1}:")
    print(f"Signature: {predictor.signature}")
    if hasattr(predictor, 'demos') and predictor.demos:
        print(f"Demonstrations: {len(predictor.demos)}")
print("="*60)

Strategy 2: Enhanced DSPy with Random Search...
Going to sample between 1 and 8 traces per predictor.
Will attempt to bootstrap 12 candidate sets.
Optimizing enhanced model...


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:18<00:00,  1.25s/it]

2025/09/06 15:21:44 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



New best score: 46.67 for seed -3
Scores so far: [46.67]
Best score so far: 46.67


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
<function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '

Average Metric: 6.00 / 15 (40.0%): 100%|██████████| 15/15 [00:17<00:00,  1.17s/it]

2025/09/06 15:22:02 INFO dspy.evaluate.evaluate: Average Metric: 6 / 15 (40.0%)



Scores so far: [46.67, 40.0]
Best score so far: 46.67


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
<function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '

Bootstrapped 8 full traces after 17 examples for up to 3 rounds, amounting to 35 attempts.
Average Metric: 6.00 / 15 (40.0%): 100%|██████████| 15/15 [00:25<00:00,  1.72s/it]

2025/09/06 15:23:41 INFO dspy.evaluate.evaluate: Average Metric: 6 / 15 (40.0%)



Scores so far: [46.67, 40.0, 40.0]
Best score so far: 46.67


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 7 full traces after 22 examples for up to 3 rounds, amounting to 52 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:25<00:00,  1.68s/it]

2025/09/06 15:27:18 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67]
Best score so far: 46.67


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 3 full traces after 6 examples for up to 3 rounds, amounting to 12 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:20<00:00,  1.40s/it]

2025/09/06 15:28:29 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67]
Best score so far: 46.67


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 1 full traces after 1 examples for up to 3 rounds, amounting to 1 attempts.
Average Metric: 9.00 / 15 (60.0%): 100%|██████████| 15/15 [00:18<00:00,  1.26s/it]

2025/09/06 15:29:11 INFO dspy.evaluate.evaluate: Average Metric: 9 / 15 (60.0%)



New best score: 60.0 for seed 2
Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 4 full traces after 9 examples for up to 3 rounds, amounting to 19 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:22<00:00,  1.49s/it]

2025/09/06 15:30:51 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 4 full traces after 8 examples for up to 3 rounds, amounting to 16 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:22<00:00,  1.52s/it]

2025/09/06 15:32:19 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 5 full traces after 6 examples for up to 3 rounds, amounting to 9 attempts.
Average Metric: 8.00 / 15 (53.3%): 100%|██████████| 15/15 [00:31<00:00,  2.12s/it]

2025/09/06 15:33:43 INFO dspy.evaluate.evaluate: Average Metric: 8 / 15 (53.3%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67, 53.33]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
<function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '

Bootstrapped 2 full traces after 5 examples for up to 3 rounds, amounting to 12 attempts.
Average Metric: 8.00 / 15 (53.3%): 100%|██████████| 15/15 [00:26<00:00,  1.76s/it]

2025/09/06 15:35:05 INFO dspy.evaluate.evaluate: Average Metric: 8 / 15 (53.3%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67, 53.33, 53.33]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 6 full traces after 13 examples for up to 3 rounds, amounting to 27 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:24<00:00,  1.65s/it]

2025/09/06 15:37:13 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67, 53.33, 53.33, 46.67]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 4 full traces after 14 examples for up to 3 rounds, amounting to 34 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:26<00:00,  1.76s/it]

2025/09/06 15:39:47 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67, 53.33, 53.33, 46.67, 46.67]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 8 full traces after 28 examples for up to 3 rounds, amounting to 68 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:26<00:00,  1.74s/it]

2025/09/06 15:44:35 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67, 53.33, 53.33, 46.67, 46.67, 46.67]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 1 full traces after 1 examples for up to 3 rounds, amounting to 1 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:19<00:00,  1.31s/it]

2025/09/06 15:45:09 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67, 53.33, 53.33, 46.67, 46.67, 46.67, 46.67]
Best score so far: 60.0


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 8 full traces after 20 examples for up to 3 rounds, amounting to 44 attempts.
Average Metric: 7.00 / 15 (46.7%): 100%|██████████| 15/15 [00:19<00:00,  1.32s/it]

2025/09/06 15:48:46 INFO dspy.evaluate.evaluate: Average Metric: 7 / 15 (46.7%)



Scores so far: [46.67, 40.0, 40.0, 46.67, 46.67, 60.0, 46.67, 46.67, 53.33, 53.33, 46.67, 46.67, 46.67, 46.67, 46.67]
Best score so far: 60.0
15 candidate programs found.
Strategy 2 optimization completed!

STRATEGY 2 - ENHANCED DSPY WITH RANDOM SEARCH:

Predictor 1:
Signature: EnhancedPersianRAGSignature(context, question -> relevant_evidence, answer
    instructions='Advanced Persian QA with context analysis and evidence-based answers.'
    context = Field(annotation=str required=True json_schema_extra={'desc': 'Retrieved context passages with relevance scores', '__dspy_field_type': 'input', 'prefix': 'Context:'})
    question = Field(annotation=str required=True json_schema_extra={'desc': 'Persian question to answer comprehensively', '__dspy_field_type': 'input', 'prefix': 'Question:'})
    relevant_evidence = Field(annotation=str required=True json_schema_extra={'desc': 'Key evidence from context that supports the answer', '__dspy_field_type': 'output', 'prefix': 'Relevant Evidenc

## Strategy 3: Ensemble Approach

In [None]:
print("Strategy 3: Ensemble Approach Optimization...")

ensemble_model = EnsemblePersianRAGModule(retriever)
print(f" {len(ensemble_model.predictors())} predictors")

ensemble_teleprompter = dspy.BootstrapFewShot(
    metric=persian_rag_accuracy_metric,
    max_bootstrapped_demos=5,
    max_labeled_demos=3,
    max_rounds=2
)

try:
    # optimization step
    ensemble_optimized = ensemble_teleprompter.compile(
        ensemble_model,
        trainset=pquad_train_examples
    )
    print("✓ Strategy 3 ensemble optimization completed successfully!")
    print(f"✓ Optimized ensemble has {len(ensemble_optimized.predictors())} optimized predictors")
    
except Exception as e:
    print(f"✗ Error in ensemble optimization: {e}")
    ensemble_optimized = ensemble_model
    print("Using unoptimized ensemble as fallback")

print("\n" + "="*60)
print("STRATEGY 3 - ENSEMBLE APPROACH OPTIMIZATION RESULTS:")
print("="*60)

print(f"\nOptimized Predictors:")
for i, predictor in enumerate(ensemble_optimized.predictors()):
    print(f"  Predictor {i+1}: {predictor.signature}")
    if hasattr(predictor, 'demos') and predictor.demos:
        print(f"    Demonstrations: {len(predictor.demos)}")
        # Show 1 demo
        for j, demo in enumerate(predictor.demos[:1]):  
            print(f"      Demo {j+1}: {demo.question[:60]}... -> {demo.answer[:40]}...")

print("="*60)

Strategy 3: Ensemble Approach Optimization...
 3 predictors


Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' object has no attribute '_state'
Exception ignored in: <function SyncHttpxClientWrapper.__del__ at 0x30a27b100>
Traceback (most recent call last):
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/openai/_base_client.py", line 811, in __del__
    if self.is_closed:
       ^^^^^^^^^^^^^^
  File "/opt/miniconda3/envs/llms/lib/python3.11/site-packages/httpx/_client.py", line 228, in is_closed
    return self._state == ClientState.CLOSED
           ^^^^^^^^^^^
AttributeError: 'SyncHttpxClientWrapper' obj

Bootstrapped 5 full traces after 13 examples for up to 2 rounds, amounting to 21 attempts.
✓ Strategy 3 ensemble optimization completed successfully!
✓ Optimized ensemble has 3 optimized predictors

STRATEGY 3 - ENSEMBLE APPROACH OPTIMIZATION RESULTS:

Optimized Predictors:
  Predictor 1: PersianRAGSignature(context, question -> answer
    instructions='Answer Persian/Farsi questions based on provided context passages. Provide precise, concise answers.'
    context = Field(annotation=str required=True json_schema_extra={'desc': 'Retrieved context passages relevant to the question', '__dspy_field_type': 'input', 'prefix': 'Context:'})
    question = Field(annotation=str required=True json_schema_extra={'desc': 'Persian question to answer', '__dspy_field_type': 'input', 'prefix': 'Question:'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'Precise, concise Persian answer based on the context', '__dspy_field_type': 'output', 'prefix': 'Answer:'})
)
    Demonst




# Comprehensive Evaluation

In [None]:
def evaluate_hipporag_model(model, model_name, test_examples, max_examples=None):
    print(f"\nEvaluating {model_name}")
    
    if max_examples:
        test_examples = test_examples[:max_examples]
    
    results = []
    
    for example in tqdm(test_examples, desc=f"Testing {model_name}"):
        try:
            prediction = model(question=example.question, top_k_retrieval=5)
            model_answer = prediction.answer
        except Exception as e:
            model_answer = f"Error: {e}"
        
        is_correct = evaluate_answer_with_judge(
            example.question,
            example.answer, 
            model_answer
        )
        
        results.append({
            'question': example.question,
            'expected_answer': example.answer,
            'model_answer': model_answer,
            'clean_model_answer': clean_model_answer(model_answer),
            'is_correct': is_correct,
            'context': getattr(prediction, 'context', ''),
            'reasoning': getattr(prediction, 'reasoning', ''),
            'evidence': getattr(prediction, 'relevant_evidence', '')
        })
    
    total_correct = sum(1 for r in results if r['is_correct'])
    total_questions = len(results)
    accuracy = total_correct / total_questions if total_questions > 0 else 0
    
    print(f"\n{model_name} Results:")
    print(f"  Overall Accuracy: {accuracy:.3f} ({total_correct}/{total_questions})")
    
    return results, accuracy

In [None]:
strategies = [
    (basic_optimized, "Strategy 1: Basic DSPy HippoRAG"),
    (enhanced_optimized, "Strategy 2: Enhanced with Random Search"),
    (ensemble_optimized, "Strategy 3: Ensemble Approach")
]

all_results = {}
all_accuracies = {}

for model, name in strategies:
    results, accuracy = evaluate_hipporag_model(
        model, name, pquad_test_examples, max_examples=50
    )
    
    all_results[name] = results
    all_accuracies[name] = accuracy
    
    filename = f"hipporag_dspy_{name.lower().replace(' ', '_').replace(':', '')}_results.csv"
    pd.DataFrame(results).to_csv(filename, index=False)
    print(f"  Saved to: {filename}")

print("\n" + "="*80)
print("FINAL HIPPORAG DSPY COMPARISON:")
print("="*80)
for name, accuracy in sorted(all_accuracies.items(), key=lambda x: x[1], reverse=True):
    print(f"{accuracy:.3f} - {name}")
print("="*80)


Evaluating Strategy 1: Basic DSPy HippoRAG


Testing Strategy 1: Basic DSPy HippoRAG: 100%|██████████| 50/50 [04:07<00:00,  4.96s/it]




Strategy 1: Basic DSPy HippoRAG Results:
  Overall Accuracy: 0.420 (21/50)
  Saved to: hipporag_dspy_strategy_1_basic_dspy_hipporag_results.csv

Evaluating Strategy 2: Enhanced with Random Search


Testing Strategy 2: Enhanced with Random Search: 100%|██████████| 50/50 [04:29<00:00,  5.38s/it]




Strategy 2: Enhanced with Random Search Results:
  Overall Accuracy: 0.340 (17/50)
  Saved to: hipporag_dspy_strategy_2_enhanced_with_random_search_results.csv

Evaluating Strategy 3: Ensemble Approach


Testing Strategy 3: Ensemble Approach: 100%|██████████| 50/50 [09:27<00:00, 11.34s/it]


Strategy 3: Ensemble Approach Results:
  Overall Accuracy: 0.380 (19/50)
  Saved to: hipporag_dspy_strategy_3_ensemble_approach_results.csv

FINAL HIPPORAG DSPY COMPARISON:
0.420 - Strategy 1: Basic DSPy HippoRAG
0.380 - Strategy 3: Ensemble Approach
0.340 - Strategy 2: Enhanced with Random Search





# Compare with Baseline Results

In [None]:
try:
    baseline_no_reasoning = pd.read_csv('../evaluated_results_no_reasoning_RAG.csv')
    baseline_accuracy = baseline_no_reasoning['is_correct'].mean()
    
    print(f"\n" + "="*80)
    print("COMPARISON WITH BASELINE HIPPORAG:")
    print("="*80)
    print(f"Baseline HippoRAG (No DSPy): {baseline_accuracy:.3f}")
    print("\nDSPy Optimized Results:")
    for name, accuracy in sorted(all_accuracies.items(), key=lambda x: x[1], reverse=True):
        improvement = accuracy - baseline_accuracy
        improvement_str = f" (+{improvement:.3f})" if improvement > 0 else f" ({improvement:.3f})"
        print(f"{accuracy:.3f}{improvement_str} - {name}")
    
    best_dspy_accuracy = max(all_accuracies.values())
    total_improvement = best_dspy_accuracy - baseline_accuracy
    print(f"\nBest DSPy Improvement: +{total_improvement:.3f} ({total_improvement/baseline_accuracy*100:.1f}% relative)")
    print("="*80)
    
except FileNotFoundError:
    print("Baseline results not found. Skipping comparison.")


COMPARISON WITH BASELINE HIPPORAG:
Baseline HippoRAG (No DSPy): 0.340

DSPy Optimized Results:
0.420 (+0.080) - Strategy 1: Basic DSPy HippoRAG
0.380 (+0.040) - Strategy 3: Ensemble Approach
0.340 (0.000) - Strategy 2: Enhanced with Random Search

Best DSPy Improvement: +0.080 (23.5% relative)


# Final Summary and Analysis

In [21]:
print(f"\nHippoRAG + DSPy Results:")
for i, (name, accuracy) in enumerate(sorted(all_accuracies.items(), key=lambda x: x[1], reverse=True), 1):
    print(f"  {i}. {accuracy:.3f} - {name}")



HippoRAG + DSPy Results:
  1. 0.420 - Strategy 1: Basic DSPy HippoRAG
  2. 0.380 - Strategy 3: Ensemble Approach
  3. 0.340 - Strategy 2: Enhanced with Random Search
