In [1]:
import os
import json
import logging
import sys
import warnings
from typing import Dict, Any

# --- 1. GLOBAL SILENCING CONFIGURATION ---
os.environ["TQDM_DISABLE"] = "1"
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
warnings.filterwarnings("ignore")

# --- 2. LOGGING SETUP ---
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

noisy_loggers = ["sentence_transformers", "transformers", "urllib3", "requests", "huggingface_hub", "filelock", "tqdm"]
for logger_name in noisy_loggers:
    logging.getLogger(logger_name).setLevel(logging.ERROR)

from builder import CausalGraphBuilder
from bunny_retriever import BunnyPathRetriever
from explainer import CausalGraphExplainer

In [2]:
import logging
from typing import List, Tuple, Dict, Any
from builder import CausalGraphBuilder
from bunny_retriever import BunnyPathRetriever

# Setup logging to see the retrieval process
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class BunnyRAGChain:
    def __init__(self, model_name: str = "all-mpnet-base-v2", graph_path: str = "causal_math_graph_llm.json"):
        """
        Initializes the Causal Graph components.
        """
        self.builder = CausalGraphBuilder(model_name=model_name)
        
        # Load the existing knowledge graph
        logger.info(f"Loading graph from {graph_path}...")
        self.builder.load(graph_path)
        
        # Initialize the Bunny Retriever
        self.retriever = BunnyPathRetriever(self.builder)
        self.graph_path = graph_path

    def explore_and_query(self, query: str):
        """
        Executes the RAG chain and explicitly shows the top 15 nodes explored.
        """
        print(f"\n{'='*60}")
        print(f"USER QUERY: {query}")
        print(f"{'='*60}\n")

        # 1. RETRIEVAL STEP: Get top 15 nodes using Effective Resistance
        # We use labda=0.1 to balance structural distance and semantic similarity
        top_15_results = self.retriever.retrieve_nodes_part2(
            query=query, 
            top_k=5, 
            labda=0.02, 
            json_path=self.graph_path
        )

        # 2. DISPLAY STEP: Show the nodes being explored
        print("--- TOP 15 NODES EXPLORED IN THE CAUSAL GRAPH ---")
        print(f"{'Node Description':<60} | {'Score'}")
        print("-" * 75)
        
        explored_node_ids = []
        for node_id, score in top_15_results:
            # Get display text from builder's node_text map
            display_text = self.builder.node_text.get(node_id, node_id)
            # Truncate for clean table display
            print(f"{display_text[:58]:<60} | {score:.4f}")
            explored_node_ids.append(node_id)

        # 3. CONTEXT BUILDING
        # Here you would typically find paths between these nodes or 
        # pull raw text snippets (similar to your _get_context_for_path logic)
        
        print(f"\n--- EXPLORATION SUMMARY ---")
        print(f"Total Nodes explored: {len(explored_node_ids)}")
        print(f"Primary Anchor: {explored_node_ids[0] if explored_node_ids else 'None'}")
        
        return explored_node_ids

# --- Execution ---
if __name__ == "__main__":
    # Ensure causal_math_graph_llm.json is in your directory
    chain = BunnyRAGChain()
    
    # Example Query
    nodes = chain.explore_and_query("What happens when the circumcenter is on the side of the triangle")

2026-01-29 18:05:03,473 - INFO - Loading graph from causal_math_graph_llm.json...

USER QUERY: What happens when the circumcenter is on the side of the triangle

--- TOP 15 NODES EXPLORED IN THE CAUSAL GRAPH ---
Node Description                                             | Score
---------------------------------------------------------------------------
a triangle changes shape                                     | -0.0200
squared integer                                              | 0.2448
minor-closed property                                        | 0.2462
Marangoni forces                                             | 0.3835
atoms and molecules exist                                    | 0.3844

--- EXPLORATION SUMMARY ---
Total Nodes explored: 5
Primary Anchor: a triangle changes shape


In [3]:
from rouge_score import rouge_scorer # Add this

class CausalRAGEvaluator:
    def __init__(self):
        self.eval_model = SentenceTransformer('all-mpnet-base-v2')
        # Initialize ROUGE scorer for L (Longest Common Subsequence) and ROUGE-1/2
        self.scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

    def calculate_metrics(self, retrieved_context, truth):
        if "No direct causal paths found" in retrieved_context:
            return 0.0, 0, 0, 0.0 # Added 0.0 for ROUGE
            
        # --- Existing Semantic Similarity ---
        embeddings = self.eval_model.encode([retrieved_context, truth], convert_to_tensor=True)
        cosine_score = util.cos_sim(embeddings[0], embeddings[1]).item()
        
        # --- New ROUGE Calculation ---
        # rougeL is particularly good for causal paths as it respects word order
        scores = self.scorer.score(truth, retrieved_context)
        rouge_l_f1 = scores['rougeL'].fmeasure
        
        # --- Updated Logic ---
        recall = 1 if cosine_score > 0.7 else 0
        precision = 1 if "PATH 1:" in retrieved_context else 0
        
        return cosine_score, recall, precision, rouge_l_f1

    def run_evaluation(self, results_data):
        evaluation_results = []
        
        for item in results_data:
            sim, recall, prec, rouge_l = self.calculate_metrics(item['context'], item['truth'])
            
            evaluation_results.append({
                "Query": item['query'],
                "Similarity": round(sim, 4),
                "ROUGE_L": round(rouge_l, 4), # New metric
                "Recall": recall,
                "Precision": prec,
                "Status": "Success" if (recall == 1 or rouge_l > 0.5) else "Fail"
            })
            
        return pd.DataFrame(evaluation_results)



In [4]:
# --- Execution ---
data_to_evaluate = [
    {
        "query": "What happens when the circumcenter is on the side of the triangle?",
        "context": "PATH 1: the circumcenter is located on the side of the triangle -> the triangle is acute\nPATH 2: the circumcenter is located on the side of the triangle -> the angle opposite that side is a right angle",
        "truth": "If the circumcenter is on a side, the angle opposite is a right angle and the triangle is a right triangle."
    },
    {
        "query": "What influences the velocity of a Brownian particle?",
        "context": "No direct causal paths found in the knowledge graph.",
        "truth": "The velocity is influenced by temperature, thermal fluctuations, and fluid viscosity."
    },
    {
        "query": "Tell me about surface tension and minimal surfaces.",
        "context": "No direct causal paths found in the knowledge graph.",
        "truth": "Surface tension causes soap films to form minimal surfaces that minimize surface area."
    }
]

evaluator = CausalRAGEvaluator()
df = evaluator.run_evaluation(data_to_evaluate)

print("### RAG Evaluation Results ###")
df

NameError: name 'SentenceTransformer' is not defined

In [None]:
import pandas as pd
import json
from sentence_transformers import SentenceTransformer, util
from rouge_score import rouge_scorer

class CausalRAGEvaluator:
    def __init__(self, json_file_path):
        # Load the provided causal knowledge graph
        with open(json_file_path, 'r') as f:
            self.knowledge_graph = json.load(f)
        
        self.eval_model = SentenceTransformer('all-mpnet-base-v2')
        self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    def _get_relevant_references(self, truth_text):
        """
        In a real RAG, this represents the set R (ground truth references).
        We will tokenize the truth or match it against KG nodes.
        """
        # For this implementation, we treat the 'truth' string as the reference set R
        return [truth_text.lower()]

    def calculate_metrics(self, retrieved_context, truth):
        if "No direct causal paths found" in retrieved_context:
            return 0.0, 0.0, 0.0, 0.0

        # 1. Formal Context Recall (Reference Equation 1)
        # Recall = (Count of retrieved items in Reference Set) / |Reference Set|
        references = self._get_relevant_references(truth)
        retrieved_items = [p.strip().lower() for p in retrieved_context.split('->')]
        
        # Indicator function: 1 if retrieved item is in the truth references
        hits = sum(1 for item in retrieved_items if any(ref in item for ref in references))
        context_recall = hits / len(references) if references else 0.0

        # 2. Formal Context Precision (Reference Equation 2)
        # Precision = (Sum of indicator values) / (Total retrieved items)
        context_precision = hits / len(retrieved_items) if retrieved_items else 0.0

        # 3. Semantic Similarity
        embeddings = self.eval_model.encode([retrieved_context, truth], convert_to_tensor=True)
        similarity = util.cos_sim(embeddings[0], embeddings[1]).item()

        # 4. ROUGE-L
        rouge_scores = self.scorer.score(truth, retrieved_context)
        rouge_l = rouge_scores['rougeL'].fmeasure

        return similarity, context_recall, context_precision, rouge_l

    def run_evaluation(self, results_data):
        evaluation_results = []
        for item in results_data:
            sim, recall, prec, rouge_l = self.calculate_metrics(item['context'], item['truth'])
            
            evaluation_results.append({
                "Query": item['query'],
                "Similarity": round(sim, 4),
                "Context Recall": round(recall, 4),
                "Context Precision": round(prec, 4),
                "ROUGE_L": round(rouge_l, 4),
                "Status": "Success" if recall > 0.5 and rouge_l > 0.3 else "Fail"
            })
            
        return pd.DataFrame(evaluation_results)

# --- Execute with your data ---
# Assuming 'causal_math_graph_llm.json' is in your directory
evaluator = CausalRAGEvaluator('causal_math_graph_llm.json')
df = evaluator.run_evaluation(data_to_evaluate)

print("### Formalized RAG Evaluation Results ###")
print(df.to_string(index=False))

In [None]:
import pandas as pd
import json
import re
from sentence_transformers import SentenceTransformer, util
from rouge_score import rouge_scorer
import itertools # For pairwise combinations

class CausalRAGEvaluator:
    def __init__(self, json_file_path):
        with open(json_file_path, 'r') as f:
            self.kg_data = json.load(f)
        
        # Extract node names and variants for formal Recall/Precision
        self.all_valid_nodes = set(self.kg_data['nodes'].keys())
        for variant_list in self.kg_data['variants'].values():
            self.all_valid_nodes.update(variant_list)
        
        self.eval_model = SentenceTransformer('all-mpnet-base-v2')
        self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    def calculate_diversity(self, retrieved_paths):
        """
        Implements Diversity: distance(di, dj) = 1 - cos_sim(emb(di), emb(dj))
        Returns the mean of all pairwise distances.
        """
        if len(retrieved_paths) < 2:
            return 0.0
        
        # Embed each path (d_i)
        embeddings = self.eval_model.encode(retrieved_paths, convert_to_tensor=True)
        distances = []

        # Compare all i < j pairs
        for i, j in itertools.combinations(range(len(retrieved_paths)), 2):
            sim = util.cos_sim(embeddings[i], embeddings[j]).item()
            # distance = 1 - similarity
            distances.append(1 - sim)
            
        return sum(distances) / len(distances) if distances else 0.0

    def _extract_nodes(self, text):
        text_lower = text.lower()
        return {node.lower() for node in self.all_valid_nodes if node.lower() in text_lower}

    def calculate_metrics(self, retrieved_context, truth):
        # Split context into individual paths for diversity and formal metrics
        # We assume paths are separated by "PATH" or newlines
        retrieved_paths = [p.strip() for p in retrieved_context.split('\n') if "PATH" in p]
        
        if not retrieved_paths:
            return 0.0, 0.0, 0.0, 0.0, 0.0

        # --- Formal Recall/Precision (per your Image equations) ---
        R = self._extract_nodes(truth) # Reference set R
        C = self._extract_nodes(retrieved_context) # Retrieved set Ci
        
        hits = sum(1 for node in C if node in R)
        context_recall = hits / len(R) if R else 1.0 # Indicator function sum / |R|
        context_precision = hits / len(C) if C else 0.0 # Indicator sum / retrieved count

        # --- Diversity ---
        diversity = self.calculate_diversity(retrieved_paths)

        # --- Similarity & ROUGE ---
        embeddings = self.eval_model.encode([retrieved_context, truth], convert_to_tensor=True)
        similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
        rouge_l = self.scorer.score(truth, retrieved_context)['rougeL'].fmeasure

        return similarity, context_recall, context_precision, rouge_l, diversity

    def run_evaluation(self, results_data):
        evaluation_results = []
        for item in results_data:
            sim, recall, prec, rouge_l, div = self.calculate_metrics(item['context'], item['truth'])
            evaluation_results.append({
                "Query": item['query'],
                "Similarity": round(sim, 4),
                "Recall": round(recall, 4),
                "Precision": round(prec, 4),
                "Diversity": round(div, 4), # Higher = more varied info
                "ROUGE_L": round(rouge_l, 4)
            })
        return pd.DataFrame(evaluation_results)


In [None]:
# Initialize with your JSON
evaluator = CausalRAGEvaluator('causal_math_graph_llm.json')
df = evaluator.run_evaluation(data_to_evaluate)
print(df.to_string(index=False))