In [1]:
import os
import json
import logging
import sys
import warnings
from typing import Dict, Any

# --- 1. GLOBAL SILENCING CONFIGURATION ---
os.environ["TQDM_DISABLE"] = "1"
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
warnings.filterwarnings("ignore")

# --- 2. LOGGING SETUP ---
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

noisy_loggers = ["sentence_transformers", "transformers", "urllib3", "requests", "huggingface_hub", "filelock", "tqdm"]
for logger_name in noisy_loggers:
    logging.getLogger(logger_name).setLevel(logging.ERROR)

# --- 3. IMPORTS ---
try:
    from causal_graph.builder import CausalGraphBuilder
    from causal_graph.retriever import CausalPathRetriever
    from causal_graph.explainer import CausalGraphExplainer
except ImportError:
    from builder import CausalGraphBuilder
    from retriever import CausalPathRetriever
    from explainer import CausalGraphExplainer

In [2]:
import re

class CausalRAGChain:
    def __init__(self, model_name: str = "all-mpnet-base-v2"):
        """
        Args:
            model_name: The SentenceTransformer model. 
        """
        self.logger = logging.getLogger(__name__)
        
        self.logger.info(f"Initializing Causal Graph Builder with model: {model_name}...")
        self.builder = CausalGraphBuilder(
            model_name=model_name, 
            normalize_nodes=True
        )
        self.retriever = None
        self.documents = []  # NEW: Store original documents for context lookup

    def load_graph_state(self, filepath: str):
        """Loads an existing graph state (nodes/edges) from JSON."""
        self.logger.info(f"Loading graph state from {filepath}...")
        if os.path.exists(filepath):
            success = self.builder.load(filepath)
            if success:
                self.logger.info(f"Graph loaded successfully: {self.builder.get_graph().number_of_nodes()} nodes.")
            else:
                self.logger.error("Failed to parse graph file. Starting with empty graph.")
        else:
            self.logger.warning(f"Graph file not found: {filepath}. Starting with empty graph.")
        
        self.retriever = CausalPathRetriever(self.builder)

    def save_graph_state(self, filepath: str):
        """Saves the current graph state to JSON."""
        self.builder.save(filepath)

    def ingest_wiki_knowledge(self, json_path: str, limit: int = None, auto_save_path: str = None):
        """
        Loads wiki json, stores raw text for retrieval, and builds the graph.
        """
        self.logger.info(f"Ingesting knowledge from {json_path}...")
        
        if not os.path.exists(json_path):
            self.logger.error(f"Knowledge base file not found: {json_path}")
            return

        try:
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            
            # Extract text
            self.documents = [] # Reset documents
            if isinstance(data, list):
                for item in data:
                    if 'raw_text' in item:
                        self.documents.append(item['raw_text'])
            
            if not self.documents:
                self.logger.warning("No 'raw_text' fields found in JSON.")
                return

            # Apply limit if specified
            if limit:
                self.documents = self.documents[:limit]
                self.logger.info(f"Limiting ingestion to first {limit} documents.")

            self.logger.info(f"Indexing {len(self.documents)} documents into the graph...")
            
            # Index documents into graph
            self.builder.index_documents(self.documents, show_progress=False)
            
            self.logger.info(f"Ingestion complete. Graph size: {self.builder.get_graph().number_of_nodes()} nodes.")
            self.retriever = CausalPathRetriever(self.builder)
            
            if auto_save_path:
                self.save_graph_state(auto_save_path)
            
        except Exception as e:
            self.logger.error(f"Error during ingestion: {e}")

    def _get_context_for_path(self, path: list[str], window_size: int = 300) -> str:
        """
        NEW: Finds the snippet in the source documents that contains the nodes in the path.
        This provides the 'narrative' context surrounding the causal arrow.
        """
        best_snippet = ""
        max_matches = 0
        
        # Convert path nodes to a set of keywords (lowercase for matching)
        path_keywords = [node.lower() for node in path]
        
        # Heuristic: Search documents for sentences containing the Cause and Effect
        for doc in self.documents:
            doc_lower = doc.lower()
            
            # Count how many path nodes appear in this document
            matches = sum(1 for keyword in path_keywords if keyword in doc_lower)
            
            if matches >= 2 and matches > max_matches:
                # If we find a document containing multiple nodes from the chain, extract context
                max_matches = matches
                
                # Find the position of the first keyword occurrence
                first_pos = doc_lower.find(path_keywords[0])
                if first_pos != -1:
                    start = max(0, first_pos - window_size)
                    end = min(len(doc), first_pos + window_size * 2)
                    best_snippet = f"...{doc[start:end]}..."
        
        return best_snippet if best_snippet else "Context not found in source text."

    def run(self, query: str):
        """Runs the retrieval chain with Context Enrichment."""
        if not self.retriever:
            self.retriever = CausalPathRetriever(self.builder)
            
        print(f"\nProcessing query: {query}")
        
        # 1. Retrieve Causal Paths (The "Skeleton" of the answer)
        paths = self.retriever.retrieve_paths(
            query, 
            max_paths=5, 
            min_path_length=2, 
            max_path_length=4
        )
        
        # 2. Retrieve Source Context (The "Flesh" of the answer)
        # We look up the original text for each path found
        context_blocks = []
        for i, path in enumerate(paths):
            arrow_chain = " -> ".join(path)
            source_snippet = self._get_context_for_path(path)
            
            block = (
                f"PATH {i+1}: {arrow_chain}\n"
                f"SOURCE CONTEXT: {source_snippet}\n"
            )
            context_blocks.append(block)
        
        paths_context_text = "\n".join(context_blocks)
        
        if not paths_context_text:
            paths_context_text = "No direct causal paths found in the knowledge graph."
            
        # 3. Enhanced Prompt
        prompt = f"""You are a Causal AI Expert. 
Using the provided Causal Paths and their Source Context, write a coherent, detailed answer.
Do not just list the paths; weave them into a narrative explanation.

USER QUERY: {query}

=== RETRIEVED CAUSAL EVIDENCE ===
{paths_context_text}
=================================

ANSWER:"""

        return {
            "query": query,
            "paths": paths, 
            "context_text": paths_context_text, # Return context for debugging
            "final_prompt": prompt
        }

In [3]:
if __name__ == "__main__":
    import os
    
    # --- Configuration ---
    GRAPH_STATE_FILE = "causal_math_graph_state_llm.json"
    WIKI_KB_FILE = "wiki_math_knowledge_base_api.json"
    OUTPUT_FILE = "rag_output_with_context.txt"
    
    # 1. Initialize Chain
    # We use the same model as before
    chain = CausalRAGChain(model_name="all-mpnet-base-v2")
    
    # 2. Load Existing Graph State
    # This loads the nodes and edges you've already built
    chain.load_graph_state(GRAPH_STATE_FILE)
    
    # 3. Ingest Data (CRITICAL STEP)
    # Even if the graph is loaded, we MUST run this to populate 'self.documents'
    # so the chain can look up the original text context.
    # We use limit=20 to match your previous test; remove 'limit' for full run.
    chain.ingest_wiki_knowledge(WIKI_KB_FILE, limit=20, auto_save_path=GRAPH_STATE_FILE)
    
    # 4. Define Queries
    queries = [
        'What happens when the circumcenter is on the side of the triangle?',
        "What influences the velocity of a Brownian particle?",
        "Tell me about surface tension and minimal surfaces."
    ]
    
    print(f"\nProcessing {len(queries)} queries... (Saving results to {OUTPUT_FILE})")
    
    # 5. Run and Save
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        f.write("=== CAUSAL RAG RESULTS WITH SOURCE CONTEXT ===\n\n")
        
        for i, q in enumerate(queries, 1):
            # The run() method now returns 'context_text' containing the source snippets
            result = chain.run(q)
            
            output_block = []
            output_block.append(f"QUERY {i}: {result['query']}")
            output_block.append("-" * 40)
            
            # Display the Retrieved Evidence (Paths + Source Text)
            if result.get('context_text'):
                output_block.append("RETRIEVED EVIDENCE & CONTEXT:")
                output_block.append(result['context_text'])
            else:
                output_block.append("  [INFO]: No evidence found.")
            
            output_block.append("-" * 40)
            
            # Display the Final Prompt (What you would send to an LLM)
            output_block.append("FINAL GENERATED PROMPT:")
            output_block.append(result['final_prompt'])
            
            output_block.append("=" * 60 + "\n")
            
            # Write to file
            full_text = "\n".join(output_block)
            f.write(full_text)
            f.flush()
            
            print(f"Finished Query {i}")

    print(f"\nDone! Check '{OUTPUT_FILE}' to see the paths linked with their original text.")

2026-01-29 15:17:39,328 - INFO - Initializing Causal Graph Builder with model: all-mpnet-base-v2...
2026-01-29 15:17:42,743 - INFO - Loading graph state from causal_math_graph_state_llm.json...
2026-01-29 15:17:43,282 - INFO - Graph loaded successfully: 4 nodes.
2026-01-29 15:17:43,285 - INFO - Ingesting knowledge from wiki_math_knowledge_base_api.json...
2026-01-29 15:17:43,302 - INFO - Limiting ingestion to first 20 documents.
2026-01-29 15:17:43,304 - INFO - Indexing 20 documents into the graph...
2026-01-29 15:17:44,150 - INFO - Processed batch 4/4: found 0 causal relationships
2026-01-29 15:17:44,153 - INFO - Indexing complete: 20 documents processed
2026-01-29 15:17:44,154 - INFO - Added 0 new nodes and 0 new relationships to graph
2026-01-29 15:17:44,155 - INFO - Graph now has 4 nodes and 3 edges
2026-01-29 15:17:44,156 - INFO - Ingestion complete. Graph size: 4 nodes.

Processing 3 queries... (Saving results to rag_output_with_context.txt)

Processing query: What happens when t

In [29]:
from rouge_score import rouge_scorer # Add this

class CausalRAGEvaluator:
    def __init__(self):
        self.eval_model = SentenceTransformer('all-mpnet-base-v2')
        # Initialize ROUGE scorer for L (Longest Common Subsequence) and ROUGE-1/2
        self.scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

    def calculate_metrics(self, retrieved_context, truth):
        if "No direct causal paths found" in retrieved_context:
            return 0.0, 0, 0, 0.0 # Added 0.0 for ROUGE
            
        # --- Existing Semantic Similarity ---
        embeddings = self.eval_model.encode([retrieved_context, truth], convert_to_tensor=True)
        cosine_score = util.cos_sim(embeddings[0], embeddings[1]).item()
        
        # --- New ROUGE Calculation ---
        # rougeL is particularly good for causal paths as it respects word order
        scores = self.scorer.score(truth, retrieved_context)
        rouge_l_f1 = scores['rougeL'].fmeasure
        
        # --- Updated Logic ---
        recall = 1 if cosine_score > 0.7 else 0
        precision = 1 if "PATH 1:" in retrieved_context else 0
        
        return cosine_score, recall, precision, rouge_l_f1

    def run_evaluation(self, results_data):
        evaluation_results = []
        
        for item in results_data:
            sim, recall, prec, rouge_l = self.calculate_metrics(item['context'], item['truth'])
            
            evaluation_results.append({
                "Query": item['query'],
                "Similarity": round(sim, 4),
                "ROUGE_L": round(rouge_l, 4), # New metric
                "Recall": recall,
                "Precision": prec,
                "Status": "Success" if (recall == 1 or rouge_l > 0.5) else "Fail"
            })
            
        return pd.DataFrame(evaluation_results)



In [30]:
# --- Execution ---
data_to_evaluate = [
    {
        "query": "What happens when the circumcenter is on the side of the triangle?",
        "context": "PATH 1: the circumcenter is located on the side of the triangle -> the triangle is acute\nPATH 2: the circumcenter is located on the side of the triangle -> the angle opposite that side is a right angle",
        "truth": "If the circumcenter is on a side, the angle opposite is a right angle and the triangle is a right triangle."
    },
    {
        "query": "What influences the velocity of a Brownian particle?",
        "context": "No direct causal paths found in the knowledge graph.",
        "truth": "The velocity is influenced by temperature, thermal fluctuations, and fluid viscosity."
    },
    {
        "query": "Tell me about surface tension and minimal surfaces.",
        "context": "No direct causal paths found in the knowledge graph.",
        "truth": "Surface tension causes soap films to form minimal surfaces that minimize surface area."
    }
]

evaluator = CausalRAGEvaluator()
df = evaluator.run_evaluation(data_to_evaluate)

print("### RAG Evaluation Results ###")
df

2026-01-29 15:36:11,992 - INFO - Using default tokenizer.
### RAG Evaluation Results ###


Unnamed: 0,Query,Similarity,ROUGE_L,Recall,Precision,Status
0,What happens when the circumcenter is on the s...,0.7294,0.4138,1,1,Success
1,What influences the velocity of a Brownian par...,0.0,0.0,0,0,Fail
2,Tell me about surface tension and minimal surf...,0.0,0.0,0,0,Fail


In [31]:
import pandas as pd
import json
from sentence_transformers import SentenceTransformer, util
from rouge_score import rouge_scorer

class CausalRAGEvaluator:
    def __init__(self, json_file_path):
        # Load the provided causal knowledge graph
        with open(json_file_path, 'r') as f:
            self.knowledge_graph = json.load(f)
        
        self.eval_model = SentenceTransformer('all-mpnet-base-v2')
        self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    def _get_relevant_references(self, truth_text):
        """
        In a real RAG, this represents the set R (ground truth references).
        We will tokenize the truth or match it against KG nodes.
        """
        # For this implementation, we treat the 'truth' string as the reference set R
        return [truth_text.lower()]

    def calculate_metrics(self, retrieved_context, truth):
        if "No direct causal paths found" in retrieved_context:
            return 0.0, 0.0, 0.0, 0.0

        # 1. Formal Context Recall (Reference Equation 1)
        # Recall = (Count of retrieved items in Reference Set) / |Reference Set|
        references = self._get_relevant_references(truth)
        retrieved_items = [p.strip().lower() for p in retrieved_context.split('->')]
        
        # Indicator function: 1 if retrieved item is in the truth references
        hits = sum(1 for item in retrieved_items if any(ref in item for ref in references))
        context_recall = hits / len(references) if references else 0.0

        # 2. Formal Context Precision (Reference Equation 2)
        # Precision = (Sum of indicator values) / (Total retrieved items)
        context_precision = hits / len(retrieved_items) if retrieved_items else 0.0

        # 3. Semantic Similarity
        embeddings = self.eval_model.encode([retrieved_context, truth], convert_to_tensor=True)
        similarity = util.cos_sim(embeddings[0], embeddings[1]).item()

        # 4. ROUGE-L
        rouge_scores = self.scorer.score(truth, retrieved_context)
        rouge_l = rouge_scores['rougeL'].fmeasure

        return similarity, context_recall, context_precision, rouge_l

    def run_evaluation(self, results_data):
        evaluation_results = []
        for item in results_data:
            sim, recall, prec, rouge_l = self.calculate_metrics(item['context'], item['truth'])
            
            evaluation_results.append({
                "Query": item['query'],
                "Similarity": round(sim, 4),
                "Context Recall": round(recall, 4),
                "Context Precision": round(prec, 4),
                "ROUGE_L": round(rouge_l, 4),
                "Status": "Success" if recall > 0.5 and rouge_l > 0.3 else "Fail"
            })
            
        return pd.DataFrame(evaluation_results)

# --- Execute with your data ---
# Assuming 'causal_math_graph_llm.json' is in your directory
evaluator = CausalRAGEvaluator('causal_math_graph_llm.json')
df = evaluator.run_evaluation(data_to_evaluate)

print("### Formalized RAG Evaluation Results ###")
print(df.to_string(index=False))

2026-01-29 15:54:05,818 - INFO - Using default tokenizer.
### Formalized RAG Evaluation Results ###
                                                             Query  Similarity  Context Recall  Context Precision  ROUGE_L Status
What happens when the circumcenter is on the side of the triangle?      0.7294             0.0                0.0   0.4138   Fail
              What influences the velocity of a Brownian particle?      0.0000             0.0                0.0   0.0000   Fail
               Tell me about surface tension and minimal surfaces.      0.0000             0.0                0.0   0.0000   Fail


In [35]:
import pandas as pd
import json
import re
from sentence_transformers import SentenceTransformer, util
from rouge_score import rouge_scorer
import itertools # For pairwise combinations

class CausalRAGEvaluator:
    def __init__(self, json_file_path):
        with open(json_file_path, 'r') as f:
            self.kg_data = json.load(f)
        
        # Extract node names and variants for formal Recall/Precision
        self.all_valid_nodes = set(self.kg_data['nodes'].keys())
        for variant_list in self.kg_data['variants'].values():
            self.all_valid_nodes.update(variant_list)
        
        self.eval_model = SentenceTransformer('all-mpnet-base-v2')
        self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    def calculate_diversity(self, retrieved_paths):
        """
        Implements Diversity: distance(di, dj) = 1 - cos_sim(emb(di), emb(dj))
        Returns the mean of all pairwise distances.
        """
        if len(retrieved_paths) < 2:
            return 0.0
        
        # Embed each path (d_i)
        embeddings = self.eval_model.encode(retrieved_paths, convert_to_tensor=True)
        distances = []

        # Compare all i < j pairs
        for i, j in itertools.combinations(range(len(retrieved_paths)), 2):
            sim = util.cos_sim(embeddings[i], embeddings[j]).item()
            # distance = 1 - similarity
            distances.append(1 - sim)
            
        return sum(distances) / len(distances) if distances else 0.0

    def _extract_nodes(self, text):
        text_lower = text.lower()
        return {node.lower() for node in self.all_valid_nodes if node.lower() in text_lower}

    def calculate_metrics(self, retrieved_context, truth):
        # Split context into individual paths for diversity and formal metrics
        # We assume paths are separated by "PATH" or newlines
        retrieved_paths = [p.strip() for p in retrieved_context.split('\n') if "PATH" in p]
        
        if not retrieved_paths:
            return 0.0, 0.0, 0.0, 0.0, 0.0

        # --- Formal Recall/Precision (per your Image equations) ---
        R = self._extract_nodes(truth) # Reference set R
        C = self._extract_nodes(retrieved_context) # Retrieved set Ci
        
        hits = sum(1 for node in C if node in R)
        context_recall = hits / len(R) if R else 1.0 # Indicator function sum / |R|
        context_precision = hits / len(C) if C else 0.0 # Indicator sum / retrieved count

        # --- Diversity ---
        diversity = self.calculate_diversity(retrieved_paths)

        # --- Similarity & ROUGE ---
        embeddings = self.eval_model.encode([retrieved_context, truth], convert_to_tensor=True)
        similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
        rouge_l = self.scorer.score(truth, retrieved_context)['rougeL'].fmeasure

        return similarity, context_recall, context_precision, rouge_l, diversity

    def run_evaluation(self, results_data):
        evaluation_results = []
        for item in results_data:
            sim, recall, prec, rouge_l, div = self.calculate_metrics(item['context'], item['truth'])
            evaluation_results.append({
                "Query": item['query'],
                "Similarity": round(sim, 4),
                "Recall": round(recall, 4),
                "Precision": round(prec, 4),
                "Diversity": round(div, 4), # Higher = more varied info
                "ROUGE_L": round(rouge_l, 4)
            })
        return pd.DataFrame(evaluation_results)


In [36]:
# Initialize with your JSON
evaluator = CausalRAGEvaluator('causal_math_graph_llm.json')
df = evaluator.run_evaluation(data_to_evaluate)
print(df.to_string(index=False))

2026-01-29 16:09:24,628 - INFO - Using default tokenizer.
                                                             Query  Similarity  Recall  Precision  Diversity  ROUGE_L
What happens when the circumcenter is on the side of the triangle?      0.7294     1.0        0.0     0.1503   0.4138
              What influences the velocity of a Brownian particle?      0.0000     0.0        0.0     0.0000   0.0000
               Tell me about surface tension and minimal surfaces.      0.0000     0.0        0.0     0.0000   0.0000
