In [1]:
import os
import json
import logging
import sys
import warnings
from typing import Dict, Any

# --- 1. GLOBAL SILENCING CONFIGURATION ---
os.environ["TQDM_DISABLE"] = "1"
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
warnings.filterwarnings("ignore")

# --- 2. LOGGING SETUP ---
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)

noisy_loggers = ["sentence_transformers", "transformers", "urllib3", "requests", "huggingface_hub", "filelock", "tqdm"]
for logger_name in noisy_loggers:
    logging.getLogger(logger_name).setLevel(logging.ERROR)

# --- 3. IMPORTS ---
try:
    from causal_graph.builder import CausalGraphBuilder
    from causal_graph.retriever import CausalPathRetriever
    from causal_graph.explainer import CausalGraphExplainer
except ImportError:
    from builder import CausalGraphBuilder
    from retriever import CausalPathRetriever
    from explainer import CausalGraphExplainer

In [None]:
class CausalRAGChain:
    """
    A RAG Chain that uses a Causal Graph to ground answers in cause-effect relationships.
    """
    def __init__(self, json_path: str, graphml_path: str = None):
        print(f"Initializing Causal Graph Chain...")
        
        # 1. Initialize the Builder
        # We use 'all-MiniLM-L6-v2' as defined in your builder.py defaults
        self.builder = CausalGraphBuilder(model_name="all-MiniLM-L6-v2")
        
        # 2. Load the Graph Data
        # The JSON file contains the critical 'nodes', 'variants', and 'edges' structure
        # required by the builder.py load() method.
        print(f"Loading semantic data from {json_path}...")
        success = self.builder.load(json_path)
        if not success:
            raise ValueError(f"Failed to load graph data from {json_path}")
            
        # Optional: If you wanted to enforce specific topology from GraphML,
        # you could overlay it here, but the JSON provided already contains
        # the edge list identical to the GraphML.
        
        # 3. Initialize the Retriever
        self.retriever = CausalPathRetriever(self.builder)
        print("Chain initialized successfully.")

    def run(self, query: str, context_window: int = 5) -> Dict[str, Any]:
        """
        Execute the chain: Query -> Retrieve Causal Paths -> Synthesize Answer
        """
        print(f"\nProcessing Query: '{query}'")
        
        # --- Step A: Semantic & Structural Retrieval ---
        # 1. Identify the core concept using vector similarity
        # This uses the embeddings created by the builder
        relevant_nodes = self.retriever.retrieve_nodes(query, top_k=3)
        if not relevant_nodes:
            return {"answer": "I couldn't find any relevant concepts in the causal graph."}
        
        top_node_id, score = relevant_nodes[0]
        print(f"Found anchor concept: '{top_node_id}' (Score: {score:.2f})")

        # 2. Retrieve Causal Paths (The "Guide")
        # This traverses the graph to find multi-hop causal chains
        paths = self.retriever.retrieve_paths(
            query, 
            max_paths=context_window, 
            min_path_length=2
        )
        
        # --- Step B: Context Formatting ---
        # Convert paths into a readable "Causal Context" string
        context_str = "Known Causal Relationships:\n"
        path_details = []
        
        if paths:
            for i, path in enumerate(paths):
                # path is a list of strings, e.g., ["smoking", "lung cancer"]
                chain_str = " -> ".join(path)
                context_str += f"{i+1}. {chain_str}\n"
                path_details.append(path)
        else:
            # Fallback if no full paths found: look for direct neighbors
            context_str += "Direct relationships found:\n"
            descendants = self.retriever._get_descendants(top_node_id, max_hops=1)
            ancestors = self.retriever._get_ancestors(top_node_id, max_hops=1)
            
            for item in descendants:
                context_str += f"- {top_node_id} causes {item}\n"
            for item in ancestors:
                context_str += f"- {item} causes {top_node_id}\n"

        # --- Step C: Generation (Simulated LLM) ---
        # In a real app, you would pass 'prompt' to GPT-4/Gemini/etc.
        prompt = f"""
        You are a Causal Reasoning Assistant. Use the provided Knowledge Graph context to answer the question.
        Do not invent information outside the graph.
        
        Context:
        {context_str}
        
        Question: {query}
        
        Answer:
        """
        
        # For demonstration, we return the constructed prompt and paths
        return {
            "query": query,
            "anchor_node": top_node_id,
            "retrieved_paths": path_details,
            "llm_prompt": prompt,
            "raw_context": context_str
        }

In [None]:
# Paths to your uploaded files
json_file = 'causal_math_graph_llm.json'

# Instantiate the Chain
rag_chain = CausalRAGChain(json_path=json_file)