In [None]:
# main.py

from src.reasoners.networkx_symbolic_reasoner import GraphSymbolicReasoner
from src.reasoners.neural_retriever import NeuralRetriever
from src.integrators.hybrid_integrator import HybridIntegrator
from src.utils.rule_extractor import RuleExtractor
from src.queries.query_logger import QueryLogger
from src.resources.resource_manager import ResourceManager
from src.feedback.feedback_manager import FeedbackManager
from src.feedback.feedback_handler import FeedbackHandler
from src.config.config_loader import ConfigLoader
from src.queries.query_expander import QueryExpander
from src.utils.evaluation import Evaluation
from src.app import App
from src.system.system_control_manager import SystemControlManager, UnifiedResponseAggregator
from src.utils.metrics_collector import MetricsCollector
from src.utils.device_manager import DeviceManager

from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import os
import json
import torch
import warnings
import logging
import time
from collections import defaultdict

# Added for DROP dataset loading
from datasets import load_dataset

# Suppress specific spaCy warnings
warnings.filterwarnings("ignore", category=UserWarning, module="spacy.util")

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def load_hotpotqa(hotpotqa_path, max_samples=None):
    """
    Loads a portion of the HotpotQA dataset.
    Each sample includes a query, ground-truth answer,
    combined context, and a 'type' = 'ground_truth_available'.
    """
    dataset = []
    with open(hotpotqa_path, 'r', encoding='utf-8') as f:
        data = json.load(f)  # a single large JSON array

    count = 0
    for example in data:
        question = example['question']
        answer = example['answer']
        supporting_facts = example.get('supporting_facts', [])

        # Flatten the context
        context_str = []
        for ctx_item in example.get('context', []):
            title, sents = ctx_item[0], ctx_item[1]
            combined_sents = " ".join(sents)
            context_str.append(f"{title}: {combined_sents}")
        context_str = "\n".join(context_str)

        dataset.append({
            "query": question,
            "answer": answer,
            "context": context_str,
            "type": "ground_truth_available",
            "supporting_facts": supporting_facts
        })

        count += 1
        if max_samples and count >= max_samples:
            break
    return dataset


def load_drop(drop_dataset, max_samples=None):
    """
    Loads a portion of the DROP dataset in a format similar to HotpotQA.
    Each sample includes a query, ground-truth answer, combined context,
    and a 'type' = 'ground_truth_available'.
    """
    dataset = []
    count = 0

    # For example, use the 'validation' split. You can also use 'train' or 'test' if available.
    for example in drop_dataset['validation']:
        question = example['question']
        passage = example['passage']

        # The 'answers_spans' field typically looks like:
        # {'spans': ['<answer string>'], 'types': ['number'] or 'span'}
        # We'll take the first answer if it exists
        spans = example['answers_spans']['spans']
        answer = spans[0] if spans else "Unknown"

        dataset.append({
            "query": question,
            "answer": answer,
            "context": passage,
            "type": "ground_truth_available",
            # DROP doesn't provide "supporting_facts" by default,
            # so we'll set it to None for consistency
            "supporting_facts": None
        })

        count += 1
        if max_samples and count >= max_samples:
            break

    return dataset


if __name__ == "__main__":
    print("\n=== Initializing HySym-RAG System ===")

    # 1. Load configuration
    print("Loading configuration...")
    config = ConfigLoader.load_config("src/config/config.yaml")
    model_name = config["model_name"]

    # 2. Acquire a unified device from DeviceManager (optional, but recommended for consistency)
    device = DeviceManager.get_device()

    # 3. Initialize Resource Manager
    print("Initializing Resource Manager...")
    resource_manager = ResourceManager(
        config_path="src/config/resource_config.yaml",
        enable_performance_tracking=True,
        history_window_size=100
    )

    # ----------------------------------------------------------------
    # 4. (Optional) We skip extracting rules from deforestation.txt.
    #    Instead, ensure 'data/rules.json' exists but is empty or minimal.
    # ----------------------------------------------------------------
    rules_path = "data/rules.json"
    if not os.path.exists(rules_path):
        with open(rules_path, "w", encoding="utf-8") as f:
            json.dump([], f)
    print(f"Loading existing rules from {rules_path} (initially empty or minimal).")

    # 5. Initialize the Graph-Based Symbolic Reasoner
    print("Initializing Graph-Based Symbolic Reasoner...")
    symbolic = GraphSymbolicReasoner(
        rules_file=rules_path,
        match_threshold=0.25,
        max_hops=5,
        embedding_model='all-MiniLM-L6-v2',
        device=device  # <-- Only if your GraphSymbolicReasoner supports a 'device' param
    )

    # 6. Initialize the Neural Retriever
    print("Initializing Neural Retriever...")
    neural = NeuralRetriever(
        model_name,
        use_quantization=False,
        device=device  # <-- Only if your NeuralRetriever supports a 'device' param
    )

    # 7. Additional components
    print("Initializing support components...")
    logger = logging.getLogger(__name__)  # Standard logger for warnings
    query_logger = QueryLogger()
    feedback_manager = FeedbackManager()
    print("Initializing QueryExpander...")
    expander = QueryExpander(
        complexity_config="src/config/complexity_rules.yaml"
    )
    print("Initializing RuleExtractor...")
    rule_extractor = RuleExtractor()  # Instantiate RuleExtractor
    print("Loading evaluation dataset...")

    # Decide which dataset to use
    use_hotpotqa = False  # Set to True if you want to load HotpotQA
    use_drop = True       # Set to True if you want to load DROP

    test_queries = []
    ground_truths = {}

    # HotpotQA config
    hotpotqa_path = "data/hotpot_dev_distractor_v1.json"
    max_hotpot_samples = 4

    # DROP config
    max_drop_samples = 4

    if use_hotpotqa and os.path.exists(hotpotqa_path):
        print("Using HotpotQA dataset...")
        test_queries = load_hotpotqa(hotpotqa_path, max_samples=max_hotpot_samples)
        # Build rules from HotpotQA contexts and store ground truths
        for i, sample in enumerate(test_queries):
            new_rules = rule_extractor.extract_hotpot_facts(sample["context"], min_confidence=0.7)
            if new_rules:
                try:
                    symbolic.add_dynamic_rules(new_rules)
                except AttributeError as e:
                    logger.warning(f"Could not track new rules automatically (missing method?): {str(e)}")
            ground_truths[sample["query"]] = sample["answer"]

    elif use_drop:
        print("Using DROP dataset...")
        drop_data = load_dataset("ucinlp/drop")
        test_queries = load_drop(drop_data, max_samples=max_drop_samples)
        for sample in test_queries:
            ground_truths[sample["query"]] = sample["answer"]

    else:
        print("Warning: No valid dataset found or specified.")
        test_queries = []
        ground_truths = {}

    evaluator = Evaluation()

    # 8. Create Hybrid Integrator
    print("Creating Hybrid Integrator...")
    integrator = HybridIntegrator(
        symbolic,
        neural,
        resource_manager,
        expander,
        # device=device  # <-- Only if your HybridIntegrator supports a 'device' param
    )

    # 9. System Control - Initialize MetricsCollector and pass it to SystemControlManager
    print("Initializing System Control Components...")
    aggregator = UnifiedResponseAggregator(include_explanations=True)
    metrics_collector = MetricsCollector()
    system_manager = SystemControlManager(
        hybrid_integrator=integrator,
        resource_manager=resource_manager,
        aggregator=aggregator,
        metrics_collector=metrics_collector,
        error_retry_limit=2,
        max_query_time=10
    )

    # 10. Initialize Application
    print("Initializing Application...")
    feedback_handler = FeedbackHandler(feedback_manager)
    app = App(
        symbolic=symbolic,
        neural=neural,
        logger=logger,
        feedback=resource_manager,
        evaluator=evaluator,
        expander=expander,
        ground_truths=ground_truths,
        system_manager=system_manager
    )

    # 11. Possibly load a knowledge base for neural context
    kb_path = "data/small_knowledge_base.txt"
    if os.path.exists(kb_path):
        with open(kb_path, "r") as kb_file:
            context = kb_file.read()
    else:
        context = ""

    print("\n=== Testing System with Queries ===")
    for q_info in test_queries:
        query = q_info["query"]
        the_answer = q_info.get("answer", None)
        forced_path = q_info.get("forced_path", None)
        data_type = q_info.get("type", "ground_truth_available")
        supporting_facts = q_info.get("supporting_facts", None)  # For DROP, likely None

        print(f"\nProcessing Query: {query}")
        print(f"Query Type: {data_type}")
        if forced_path:
            print(f"Forced Path: {forced_path}")
        print("-" * 50)
        try:
            initial_time = time.time()
            complexity = expander.get_query_complexity(query)
            print(f"Query Complexity Score: {complexity:.4f}")

            initial_metrics = resource_manager.check_resources()
            local_context = q_info.get("context", context)

            final_answer = system_manager.process_query_with_fallback(
                query, local_context, forced_path=forced_path, query_complexity=complexity
            )
            final_metrics = resource_manager.check_resources()
            resource_delta = {
                key: final_metrics[key] - initial_metrics[key]
                for key in final_metrics
            }

            # Collect enhanced query metrics
            metrics_collector.collect_query_metrics(
                query=query,
                prediction=final_answer[0] if isinstance(final_answer, tuple) else final_answer,
                ground_truth=the_answer,
                reasoning_path=(
                    symbolic.extract_reasoning_pattern(query, final_answer.get('reasoning_path', []))
                        .get('pattern_type', 'unknown')
                    if hasattr(symbolic, "extract_reasoning_pattern") else 'unknown'
                ),
                processing_time=time.time() - initial_time,
                resource_usage=resource_delta,
                complexity_score=complexity
            )

            # Track component performance (if available)
            if isinstance(final_answer, dict):
                metrics_collector.component_metrics['symbolic']['execution_time'].append(
                    final_answer.get('symbolic_time', 0.0)
                )
                metrics_collector.component_metrics['neural']['execution_time'].append(
                    final_answer.get('neural_time', 0.0)
                )

            query_logger.log_query(
                query=query,
                result=final_answer,
                source="hybrid",
                complexity=complexity,
                resource_usage=resource_delta
            )

            print("\nProcessing Results:")
            print("-" * 20)
            print(final_answer)
            print("\nResource Usage:")
            print(f"CPU Delta: {resource_delta['cpu'] * 100:.1f}%")
            print(f"Memory Delta: {resource_delta['memory'] * 100:.1f}%")
            print(f"GPU Delta: {resource_delta['gpu'] * 100:.1f}%")
            print("-" * 20)

            if data_type == "ground_truth_available" and the_answer is not None:
                reasoning_chain = {}
                if hasattr(symbolic, "extract_reasoning_pattern"):
                    reasoning_chain = symbolic.extract_reasoning_pattern(
                        query,
                        final_answer.get('reasoning_path', [])
                    )
                eval_metrics = evaluator.evaluate(
                    predictions={query: final_answer[0] if isinstance(final_answer, tuple) else final_answer},
                    ground_truths={query: the_answer},
                    supporting_facts={query: supporting_facts},
                    reasoning_chain=reasoning_chain
                )
                print("\nEvaluation Metrics:")
                print(f"Similarity Score: {eval_metrics['average_semantic_similarity']:.2f}")
                print(f"ROUGE-L Score: {eval_metrics['average_rougeL']:.2f}")
                print(f"BLEU Score: {eval_metrics['average_bleu']:.2f}")
                print(f"F1 Score: {eval_metrics['average_f1']:.2f}")
                if 'reasoning_analysis' in eval_metrics:
                    print("\nReasoning Analysis:")
                    print(f"Pattern Type: {eval_metrics['reasoning_analysis'].get('pattern_type', 'unknown')}")
                    print(f"Chain Length: {eval_metrics['reasoning_analysis'].get('chain_length', 0)}")
                    print(f"Pattern Confidence: {eval_metrics['reasoning_analysis'].get('pattern_confidence', 0.0):.2f}")

        except KeyError as e:
            print(f"Error: Missing ground truth for query evaluation - {str(e)}")
        except Exception as e:
            print(f"Error processing query: {str(e)}")

    # 12. Optional Comparison Experiment
    print("\n=== Comparison Experiment (Sample) ===")
    comparison_queries = ["Compare and contrast the film adaptations of 'Pride and Prejudice'."]
    header = f"{'Query':<50} | {'Mode':<15} | {'CPU Δ (%)':<10} | {'Memory Δ (%)':<15} | {'GPU Δ (%)':<10} | {'Response'}"
    print(header)
    print("-" * len(header))

    for query in comparison_queries:
        initial_metrics_hybrid = resource_manager.check_resources()
        hybrid_answer = system_manager.process_query_with_fallback(query, context)
        final_metrics_hybrid = resource_manager.check_resources()
        hybrid_delta = {k: final_metrics_hybrid[k] - initial_metrics_hybrid[k] for k in final_metrics_hybrid}

        initial_metrics_neural = resource_manager.check_resources()
        neural_answer_raw = system_manager.process_query_with_fallback(query, context, forced_path="neural")
        neural_answer = aggregator.format_response({'result': neural_answer_raw})
        final_metrics_neural = resource_manager.check_resources()
        neural_delta = {k: final_metrics_neural[k] - initial_metrics_neural[k] for k in final_metrics_neural}

        row_hybrid = (
            f"{query:<50} | {'Hyb.':<15} | {hybrid_delta['cpu'] * 100:>10.1f} "
            f"| {hybrid_delta['memory'] * 100:>15.1f} | {hybrid_delta['gpu'] * 100:>10.1f} "
            f"| {hybrid_answer}"
        )
        row_neural = (
            f"{query:<50} | {'Neural':<15} | {neural_delta['cpu'] * 100:>10.1f} "
            f"| {neural_delta['memory'] * 100:>15.1f} | {neural_delta['gpu'] * 100:>10.1f} "
            f"| {str(neural_answer)[:150]}..."
        )
        print(row_hybrid)
        print(row_neural)
        print("-" * len(header))

    print("\n=== System Performance Summary ===")
    performance_stats = system_manager.get_performance_metrics()
    print("\nOverall Performance:")
    print(f"- Total Queries: {performance_stats['total_queries']}")
    print(f"- Average Response Time: {performance_stats['avg_response_time']:.2f}s")
    print(f"- Success Rate: {performance_stats['success_rate']:.1f}%")

    print("\nResource Utilization:")
    final_resources = resource_manager.check_resources()
    print(f"- CPU Usage: {final_resources['cpu'] * 100:.1f}%")
    print(f"- Memory Usage: {final_resources['memory'] * 100:.1f}%")
    print(f"- GPU Usage: {final_resources['gpu'] * 100:.1f}%")

    print("\nReasoning Path Distribution:")
    path_stats = system_manager.get_reasoning_path_stats()
    total_queries = performance_stats['total_queries']
    for path, stats in path_stats.items():
        count = stats.get('count', 0)
        percentage = (count / total_queries) * 100 if total_queries > 0 else 0
        print(f"- {path}: {percentage:.1f}%")

    # Enhanced Academic Analysis Display
    print("\n=== Comprehensive Academic Analysis ===")
    academic_report = metrics_collector.generate_academic_report()

    print("\nPerformance Analysis:")
    print(f"- Average Processing Time: {academic_report['performance_metrics']['processing_time']['mean']:.2f}s")
    print(f"- 95th Percentile Time: {academic_report['performance_metrics']['processing_time']['percentile_95']:.2f}s")

    print("\nReasoning Analysis:")
    if 'reasoning_analysis' in academic_report:
        ra = academic_report['reasoning_analysis']
        print(f"- Average Chain Length: {ra.get('chain_characteristics', {}).get('avg_length', 0.0):.2f}")
        print(f"- Average Confidence: {ra.get('chain_characteristics', {}).get('avg_confidence', 0.0):.2f}")
        print(f"- Average Inference Depth: {ra.get('chain_characteristics', {}).get('avg_inference_depth', 0.0):.2f}")

    print("\nResource Efficiency:")
    if 'efficiency_metrics' in academic_report:
        em = academic_report['efficiency_metrics']
        for resource, metrics in em.items():
            if resource != 'trends':
                print(f"- {resource.capitalize()}:")
                print(f"  * Mean Usage: {metrics.get('mean_usage', 0.0)*100:.1f}%")
                print(f"  * Peak Usage: {metrics.get('peak_usage', 0.0)*100:.1f}%")
                print(f"  * Efficiency Score: {metrics.get('efficiency_score', 0.0):.2f}")

    print("\nStatistical Analysis:")
    if 'statistical_analysis' in academic_report:
        sa = academic_report['statistical_analysis']
        print("Significance Tests:")
        for metric, stats in sa.items():
            if isinstance(stats, dict) and 'p_value' in stats:
                print(f"- {metric}:")
                print(f"  * p-value: {stats['p_value']:.3f}")
                print(f"  * effect size: {stats.get('effect_size', 0.0):.2f}")

    # Optional Ablation Study Section with Enhanced Reporting
    print("\n=== Ablation Study ===")
    ablation_results = defaultdict(dict)
    ablation_configs = [
        {'name': 'No Pattern Analysis', 'disable_patterns': True},
        {'name': 'Limited Hops', 'max_hops': 2},
        {'name': 'High Threshold', 'match_threshold': 0.5}
    ]
    for config in ablation_configs:
        config_name = config['name']
        print(f"\nTesting Configuration: {config_name}")
        try:
            # Run baseline
            baseline_result = system_manager.process_query_with_fallback(
                "What are the environmental effects of deforestation?",
                context
            )
            baseline_metrics = metrics_collector.get_real_time_metrics()

            # Run modified version using a modified symbolic reasoner
            modified_symbolic = GraphSymbolicReasoner(
                rules_file=rules_path,
                match_threshold=config.get('match_threshold', 0.25),
                max_hops=config.get('max_hops', 5),
                embedding_model='all-MiniLM-L6-v2'
            )
            modified_result = modified_symbolic.process_query(
                "What are the environmental effects of deforestation?"
            )
            modified_metrics = metrics_collector.get_real_time_metrics()

            ablation_results[config_name] = {
                'baseline': baseline_metrics,
                'modified': modified_metrics
            }

            print("\nPerformance Comparison:")
            print(f"Baseline processing time: {baseline_metrics['average_processing_time']:.2f}s")
            print(f"Modified processing time: {modified_metrics['average_processing_time']:.2f}s")
        except Exception as e:
            print(f"Error in ablation study for {config_name}: {str(e)}")

    print("\n=== System Performance Summary (Extended) ===")
    pattern_metrics = symbolic.get_reasoning_metrics()
    print(f"- Average Chain Length: {pattern_metrics['path_analysis']['average_length']:.2f}")
    print(f"- Average Confidence: {pattern_metrics['confidence_analysis']['mean_confidence']:.2f}")
    print("\nPattern Distribution:")
    for length, frequency in pattern_metrics['path_analysis']['path_distribution'].items():
        print(f"- {length}-hop paths: {frequency * 100:.1f}%")

    print("\n=== End of Run ===")
    print("\n=== Academic Evaluation Results ===")
    print(json.dumps(academic_report, indent=2))


  from .autonotebook import tqdm as notebook_tqdm
INFO:src.utils.rule_extractor:Successfully loaded spaCy model
Device set to use cuda:0
INFO:src.utils.rule_extractor:Successfully initialized rule scorer
INFO:ResourceManager:ResourceManager initialized with config: src/config/resource_config.yaml
INFO:GraphSymbolicReasoner:Successfully loaded 5 rules from data/rules.json



=== Initializing HySym-RAG System ===
Loading configuration...
Initializing Resource Manager...
Loading existing rules from data/rules.json (initially empty or minimal).
Initializing Graph-Based Symbolic Reasoner...


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:GraphSymbolicReasoner:Rule index built successfully with 5 rules.
INFO:GraphSymbolicReasoner:Knowledge graph built with 5 nodes
INFO:GraphSymbolicReasoner:GraphSymbolicReasoner initialized successfully


Initializing Neural Retriever...
Initializing Neural Retriever with model: meta-llama/Llama-3.2-3B...


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.17it/s]
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:QueryLogger:QueryLogger initialized successfully
INFO:FeedbackManager:FeedbackManager initialized successfully


Model meta-llama/Llama-3.2-3B loaded successfully!
Initializing support components...
Initializing QueryExpander...


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Initializing RuleExtractor...
Loading evaluation dataset...
Using DROP dataset...


Generating train split: 100%|██████████| 77400/77400 [00:00<00:00, 1245770.72 examples/s]
Generating validation split: 100%|██████████| 9535/9535 [00:00<00:00, 1372338.50 examples/s]
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:absl:Using default tokenizer.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Creating Hybrid Integrator...


INFO:FeedbackHandler:FeedbackHandler initialized successfully
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


Initializing System Control Components...
Initializing Application...


INFO:SystemControlManager:Path Selection Factors: {'complexity': 0.7636363863945007, 'resource_pressure': 0.9748793966780654}
INFO:SystemControlManager:Selected Path: symbolic - Reason: Low query complexity favors symbolic reasoning
INFO:SystemControlManager:Selected reasoning path: symbolic



=== Testing System with Queries ===

Processing Query: Who scored the first touchdown of the game?
Query Type: ground_truth_available
--------------------------------------------------
Query Complexity Score: 0.7636


Batches: 100%|██████████| 1/1 [00:00<00:00,  7.88it/s]
INFO:GraphSymbolicReasoner:No symbolic match found.
INFO:SystemControlManager:Path Selection Factors: {'complexity': 0.7636363863945007, 'resource_pressure': 0.9785432950659502}
INFO:SystemControlManager:Selected Path: symbolic - Reason: Low query complexity favors symbolic reasoning
INFO:SystemControlManager:Selected reasoning path: symbolic



Processing Results:
--------------------
{'result': ['No symbolic match found.'], 'processing_time': 0.13135099411010742, 'resource_usage': {'cpu': 0.055, 'memory': 0.010000000000000009, 'gpu': 0.003663898387884723}, 'reasoning_path': 'symbolic', 'retries': 0, 'explanation': 'Reasoning Approach: symbolic | Processing Time: 0.131s | Resource Utilization: | - cpu: 5.5% | - memory: 1.0% | - gpu: 0.4%'}

Resource Usage:
CPU Delta: -4.0%
Memory Delta: 1.0%
GPU Delta: 0.4%
--------------------
Error processing query: Evaluation.evaluate() got an unexpected keyword argument 'reasoning_chain'

Processing Query: How many field goals did Kris Brown kick?
Query Type: ground_truth_available
--------------------------------------------------
Query Complexity Score: 0.7636


Batches: 100%|██████████| 1/1 [00:00<00:00, 491.14it/s]
INFO:GraphSymbolicReasoner:No symbolic match found.
INFO:SystemControlManager:Path Selection Factors: {'complexity': 0.625, 'resource_pressure': 0.9785432950659502}
INFO:SystemControlManager:Selected Path: symbolic - Reason: Low query complexity favors symbolic reasoning
INFO:SystemControlManager:Selected reasoning path: symbolic



Processing Results:
--------------------
{'result': ['No symbolic match found.'], 'processing_time': 0.005845785140991211, 'resource_usage': {'cpu': 0.11800000000000001, 'memory': 0.0, 'gpu': 0.0}, 'reasoning_path': 'symbolic', 'retries': 0, 'explanation': 'Reasoning Approach: symbolic | Processing Time: 0.006s | Resource Utilization: | - cpu: 11.8% | - memory: 0.0% | - gpu: 0.0%'}

Resource Usage:
CPU Delta: -29.0%
Memory Delta: 0.0%
GPU Delta: 0.0%
--------------------
Error processing query: Evaluation.evaluate() got an unexpected keyword argument 'reasoning_chain'

Processing Query: Which team won the game?
Query Type: ground_truth_available
--------------------------------------------------
Query Complexity Score: 0.6250


Batches: 100%|██████████| 1/1 [00:00<00:00, 252.27it/s]
INFO:GraphSymbolicReasoner:No symbolic match found.
INFO:SystemControlManager:Path Selection Factors: {'complexity': 0.8399999499320985, 'resource_pressure': 0.9785432950659502}
INFO:SystemControlManager:Selected Path: symbolic - Reason: High resource pressure favors symbolic path
INFO:SystemControlManager:Selected reasoning path: symbolic



Processing Results:
--------------------
{'result': ['No symbolic match found.'], 'processing_time': 0.007930517196655273, 'resource_usage': {'cpu': -0.15899999999999997, 'memory': 0.0, 'gpu': 0.0}, 'reasoning_path': 'symbolic', 'retries': 0, 'explanation': 'Reasoning Approach: symbolic | Processing Time: 0.008s | Resource Utilization: | - cpu: -15.9% | - memory: 0.0% | - gpu: 0.0%'}

Resource Usage:
CPU Delta: -26.7%
Memory Delta: 0.0%
GPU Delta: 0.0%
--------------------
Error processing query: Evaluation.evaluate() got an unexpected keyword argument 'reasoning_chain'

Processing Query: How many field goals did both teams kick in the first half?
Query Type: ground_truth_available
--------------------------------------------------
Query Complexity Score: 0.8400


Batches: 100%|██████████| 1/1 [00:00<00:00, 518.14it/s]
INFO:GraphSymbolicReasoner:No symbolic match found.



Processing Results:
--------------------
{'result': ['No symbolic match found.'], 'processing_time': 0.0041310787200927734, 'resource_usage': {'cpu': 0.16699999999999998, 'memory': 0.0, 'gpu': 0.0}, 'reasoning_path': 'symbolic', 'retries': 0, 'explanation': 'Reasoning Approach: symbolic | Processing Time: 0.004s | Resource Utilization: | - cpu: 16.7% | - memory: 0.0% | - gpu: 0.0%'}

Resource Usage:
CPU Delta: -40.7%
Memory Delta: 0.0%
GPU Delta: 0.0%
--------------------
Error processing query: Evaluation.evaluate() got an unexpected keyword argument 'reasoning_chain'

=== Comparison Experiment (Sample) ===
Query                                              | Mode            | CPU Δ (%)  | Memory Δ (%)    | GPU Δ (%)  | Response
---------------------------------------------------------------------------------------------------------------------------


INFO:SystemControlManager:Computed query complexity: 0.8333
INFO:SystemControlManager:Path Selection Factors: {'complexity': 0.8333333767950535, 'resource_pressure': 0.9785432950659502}
INFO:SystemControlManager:Selected Path: symbolic - Reason: High resource pressure favors symbolic path
INFO:SystemControlManager:Selected reasoning path: symbolic
Batches: 100%|██████████| 1/1 [00:00<00:00, 466.92it/s]
INFO:GraphSymbolicReasoner:No symbolic match found.
INFO:SystemControlManager:Forced reasoning path: neural
Batches: 100%|██████████| 1/1 [00:00<00:00, 228.91it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 527.12it/s]
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
INFO:SystemControlManager:Computed query complexity: 0.6364
INFO:SystemControlManager:Path Selection Factors: {'complexity': 0.6363637074828148, 'resource_pressure': 0.9866038715192965}
INFO:SystemControlManager:Selected Path: symbolic - Reason: Low query complexity favors symbolic reasoning
INFO:Sys

Compare and contrast the film adaptations of 'Pride and Prejudice'. | Hyb.            |        0.0 |             0.0 |        0.0 | {'result': ['No symbolic match found.'], 'processing_time': 0.014783143997192383, 'resource_usage': {'cpu': 0.17600000000000002, 'memory': 0.0, 'gpu': 0.0}, 'reasoning_path': 'symbolic', 'retries': 0, 'explanation': 'Reasoning Approach: symbolic | Processing Time: 0.015s | Resource Utilization: | - cpu: 17.6% | - memory: 0.0% | - gpu: 0.0%'}
Compare and contrast the film adaptations of 'Pride and Prejudice'. | Neural          |        0.0 |             1.2 |        0.8 | {'result': {'result': "The 2005 film adaptation of Pride and Prejudeice, directed by Joe Wright, stands out as a visually stunning and emotionally res...
---------------------------------------------------------------------------------------------------------------------------

=== System Performance Summary ===

Overall Performance:
- Total Queries: 6
- Average Response Time: 3.73s
- Succ

Batches: 100%|██████████| 1/1 [00:00<00:00, 620.00it/s]
INFO:GraphSymbolicReasoner:No symbolic match found.
INFO:GraphSymbolicReasoner:Successfully loaded 5 rules from data/rules.json
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:GraphSymbolicReasoner:Rule index built successfully with 5 rules.
INFO:GraphSymbolicReasoner:Knowledge graph built with 5 nodes
INFO:GraphSymbolicReasoner:GraphSymbolicReasoner initialized successfully
Batches: 100%|██████████| 1/1 [00:00<00:00, 527.65it/s]
INFO:GraphSymbolicReasoner:No symbolic match found.



Performance Comparison:
Baseline processing time: 2.05s
Modified processing time: 2.05s

Testing Configuration: Limited Hops


In [5]:
pip install datasets

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)
Collecting aiohttp (from datasets)
  Downloading aiohttp-3.11.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets)
  Downloading aiohappyeyeballs-2.4.6-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->datasets)
  Downloading aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting frozenlist>=1.1.1 (f