In [11]:
import pickle
import json
from pathlib import Path
import numpy as np
import pandas as pd

from neurosym.examples.near.metrics import compute_metrics


In [12]:
from pathlib import Path

def find_repo_root(start: Path) -> Path:
    start = start.resolve()
    markers = ("pyproject.toml", ".git", "neurosym")
    for path in (start, *start.parents):
        if any((path / m).exists() for m in markers):
            return path
    return start

repo_root = find_repo_root(Path.cwd())

file_path = repo_root / 'outputs' / 'ecg_results' / 'reproduction.pkl'
neurosym_results = {}
if file_path.exists():
    with open(file_path, 'rb') as f:
        results = pickle.load(f)
        file_name = file_path.stem
        for i, result in enumerate(results):
            key = f"neurosym_{file_name}_{i:03d}"
            neurosym_results[key] = result
    print(f"  Loaded {len(results)} programs from {file_name}")
else:
    print(f"Warning: {file_path} not found")

# Load baseline JSON results if present
baseline_results = {}
baseline_dir = repo_root / 'outputs' / 'ecg_results'
baseline_files = [
    baseline_dir / 'baseline_nn_single.json',
    baseline_dir / 'baseline_nn_multi.json',
    baseline_dir / 'baseline_tree_decision_tree_single.json',
    baseline_dir / 'baseline_tree_decision_tree_multi.json',
    baseline_dir / 'baseline_tree_random_forest_single.json',
    baseline_dir / 'baseline_tree_random_forest_multi.json',
]
for bf in baseline_files:
    if bf.exists():
        with open(bf, 'r', encoding='utf-8') as f:
            data = json.load(f)
        key = f"baseline_{bf.stem}"
        baseline_results[key] = data
if baseline_results:
    print(f"  Loaded {len(baseline_results)} baseline results")


  Loaded 20 programs from reproduction
  Loaded 4 baseline results


In [13]:
all_results = {}
all_results.update(neurosym_results)
all_results.update(baseline_results)

# Create comparison dataframe
table_data = []

for name, result in all_results.items():
    report = result.get("report", {})
    if isinstance(report, dict):
        if "report" in report:
            macro_avg = report["report"].get("macro avg", {})
            hamming_acc = report.get("hamming_accuracy", 0.0)
        elif "macro avg" in report:
            macro_avg = report["macro avg"]
            hamming_acc = report.get("hamming_accuracy", 0.0)
        else:
            if "pred_vals" in result and "true_vals" in result:
                pred_vals = np.array(result["pred_vals"])
                true_vals = np.array(result["true_vals"])
                metrics = compute_metrics(pred_vals, true_vals.astype("int32"))
                macro_avg = metrics["report"]["macro avg"]
                hamming_acc = metrics["hamming_accuracy"]
            else:
                continue
    else:
        continue

    row = {
        "experiment": name,
        "precision": macro_avg.get("precision", 0.0),
        "recall": macro_avg.get("recall", 0.0),
        "f1_score": macro_avg.get("f1-score", 0.0),
        "support": macro_avg.get("support", 0),
        "hamming_accuracy": hamming_acc,
        "time": result.get("time", 0.0),
    }
    table_data.append(row)

df = pd.DataFrame(table_data)
df = df.sort_values("hamming_accuracy", ascending=False)

print("\n" + "=" * 80)
print("RESULTS COMPARISON")
print("=" * 80)
print(df.to_string(index=False))
print("=" * 80)



RESULTS COMPARISON
                                 experiment  precision   recall  f1_score  support  hamming_accuracy      time
 baseline_baseline_tree_random_forest_multi   0.425891 0.170281  0.193452   2198.0          0.874839  0.000000
                 baseline_baseline_nn_multi   0.246134 0.170591  0.157155   2198.0          0.863264  0.000000
                  neurosym_reproduction_012   0.081122 0.400228  0.102534   2198.0          0.593885 80.511543
                  neurosym_reproduction_013   0.081122 0.400228  0.102534   2198.0          0.593885 80.511661
                  neurosym_reproduction_014   0.081122 0.400228  0.102534   2198.0          0.593885 80.511699
                  neurosym_reproduction_015   0.081122 0.400228  0.102534   2198.0          0.593885 80.511736
                  neurosym_reproduction_016   0.081122 0.400228  0.102534   2198.0          0.593885 80.511770
                  neurosym_reproduction_017   0.081122 0.400228  0.102534   2198.0          

In [9]:
print("\nSUMMARY STATISTICS")
print("=" * 80)
print(f"Total experiments: {len(df)}")
if not df.empty:
    print(f"\nBest F1-score: {df['f1_score'].max():.6f}")
    print(f"  Experiment: {df.loc[df['f1_score'].idxmax(), 'experiment']}")
    print(f"\nBest Hamming accuracy: {df['hamming_accuracy'].max():.6f}")
    print(f"  Experiment: {df.loc[df['hamming_accuracy'].idxmax(), 'experiment']}")
    print(f"\nAverage F1-score: {df['f1_score'].mean():.6f}")
    print(f"Average Hamming accuracy: {df['hamming_accuracy'].mean():.6f}")
    print(f"Average training time: {df['time'].mean():.2f} seconds")
print("=" * 80)



SUMMARY STATISTICS
Total experiments: 20

Best F1-score: 0.114910
  Experiment: neurosym_reproduction_003

Best Hamming accuracy: 0.593885
  Experiment: neurosym_reproduction_016

Average F1-score: 0.109960
Average Hamming accuracy: 0.515299
Average training time: 74.82 seconds


In [10]:
# Save CSV
output_dir = repo_root / 'outputs' / 'ecg_results'
output_dir.mkdir(parents=True, exist_ok=True)
csv_path = output_dir / 'comparison.csv'
df.to_csv(csv_path, index=False)
print(f"Saved CSV to: {csv_path}")

# Save Markdown
md_path = output_dir / 'comparison.md'
with open(md_path, 'w') as f:
    f.write('# ECG NEAR Results Comparison\n\n')
    f.write(df.to_markdown(index=False))
    f.write('\n')
print(f"Saved Markdown to: {md_path}")


Saved CSV to: /home/asehgal/neurosym-lib/outputs/ecg_results/comparison.csv
Saved Markdown to: /home/asehgal/neurosym-lib/outputs/ecg_results/comparison.md
