# Analyze Baselines

In [20]:
import json
from pathlib import Path

import pandas as pd

In [21]:
# Root experiment directory
root = Path("../experiments/darpa2000/s1_inside/baselines")

In [22]:
rows = []

for json_file in root.rglob("*_metrics.json"):
    
    with open(json_file) as f:
        metrics = json.load(f)
    
    experiment_name = json_file.stem.replace("_metrics", "")
    
    row = {
        "experiment": experiment_name,
        "accuracy": metrics["accuracy"],
        "precision": metrics["precision"],
        "recall": metrics["recall"],
        "f1": metrics["f1"],
        "num_errors": len(metrics["misclassified_indices"])
    }
    
    rows.append(row)

results_df = pd.DataFrame(rows)

results_df = results_df.sort_values("f1", ascending=False).reset_index(drop=True)

results_df

Unnamed: 0,experiment,accuracy,precision,recall,f1,num_errors
0,ensemble_lstm_original_class_weights_w10,0.999622,0.890752,0.979053,0.929069,19
1,ensemble_lstm_resampled_class_weights_w10,0.999523,0.859986,0.981328,0.912855,24
2,ensemble_lstm_resampled_no_class_weights_w10,0.999583,0.86152,0.981352,0.912269,21
3,multi_class_lstm_resampled_class_weights_w50,0.999563,0.940343,0.898702,0.905887,22
4,multi_class_lstm_resampled_class_weights_w100,0.999582,0.903576,0.90995,0.902462,21
5,ensemble_lstm_resampled_no_class_weights_w100,0.999582,0.955412,0.886148,0.899144,21
6,multi_class_lstm_resampled_no_class_weights_w100,0.999582,0.924872,0.891439,0.895392,21
7,multi_class_lstm_resampled_class_weights_w10,0.999523,0.82844,0.98133,0.890621,24
8,multi_class_lstm_resampled_no_class_weights_w10,0.999464,0.818706,0.981306,0.885239,27
9,multi_class_lstm_resampled_no_class_weights_w50,0.999443,0.880499,0.873541,0.872674,28


In [23]:
# Sanity check
print(len(results_df))

23


In [25]:
print("Top 5 experiments by F1 score:")
results_df.head()

Top 5 experiments by F1 score:


Unnamed: 0,experiment,accuracy,precision,recall,f1,num_errors
0,ensemble_lstm_original_class_weights_w10,0.999622,0.890752,0.979053,0.929069,19
1,ensemble_lstm_resampled_class_weights_w10,0.999523,0.859986,0.981328,0.912855,24
2,ensemble_lstm_resampled_no_class_weights_w10,0.999583,0.86152,0.981352,0.912269,21
3,multi_class_lstm_resampled_class_weights_w50,0.999563,0.940343,0.898702,0.905887,22
4,multi_class_lstm_resampled_class_weights_w100,0.999582,0.903576,0.90995,0.902462,21


In [29]:
# Save results to CSV
out_dir = Path("../reports/baselines")
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / "baselines_analysis.csv"
print(f"Saving results to {out_path}...")
results_df.to_csv(out_path, index=False)

Saving results to ../reports/baselines/baselines_analysis.csv...
