In [1]:
import os
import json
import numpy as np
import pandas as pd
from math import sqrt
from scipy.stats import t

# ================== CONFIGURATION ==================

# Channels you want to combine (edit as needed)
CHANNELS = [0, 1, 4, 7]

# Number of folds used in training
K_FOLDS = 5   # or 5 etc.

# TM/training configuration used in filenames
BATCH_SIZE = 200
N_CLAUSES = 30
BITS = 3

# ================== HELPER FUNCTIONS ==================

def load_sensor_log(channel, k_folds):
    """
    Loads the JSON file for that channel and K-fold setting.
    Expected path format:
    channel_{ch}/bs_{BATCH_SIZE}_clauses_{N_CLAUSES}_bits_{BITS}_k{k_folds}.json
    """
    folder = f"channel_{channel}"
    filename = f"bs_{BATCH_SIZE}_clauses_{N_CLAUSES}_bits_{BITS}_k{k_folds}.json"
    path = os.path.join(folder, filename)

    if not os.path.exists(path):
        raise FileNotFoundError(f"[ERROR] Cannot find: {path}")

    with open(path, "r") as f:
        return json.load(f)


# ================== LOAD ALL SENSOR LOGS ==================

sensor_logs = {ch: load_sensor_log(ch, K_FOLDS) for ch in CHANNELS}

first_channel = CHANNELS[0]
num_folds = sensor_logs[first_channel].get("k_folds", K_FOLDS)

print(f"\nLoaded logs for channels {CHANNELS} with {num_folds} folds.\n")


# ================== OUTPUT DIRECTORIES ==================

output_root = f"composite_k{K_FOLDS}"
os.makedirs(output_root, exist_ok=True)

summary_file = os.path.join(output_root, "composite_summary.txt")


# ================== MAIN LOOP OVER FOLDS ==================

final_ensemble_accs = []   # store final ensemble accuracy per fold
channel_fold_accs = {ch: [] for ch in CHANNELS}  # <<< NEW: per-channel fold accs

for fold_idx in range(1, num_folds + 1):
    fold_key = f"fold_{fold_idx}"
    print(f"\n======= Processing {fold_key} =======")

    # Extract fold-specific dictionaries
    fold_dicts = {ch: sensor_logs[ch][fold_key] for ch in CHANNELS}

    # Reference channel for GT labels and batch list
    ref_fold = fold_dicts[first_channel]
    y_true = np.array(ref_fold["y_test_learning"], dtype=np.int64)

    # Collect batch keys
    batch_keys = [k for k in ref_fold.keys() if k.startswith("batch_")]
    batch_keys = sorted(batch_keys, key=lambda x: int(x.split("_")[1]))

    rows = []

    for batch_key in batch_keys:
        sensor_scores = {}
        sensor_accs = {}

        for ch in CHANNELS:
            entry = fold_dicts[ch][batch_key]
            scores = np.array(entry["scores"], dtype=np.float32)
            preds = scores.argmax(axis=1)
            acc = 100.0 * (preds == y_true).mean()

            sensor_scores[ch] = scores
            sensor_accs[ch] = acc

        # Ensemble fusion (normalized voting)
        n_samples, n_classes = next(iter(sensor_scores.values())).shape
        votes = np.zeros((n_samples, n_classes), dtype=np.float32)

        for ch, scores in sensor_scores.items():
            smax = scores.max()
            smin = scores.min()
            denom = smax - smin if smax != smin else 1.0
            votes += scores / denom

        ensemble_pred = votes.argmax(axis=1)
        ensemble_acc = 100.0 * (ensemble_pred == y_true).mean()

        # Row for this batch
        row = {
            "fold": fold_idx,
            "batch_key": batch_key,
            "ensemble_acc": ensemble_acc
        }
        for ch in CHANNELS:
            row[f"ch{ch}_acc"] = sensor_accs[ch]

        rows.append(row)

    # Save fold results to CSV
    df = pd.DataFrame(rows)
    csv_path = os.path.join(output_root, f"fold_{fold_idx}_ensemble.csv")
    df.to_csv(csv_path, index=False)
    print(f"Saved: {csv_path}")

    # Store final (last batch) ensemble accuracy
    last_row = rows[-1]
    final_ensemble_accs.append(last_row["ensemble_acc"])

    # <<< NEW: store final (last batch) accuracy per channel for this fold
    for ch in CHANNELS:
        channel_fold_accs[ch].append(last_row[f"ch{ch}_acc"])


# ================== COMPUTE MEAN / STD / 95% CI (ENSEMBLE) ==================

mean_acc = np.mean(final_ensemble_accs)
std_acc = np.std(final_ensemble_accs, ddof=1)

# t-distribution for 95% CI
t_value = t.ppf(0.975, df=num_folds - 1)
ci_margin = t_value * (std_acc / sqrt(num_folds))

lower_ci = mean_acc - ci_margin
upper_ci = mean_acc + ci_margin

print("\n====== COMPOSITE PERFORMANCE SUMMARY ======")
print(f"Per-fold final ensemble accuracies: {final_ensemble_accs}")
print(f"Mean Accuracy (ensemble): {mean_acc:.2f}%")
print(f"Std Dev (ensemble): {std_acc:.2f}%")
print(f"95% CI (ensemble): ±{ci_margin:.2f}%  →  [{lower_ci:.2f}%, {upper_ci:.2f}%]")


# ================== PER-CHANNEL STATS (MEAN / STD / 95% CI) ==================  # <<< NEW

channel_stats = {}

for ch in CHANNELS:
    accs = np.array(channel_fold_accs[ch], dtype=np.float32)
    ch_mean = float(np.mean(accs))
    ch_std = float(np.std(accs, ddof=1)) if len(accs) > 1 else 0.0
    ch_ci_margin = float(t_value * (ch_std / sqrt(num_folds))) if len(accs) > 1 else 0.0
    ch_lower = ch_mean - ch_ci_margin
    ch_upper = ch_mean + ch_ci_margin

    channel_stats[ch] = {
        "per_fold_acc": accs.tolist(),
        "mean": ch_mean,
        "std": ch_std,
        "ci_margin": ch_ci_margin,
        "ci_lower": ch_lower,
        "ci_upper": ch_upper,
    }

    print(f"\nChannel {ch}:")
    print(f"  Per-fold accs: {accs.tolist()}")
    print(f"  Mean: {ch_mean:.2f}%")
    print(f"  Std: {ch_std:.2f}%")
    print(f"  95% CI: ±{ch_ci_margin:.2f}%  →  [{ch_lower:.2f}%, {ch_upper:.2f}%]")


# ================== SAVE SUMMARY TO TEXT FILE ==================

with open(summary_file, "w") as f:
    f.write("===== Composite Ensemble Summary =====\n")
    f.write(f"Channels used: {CHANNELS}\n")
    f.write(f"K-folds: {K_FOLDS}\n\n")

    # Ensemble stats
    f.write("---- Ensemble (Composite) ----\n")
    f.write(f"Per-fold accuracies: {final_ensemble_accs}\n")
    f.write(f"Mean accuracy: {mean_acc:.2f}%\n")
    f.write(f"Std deviation: {std_acc:.2f}%\n")
    f.write(f"95% Confidence Interval: ±{ci_margin:.2f}%\n")
    f.write(f"Range: [{lower_ci:.2f}%, {upper_ci:.2f}%]\n\n")

    # Per-channel stats
    f.write("---- Per-Channel Statistics ----\n")
    for ch in CHANNELS:
        stats = channel_stats[ch]
        f.write(f"\nChannel {ch}:\n")
        f.write(f"  Per-fold accuracies: {stats['per_fold_acc']}\n")
        f.write(f"  Mean accuracy: {stats['mean']:.2f}%\n")
        f.write(f"  Std deviation: {stats['std']:.2f}%\n")
        f.write(f"  95% Confidence Interval: ±{stats['ci_margin']:.2f}%\n")
        f.write(f"  Range: [{stats['ci_lower']:.2f}%, {stats['ci_upper']:.2f}%]\n")

print(f"\nSaved summary to: {summary_file}")



Loaded logs for channels [0, 1, 4, 7] with 5 folds.


Saved: composite_k5/fold_1_ensemble.csv

Saved: composite_k5/fold_2_ensemble.csv

Saved: composite_k5/fold_3_ensemble.csv

Saved: composite_k5/fold_4_ensemble.csv

Saved: composite_k5/fold_5_ensemble.csv

Per-fold final ensemble accuracies: [96.73469387755102, 95.71428571428572, 94.38775510204081, 97.34693877551021, 96.0204081632653]
Mean Accuracy (ensemble): 96.04%
Std Dev (ensemble): 1.12%
95% CI (ensemble): ±1.39%  →  [94.65%, 97.43%]

Channel 0:
  Per-fold accs: [27.5510196685791, 23.87755012512207, 26.53061294555664, 25.0, 27.85714340209961]
  Mean: 26.16%
  Std: 1.70%
  95% CI: ±2.11%  →  [24.06%, 28.27%]

Channel 1:
  Per-fold accs: [79.7959213256836, 82.55101776123047, 84.69387817382812, 82.7551040649414, 81.0204086303711]
  Mean: 82.16%
  Std: 1.86%
  95% CI: ±2.31%  →  [79.86%, 84.47%]

Channel 4:
  Per-fold accs: [89.38775634765625, 89.59183502197266, 90.40816497802734, 91.12245178222656, 87.14286041259766]
  Mean: 89.53