In [1]:
import json
import numpy as np
import os

cw = os.getcwd()
pwd = os.path.abspath(os.path.join(cw, "..", "results"))
print(f"Parent working directory: {pwd}")

Parent working directory: /proj/sourasb-220503/IoT_attack_CL_IDS/results


In [3]:
import json
import numpy as np

def compute_performance_average(files, ddof=0):
    """
    For each JSON file in `files`:
      - Extract the last value from each key under 'performance_stability'
      - Compute mean and std of those values for the file
    Returns:
      - file_means: list of per-file means
      - file_stds:  list of per-file stds
      - overall_perfile_mean, overall_perfile_std: stats over the per-file means
      - overall_pooled_mean,  overall_pooled_std: stats over all last-values pooled
    """
    def extract_last_values(fp):
        with open(fp, "r") as f:
            data = json.load(f)
        perf_stab = data.get("performance_stability", {})
        last_vals = [v[-1] for v in perf_stab.values() if isinstance(v, list) and len(v) > 0]
        if last_vals:
            arr = np.asarray(last_vals, dtype=float)
            return float(np.mean(arr)), float(np.std(arr, ddof=ddof)), arr
        else:
            return None, None, np.array([], dtype=float)

    file_means, file_stds = [], []
    pooled = []

    for fp in files:
        m, s, arr = extract_last_values(fp)
        file_means.append(m)
        file_stds.append(s)
        if arr.size:
            pooled.append(arr)

    # Clean Nones for per-file aggregates
    clean_means = [m for m in file_means if m is not None]

    # overall_perfile_mean = float(np.mean(clean_means)) if clean_means else None
    # overall_perfile_std  = float(np.std(clean_means, ddof=ddof)) if clean_means else None

    # Pooled across all last-values
    pooled_arr = np.concatenate(pooled) if pooled else np.array([], dtype=float)
    overall_mean = float(np.mean(pooled_arr)) if pooled_arr.size else None
    overall_std  = float(np.std(pooled_arr, ddof=ddof)) if pooled_arr.size else None

    return file_means, file_stds, overall_mean, overall_std



In [16]:

files = [
    pwd + "/1_experiment_results_LSTM_WCL_random.json",
    pwd + "/2_experiment_results_LSTM_WCL_random.json",
    pwd + "/3_experiment_results_LSTM_WCL_random.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.47463562962288924, 0.50152549732696, 0.4961553904708209]
Per-file standard deviations: [0.2091139055011413, 0.19585770255590462, 0.19531869592913903]
Overall avg±std : 0.4907721724735566±0.20053533283892663
Overall avg±std : 0.49±0.20


In [15]:

files = [
    pwd + "/1_experiment_results_LSTM_WCL_b2w.json",
    pwd + "/2_experiment_results_LSTM_WCL_b2w.json",
    pwd + "/3_experiment_results_LSTM_WCL_b2w.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.5046047630307199, 0.521512437324695, 0.555772382476299]
Per-file standard deviations: [0.19059189059559353, 0.1983586971434255, 0.21810898197239514]
Overall avg±std : 0.5272965276105713±0.20379909690738002
Overall avg±std : 0.53±0.20


In [17]:

files = [
    pwd + "/1_experiment_results_LSTM_WCL_w2b.json",
    pwd + "/2_experiment_results_LSTM_WCL_w2b.json",
    pwd + "/3_experiment_results_LSTM_WCL_w2b.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.5858279850326342, 0.5616453728354474, 0.6005275935427928]
Per-file standard deviations: [0.2934041261003132, 0.2631176899563266, 0.29372278659931766]
Overall avg±std : 0.5826669838036247±0.2842304679803771
Overall avg±std : 0.58±0.28


In [18]:
files = [
    pwd + "/1_experiment_results_LSTM_WCL_toggle.json",
    pwd + "/2_experiment_results_LSTM_WCL_toggle.json",
    pwd + "/3_experiment_results_LSTM_WCL_toggle.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.582245449243203, 0.5845538452388884, 0.5885582354442725]
Per-file standard deviations: [0.30646025794777815, 0.3062737939343609, 0.3020079311766125]
Overall avg±std : 0.5851191766421213±0.3049320810061081
Overall avg±std : 0.59±0.30


EWC

In [19]:

files = [
    pwd + "/1_experiment_results_LSTM_EWC_random.json",
    pwd + "/2_experiment_results_LSTM_EWC_random.json",
    pwd + "/3_experiment_results_LSTM_EWC_random.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.5717079492066316, 0.5944279227035163, 0.5758839485658321]
Per-file standard deviations: [0.21970890784945396, 0.2566740216133694, 0.2180710359327708]
Overall avg±std : 0.5806732734919933±0.23237974619151167
Overall avg±std : 0.58±0.23


In [21]:

files = [
    pwd + "/1_experiment_results_LSTM_EWC_b2w.json",
    pwd + "/2_experiment_results_LSTM_EWC_b2w.json",
    pwd + "/3_experiment_results_LSTM_EWC_b2w.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.5858279850326342, 0.6342014608050582, 0.564558949007367]
Per-file standard deviations: [0.2934041261003132, 0.22853503956316343, 0.2304414528447301]
Overall avg±std : 0.5948627982816865±0.2542734684091315
Overall avg±std : 0.59±0.25


In [22]:

files = [
    pwd + "/1_experiment_results_LSTM_EWC_w2b.json",
    pwd + "/2_experiment_results_LSTM_EWC_w2b.json",
    pwd + "/3_experiment_results_LSTM_EWC_w2b.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.6204501526685241, 0.6320696048988216, 0.6476483780606966]
Per-file standard deviations: [0.1868478157791697, 0.21994609398488987, 0.22860009664559358]
Overall avg±std : 0.6333893785426807±0.21285274968310802
Overall avg±std : 0.63±0.21


In [23]:

files = [
    pwd + "/1_experiment_results_LSTM_EWC_toggle.json",
    pwd + "/2_experiment_results_LSTM_EWC_toggle.json",
    pwd + "/3_experiment_results_LSTM_EWC_toggle.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.6290402901586375, 0.631108759003066, 0.6407277317319854]
Per-file standard deviations: [0.20978898355707085, 0.22653414937825872, 0.22551983350507523]
Overall avg±std : 0.6336255936312296±0.22080619978515137
Overall avg±std : 0.63±0.22


## SI


In [24]:

files = [
    pwd + "/1_experiment_results_LSTM_SI_random.json",
    pwd + "/2_experiment_results_LSTM_SI_random.json",
    pwd + "/3_experiment_results_LSTM_SI_random.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.5507650209346928, 0.5339300220183713, 0.5505636965665359]
Per-file standard deviations: [0.18151068670546305, 0.20449813025470986, 0.1820375589439451]
Overall avg±std : 0.5450862465065333±0.18981569992603117
Overall avg±std : 0.55±0.19


In [25]:

files = [
    pwd + "/1_experiment_results_LSTM_SI_b2w.json",
    pwd + "/2_experiment_results_LSTM_SI_b2w.json",
    pwd + "/3_experiment_results_LSTM_SI_b2w.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.5721709186059861, 0.5295788421071385, 0.5811399018570027]
Per-file standard deviations: [0.2571030473489144, 0.17898907729727898, 0.2794430776565301]
Overall avg±std : 0.5609632208567091±0.24340993582329803
Overall avg±std : 0.56±0.24


In [26]:

files = [
    pwd + "/1_experiment_results_LSTM_SI_w2b.json",
    pwd + "/2_experiment_results_LSTM_SI_w2b.json",
    pwd + "/3_experiment_results_LSTM_SI_w2b.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.6383447094037775, 0.5854918006890837, 0.6282956765746267]
Per-file standard deviations: [0.23464470163487128, 0.24106223476999536, 0.24060047785880556]
Overall avg±std : 0.6173773955558293±0.23988417708963752
Overall avg±std : 0.62±0.24


In [27]:

files = [
    pwd + "/1_experiment_results_LSTM_SI_toggle.json",
    pwd + "/2_experiment_results_LSTM_SI_toggle.json",
    pwd + "/3_experiment_results_LSTM_SI_toggle.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files)

print("Per-file averages:", file_means)
print("Per-file standard deviations:", file_stds)
print(f"Overall avg±std : {overall_mean}±{overall_std}")
print(f"Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


Per-file averages: [0.6050539160321659, 0.615708005209903, 0.6311555293360179]
Per-file standard deviations: [0.22120464846707105, 0.19769911569706217, 0.23899667684444736]
Overall avg±std : 0.6173058168593624±0.22021226502714958
Overall avg±std : 0.62±0.22


## LwF


In [29]:

files_1 = [
    pwd + "/1_experiment_results_LSTM_LwF_random_alpha_1.0_T_4.0.json",
    pwd + "/2_experiment_results_LSTM_LwF_random_alpha_1.0_T_4.0.json",
    pwd + "/3_experiment_results_LSTM_LwF_random_alpha_1.0_T_4.0.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_1)

print("[random] Per-file averages:", file_means)
print("[random] Per-file standard deviations:", file_stds)
print(f"[random] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[random] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_2 = [
    pwd + "/1_experiment_results_LSTM_LwF_b2w_alpha_1.0_T_4.0.json",
    pwd + "/2_experiment_results_LSTM_LwF_b2w_alpha_1.0_T_4.0.json",
    pwd + "/3_experiment_results_LSTM_LwF_b2w_alpha_1.0_T_4.0.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_2)

print("[b2w] Per-file averages:", file_means)
print("[b2w] Per-file standard deviations:", file_stds)
print(f"[b2w] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[b2w] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_3 = [
    pwd + "/1_experiment_results_LSTM_LwF_w2b_alpha_1.0_T_4.0.json",
    pwd + "/2_experiment_results_LSTM_LwF_w2b_alpha_1.0_T_4.0.json",
    pwd + "/3_experiment_results_LSTM_LwF_w2b_alpha_1.0_T_4.0.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_3)

print("[w2b] Per-file averages:", file_means)
print("[w2b] Per-file standard deviations:", file_stds)
print(f"[w2b] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[w2b] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_4 = [
    pwd + "/1_experiment_results_LSTM_LwF_toggle_alpha_1.0_T_4.0.json",
    pwd + "/2_experiment_results_LSTM_LwF_toggle_alpha_1.0_T_4.0.json",
    pwd + "/3_experiment_results_LSTM_LwF_toggle_alpha_1.0_T_4.0.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_4)

print("[toggle] Per-file averages:", file_means)
print("[toggle] Per-file standard deviations:", file_stds)
print(f"[toggle] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[toggle] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


[random] Per-file averages: [0.350999732130844, 0.341222581509715, 0.30815874716445385]
[random] Per-file standard deviations: [0.09707228019504162, 0.08234430486578263, 0.09797663506152952]
[random] Overall avg±std : 0.3334603536016709±0.09453585988689446
[random] Overall avg±std : 0.33±0.09
[b2w] Per-file averages: [0.42317349664947934, 0.5091413717489243, 0.5069067253936671]
[b2w] Per-file standard deviations: [0.1429354083828966, 0.1835696258833955, 0.17832940855825144]
[b2w] Overall avg±std : 0.4797405312640236±0.1739079808433971
[b2w] Overall avg±std : 0.48±0.17
[w2b] Per-file averages: [0.5423424610839075, 0.585085121343076, 0.6102970306974458]
[w2b] Per-file standard deviations: [0.27129130795392004, 0.29257758101604125, 0.26959185912430217]
[w2b] Overall avg±std : 0.5792415377081431±0.2794282962054683
[w2b] Overall avg±std : 0.58±0.28
[toggle] Per-file averages: [0.5462790693605616, 0.566926976493204, 0.5750539768662121]
[toggle] Per-file standard deviations: [0.26460546000148

In [32]:

files_1 = [
    pwd + "/1_experiment_results_LSTM_Replay_REPLAY_random.json",
    pwd + "/2_experiment_results_LSTM_Replay_REPLAY_random.json",
    pwd + "/3_experiment_results_LSTM_Replay_REPLAY_random.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_1)

print("[random] Per-file averages:", file_means)
print("[random] Per-file standard deviations:", file_stds)
print(f"[random] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[random] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_2 = [
    pwd + "/1_experiment_results_LSTM_Replay_REPLAY_b2w.json",
    pwd + "/2_experiment_results_LSTM_Replay_REPLAY_b2w.json",
    pwd + "/3_experiment_results_LSTM_Replay_REPLAY_b2w.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_2)

print("[b2w] Per-file averages:", file_means)
print("[b2w] Per-file standard deviations:", file_stds)
print(f"[b2w] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[b2w] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_3 = [
    pwd + "/1_experiment_results_LSTM_Replay_REPLAY_w2b.json",
    pwd + "/2_experiment_results_LSTM_Replay_REPLAY_w2b.json",
    pwd + "/3_experiment_results_LSTM_Replay_REPLAY_w2b.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_3)

print("[w2b] Per-file averages:", file_means)
print("[w2b] Per-file standard deviations:", file_stds)
print(f"[w2b] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[w2b] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_4 = [
    pwd + "/1_experiment_results_LSTM_Replay_REPLAY_toggle.json",
    pwd + "/2_experiment_results_LSTM_Replay_REPLAY_toggle.json",
    pwd + "/3_experiment_results_LSTM_Replay_REPLAY_toggle.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_4)

print("[toggle] Per-file averages:", file_means)
print("[toggle] Per-file standard deviations:", file_stds)
print(f"[toggle] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[toggle] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


[random] Per-file averages: [0.7838978897570685, 0.764291818744525, 0.7708863801564384]
[random] Per-file standard deviations: [0.1776528746954364, 0.18614846216463737, 0.1654820788899544]
[random] Overall avg±std : 0.7730253628860108±0.17681928078172876
[random] Overall avg±std : 0.77±0.18
[b2w] Per-file averages: [0.7774659051202804, 0.7714156839956354, 0.7766201022221385]
[b2w] Per-file standard deviations: [0.1787673096573466, 0.16995474097608848, 0.16002584904420025]
[b2w] Overall avg±std : 0.7751672304460181±0.16977642767977486
[b2w] Overall avg±std : 0.78±0.17
[w2b] Per-file averages: [0.7756105810749983, 0.7789822329904726, 0.7905150581604339]
[w2b] Per-file standard deviations: [0.17095000501966134, 0.17374980897860723, 0.1797127129805254]
[w2b] Overall avg±std : 0.7817026240753017±0.1749587875788869
[w2b] Overall avg±std : 0.78±0.17
[toggle] Per-file averages: [0.7768473746199911, 0.7691638490903276, 0.778212820287357]
[toggle] Per-file standard deviations: [0.186508673422027

In [33]:

files_1 = [
    pwd + "/1_experiment_results_LSTM_GR_random.json",
    pwd + "/2_experiment_results_LSTM_GR_random.json",
    pwd + "/3_experiment_results_LSTM_GR_random.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_1)

print("[random] Per-file averages:", file_means)
print("[random] Per-file standard deviations:", file_stds)
print(f"[random] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[random] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_2 = [
    pwd + "/1_experiment_results_LSTM_GR_b2w.json",
    pwd + "/2_experiment_results_LSTM_GR_b2w.json",
    pwd + "/3_experiment_results_LSTM_GR_b2w.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_2)

print("[b2w] Per-file averages:", file_means)
print("[b2w] Per-file standard deviations:", file_stds)
print(f"[b2w] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[b2w] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_3 = [
    pwd + "/1_experiment_results_LSTM_GR_w2b.json",
    pwd + "/2_experiment_results_LSTM_GR_w2b.json",
    pwd + "/3_experiment_results_LSTM_GR_w2b.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_3)

print("[w2b] Per-file averages:", file_means)
print("[w2b] Per-file standard deviations:", file_stds)
print(f"[w2b] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[w2b] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_4 = [
    pwd + "/1_experiment_results_LSTM_GR_toggle.json",
    pwd + "/2_experiment_results_LSTM_GR_toggle.json",
    pwd + "/3_experiment_results_LSTM_GR_toggle.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_4)

print("[toggle] Per-file averages:", file_means)
print("[toggle] Per-file standard deviations:", file_stds)
print(f"[toggle] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[toggle] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


[random] Per-file averages: [0.5018843808538107, 0.45013126032924405, 0.5152544410634374]
[random] Per-file standard deviations: [0.20348759573135322, 0.1883496791836308, 0.22112934547142654]
[random] Overall avg±std : 0.48909002741549745±0.20667772678581078
[random] Overall avg±std : 0.49±0.21
[b2w] Per-file averages: [0.47328082737079563, 0.46322668651407, 0.5169291298113883]
[b2w] Per-file standard deviations: [0.2104618200957648, 0.1631433107334366, 0.18204581531466937]
[b2w] Overall avg±std : 0.4844788812320847±0.18768827636282345
[b2w] Overall avg±std : 0.48±0.19
[w2b] Per-file averages: [0.5792466914937505, 0.5970834433920374, 0.591059810920628]
[w2b] Per-file standard deviations: [0.28294888127227164, 0.2804555847112314, 0.2715129826256461]
[w2b] Overall avg±std : 0.589129981935472±0.2784477002731582
[w2b] Overall avg±std : 0.59±0.28
[toggle] Per-file averages: [0.592103009758471, 0.5674100219294941, 0.580043834661037]
[toggle] Per-file standard deviations: [0.29332335138157734

## Groupped Performance F1-Score / Auc-ROC

In [20]:
import json
import numpy as np
from collections import defaultdict

def compute_grouped_performance_average(
    files,
    ddof=0,
    groups=("blackhole", "disflooding", "localrepair", "worstparent"),
    case_insensitive=True,
):

    # Normalize group names once
    group_list = list(groups)
    group_norm = [g.lower() for g in group_list]

    per_file_results = []
    pooled_buckets = {g: [] for g in group_list}

    for fp in files:
        with open(fp, "r") as f:
            data = json.load(f)

        # perf_stab = data.get("roc_auc_stability", {}) or {}
        perf_stab = data.get("performance_stability", {}) or {}
        # Buckets of final values per group for this file
        file_buckets = {g: [] for g in group_list}

        for k, v in perf_stab.items():
            if not (isinstance(v, list) and len(v) > 0):
                continue
            k_cmp = k.lower() if case_insensitive else k
            last_val = v[-1]

            # Find which group this key belongs to (first matching prefix wins)
            for g_name, g_norm in zip(group_list, group_norm):
                if k_cmp.startswith(g_norm):
                    try:
                        file_buckets[g_name].append(float(last_val))
                    except Exception:
                        # Skip if conversion to float fails
                        pass
                    break  # stop at first matched group

        # Compute per-file stats
        file_group_stats = {}
        for g in group_list:
            arr = np.asarray(file_buckets[g], dtype=float)
            if arr.size > 0:
                mean_g = float(np.mean(arr))
                std_g = float(np.std(arr, ddof=ddof))
            else:
                mean_g, std_g = None, None
            file_group_stats[g] = {
                "count": int(arr.size),
                "mean": mean_g,
                "std": std_g,
                "values": arr,
            }

            # Add to pooled buckets
            if arr.size:
                pooled_buckets[g].append(arr)

        per_file_results.append({
            "file": fp,
            "groups": file_group_stats
        })

    # Pooled stats across files per group
    pooled_stats = {}
    for g in group_list:
        if len(pooled_buckets[g]) > 0:
            pooled_arr = np.concatenate(pooled_buckets[g], axis=0)
        else:
            pooled_arr = np.asarray([], dtype=float)

        if pooled_arr.size > 0:
            pooled_mean = float(np.mean(pooled_arr))
            pooled_std  = float(np.std(pooled_arr, ddof=ddof))
        else:
            pooled_mean, pooled_std = None, None

        pooled_stats[g] = {
            "count": int(pooled_arr.size),
            "mean": pooled_mean,
            "std": pooled_std,
            "values": pooled_arr,
        }

    return {
        "per_file": per_file_results,
        "pooled": pooled_stats
    }


In [21]:
import pandas as pd
import numpy as np

ATTACK_KEYS = [
    ("blackhole", "Blackhole"),
    ("disflooding", "Disflooding"),
    ("localrepair", "LocalRepair"),
    ("worstparent", "WorstParent"),
]

def methods_for(ordering_suffix: str):
    """Return the methods→filelist dict for a given ordering suffix."""
    return {
        "WCL": [f"{pwd}/1_experiment_results_LSTM_WCL_{ordering_suffix}.json",
                f"{pwd}/2_experiment_results_LSTM_WCL_{ordering_suffix}.json",
                f"{pwd}/3_experiment_results_LSTM_WCL_{ordering_suffix}.json"],

        "EWC": [f"{pwd}/1_experiment_results_LSTM_EWC_{ordering_suffix}.json",
                f"{pwd}/2_experiment_results_LSTM_EWC_{ordering_suffix}.json",
                f"{pwd}/3_experiment_results_LSTM_EWC_{ordering_suffix}.json"],

        "SI":  [f"{pwd}/1_experiment_results_LSTM_SI_{ordering_suffix}.json",
                f"{pwd}/2_experiment_results_LSTM_SI_{ordering_suffix}.json",
                f"{pwd}/3_experiment_results_LSTM_SI_{ordering_suffix}.json"],

        "LwF": [f"{pwd}/1_experiment_results_LSTM_LwF_{ordering_suffix}_alpha_1.0_T_4.0.json",
                f"{pwd}/2_experiment_results_LSTM_LwF_{ordering_suffix}_alpha_1.0_T_4.0.json",
                f"{pwd}/3_experiment_results_LSTM_LwF_{ordering_suffix}_alpha_1.0_T_4.0.json"],

        "Replay": [f"{pwd}/1_experiment_results_LSTM_Replay_REPLAY_{ordering_suffix}.json",
                   f"{pwd}/2_experiment_results_LSTM_Replay_REPLAY_{ordering_suffix}.json",
                   f"{pwd}/3_experiment_results_LSTM_Replay_REPLAY_{ordering_suffix}.json"],

        "Generative Replay": [f"{pwd}/1_experiment_results_LSTM_GR_{ordering_suffix}.json",
                              f"{pwd}/2_experiment_results_LSTM_GR_{ordering_suffix}.json",
                              f"{pwd}/3_experiment_results_LSTM_GR_{ordering_suffix}.json"],
    }

def build_numeric_df(methods_dict):
    """
    Returns a numeric DataFrame (floats) with rows=methods and columns=[Blackhole, Disflooding, LocalRepair, WorstParent].
    Missing values become NaN (so we can average later).
    """
    rows = {}
    for method, files in methods_dict.items():
        results = compute_grouped_performance_average(files, ddof=0)
        pooled = results["pooled"]
        row = {}
        for key, disp in ATTACK_KEYS:
            m = pooled.get(key, {}).get("mean", None)
            row[disp] = float(m) if m is not None else np.nan
        rows[method] = row
    return pd.DataFrame.from_dict(rows, orient="index")[ [disp for _, disp in ATTACK_KEYS] ]

def format_df_for_print(df):
    """Format to 2 decimals with '--' for NaN, leaving a copy for printing/LaTeX."""
    out = df.copy()
    out = out.round(2)
    return out.fillna("--")

def to_latex(df):
    """Pretty LaTeX (bold method names)."""
    # Ensure strings for NaN handled:
    display_df = df.copy()
    return display_df.to_latex(index=True, header=True, escape=False, bold_rows=True)

# ---- Build each ordering table (numeric) ----
orderings = {
    "Random": build_numeric_df(methods_for("random")),
    "Best-to-Worst": build_numeric_df(methods_for("b2w")),
    "Worst-to-Best": build_numeric_df(methods_for("w2b")),
    "Toggle": build_numeric_df(methods_for("toggle")),
}

# ---- Print the 4 individual tables (pretty + LaTeX) ----
for name, numeric_df in orderings.items():
    print(f"\n=== {name} Ordering ===")
    pretty = format_df_for_print(numeric_df)
    print(pretty)
    #print(to_latex(pretty))

# ---- Averaged table across the four orderings ----
# Concatenate vertically then mean by method name (row index)
all_concat = pd.concat(orderings.values(), axis=0, keys=orderings.keys())  # MultiIndex (ordering, method)
avg_numeric = all_concat.groupby(level=1).mean(numeric_only=True)  # average across the 4 orderings by method

print("\n=== Averaged Across Orderings ===")
avg_pretty = format_df_for_print(avg_numeric)
print(avg_pretty)
#print(to_latex(avg_pretty))



=== Random Ordering ===
                   Blackhole  Disflooding  LocalRepair  WorstParent
WCL                     0.31         0.58         0.62         0.47
EWC                     0.56         0.77         0.56         0.46
SI                      0.34         0.60         0.55         0.52
LwF                     0.30         0.33         0.36         0.43
Replay                  0.65         0.97         0.87         0.64
Generative Replay       0.27         0.59         0.54         0.43

=== Best-to-Worst Ordering ===
                   Blackhole  Disflooding  LocalRepair  WorstParent
WCL                     0.32         0.58         0.61         0.48
EWC                     0.45         0.84         0.64         0.49
SI                      0.38         0.96         0.62         0.47
LwF                     0.34         0.58         0.62         0.47
Replay                  0.64         0.96         0.88         0.62
Generative Replay       0.31         0.53         0.51     

In [23]:
import json
import numpy as np
import pandas as pd

# =========================================
# Config
# =========================================
# Column order (display names)
ATTACK_KEYS = [
    ("blackhole", "Blackhole"),
    ("disflooding", "Disflooding"),
    ("localrepair", "LocalRepair"),
    ("worstparent", "WorstParent"),
]

# Two metrics ⇒ 5 tables each (4 orderings + averaged) = 10 total
METRIC_KEYS = [
    ("roc_auc_stability", "ROC-AUC Stability"),
    ("performance_stability", "Performance Stability"),
]

# Orderings: suffix used in filenames → label used in prints
ORDERING_LABELS = [
    ("random", "Random"),
    ("b2w", "Best-to-Worst"),
    ("w2b", "Worst-to-Best"),
    ("toggle", "Toggle"),
]

# Fixed row order
ROW_ORDER = ["WCL", "Replay", "EWC", "SI", "LwF", "Generative Replay"]


# =========================================
# Helpers
# =========================================
def compute_grouped_performance_average(
    files,
    ddof=0,
    groups=("blackhole", "disflooding", "localrepair", "worstparent"),
    case_insensitive=True,
    metric_key="performance_stability",
):
    """
    Aggregate last-step values for a metric across JSON files by attack-group prefix.

    Returns:
        {
          "per_file": [ { "file": <path>, "groups": { g: {count,mean,std,values} } }, ... ],
          "pooled":   { g: {count,mean,std,values} }
        }
    """
    group_list = list(groups)
    group_norm = [g.lower() for g in group_list]

    per_file_results = []
    pooled_buckets = {g: [] for g in group_list}

    for fp in files:
        with open(fp, "r") as f:
            data = json.load(f)

        perf_stab = data.get(metric_key, {}) or {}
        file_buckets = {g: [] for g in group_list}

        for k, v in perf_stab.items():
            if not (isinstance(v, list) and len(v) > 0):
                continue
            k_cmp = k.lower() if case_insensitive else k
            last_val = v[-1]

            for g_name, g_norm in zip(group_list, group_norm):
                if k_cmp.startswith(g_norm):
                    try:
                        file_buckets[g_name].append(float(last_val))
                    except Exception:
                        pass
                    break

        file_group_stats = {}
        for g in group_list:
            arr = np.asarray(file_buckets[g], dtype=float)
            if arr.size > 0:
                mean_g = float(np.mean(arr))
                std_g = float(np.std(arr, ddof=ddof))
            else:
                mean_g, std_g = None, None
            file_group_stats[g] = {
                "count": int(arr.size),
                "mean": mean_g,
                "std": std_g,
                "values": arr,
            }
            if arr.size:
                pooled_buckets[g].append(arr)

        per_file_results.append({
            "file": fp,
            "groups": file_group_stats
        })

    pooled_stats = {}
    for g in group_list:
        if len(pooled_buckets[g]) > 0:
            pooled_arr = np.concatenate(pooled_buckets[g], axis=0)
        else:
            pooled_arr = np.asarray([], dtype=float)

        if pooled_arr.size > 0:
            pooled_mean = float(np.mean(pooled_arr))
            pooled_std  = float(np.std(pooled_arr, ddof=ddof))
        else:
            pooled_mean, pooled_std = None, None

        pooled_stats[g] = {
            "count": int(pooled_arr.size),
            "mean": pooled_mean,
            "std": pooled_std,
            "values": pooled_arr,
        }

    return {
        "per_file": per_file_results,
        "pooled": pooled_stats
    }


def methods_for(ordering_suffix: str):
    """Return the methods→filelist dict for a given ordering suffix."""
    return {
        "WCL": [f"{pwd}/1_experiment_results_LSTM_WCL_{ordering_suffix}.json",
                f"{pwd}/2_experiment_results_LSTM_WCL_{ordering_suffix}.json",
                f"{pwd}/3_experiment_results_LSTM_WCL_{ordering_suffix}.json"],

        "Replay": [f"{pwd}/1_experiment_results_LSTM_Replay_REPLAY_{ordering_suffix}.json",
                   f"{pwd}/2_experiment_results_LSTM_Replay_REPLAY_{ordering_suffix}.json",
                   f"{pwd}/3_experiment_results_LSTM_Replay_REPLAY_{ordering_suffix}.json"],

        "EWC": [f"{pwd}/1_experiment_results_LSTM_EWC_{ordering_suffix}.json",
                f"{pwd}/2_experiment_results_LSTM_EWC_{ordering_suffix}.json",
                f"{pwd}/3_experiment_results_LSTM_EWC_{ordering_suffix}.json"],

        "SI":  [f"{pwd}/1_experiment_results_LSTM_SI_{ordering_suffix}.json",
                f"{pwd}/2_experiment_results_LSTM_SI_{ordering_suffix}.json",
                f"{pwd}/3_experiment_results_LSTM_SI_{ordering_suffix}.json"],

        "LwF": [f"{pwd}/1_experiment_results_LSTM_LwF_{ordering_suffix}_alpha_1.0_T_4.0.json",
                f"{pwd}/2_experiment_results_LSTM_LwF_{ordering_suffix}_alpha_1.0_T_4.0.json",
                f"{pwd}/3_experiment_results_LSTM_LwF_{ordering_suffix}_alpha_1.0_T_4.0.json"],

        "Generative Replay": [f"{pwd}/1_experiment_results_LSTM_GR_{ordering_suffix}.json",
                              f"{pwd}/2_experiment_results_LSTM_GR_{ordering_suffix}.json",
                              f"{pwd}/3_experiment_results_LSTM_GR_{ordering_suffix}.json"],
    }


def build_numeric_df(methods_dict, metric_key: str):
    """
    Returns a numeric DataFrame (floats) with rows=methods and columns=[Blackhole, Disflooding, LocalRepair, WorstParent],
    computed from the specified metric_key.
    """
    rows = {}
    for method, files in methods_dict.items():
        results = compute_grouped_performance_average(files, ddof=0, metric_key=metric_key)
        pooled = results["pooled"]
        row = {}
        for key, disp in ATTACK_KEYS:
            m = pooled.get(key, {}).get("mean", None)
            row[disp] = float(m) if m is not None else np.nan
        rows[method] = row
    # enforce column order
    df = pd.DataFrame.from_dict(rows, orient="index")[ [disp for _, disp in ATTACK_KEYS] ]
    return df


def format_df_for_print(df):
    """Format to 2 decimals with '--' for NaN, enforce row order."""
    df_ordered = df.reindex(ROW_ORDER)
    out = df_ordered.copy().round(2)
    return out.fillna("--")


def to_latex(df):
    """Pretty LaTeX (bold method names) with fixed row order."""
    df_ordered = df.reindex(ROW_ORDER)
    return df_ordered.to_latex(index=True, header=True, escape=False, bold_rows=True)


# =========================================
# Main: build & print 10 tables
# =========================================
for metric_key, metric_title in METRIC_KEYS:
    print(f"\n\n############################")
    print(f"### Metric: {metric_title} ({metric_key})")
    print(f"############################")

    # Build numeric tables per ordering for this metric
    orderings = {}
    for suf, label in ORDERING_LABELS:
        orderings[label] = build_numeric_df(methods_for(suf), metric_key=metric_key)

    # 4 individual tables (with fixed row order)
    for _, label in ORDERING_LABELS:
        numeric_df = orderings[label].reindex(ROW_ORDER)
        pretty = format_df_for_print(numeric_df)
        print(f"\n=== {metric_title} — {label} Ordering ===")
        print(pretty)
        # Uncomment if you want LaTeX too:
        # print(to_latex(pretty))

    # Averaged table across the four orderings (5th table)
    all_concat = pd.concat(orderings.values(), axis=0, keys=orderings.keys())  # MultiIndex (ordering, method)
    avg_numeric = all_concat.groupby(level=1).mean(numeric_only=True).reindex(ROW_ORDER)

    avg_pretty = format_df_for_print(avg_numeric)
    print(f"\n=== {metric_title} — Averaged Across Orderings ===")
    print(avg_pretty)
    # Uncomment if you want LaTeX too:
    # print(to_latex(avg_pretty))




############################
### Metric: ROC-AUC Stability (roc_auc_stability)
############################

=== ROC-AUC Stability — Random Ordering ===
                   Blackhole  Disflooding  LocalRepair  WorstParent
WCL                     0.47         0.90         0.78         0.59
Replay                  0.70         1.00         0.95         0.70
EWC                     0.71         1.00         0.64         0.57
SI                      0.56         1.00         0.68         0.59
LwF                     0.49         0.44         0.62         0.61
Generative Replay       0.44         0.79         0.68         0.53

=== ROC-AUC Stability — Best-to-Worst Ordering ===
                   Blackhole  Disflooding  LocalRepair  WorstParent
WCL                     0.45         0.95         0.78         0.63
Replay                  0.69         1.00         0.95         0.69
EWC                     0.56         1.00         0.80         0.61
SI                      0.56         1.00    

In [None]:

files_1 = [
    pwd + "/1_experiment_results_LSTM_GR_random.json",
    pwd + "/2_experiment_results_LSTM_GR_random.json",
    pwd + "/3_experiment_results_LSTM_GR_random.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_1)

print("[random] Per-file averages:", file_means)
print("[random] Per-file standard deviations:", file_stds)
print(f"[random] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[random] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_2 = [
    pwd + "/1_experiment_results_LSTM_GR_b2w.json",
    pwd + "/2_experiment_results_LSTM_GR_b2w.json",
    pwd + "/3_experiment_results_LSTM_GR_b2w.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_2)

print("[b2w] Per-file averages:", file_means)
print("[b2w] Per-file standard deviations:", file_stds)
print(f"[b2w] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[b2w] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_3 = [
    pwd + "/1_experiment_results_LSTM_GR_w2b.json",
    pwd + "/2_experiment_results_LSTM_GR_w2b.json",
    pwd + "/3_experiment_results_LSTM_GR_w2b.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_3)

print("[w2b] Per-file averages:", file_means)
print("[w2b] Per-file standard deviations:", file_stds)
print(f"[w2b] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[w2b] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")


files_4 = [
    pwd + "/1_experiment_results_LSTM_GR_toggle.json",
    pwd + "/2_experiment_results_LSTM_GR_toggle.json",
    pwd + "/3_experiment_results_LSTM_GR_toggle.json"
]

file_means, file_stds, overall_mean, overall_std = compute_performance_average(files_4)

print("[toggle] Per-file averages:", file_means)
print("[toggle] Per-file standard deviations:", file_stds)
print(f"[toggle] Overall avg±std : {overall_mean}±{overall_std}")
print(f"[toggle] Overall avg±std : {overall_mean:.2f}±{overall_std:.2f}")