In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import plot_utils as pu
import shutil

# --- 0) Paths & CSVs
files = {
    "25341": "statistics/lookback25341.csv",
    "24617": "statistics/lookback24617.csv",
    "24705": "statistics/lookback24705.csv",
    "25387": "statistics/lookback25387.csv",
    "25133": "statistics/lookback25133.csv",
}

# Only these metrics
desired_metrics = ["Accuracy", "Recall", "Specificity", "F1", "Pearson Correlation"]
real_splits = ["Train", "Val", "Test"]
DISPLAY_NAME = {"Recall": "Sensitivity"}  # for labels only

# --- 1) Load all CSVs and tag 'mouse'
mice = []
for mid, path in files.items():
    df = pd.read_csv(path).assign(mouse=mid)
    mice.append(df)
all_df = pd.concat(mice, ignore_index=True)

# Normalize column names (if needed)
rename_map = {"Sensitivity": "Recall"}
all_df = all_df.rename(columns={k: v for k, v in rename_map.items() if k in all_df.columns})

# --- 1a) Extract lookback & split from 'run' if needed
if 'lookback' not in all_df.columns and 'run' in all_df.columns:
    all_df['lookback'] = all_df['run'].str.extract(r'lb(\d+)_')[0].astype(int)

if 'split' not in all_df.columns and 'run' in all_df.columns:
    split_num = all_df['run'].str.extract(r'split(\d+)')[0].astype(int)
    all_df['split'] = split_num.map({1: 'Train', 2: 'Val', 3: 'Test'})

all_df['lookback'] = pd.to_numeric(all_df['lookback'], errors='coerce')
all_df = all_df.dropna(subset=['lookback'])
all_df['lookback'] = all_df['lookback'].astype(int)

# Keep only the selected metrics that actually exist
metrics = [m for m in desired_metrics if m in all_df.columns]

# -------- Average per neuron first (if neuron id exists) ----------
NEURON_COL = next((c for c in ['neuron_id','Neuron_ID','Neuron_IDs','unit','neuron']
                   if c in all_df.columns), None)
base_df = all_df
if NEURON_COL is not None:
    base_df = (
        all_df
        .groupby(['mouse','lookback','split', NEURON_COL])[metrics]
        .mean()
        .reset_index()
    )

# --- 2) Within-mouse aggregation
within_mouse = (
    base_df
    .groupby(['mouse', 'lookback', 'split'])[metrics]
    .agg(['mean', 'sem'])
    .reset_index()
)

# --- 3) Across-mice aggregation
def collapse_across_mice(group: pd.DataFrame) -> pd.Series:
    out = {}
    for m in metrics:
        vals = group[(m, 'mean')].to_numpy()
        n = len(vals)
        out[(m, 'mean')] = vals.mean() if n > 0 else np.nan
        out[(m, 'sem')]  = (vals.std(ddof=1) / np.sqrt(n)) if n > 1 else 0.0
    return pd.Series(out)

across_mice = (
    within_mouse
    .groupby(['lookback', 'split'])
    .apply(collapse_across_mice)
    .reset_index()
).sort_values(['split','lookback'])

# ================== PLOTTING (fixed margins & size) ==================
out_dir = "results/sensitivity/plots"
if os.path.isdir(out_dir):
    shutil.rmtree(out_dir)
os.makedirs(out_dir, exist_ok=True)
print("→ writing plots into:", os.path.abspath(out_dir))

COLORS = {"Train": "blue", "Val": "green", "Test": "red"}

for m in metrics:
    disp = DISPLAY_NAME.get(m, m)

    # rectangular canvas to avoid crowding
    fig, ax = pu.configure_plot(
        title=f"{disp} vs Lookback",
        xlabel="Lookback",
        ylabel=disp,
        fontsize=24,
        figsize=(8, 6),   # <= key difference: wide canvas
        spine="left"
    )
    ax.title.set_y(1.02)

    plotted = False
    for split in real_splits:
        sub = across_mice[across_mice["split"] == split]
        if sub.empty or (m, "mean") not in sub.columns:
            continue

        # sorted x for clean lines
        sub = sub.sort_values("lookback")
        x    = sub["lookback"].to_numpy()
        y    = sub[(m, "mean")].to_numpy()
        yerr = sub[(m, "sem")].to_numpy()

        ax.errorbar(
            x, y, yerr=yerr,
            label=split,
            color=COLORS[split],
            marker="o", markersize=6,
            linewidth=2,
            **pu.ERROR_KW(elinewidth=2, capsize=5)
        )
        plotted = True

    if not plotted:
        plt.close(fig)
        continue

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

    present = [s for s in real_splits if not across_mice[across_mice['split']==s].empty]
    pu.configure_legend(
        ax,
        colors=[COLORS[s] for s in present],
        loc="upper left",
        bbox_to_anchor=(1.02, 1),   # legend fully outside
        legend_line=("vertical", 1),
        font_size=14
    )

    # crucial: give space for big fonts & thick spines
    fig.tight_layout()
    fig.subplots_adjust(left=0.18, right=0.78, bottom=0.18, top=0.88)

    out_path = os.path.join(out_dir, f"{disp.lower().replace(' ','_')}_vs_lookback.png")
    print("→ saving", out_path)
    fig.savefig(out_path, dpi=300)
    plt.close(fig)


  .apply(collapse_across_mice)
  .apply(collapse_across_mice)


→ writing plots into: c:\Users\marin\Documents\GitHub\marina_ITE\results\sensitivity\plots
→ saving results/sensitivity/plots\accuracy_vs_lookback.png
→ saving results/sensitivity/plots\sensitivity_vs_lookback.png
→ saving results/sensitivity/plots\specificity_vs_lookback.png
→ saving results/sensitivity/plots\f1_vs_lookback.png
→ saving results/sensitivity/plots\pearson_correlation_vs_lookback.png


In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import plot_utils as pu
import shutil

# --- 0) Paths & CSVs  (hidden_size files)
files = {
    "25341": "statistics/hidden_sizes25341.csv",
    "24617": "statistics/hidden_sizes24617.csv",
    "24705": "statistics/hidden_sizes24705.csv",
    "25387": "statistics/hidden_sizes25387.csv",
    "25133": "statistics/hidden_sizes25133.csv",
}

# Only these metrics
desired_metrics = ["Accuracy", "Recall", "Specificity", "F1", "Pearson Correlation"]
real_splits = ["Train", "Val", "Test"]
DISPLAY_NAME = {"Recall": "Sensitivity"}  # for labels only

# --- 1) Load all CSVs and tag 'mouse'
mice = []
for mid, path in files.items():
    df = pd.read_csv(path).assign(mouse=mid)
    mice.append(df)
all_df = pd.concat(mice, ignore_index=True)

# Normalize column names (if needed)
rename_map = {"Sensitivity": "Recall"}
all_df = all_df.rename(columns={k: v for k, v in rename_map.items() if k in all_df.columns})

# --- 1a) Extract hidden_size & split from 'run' if needed
if 'hidden_size' not in all_df.columns and 'run' in all_df.columns:
    all_df['hidden_size'] = all_df['run'].str.extract(r'hs(\d+)_')[0].astype(int)

if 'split' not in all_df.columns and 'run' in all_df.columns:
    split_num = all_df['run'].str.extract(r'split(\d+)')[0].astype(int)
    all_df['split'] = split_num.map({1: 'Train', 2: 'Val', 3: 'Test'})

all_df['hidden_size'] = pd.to_numeric(all_df['hidden_size'], errors='coerce')
all_df = all_df.dropna(subset=['hidden_size'])
all_df['hidden_size'] = all_df['hidden_size'].astype(int)

# Keep only the selected metrics that actually exist
metrics = [m for m in desired_metrics if m in all_df.columns]

# -------- Average per neuron first (if neuron id exists) ----------
NEURON_COL = next((c for c in ['neuron_id','Neuron_ID','Neuron_IDs','unit','neuron']
                   if c in all_df.columns), None)
base_df = all_df
if NEURON_COL is not None:
    base_df = (
        all_df
        .groupby(['mouse','hidden_size','split', NEURON_COL])[metrics]
        .mean()
        .reset_index()
    )

# --- 2) Within-mouse aggregation
within_mouse = (
    base_df
    .groupby(['mouse', 'hidden_size', 'split'])[metrics]
    .agg(['mean', 'sem'])
    .reset_index()
)

# --- 3) Across-mice aggregation
def collapse_across_mice(group: pd.DataFrame) -> pd.Series:
    out = {}
    for m in metrics:
        vals = group[(m, 'mean')].to_numpy()
        n = len(vals)
        out[(m, 'mean')] = vals.mean() if n > 0 else np.nan
        out[(m, 'sem')]  = (vals.std(ddof=1) / np.sqrt(n)) if n > 1 else 0.0
    return pd.Series(out)

across_mice = (
    within_mouse
    .groupby(['hidden_size', 'split'])
    .apply(collapse_across_mice)
    .reset_index()
).sort_values(['split','hidden_size'])

# ================== PLOTTING (fixed margins & size) ==================
out_dir = "results/sensitivity/plots"
if os.path.isdir(out_dir):
    shutil.rmtree(out_dir)
os.makedirs(out_dir, exist_ok=True)
print("→ writing plots into:", os.path.abspath(out_dir))

COLORS = {"Train": "blue", "Val": "green", "Test": "red"}

for m in metrics:
    disp = DISPLAY_NAME.get(m, m)

    fig, ax = pu.configure_plot(
        title=f"{disp} vs Hidden Size",
        xlabel="Hidden Size",
        ylabel=disp,
        fontsize=24,
        figsize=(8, 6),
        spine="left"
    )
    ax.title.set_y(1.02)

    plotted = False
    for split in real_splits:
        sub = across_mice[across_mice["split"] == split]
        if sub.empty or (m, "mean") not in sub.columns:
            continue

        sub = sub.sort_values("hidden_size")
        x    = sub["hidden_size"].to_numpy()
        y    = sub[(m, "mean")].to_numpy()
        yerr = sub[(m, "sem")].to_numpy()

        ax.errorbar(
            x, y, yerr=yerr,
            label=split,
            color=COLORS[split],
            marker="o", markersize=6,
            linewidth=2,
            **pu.ERROR_KW(elinewidth=2, capsize=5)
        )
        plotted = True

    if not plotted:
        plt.close(fig)
        continue

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

    present = [s for s in real_splits if not across_mice[across_mice['split']==s].empty]
    pu.configure_legend(
        ax,
        colors=[COLORS[s] for s in present],
        loc="upper left",
        bbox_to_anchor=(1.02, 1),
        legend_line=("vertical", 1),
        font_size=14
    )

    fig.tight_layout()
    fig.subplots_adjust(left=0.18, right=0.78, bottom=0.18, top=0.88)

    out_path = os.path.join(out_dir, f"{disp.lower().replace(' ','_')}_vs_hidden_size.png")
    print("→ saving", out_path)
    fig.savefig(out_path, dpi=300)
    plt.close(fig)


  .apply(collapse_across_mice)
  .apply(collapse_across_mice)


→ writing plots into: c:\Users\marin\Documents\GitHub\marina_ITE\results\sensitivity\plots
→ saving results/sensitivity/plots\accuracy_vs_hidden_size.png
→ saving results/sensitivity/plots\sensitivity_vs_hidden_size.png
→ saving results/sensitivity/plots\specificity_vs_hidden_size.png
→ saving results/sensitivity/plots\f1_vs_hidden_size.png
→ saving results/sensitivity/plots\pearson_correlation_vs_hidden_size.png


In [5]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import plot_utils as pu
import shutil

# --- 0) Paths & CSVs  (output_size files)
files = {
    "25341": "statistics/newoutput_size25341.csv",
    "24617": "statistics/newoutput_size24617.csv",
    #"24705": "statistics/newoutput_size24705.csv",
    "25387": "statistics/newoutput_size25387.csv",
    "25133": "statistics/newoutput_size25133.csv",
}

# Only these metrics
desired_metrics = ["Accuracy", "Recall", "Specificity", "F1", "Pearson Correlation"]
real_splits = ["Train", "Val", "Test"]
DISPLAY_NAME = {"Recall": "Sensitivity"}  # for labels only

# --- 1) Load all CSVs and tag 'mouse'
mice = []
for mid, path in files.items():
    df = pd.read_csv(path).assign(mouse=mid)
    mice.append(df)
all_df = pd.concat(mice, ignore_index=True)

# Normalize column names (if needed)
rename_map = {"Sensitivity": "Recall"}
all_df = all_df.rename(columns={k: v for k, v in rename_map.items() if k in all_df.columns})

# --- 1a) Extract output_size & split from 'run' if needed
if 'output_size' not in all_df.columns and 'run' in all_df.columns:
    all_df['output_size'] = all_df['run'].str.extract(r'os(\d+)_')[0].astype(int)

if 'split' not in all_df.columns and 'run' in all_df.columns:
    split_num = all_df['run'].str.extract(r'split(\d+)')[0].astype(int)
    all_df['split'] = split_num.map({1: 'Train', 2: 'Val', 3: 'Test'})

all_df['output_size'] = pd.to_numeric(all_df['output_size'], errors='coerce')
all_df = all_df.dropna(subset=['output_size'])
all_df['output_size'] = all_df['output_size'].astype(int)

# Keep only the selected metrics that actually exist
metrics = [m for m in desired_metrics if m in all_df.columns]

# -------- Average per neuron first (if neuron id exists) ----------
NEURON_COL = next((c for c in ['neuron_id','Neuron_ID','Neuron_IDs','unit','neuron']
                   if c in all_df.columns), None)
base_df = all_df
if NEURON_COL is not None:
    base_df = (
        all_df
        .groupby(['mouse','output_size','split', NEURON_COL])[metrics]
        .mean()
        .reset_index()
    )

# --- 2) Within-mouse aggregation
within_mouse = (
    base_df
    .groupby(['mouse', 'output_size', 'split'])[metrics]
    .agg(['mean', 'sem'])
    .reset_index()
)

# --- 3) Across-mice aggregation
def collapse_across_mice(group: pd.DataFrame) -> pd.Series:
    out = {}
    for m in metrics:
        vals = group[(m, 'mean')].to_numpy()
        n = len(vals)
        out[(m, 'mean')] = vals.mean() if n > 0 else np.nan
        out[(m, 'sem')]  = (vals.std(ddof=1) / np.sqrt(n)) if n > 1 else 0.0
    return pd.Series(out)

across_mice = (
    within_mouse
    .groupby(['output_size', 'split'])
    .apply(collapse_across_mice)
    .reset_index()
).sort_values(['split','output_size'])

# ================== PLOTTING (fixed margins & size) ==================
out_dir = "results/sensitivity/plots"
if os.path.isdir(out_dir):
    shutil.rmtree(out_dir)
os.makedirs(out_dir, exist_ok=True)
print("→ writing plots into:", os.path.abspath(out_dir))

COLORS = {"Train": "blue", "Val": "green", "Test": "red"}

for m in metrics:
    disp = DISPLAY_NAME.get(m, m)

    fig, ax = pu.configure_plot(
        title=f"{disp} vs Output Size",
        xlabel="Output Size",
        ylabel=disp,
        fontsize=24,
        figsize=(8, 6),
        spine="left"
    )
    ax.title.set_y(1.02)

    plotted = False
    for split in real_splits:
        sub = across_mice[across_mice["split"] == split]
        if sub.empty or (m, "mean") not in sub.columns:
            continue

        sub = sub.sort_values("output_size")
        x    = sub["output_size"].to_numpy()
        y    = sub[(m, "mean")].to_numpy()
        yerr = sub[(m, "sem")].to_numpy()

        ax.errorbar(
            x, y, yerr=yerr,
            label=split,
            color=COLORS[split],
            marker="o", markersize=6,
            linewidth=2,
            **pu.ERROR_KW(elinewidth=2, capsize=5)
        )
        plotted = True

    if not plotted:
        plt.close(fig)
        continue

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

    present = [s for s in real_splits if not across_mice[across_mice['split']==s].empty]
    pu.configure_legend(
        ax,
        colors=[COLORS[s] for s in present],
        loc="upper left",
        bbox_to_anchor=(1.02, 1),
        legend_line=("vertical", 1),
        font_size=14
    )

    fig.tight_layout()
    fig.subplots_adjust(left=0.18, right=0.78, bottom=0.18, top=0.88)

    out_path = os.path.join(out_dir, f"{disp.lower().replace(' ','_')}_vs_output_size.png")
    print("→ saving", out_path)
    fig.savefig(out_path, dpi=300)
    plt.close(fig)


  .apply(collapse_across_mice)
  .apply(collapse_across_mice)


→ writing plots into: c:\Users\marin\Documents\GitHub\marina_ITE\results\sensitivity\plots
→ saving results/sensitivity/plots\accuracy_vs_output_size.png
→ saving results/sensitivity/plots\sensitivity_vs_output_size.png
→ saving results/sensitivity/plots\specificity_vs_output_size.png
→ saving results/sensitivity/plots\f1_vs_output_size.png
→ saving results/sensitivity/plots\pearson_correlation_vs_output_size.png


In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import plot_utils as pu
import shutil

# --- 0) Paths & CSVs  (threshold files)
files = {
    "25341": "statistics/threshold25341.csv",
    "24617": "statistics/threshold24617.csv",
    "24705": "statistics/threshold24705.csv",
    "25387": "statistics/threshold25387.csv",
    "25133": "statistics/threshold25133.csv",
}

# Only these metrics
desired_metrics = ["Accuracy", "Recall", "Specificity", "F1", "Pearson Correlation"]
real_splits = ["Train", "Val", "Test"]
DISPLAY_NAME = {"Recall": "Sensitivity"}  # for labels only

# --- 1) Load all CSVs and tag 'mouse'
mice = []
for mid, path in files.items():
    df = pd.read_csv(path).assign(mouse=mid)
    mice.append(df)
all_df = pd.concat(mice, ignore_index=True)

# Normalize column names (if needed)
rename_map = {"Sensitivity": "Recall"}
all_df = all_df.rename(columns={k: v for k, v in rename_map.items() if k in all_df.columns})

# --- 1a) Extract threshold & split from 'run' if needed
if 'threshold' not in all_df.columns and 'run' in all_df.columns:
    # try common patterns: th..., thr..., threshold..., t...
    pattns = [
        r'th(?:r(?:esh(?:old)?)?)?([0-9]*\.?[0-9]+)_',  # th0.5_ / thr0.50_ / threshold0.7_
        r't([0-9]*\.?[0-9]+)_',                         # t0.60_
    ]
    th = None
    for p in pattns:
        th = all_df['run'].str.extract(p)[0]
        if th.notna().any():
            break
    all_df['threshold'] = pd.to_numeric(th, errors='coerce')

if 'split' not in all_df.columns and 'run' in all_df.columns:
    split_num = all_df['run'].str.extract(r'split(\d+)')[0].astype(int)
    all_df['split'] = split_num.map({1: 'Train', 2: 'Val', 3: 'Test'})

all_df['threshold'] = pd.to_numeric(all_df['threshold'], errors='coerce')
all_df = all_df.dropna(subset=['threshold'])

# Keep only the selected metrics that actually exist
metrics = [m for m in desired_metrics if m in all_df.columns]

# -------- Average per neuron first (if neuron id exists) ----------
NEURON_COL = next((c for c in ['neuron_id','Neuron_ID','Neuron_IDs','unit','neuron']
                   if c in all_df.columns), None)
base_df = all_df
if NEURON_COL is not None:
    base_df = (
        all_df
        .groupby(['mouse','threshold','split', NEURON_COL])[metrics]
        .mean()
        .reset_index()
    )

# --- 2) Within-mouse aggregation
within_mouse = (
    base_df
    .groupby(['mouse', 'threshold', 'split'])[metrics]
    .agg(['mean', 'sem'])
    .reset_index()
)

# --- 3) Across-mice aggregation
def collapse_across_mice(group: pd.DataFrame) -> pd.Series:
    out = {}
    for m in metrics:
        vals = group[(m, 'mean')].to_numpy()
        n = len(vals)
        out[(m, 'mean')] = vals.mean() if n > 0 else np.nan
        out[(m, 'sem')]  = (vals.std(ddof=1) / np.sqrt(n)) if n > 1 else 0.0
    return pd.Series(out)

across_mice = (
    within_mouse
    .groupby(['threshold', 'split'])
    .apply(collapse_across_mice)
    .reset_index()
).sort_values(['split','threshold'])

# ================== PLOTTING (fixed margins & size) ==================
out_dir = "results/sensitivity/plots"
if os.path.isdir(out_dir):
    shutil.rmtree(out_dir)
os.makedirs(out_dir, exist_ok=True)
print("→ writing plots into:", os.path.abspath(out_dir))

COLORS = {"Train": "blue", "Val": "green", "Test": "red"}

for m in metrics:
    disp = DISPLAY_NAME.get(m, m)

    fig, ax = pu.configure_plot(
        title=f"{disp} vs Threshold",
        xlabel="Threshold",
        ylabel=disp,
        fontsize=24,
        figsize=(8, 6),
        spine="left"
    )
    ax.title.set_y(1.02)

    plotted = False
    for split in real_splits:
        sub = across_mice[across_mice["split"] == split]
        if sub.empty or (m, "mean") not in sub.columns:
            continue

        sub = sub.sort_values("threshold")
        x    = sub["threshold"].to_numpy()
        y    = sub[(m, "mean")].to_numpy()
        yerr = sub[(m, "sem")].to_numpy()

        ax.errorbar(
            x, y, yerr=yerr,
            label=split,
            color=COLORS[split],
            marker="o", markersize=6,
            linewidth=2,
            **pu.ERROR_KW(elinewidth=2, capsize=5)
        )
        plotted = True

    if not plotted:
        plt.close(fig)
        continue

    ax.xaxis.set_major_locator(MaxNLocator(integer=False))  # thresholds can be floats
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))


    # --- NEW: only show every 2nd x-axis label ---
    labels = ax.get_xticklabels()
    for i, label in enumerate(labels):
        if i % 2 != 0:  # hide odd-indexed labels
            label.set_visible(False)


    present = [s for s in real_splits if not across_mice[across_mice['split']==s].empty]
    pu.configure_legend(
        ax,
        colors=[COLORS[s] for s in present],
        loc="upper left",
        bbox_to_anchor=(1.02, 1),
        legend_line=("vertical", 1),
        font_size=14
    )

    fig.tight_layout()
    fig.subplots_adjust(left=0.18, right=0.78, bottom=0.18, top=0.88)

    out_path = os.path.join(out_dir, f"{disp.lower().replace(' ','_')}_vs_threshold.png")
    print("→ saving", out_path)
    fig.savefig(out_path, dpi=300)
    plt.close(fig)


  .apply(collapse_across_mice)
  .apply(collapse_across_mice)


→ writing plots into: c:\Users\marin\Documents\GitHub\marina_ITE\results\sensitivity\plots
→ saving results/sensitivity/plots\accuracy_vs_threshold.png
→ saving results/sensitivity/plots\sensitivity_vs_threshold.png
→ saving results/sensitivity/plots\specificity_vs_threshold.png
→ saving results/sensitivity/plots\f1_vs_threshold.png
→ saving results/sensitivity/plots\pearson_correlation_vs_threshold.png
