In [1]:
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
from collections import defaultdict

In [None]:
BASE_DIR = Path("/jet/home/smazioud/stat215a_final/ridge_new")

SUBJECTS = [2, 3]  

MODEL_CONFIG = {
    "bert_finetuned": {
        # "dir": BASE_DIR / "finetuned_bert",
        "dir": BASE_DIR / "finetuned_bert",
        "pattern": "subject{subject_id}ridge_streaming_80_20_k_3.pkl",
    },
    "bert_pretrained": {
        "dir": BASE_DIR / "pretrained_bert",
        "pattern": "subject{subject_id}_ridge_streaming_80_20_k_3.pkl",
    },
    "word2vec": {
        "dir": BASE_DIR / "word2vec",
        "pattern": "subject{subject_id}_ridge_streaming_80_20_k_3_1e-3.pkl",
    },
    # Placeholder for GloVe: no files yet, so weâ€™ll mark missing
    "glove": {
        "dir": BASE_DIR / "glove",
        "pattern": "subject{subject_id}_ridge_streaming_80_20_k_3.pkl",
    },
}

## Helper functions

In [None]:
# Function to load model results for a given subject

def load_model_result(model_name, subject_id):
    cfg = MODEL_CONFIG[model_name]
    model_dir = cfg["dir"]
    pattern = cfg["pattern"].format(subject_id=subject_id)
    pkl_path = model_dir / pattern

    if not pkl_path.exists():
        print(f"[WARN] Missing file for model={model_name}, subject={subject_id}: {pkl_path}")
        return None

    with open(pkl_path, "rb") as f:
        data = pickle.load(f)

    metrics = data.get("metrics", {})
    test_corr = metrics.get("test_corr", None)
    test_mse = metrics.get("test_mse", None)
    mean_test_corr = metrics.get("mean_test_corr", None)

    if test_corr is None:
        raise ValueError(f"test_corr not found in {pkl_path}")

    return {
        "path": pkl_path,
        "W_shape": data.get("W", np.empty((0, 0))).shape,
        "test_corr": np.asarray(test_corr),
        "test_mse": float(test_mse) if test_mse is not None else None,
        "mean_test_corr": float(mean_test_corr) if mean_test_corr is not None else None,
    }


In [4]:
def summarize_corr_vector(cc):
    """
    cc: 1D array of voxel-wise test correlations
    Returns a dict with summary metrics.
    """
    cc = np.asarray(cc)
    cc = cc[np.isfinite(cc)] 

    stats = {
        "mean_cc": float(np.mean(cc)),
        "median_cc": float(np.median(cc)),
        "p95_cc": float(np.percentile(cc, 95)),  # top 5%
        "p99_cc": float(np.percentile(cc, 99)),  # top 1%
        "max_cc": float(np.max(cc)),
        # Domain-specific extra metrics:
        "frac_cc_gt_0": float(np.mean(cc > 0.0)),
        "frac_cc_gt_0_05": float(np.mean(cc > 0.05)),
        "frac_cc_gt_0_1": float(np.mean(cc > 0.1)),
    }
    return stats

## Compute summaries

In [5]:
# Aggregate summaries across all subjects and models

rows = []

for subject_id in SUBJECTS:
    for model_name in MODEL_CONFIG.keys():
        result = load_model_result(model_name, subject_id)
        if result is None:
            # placeholder for GloVe resultss
            rows.append({
                "subject": subject_id,
                "model": model_name,
                "available": False,
            })
            continue

        cc = result["test_corr"]
        corr_stats = summarize_corr_vector(cc)

        row = {
            "subject": subject_id,
            "model": model_name,
            "available": True,
            "test_mse": result["test_mse"],
            "mean_test_corr_stored": result["mean_test_corr"],
            "W_shape": result["W_shape"],
        }
        row.update(corr_stats)
        rows.append(row)

summary_df = pd.DataFrame(rows)
summary_df


Unnamed: 0,subject,model,available,test_mse,mean_test_corr_stored,W_shape,mean_cc,median_cc,p95_cc,p99_cc,max_cc,frac_cc_gt_0,frac_cc_gt_0_05,frac_cc_gt_0_1
0,2,bert_finetuned,True,96060.636756,0.070016,"(3072, 94251)",0.070016,0.042344,0.221875,0.297717,0.487609,0.936128,0.452971,0.264188
1,2,bert_pretrained,True,104111.403796,0.043053,"(3072, 94251)",0.043053,0.022255,0.166747,0.249532,0.463118,0.854272,0.287689,0.142025
2,2,word2vec,True,96669.859375,0.039643,"(1200, 94251)",0.039643,0.023593,0.142334,0.205051,0.368426,0.854622,0.289185,0.119256
3,2,glove,True,96872.070312,0.030939,"(1200, 94251)",0.030939,0.020062,0.139705,0.21835,0.419977,0.710051,0.261589,0.100731
4,3,bert_finetuned,True,92901.923554,0.069806,"(3072, 95556)",0.069806,0.044209,0.223283,0.313492,0.531088,0.948596,0.459385,0.244192
5,3,bert_pretrained,True,100919.336629,0.045722,"(3072, 95556)",0.045722,0.025475,0.171015,0.268123,0.50546,0.881902,0.299981,0.140483
6,3,word2vec,True,98106.46875,0.035765,"(1200, 95556)",0.035765,0.021722,0.132313,0.199762,0.392982,0.84676,0.251413,0.097126
7,3,glove,True,98005.46875,0.028434,"(1200, 95556)",0.028434,0.0211,0.117652,0.178672,0.362167,0.726171,0.252491,0.07673


## Pick best model

### Top 5 percentile CC

In [6]:
# Pick best model per subject according to p95_cc

# Filter to only models that actually have results
valid_df = summary_df[summary_df["available"]]

# For each subject, find the model with max p95_cc
best_by_subject = (
    valid_df
    .sort_values(["subject", "p95_cc"], ascending=[True, False])
    .groupby("subject")
    .head(1)
    .reset_index(drop=True)
)

best_by_subject[["subject", "model", "p95_cc", "mean_cc", "median_cc"]]

best_count = best_by_subject["model"].value_counts()
best_count

model
bert_finetuned    2
Name: count, dtype: int64

### Mean of top 5 and top 1 percentiles

In [7]:
# Score = mean of p95 and p99
valid_df["score_p95_p99_mean"] = 0.5 * (valid_df["p95_cc"] + valid_df["p99_cc"])
best_by_subject_alt = (
    valid_df
    .sort_values(["subject", "score_p95_p99_mean"], ascending=[True, False])
    .groupby("subject")
    .head(1)
    .reset_index(drop=True)
)
best_by_subject_alt[["subject", "model", "score_p95_p99_mean"]]


Unnamed: 0,subject,model,score_p95_p99_mean
0,2,bert_finetuned,0.259796
1,3,bert_finetuned,0.268387
