In [4]:
import numpy as np
from sklearn.decomposition import PCA
from numpy.random import default_rng

# ------------ helpers ------------
def censored_lstsq(A, B, M, lam=1e-6):
    # Solve min_X || M âŠ™ (A X - B) ||_F^2 + Î»||X||_F^2
    n, r = A.shape
    _, m = B.shape
    X = np.empty((r, m), dtype=B.dtype)
    I = np.eye(r, dtype=B.dtype)
    for j in range(m):
        mask = M[:, j]
        Aj = A[mask, :]
        bj = B[mask, j]
        T  = Aj.T @ Aj + lam * I
        rhs = Aj.T @ bj
        X[:, j] = np.linalg.solve(T, rhs)
    return X

def cv_pca_blocked(X, rank, holdout_cols, n_iter=20, lam=1e-6, seed=0):
    """
    X: (n_neurons Ã— n_images) trial-avg or denoised neural matrix
    holdout_cols: list/array of image indices held out for test
    returns U (nÃ—r), V (rÃ—m), train_err, test_err, mask M
    """
    rng = default_rng(seed)
    n, m = X.shape
    M = np.ones((n, m), dtype=bool)
    M[:, holdout_cols] = False

    U = rng.standard_normal((n, rank)) / np.sqrt(n)
    for _ in range(n_iter):
        V = censored_lstsq(U, X, M, lam=lam)            # rÃ—m
        UT = censored_lstsq(V.T, X.T, M.T, lam=lam)     # mÃ—r
        U  = UT.T                                       # nÃ—r

    # orthonormalize U for interpretability; recompute V as scores
    U, _, _ = np.linalg.svd(U, full_matrices=False)
    V = U.T @ X
    R = U @ V - X
    train_err = np.mean((R[M])**2)
    test_err  = np.mean((R[~M])**2)
    return U, V, train_err, test_err, M

def fisher_z(r):
    r = np.clip(r, -0.999999, 0.999999)
    return 0.5*np.log((1+r)/(1-r))

def inv_fisher_z(z):
    return (np.exp(2*z)-1)/(np.exp(2*z)+1)

# ------------ main CV ------------
def vit_alignment_cv(X_neural, Z_vit, k_images=5, rank=1, n_splits=5, seed=0, lam=1e-6,
                     vit_pc_idx=0, n_perm=2000):
    """
    X_neural: (n_neurons Ã— n_images) (use train-mean centering per fold)
    Z_vit:    (n_images Ã— d) ViT features per image (e.g., CLR-softmax or logits)
    vit_pc_idx: which ViT PC to align with (0=PC1)
    returns dict with per-fold test r, CI, permutation p, stability, etc.
    """
    rng = default_rng(seed)
    m = X_neural.shape[1]
    # Freeze ViT PC basis on ALL images (no test peeking in selection later; we only use scores from held-out images)
    vit_pca = PCA()
    vit_pca.fit(Z_vit)
    vit_w   = vit_pca.components_[vit_pc_idx]          # (d,)
    vit_scores_all = Z_vit @ vit_w                     # (m,)

    # K-fold indices over images
    idx = np.arange(m)
    rng.shuffle(idx)
    folds = np.array_split(idx, n_splits)

    test_rs, train_rs, pvals = [], [], []
    directions = []   # store U[:,0] per fold for stability

    for f, test_cols in enumerate(folds):
        train_cols = np.setdiff1d(idx, test_cols, assume_unique=False)

        # foldwise centering: subtract train-image mean from neural
        mu_train = X_neural[:, train_cols].mean(axis=1, keepdims=True)
        Xc = X_neural - mu_train

        # also center ViT scores by their train mean (to avoid leakage)
        vit_mu_train = vit_scores_all[train_cols].mean()
        vit_scores_centered = vit_scores_all - vit_mu_train

        # fit blocked cvPCA on train images
        U, V, tr_err, te_err, M = cv_pca_blocked(
            Xc, rank=rank, holdout_cols=test_cols, n_iter=30, lam=lam, seed=seed+f
        )

        # neural scores for train/test images
        neural_scores_train = V[0, train_cols] if rank == 1 else V[:, train_cols][0]
        neural_scores_test  = V[0, test_cols]  if rank == 1 else V[:, test_cols][0]

        # sign alignment using TRAIN correlation only
        r_train = np.corrcoef(neural_scores_train, vit_scores_centered[train_cols])[0,1]
        sign = 1.0 if r_train >= 0 else -1.0
        neural_scores_train *= sign
        neural_scores_test  *= sign

        # test correlation on held-out images
        r_test = np.corrcoef(neural_scores_test, vit_scores_centered[test_cols])[0,1]
        test_rs.append(r_test)
        train_rs.append(r_train)
        directions.append(U[:, 0])  # store first component

        # permutation test on TEST set only
        vt = vit_scores_centered[test_cols].copy()
        obs = r_test
        null = []
        for _ in range(n_perm):
            rng.shuffle(vt)
            null.append(np.corrcoef(neural_scores_test, vt)[0,1])
        null = np.array(null)
        # two-sided p
        p = (np.sum(np.abs(null) >= np.abs(obs)) + 1) / (len(null) + 1)
        pvals.append(p)

    # aggregate
    z = np.array([fisher_z(r) for r in test_rs])
    z_mean = z.mean()
    z_se   = z.std(ddof=1)/np.sqrt(len(z))
    # 95% CI on r via Fisher-z
    ci_lo = inv_fisher_z(z_mean - 1.96*z_se)
    ci_hi = inv_fisher_z(z_mean + 1.96*z_se)
    r_bar = inv_fisher_z(z_mean)

    # direction stability: mean cosine similarity to first-fold direction
    D = np.stack(directions, axis=1)     # (n_neurons Ã— n_folds)
    D = D / np.linalg.norm(D, axis=0, keepdims=True)
    ref = D[:, 0:1]
    cos = (ref * D).sum(axis=0)
    stability_mean = float(np.mean(np.abs(cos[1:])))  # exclude self
    stability_sd   = float(np.std(np.abs(cos[1:]), ddof=1))

    return {
        "fold_test_r": test_rs,
        "fold_train_r": train_rs,
        "mean_test_r": float(r_bar),
        "ci95": (float(ci_lo), float(ci_hi)),
        "perm_pvals": pvals,
        "mean_perm_p": float(np.mean(pvals)),
        "direction_stability_cos_mean": stability_mean,
        "direction_stability_cos_sd": stability_sd,
    }

#!/usr/bin/env python3
"""
Cross-validated ViT alignment via cvPCA (split by images).

For each brain area:
 - Load neural responses
 - Run blocked cvPCA with image-level holdouts
 - Compute correlation with ViT PC1 on held-out images
 - Report mean r, CI, p-value, and stability
"""

#!/usr/bin/env python3
"""
Cross-validated ViT alignment via cvPCA (split by images).

For each brain area:
 - Load neural responses
 - Run blocked cvPCA with image-level holdouts
 - Compute correlation with ViT PC1 on held-out images
 - Report mean r, CI, p-value, and stability
"""

import pickle
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.special import softmax
from skbio.stats.composition import clr

# ---------------------------------------------------------------
# CONFIG
# ---------------------------------------------------------------
VIT_PATH    = '/home/maria/Documents/HuggingMouseData/MouseViTEmbeddings/google_vit-base-patch16-224_embeddings_logits.pkl'
NEURAL_PATH = '/home/maria/LuckyMouse/pixel_transformer_neuro/data/processed/hybrid_neural_responses.npy'
AREAS_PATH  = '/home/maria/MITNeuralComputation/visualization/brain_area.npy'

N_SPLITS    = 5
LAMBDA      = 1e-4
VIT_PC_IDX  = 0
N_PERM      = 2000
RANK        = 1
RANDOM_SEED = 42
VAR_CUTOFF  = 0.90  # keep enough ViT PCs to explain 90% variance

# ---------------------------------------------------------------
# LOAD ViT EMBEDDINGS AND PCA-REDUCE
# ---------------------------------------------------------------
print("ðŸ”¹ Loading ViT embeddings...")
with open(VIT_PATH, 'rb') as f:
    vit_logits = pickle.load(f)['natural_scenes']  # (images Ã— D_vit)

# softmax â†’ CLR â†’ PCA(90%)
Xv = softmax(np.asarray(vit_logits), axis=1)
Xv = clr(Xv + 1e-12)

vit_pca_full = PCA(random_state=RANDOM_SEED).fit(Xv)
vit_cumvar = np.cumsum(vit_pca_full.explained_variance_ratio_)
vit_ncomp = np.searchsorted(vit_cumvar, VAR_CUTOFF) + 1
vit_pca = PCA(n_components=vit_ncomp, random_state=RANDOM_SEED)
Zv = vit_pca.fit_transform(Xv)  # (images Ã— vit_ncomp)
print(f"âœ… ViT PCs covering 90% variance: {vit_ncomp}")

# ---------------------------------------------------------------
# LOAD NEURAL DATA
# ---------------------------------------------------------------
print("ðŸ”¹ Loading neural responses...")
neural_matrix = np.load(NEURAL_PATH)  # (n_neurons Ã— images)
areas = np.load(AREAS_PATH, allow_pickle=True)  # (n_neurons,)

unique_areas = np.unique(areas)
print(f"Found {len(unique_areas)} areas: {unique_areas}")

# ---------------------------------------------------------------
# MAIN LOOP OVER AREAS
# ---------------------------------------------------------------
results = {}

for area in unique_areas:
    print(f"\n=== Processing area: {area} ===")
    idx = np.where(areas == area)[0]
    X_area = neural_matrix[idx, :]

    # z-score across neurons for numerical stability
    X_area = StandardScaler(with_mean=True, with_std=True).fit_transform(X_area)

    # run cvPCA alignment
    res = vit_alignment_cv(
        X_neural=X_area,
        Z_vit=Zv,
        n_splits=N_SPLITS,
        rank=RANK,
        vit_pc_idx=VIT_PC_IDX,
        lam=LAMBDA,
        n_perm=N_PERM,
        seed=RANDOM_SEED
    )

    results[area] = res

    print(f"mean_test_r: {res['mean_test_r']:.3f}")
    print(f"95% CI: [{res['ci95'][0]:.3f}, {res['ci95'][1]:.3f}]")
    print(f"mean_perm_p: {res['mean_perm_p']:.4f}")
    print(f"direction stability (cos): {res['direction_stability_cos_mean']:.3f} Â± {res['direction_stability_cos_sd']:.3f}")

# ---------------------------------------------------------------
# SAVE RESULTS
# ---------------------------------------------------------------
out_path = "/home/maria/MITNeuralComputation/vit_alignment_results.pkl"
with open(out_path, "wb") as f:
    pickle.dump(results, f)

print(f"\nAll done! Results saved to {out_path}")



ðŸ”¹ Loading ViT embeddings...
âœ… ViT PCs covering 90% variance: 44
ðŸ”¹ Loading neural responses...
Found 6 areas: ['VISal' 'VISam' 'VISl' 'VISp' 'VISpm' 'VISrl']

=== Processing area: VISal ===


IndexError: index 118 is out of bounds for axis 0 with size 118

In [5]:
import numpy as np
from sklearn.decomposition import PCA
from numpy.random import default_rng

# ------------ helpers ------------
def censored_lstsq(A, B, M, lam=1e-6):
    # Solve min_X || M âŠ™ (A X - B) ||_F^2 + Î»||X||_F^2
    n, r = A.shape
    _, m = B.shape
    X = np.empty((r, m), dtype=B.dtype)
    I = np.eye(r, dtype=B.dtype)
    for j in range(m):
        mask = M[:, j]
        Aj = A[mask, :]
        bj = B[mask, j]
        T  = Aj.T @ Aj + lam * I
        rhs = Aj.T @ bj
        X[:, j] = np.linalg.solve(T, rhs)
    return X

def cv_pca_blocked(X, rank, holdout_cols, n_iter=20, lam=1e-6, seed=0):
    """
    X: (n_neurons Ã— n_images) trial-avg or denoised neural matrix
    holdout_cols: list/array of image indices held out for test
    returns U (nÃ—r), V (rÃ—m), train_err, test_err, mask M
    """
    rng = default_rng(seed)
    n, m = X.shape
    M = np.ones((n, m), dtype=bool)
    M[:, holdout_cols] = False

    U = rng.standard_normal((n, rank)) / np.sqrt(n)
    for _ in range(n_iter):
        V = censored_lstsq(U, X, M, lam=lam)            # rÃ—m
        UT = censored_lstsq(V.T, X.T, M.T, lam=lam)     # mÃ—r
        U  = UT.T                                       # nÃ—r

    # orthonormalize U for interpretability; recompute V as scores
    U, _, _ = np.linalg.svd(U, full_matrices=False)
    V = U.T @ X
    R = U @ V - X
    train_err = np.mean((R[M])**2)
    test_err  = np.mean((R[~M])**2)
    return U, V, train_err, test_err, M

def fisher_z(r):
    r = np.clip(r, -0.999999, 0.999999)
    return 0.5*np.log((1+r)/(1-r))

def inv_fisher_z(z):
    return (np.exp(2*z)-1)/(np.exp(2*z)+1)

# ------------ main CV ------------
def vit_alignment_cv(X_neural, Z_vit, k_images=5, rank=1, n_splits=5, seed=0, lam=1e-6,
                     vit_pc_idx=0, n_perm=2000):
    """
    X_neural: (n_neurons Ã— n_images) (use train-mean centering per fold)
    Z_vit:    (n_images Ã— d) ViT features per image (e.g., CLR-softmax or logits)
    vit_pc_idx: which ViT PC to align with (0=PC1)
    returns dict with per-fold test r, CI, permutation p, stability, etc.
    """
    rng = default_rng(seed)
    m = X_neural.shape[1]
    # Freeze ViT PC basis on ALL images (no test peeking in selection later; we only use scores from held-out images)
    vit_pca = PCA()
    vit_pca.fit(Z_vit)
    vit_w   = vit_pca.components_[vit_pc_idx]          # (d,)
    vit_scores_all = Z_vit @ vit_w                     # (m,)

    # K-fold indices over images
    idx = np.arange(m)
    rng.shuffle(idx)
    folds = np.array_split(idx, n_splits)

    test_rs, train_rs, pvals = [], [], []
    directions = []   # store U[:,0] per fold for stability

    for f, test_cols in enumerate(folds):
        train_cols = np.setdiff1d(idx, test_cols, assume_unique=False)

        # foldwise centering: subtract train-image mean from neural
        mu_train = X_neural[:, train_cols].mean(axis=1, keepdims=True)
        Xc = X_neural - mu_train

        # also center ViT scores by their train mean (to avoid leakage)
        vit_mu_train = vit_scores_all[train_cols].mean()
        vit_scores_centered = vit_scores_all - vit_mu_train

        # fit blocked cvPCA on train images
        U, V, tr_err, te_err, M = cv_pca_blocked(
            Xc, rank=rank, holdout_cols=test_cols, n_iter=30, lam=lam, seed=seed+f
        )

        # neural scores for train/test images
        neural_scores_train = V[0, train_cols] if rank == 1 else V[:, train_cols][0]
        neural_scores_test  = V[0, test_cols]  if rank == 1 else V[:, test_cols][0]

        # sign alignment using TRAIN correlation only
        r_train = np.corrcoef(neural_scores_train, vit_scores_centered[train_cols])[0,1]
        sign = 1.0 if r_train >= 0 else -1.0
        neural_scores_train *= sign
        neural_scores_test  *= sign

        # test correlation on held-out images
        r_test = np.corrcoef(neural_scores_test, vit_scores_centered[test_cols])[0,1]
        test_rs.append(r_test)
        train_rs.append(r_train)
        directions.append(U[:, 0])  # store first component

        # permutation test on TEST set only
        vt = vit_scores_centered[test_cols].copy()
        obs = r_test
        null = []
        for _ in range(n_perm):
            rng.shuffle(vt)
            null.append(np.corrcoef(neural_scores_test, vt)[0,1])
        null = np.array(null)
        # two-sided p
        p = (np.sum(np.abs(null) >= np.abs(obs)) + 1) / (len(null) + 1)
        pvals.append(p)

    # aggregate
    z = np.array([fisher_z(r) for r in test_rs])
    z_mean = z.mean()
    z_se   = z.std(ddof=1)/np.sqrt(len(z))
    # 95% CI on r via Fisher-z
    ci_lo = inv_fisher_z(z_mean - 1.96*z_se)
    ci_hi = inv_fisher_z(z_mean + 1.96*z_se)
    r_bar = inv_fisher_z(z_mean)

    # direction stability: mean cosine similarity to first-fold direction
    D = np.stack(directions, axis=1)     # (n_neurons Ã— n_folds)
    D = D / np.linalg.norm(D, axis=0, keepdims=True)
    ref = D[:, 0:1]
    cos = (ref * D).sum(axis=0)
    stability_mean = float(np.mean(np.abs(cos[1:])))  # exclude self
    stability_sd   = float(np.std(np.abs(cos[1:]), ddof=1))

    return {
        "fold_test_r": test_rs,
        "fold_train_r": train_rs,
        "mean_test_r": float(r_bar),
        "ci95": (float(ci_lo), float(ci_hi)),
        "perm_pvals": pvals,
        "mean_perm_p": float(np.mean(pvals)),
        "direction_stability_cos_mean": stability_mean,
        "direction_stability_cos_sd": stability_sd,
    }

#!/usr/bin/env python3
"""
Cross-validated ViT alignment via cvPCA (split by images).

For each brain area:
 - Load neural responses
 - Run blocked cvPCA with image-level holdouts
 - Compute correlation with ViT PC1 on held-out images
 - Report mean r, CI, p-value, and stability
"""

#!/usr/bin/env python3
"""
Cross-validated ViT alignment via cvPCA (split by images).

For each brain area:
 - Load neural responses
 - Run blocked cvPCA with image-level holdouts
 - Compute correlation with ViT PC1 on held-out images
 - Report mean r, CI, p-value, and stability
"""


'\nCross-validated ViT alignment via cvPCA (split by images).\n\nFor each brain area:\n - Load neural responses\n - Run blocked cvPCA with image-level holdouts\n - Compute correlation with ViT PC1 on held-out images\n - Report mean r, CI, p-value, and stability\n'

In [6]:
#!/usr/bin/env python3
"""
Cross-validated ViT alignment via cvPCA (split by images).

For each brain area:
 â€¢ Load neural responses
 â€¢ Run blocked cvPCA with image-level holdouts
 â€¢ Compute correlation with ViT PC1 on held-out images
 â€¢ Report mean r, CI, permutation p, and stability
"""

import pickle
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.special import softmax
from skbio.stats.composition import clr
from numpy.random import default_rng

# ==============================================================
# CONFIG
# ==============================================================
VIT_PATH    = '/home/maria/Documents/HuggingMouseData/MouseViTEmbeddings/google_vit-base-patch16-224_embeddings_logits.pkl'
NEURAL_PATH = '/home/maria/LuckyMouse/pixel_transformer_neuro/data/processed/hybrid_neural_responses.npy'
AREAS_PATH  = '/home/maria/MITNeuralComputation/visualization/brain_area.npy'

N_SPLITS    = 5
LAMBDA      = 1e-4
VIT_PC_IDX  = 0
N_PERM      = 2000
RANK        = 1
RANDOM_SEED = 42
VAR_CUTOFF  = 0.90

# ==============================================================
# HELPER FUNCTIONS
# ==============================================================
def censored_lstsq(A, B, M, lam=1e-6):
    """Solve min_X ||MâŠ™(A X âˆ’ B)||Â² + Î»||X||Â²."""
    n, r = A.shape
    _, m = B.shape
    X = np.empty((r, m), dtype=B.dtype)
    I = np.eye(r, dtype=B.dtype)
    for j in range(m):
        mask = M[:, j]
        Aj = A[mask, :]
        bj = B[mask, j]
        T = Aj.T @ Aj + lam * I
        rhs = Aj.T @ bj
        X[:, j] = np.linalg.solve(T, rhs)
    return X


def cv_pca_blocked(X, rank, holdout_cols, n_iter=20, lam=1e-6, seed=0):
    """cvPCA with a block holdout (columns = images)."""
    rng = default_rng(seed)
    n, m = X.shape
    M = np.ones((n, m), dtype=bool)
    M[:, holdout_cols] = False

    U = rng.standard_normal((n, rank)) / np.sqrt(n)
    for _ in range(n_iter):
        V = censored_lstsq(U, X, M, lam=lam)
        UT = censored_lstsq(V.T, X.T, M.T, lam=lam)
        U = UT.T

    # orthonormalize U, recompute V
    U, _, _ = np.linalg.svd(U, full_matrices=False)
    V = U.T @ X
    R = U @ V - X
    train_err = np.mean((R[M])**2)
    test_err  = np.mean((R[~M])**2)
    return U, V, train_err, test_err, M


def fisher_z(r):
    r = np.clip(r, -0.999999, 0.999999)
    return 0.5 * np.log((1 + r) / (1 - r))


def inv_fisher_z(z):
    return (np.exp(2 * z) - 1) / (np.exp(2 * z) + 1)


def vit_alignment_cv(X_neural, Z_vit, rank=1, n_splits=5, seed=0,
                     lam=1e-6, vit_pc_idx=0, n_perm=2000):
    """Run cross-validated alignment between neural cvPCA and ViT PCs."""
    rng = default_rng(seed)
    m = X_neural.shape[1]

    # 1. ViT PCA basis (frozen)
    vit_pca = PCA()
    vit_pca.fit(Z_vit)
    vit_w = vit_pca.components_[vit_pc_idx]
    vit_scores_all = Z_vit @ vit_w  # (m,)

    # 2. Split images into folds
    indices = np.arange(m)
    rng.shuffle(indices)
    folds = np.array_split(indices, n_splits)

    test_rs, train_rs, pvals = [], [], []
    directions = []

    for f, test_cols in enumerate(folds):
        train_cols = np.setdiff1d(indices, test_cols, assume_unique=True)

        mu_train = X_neural[:, train_cols].mean(axis=1, keepdims=True)
        Xc = X_neural - mu_train

        vit_mu_train = vit_scores_all[train_cols].mean()
        vit_scores_centered = vit_scores_all - vit_mu_train

        U, V, _, _, _ = cv_pca_blocked(
            Xc, rank=rank, holdout_cols=test_cols,
            n_iter=30, lam=lam, seed=seed + f
        )

        neural_scores_train = V[0, train_cols]
        neural_scores_test  = V[0, test_cols]

        # sign alignment on train
        r_train = np.corrcoef(neural_scores_train, vit_scores_centered[train_cols])[0, 1]
        sign = 1.0 if r_train >= 0 else -1.0
        neural_scores_train *= sign
        neural_scores_test  *= sign

        r_test = np.corrcoef(neural_scores_test, vit_scores_centered[test_cols])[0, 1]
        test_rs.append(r_test)
        train_rs.append(r_train)
        directions.append(U[:, 0])

        # permutation test on test images
        vt = vit_scores_centered[test_cols].copy()
        obs = r_test
        null = np.empty(n_perm)
        for i in range(n_perm):
            rng.shuffle(vt)
            null[i] = np.corrcoef(neural_scores_test, vt)[0, 1]
        p = (np.sum(np.abs(null) >= np.abs(obs)) + 1) / (n_perm + 1)
        pvals.append(p)

    z = np.array([fisher_z(r) for r in test_rs])
    z_mean = z.mean()
    z_se = z.std(ddof=1) / np.sqrt(len(z))
    ci_lo = inv_fisher_z(z_mean - 1.96 * z_se)
    ci_hi = inv_fisher_z(z_mean + 1.96 * z_se)
    r_bar = inv_fisher_z(z_mean)

    D = np.stack(directions, axis=1)
    D /= np.linalg.norm(D, axis=0, keepdims=True)
    ref = D[:, 0:1]
    cos = (ref * D).sum(axis=0)
    stability_mean = float(np.mean(np.abs(cos[1:])))
    stability_sd = float(np.std(np.abs(cos[1:]), ddof=1))

    return {
        "fold_test_r": test_rs,
        "fold_train_r": train_rs,
        "mean_test_r": float(r_bar),
        "ci95": (float(ci_lo), float(ci_hi)),
        "perm_pvals": pvals,
        "mean_perm_p": float(np.mean(pvals)),
        "direction_stability_cos_mean": stability_mean,
        "direction_stability_cos_sd": stability_sd,
    }

# ==============================================================
# LOAD AND PREPROCESS VIT DATA
# ==============================================================
print("ðŸ”¹ Loading ViT embeddingsâ€¦")
with open(VIT_PATH, "rb") as f:
    vit_logits = pickle.load(f)["natural_scenes"]  # (images Ã— D_vit)

Xv = softmax(np.asarray(vit_logits), axis=1)
Xv = clr(Xv + 1e-12)

vit_pca_full = PCA(random_state=RANDOM_SEED).fit(Xv)
vit_cumvar = np.cumsum(vit_pca_full.explained_variance_ratio_)
vit_ncomp = np.searchsorted(vit_cumvar, VAR_CUTOFF) + 1
vit_pca = PCA(n_components=vit_ncomp, random_state=RANDOM_SEED)
Zv = vit_pca.fit_transform(Xv)
print(f"âœ… ViT PCs covering 90 % variance: {vit_ncomp}")

# ==============================================================
# LOAD NEURAL DATA
# ==============================================================
print("ðŸ”¹ Loading neural responsesâ€¦")
neural_matrix = np.load(NEURAL_PATH)  # (n_neurons Ã— images)
areas = np.load(AREAS_PATH, allow_pickle=True)
unique_areas = np.unique(areas)
print(f"Found {len(unique_areas)} areas: {unique_areas}")

# ==============================================================
# MAIN LOOP OVER AREAS
# ==============================================================
results = {}

for area in unique_areas:
    print(f"\n=== Processing area: {area} ===")
    idx = np.where(areas == area)[0]
    X_area = neural_matrix[idx, :]

    # normalize neuron variance
    X_area = StandardScaler(with_mean=True, with_std=True).fit_transform(X_area)

    res = vit_alignment_cv(
        X_neural=X_area,
        Z_vit=Zv,
        n_splits=N_SPLITS,
        rank=RANK,
        vit_pc_idx=VIT_PC_IDX,
        lam=LAMBDA,
        n_perm=N_PERM,
        seed=RANDOM_SEED,
    )

    results[area] = res
    print(f"mean_test_r: {res['mean_test_r']:.3f}")
    print(f"95 % CI: [{res['ci95'][0]:.3f}, {res['ci95'][1]:.3f}]")
    print(f"mean_perm_p: {res['mean_perm_p']:.4f}")
    print(f"direction stability (cos): "
          f"{res['direction_stability_cos_mean']:.3f} Â± {res['direction_stability_cos_sd']:.3f}")

# ==============================================================
# SAVE RESULTS
# ==============================================================
out_path = "/home/maria/MITNeuralComputation/vit_alignment_results.pkl"
with open(out_path, "wb") as f:
    pickle.dump(results, f)
print(f"\nAll done âœ… Results saved to {out_path}")


ðŸ”¹ Loading ViT embeddingsâ€¦
âœ… ViT PCs covering 90 % variance: 44
ðŸ”¹ Loading neural responsesâ€¦
Found 6 areas: ['VISal' 'VISam' 'VISl' 'VISp' 'VISpm' 'VISrl']

=== Processing area: VISal ===


IndexError: index 1325 is out of bounds for axis 0 with size 118