In [1]:
import os

import himalaya.scoring
import numpy as np
import pandas as pd
import seaborn as sns
from himalaya.backend import set_backend
from himalaya.ridge import BandedRidgeCV, ColumnTransformerNoStack
from matplotlib import pyplot as plt
import simplstyles
from sklearn.pipeline import make_pipeline
from voxelwise_tutorials.delayer import Delayer

from fmri.features import load_brain_data, load_feature

In [2]:
backend = set_backend("torch_cuda", on_error='throw')
plt.style.use('nord-light-talk')
data_dir = "../../data"
simplstyles

<module 'simplstyles' from '/home/leo/PycharmProjects/compare_variance_residual/.venv/lib/python3.12/site-packages/simplstyles/__init__.py'>

In [3]:
subject = 1
modality = "reading"

In [4]:
n_alphas_batch = 3
n_targets_batch = 100
n_targets_batch_refit = 50

In [5]:
number_of_delays = 4
alphas = np.logspace(-5, 20, 10)
cv = 5

In [6]:
def result_path(subject, modality, ridge_type, param_name, param_range):
    dir = f"results/brain_maps_validation_curves/{modality}/{subject}/{ridge_type}"
    os.makedirs(dir, exist_ok=True)
    path = os.path.join(dir, f"{param_name}_{param_range}.csv")
    return path

# Load features

In [7]:
X_semantic, n_samples_train = load_feature(data_dir, "english1000")
X_low_level, n_samples_train = load_feature(data_dir, "letters")
X = np.concatenate([X_semantic, X_low_level], axis=1)

# Load brain data

In [8]:
Y, n_samples_train = load_brain_data(data_dir, subject, modality)

  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_train = np.vstack([zscore(Y_train[story][:-trim]) for story in Y_train.keys()])
  Y_test = [np.vstack([zscore(Y_test[story][i][:-trim]) for story in Y_test.keys()]) for i in range(2)]
  Y_test = [np.vstack([zscore(Y_test[story][i][:-trim]) 

# Iterations

In [9]:
n_iter_range = np.linspace(1, 100, 10).astype(int)
# turn to python ints
n_iter_range = [int(n_iter) for n_iter in n_iter_range]
n_iter_range

[1, 12, 23, 34, 45, 56, 67, 78, 89, 100]

In [10]:
from sklearn.metrics import r2_score

cv_scores = pd.DataFrame()
for _n_iter in n_iter_range:
    print(_n_iter)
    path = result_path(subject, modality, "banded_ridge", "n_iter", _n_iter)
    if not os.path.exists(path):
        print("File does not exist")
        delayer = Delayer(delays=range(1, number_of_delays + 1))
        start_and_end = np.concatenate([[0], np.cumsum([X_semantic.shape[1], X_low_level.shape[1]])])
        slices = [slice(start, end) for start, end in zip(start_and_end[:-1], start_and_end[1:])]
        ct = ColumnTransformerNoStack(transformers=[(f'feature_{i}', delayer, s) for i, s in enumerate(slices)])

        solver_params = dict(
            alphas=alphas, n_iter=_n_iter, n_targets_batch=n_targets_batch,
            n_alphas_batch=n_alphas_batch, n_targets_batch_refit=n_targets_batch_refit,
            score_func=himalaya.scoring.r2_score)
        banded_ridge_cv = BandedRidgeCV(cv=cv, groups="input", solver_params=solver_params)

        pipeline = make_pipeline(
            ct,
            banded_ridge_cv
        )

        pipeline.fit(X[:n_samples_train], Y[:n_samples_train])



        prediction = pipeline.predict(X[n_samples_train:])
        prediction = backend.to_numpy(prediction)

        correlation = np.array([np.corrcoef(Y[n_samples_train:, i], prediction[:, i])[0, 1] for i in range(Y.shape[1])])

        # (n_iter, n_targets)
        cv_score = pipeline[-1].cv_scores_
        cv_score = max(cv_score, key=lambda x: x.mean())

        r2 = himalaya.scoring.r2_score(Y[n_samples_train:], prediction)
        result = pd.DataFrame(
            {
                'correlation_score': correlation,
                'cv_score': cv_score,
                'r2_score': r2
            }
        )

        result.to_csv(path)
    else:
        print("File exists")
        result = pd.read_csv(path)

1
File does not exist




[                                        ] 0% | 0.00 sec | 1 random sampling with cv | 



[........................................] 100% | 74.60 sec | 1 random sampling with cv | 


  c /= stddev[:, None]
  c /= stddev[None, :]


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [None]:
sns.catplot(data=cv_scores, kind='box')