In [1]:
import sys 
sys.path.append("../")

import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import StratifiedShuffleSplit, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from pathlib import Path

from src.dataset import load_dataset

import matplotlib.pyplot as plt

In [2]:
def classify(X, y, C=1, kernel="rbf", num_runs=5, seed=12345, n_jobs=-1, **kwargs):
    model = SVC(C=C, kernel=kernel, random_state=seed, **kwargs)
    kfold = StratifiedShuffleSplit(n_splits=num_runs, test_size=0.10, random_state=seed)
    pipeline = Pipeline([("scale", StandardScaler()), ("clf", model)])
    return cross_val_score(pipeline, X, y, cv=kfold, n_jobs=n_jobs)

In [3]:
dataset="ukb"  # use "hcp" or "ukb" here
trial = 0
seed = 12345
num_runs = 10

In [4]:
data_dir = "../data"
dataset_args = dict(dataset=dataset,
                        window_size=30,
                        window_stride=30,
                        measure="correlation",
                        top_percent=5)
data = load_dataset(dataset=dataset, data_dir=data_dir)  # Need to load dataset for gender labels

In [6]:
# Default path from training script
save_path = Path.cwd().parent / f"models_{dataset}_{trial}"  # Change path if models saved elsewhere

# Load the embeddings from the best model according to validation negative log-likelihood
results = np.load(save_path / "results.npy", allow_pickle=True).item()
embeddings = results['embeddings']

In [10]:
x, y = [], []
for subject_idx, subject_graphs, gender_label in data:
    betas = np.array(embeddings[subject_idx]['beta_embeddings']['train']).mean((0, 1))
    x += [betas]
    y += [gender_label]

X = np.array(x)
Y = np.array(y)

results = classify(X, Y, num_runs=num_runs, seed=seed)
print(f'Gender classification accuracy: {np.mean(results)} ±  {np.std(results)}' ) 

Gender classification accuracy: 0.74 ±  0.08666666666666668
