In [230]:
import numpy as np
import os
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score

In [231]:
# PCS

## args
side='R'
subject_column_name='long_name'
label='Right_PCS'
label_type='binary'
labels = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/subjects_labels.csv', usecols=[subject_column_name, label])
train_subjects = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/train_val_test/ACCpatterns_subjects_filtered_train.csv', header=None)
val_subjects = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/train_val_test/ACCpatterns_subjects_filtered_val.csv', header=None)
test_subjects = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/train_val_test/ACCpatterns_subjects_filtered_test.csv', header=None)
crops_dir = '/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/crops/2mm/CINGULATE/mask/'

In [241]:
# OFC
side='L'
subject_column_name='Subject'
label='Left_OFC'
label_type='multiclass'
labels = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/hcp/hcp_OFC_labels.csv', usecols=[subject_column_name, label])
train_subjects = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/orbital_patterns/Troiani/split_0-6.csv', header=None)
val_subjects = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/orbital_patterns/Troiani/split_7.csv', header=None)
test_subjects = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/orbital_patterns/Troiani/split_8-9.csv', header=None)
crops_dir = '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/ORBITAL/mask/'

In [242]:
skeletons = np.load(os.path.join(crops_dir, f'{side}skeleton.npy'))
skeletons = skeletons != 0
subjects = pd.read_csv(os.path.join(crops_dir, f'{side}skeleton_subject.csv'))

In [243]:
train_val_idxs = subjects.loc[(subjects['Subject'].isin(train_subjects[0]))|(subjects['Subject'].isin(val_subjects[0]))].index.tolist()
test_idxs = subjects.loc[subjects['Subject'].isin(test_subjects[0])].index.tolist()

In [244]:
pca = PCA(n_components=256)

In [245]:
X_train = skeletons[train_val_idxs]
X_train = X_train.reshape(len(X_train), (np.prod(X_train.shape) // len(X_train)))

In [246]:
pca.fit(X_train)

PCA(n_components=256)

In [247]:
X_test = skeletons[test_idxs]
X_test = X_test.reshape(len(X_test), (np.prod(X_test.shape) // len(X_test)))

In [248]:
embeddings_pca_test = pca.transform(X_test)
embeddings_pca_train = pca.transform(X_train)

In [263]:
test_subjects.columns=[subject_column_name]
labels_test = labels.loc[labels[subject_column_name].isin(test_subjects[subject_column_name])]
Y_true_test = test_subjects.merge(labels_test, on=subject_column_name)[label]
if label=='Right_PCS':
    Y_true_test = Y_true_test=='present'


train_val_subjects = pd.concat((train_subjects, val_subjects))
train_val_subjects.columns=[subject_column_name]
labels_train_val = labels.loc[labels[subject_column_name].isin(train_val_subjects[subject_column_name])]
Y_true_train_val = train_val_subjects.merge(labels_train_val, on=subject_column_name)[label]
if label=='Right_PCS':
    Y_true_train_val = Y_true_train_val=='present'
Y_true_train_val = (Y_true_train_val-np.min(Y_true_train_val)).astype(int) # in case labels are 1,2,3,4 instead of 0,1,2,3

In [264]:
for i in range(5):
    model = SVC(kernel='linear', probability=True,
                max_iter=-1, random_state=i,
                C=0.01, class_weight='balanced')
    model.fit(embeddings_pca_train, Y_true_train_val)
    Y_pred_test = model.predict_proba(embeddings_pca_test)
    if label_type=='binary':
        Y_pred_test = Y_pred_test[:, 0]
    Y_true_test = (Y_true_test-np.min(Y_true_test)).astype(int) # in case labels are 1,2,3,4 instead of 0,1,2,3
    print(roc_auc_score(Y_true_test, Y_pred_test, multi_class='ovr', average='weighted'))

0.48512684026845554
0.49202274803099283
0.4925874689641204
0.4932328834975642
0.49202431526922574
