In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import auc, roc_curve, roc_auc_score
from sklearn.model_selection import cross_val_predict

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# ORBITAL

In [2]:
crops_dir = '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/ORBITAL/mask'
skels = np.load(os.path.join(crops_dir, 'Lskeleton.npy'))
skel_subs = pd.read_csv(os.path.join(crops_dir, 'Lskeleton_subject.csv'))
skels = skels.reshape(skels.shape[0], np.prod(skels.shape[1:]))
skels = skels.astype(bool)

In [3]:
splits_dir = '/neurospin/dico/data/deep_folding/current/datasets/orbital_patterns/Troiani'
train_subs = pd.read_csv(os.path.join(splits_dir, 'split_0-6.csv'), names=['Subject'])
val_subs = pd.read_csv(os.path.join(splits_dir, 'split_7.csv'), names=['Subject'])
test_subs = pd.read_csv(os.path.join(splits_dir, 'split_8-9.csv'), names=['Subject'])

In [4]:
label='Left_OFC'
labels = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/hcp/hcp_OFC_labels_from_0.csv', usecols=['Subject', label])
proportions = np.unique(labels[label], return_counts=True)
proportions = proportions[1] / np.sum(proportions[1])

In [5]:
train = skel_subs.loc[skel_subs['Subject'].isin(train_subs['Subject'])]
idxs_train = train.index.tolist()
Y_train = pd.merge(train, labels)[label]
train_skels= skels[idxs_train]

val = skel_subs.loc[skel_subs['Subject'].isin(val_subs['Subject'])]
idxs_val = val.index.tolist()
Y_val = pd.merge(val, labels)[label]
val_skels= skels[idxs_val]

test = skel_subs.loc[skel_subs['Subject'].isin(test_subs['Subject'])]
idxs_test = test.index.tolist()
Y_test = pd.merge(test, labels)[label]
test_skels= skels[idxs_test]

In [9]:
np.var(train_skels) * train_skels.shape[1]

650.9700082294804

In [13]:
#for C in [1e-2, 1e-1, 1, 10]: 
for C in [1]:   
    for class_weight in [None]:
        for degree in [3]:
            for gamma in [k/(np.var(train_skels) * train_skels.shape[1]) for k in [0.1, 0.33, 3.3, 10]]:
                model = SVC(kernel='rbf', probability=True, degree=degree, gamma=gamma,
                            max_iter=10000, random_state=None,
                            C=C, class_weight=class_weight, decision_function_shape='ovr')
                
                model.fit(train_skels, Y_train)
                labels_proba = model.predict_proba(val_skels)
                roc_aucs = roc_auc_score(Y_val, labels_proba, multi_class='ovr', average=None)

                print(gamma, C, class_weight)
                print(roc_aucs)
                print(np.dot(roc_aucs, proportions.T))

0.00015361690820746374 1 None
[0.60125    0.53496503 0.72222222 0.39150943]
0.5910736128709299
0.0005069357970846303 1 None
[0.59875    0.47377622 0.72698413 0.39622642]
0.574122560397249
0.005069357970846303 1 None
[0.54125    0.6048951  0.26984127 0.58490566]
0.5140037152614815
0.015361690820746373 1 None
[0.39       0.44055944 0.36349206 0.46698113]
0.40409526900583437


In [127]:
# test
model = SVC(kernel='rbf', probability=True, degree=3,
                        max_iter=-1, random_state=None,
                        C=1, class_weight=None, decision_function_shape='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba, multi_class='ovr', average=None)
print(roc_auc)
print(np.dot(roc_auc, proportions.T))

[0.66545675 0.61278195 0.65291262 0.70560748]
0.6512508148626943


In [128]:
# LogisticRegression
model = LogisticRegression(C=1,  penalty='l1', solver='liblinear', class_weight='balanced', max_iter=100, multi_class='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(val_skels)
roc_auc = roc_auc_score(Y_val, labels_proba, multi_class='ovr', average=None)
print(roc_auc)
print(np.dot(roc_auc, proportions.T))

[0.62       0.51573427 0.63968254 0.36792453]
0.5789860793389128


In [124]:
#test 
model = LogisticRegression(C=1,  penalty='l1', solver='liblinear', class_weight='balanced', max_iter=100, multi_class='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba, multi_class='ovr', average=None)
print(roc_auc)
print(np.dot(roc_auc, proportions.T))

[0.65759226 0.60594668 0.59061489 0.42523364]
0.6171394484409242


In [129]:
# ElasticNet
model = LogisticRegression(C=1, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight='balanced', max_iter=100, multi_class='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(val_skels)
roc_auc = roc_auc_score(Y_val, labels_proba, multi_class='ovr', average=None)
print(roc_auc)
print(np.dot(roc_auc, proportions.T))



[0.63       0.56643357 0.73015873 0.36320755]
0.6133699142237123


In [126]:
# ElasticNet
model = LogisticRegression(C=0.1, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight='balanced', max_iter=100, multi_class='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba, multi_class='ovr', average=None)
print(roc_auc)
print(np.dot(roc_auc, proportions.T))



[0.6291591  0.58202324 0.64967638 0.47079439]
0.6098809629161359




# PCS

In [11]:
crops_dir = '/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/crops/2mm/CINGULATE/mask'
skels = np.load(os.path.join(crops_dir, 'Rskeleton.npy'))
skel_subs = pd.read_csv(os.path.join(crops_dir, 'Rskeleton_subject.csv'))
skels = skels.reshape(skels.shape[0], np.prod(skels.shape[1:]))
skels = skels.astype(bool)

In [12]:
splits_dir = '/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/train_val_test'
train_subs = pd.read_csv(os.path.join(splits_dir, 'ACCpatterns_subjects_filtered_train.csv'), names=['Subject'])
val_subs = pd.read_csv(os.path.join(splits_dir, 'ACCpatterns_subjects_filtered_val.csv'), names=['Subject'])
test_subs = pd.read_csv(os.path.join(splits_dir, 'ACCpatterns_subjects_filtered_test.csv'), names=['Subject'])

In [13]:
label='Right_PCS'
labels = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/ACCpatterns/subjects_labels.csv', usecols=['long_name', label])
labels.columns = ['Subject', label]
proportions = np.unique(labels[label], return_counts=True)
proportions = proportions[1] / np.sum(proportions[1])

In [14]:
train = skel_subs.loc[skel_subs['Subject'].isin(train_subs['Subject'])]
idxs_train = train.index.tolist()
Y_train = pd.merge(train, labels)[label]
train_skels= skels[idxs_train]

val = skel_subs.loc[skel_subs['Subject'].isin(val_subs['Subject'])]
idxs_val = val.index.tolist()
Y_val = pd.merge(val, labels)[label]
val_skels= skels[idxs_val]

test = skel_subs.loc[skel_subs['Subject'].isin(test_subs['Subject'])]
idxs_test = test.index.tolist()
Y_test = pd.merge(test, labels)[label]
test_skels= skels[idxs_test]

In [41]:
for C in [1e-2, 1e-1, 1, 10]:    
    for class_weight in ['balanced']:
        for degree in [2, 3, 4, 5]:

            model = SVC(kernel='rbf', probability=True, degree=degree,
                        max_iter=-1, random_state=None,
                        C=C, class_weight=class_weight, decision_function_shape='ovr')
            
            model.fit(train_skels, Y_train)
            labels_proba = model.predict_proba(val_skels)
            roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])

            print(C, class_weight, degree)
            print(roc_auc)

0.01 balanced 2
0.1904761904761905
0.01 balanced 3
0.1904761904761905
0.01 balanced 4
0.1904761904761905
0.01 balanced 5
0.1904761904761905
0.1 balanced 2
0.1904761904761905
0.1 balanced 3
0.1904761904761905
0.1 balanced 4
0.1904761904761905
0.1 balanced 5
0.1904761904761905
1 balanced 2
0.8205128205128205
1 balanced 3
0.8205128205128205
1 balanced 4
0.8205128205128205
1 balanced 5
0.8205128205128205
10 balanced 2
0.8241758241758242
10 balanced 3
0.8241758241758242
10 balanced 4
0.8241758241758242
10 balanced 5
0.8241758241758242


In [None]:
model = SVC(kernel='rbf', probability=True, degree=3,
                        max_iter=100, random_state=None,
                        C=1, class_weight='balanced', decision_function_shape='ovr')
model.fit(val_skels, Y_val)
labels_proba = model.predict_proba(val_skels)
roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])
print(roc_auc)

In [39]:
model = SVC(kernel='rbf', probability=True, degree=3,
                        max_iter=-1, random_state=None,
                        C=1, class_weight='balanced', decision_function_shape='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)



0.7428842504743833


In [71]:
# LogisticRegression
model = LogisticRegression(C=0.3, penalty='l1', solver='liblinear', class_weight='balanced', max_iter=100000)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(val_skels)
roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])
print(roc_auc)

0.7912087912087913


In [76]:
#test
model = LogisticRegression(C=0.3, penalty='l1', solver='liblinear', class_weight='balanced', max_iter=100000)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

0.6195445920303605


In [58]:
# ElasticNet
model = LogisticRegression(C=0.1, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight='balanced', max_iter=100)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(val_skels)
roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])
print(roc_auc)

0.8058608058608059




In [77]:
# test
model = LogisticRegression(C=0.1, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight='balanced', max_iter=100)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

0.7201138519924098




# FIP R

In [27]:
crops_dir = '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/F.I.P./mask'
skels = np.load(os.path.join(crops_dir, 'Rskeleton.npy'))
skel_subs = pd.read_csv(os.path.join(crops_dir, 'Rskeleton_subject.csv'))
skels = skels.reshape(skels.shape[0], np.prod(skels.shape[1:]))
skels = skels.astype(bool)

In [28]:
splits_dir = '/neurospin/dico/data/deep_folding/current/datasets/hcp/FIP'
train_subs = pd.read_csv(os.path.join(splits_dir, 'train_split.csv'), names=['Subject'])
val_subs = pd.read_csv(os.path.join(splits_dir, 'val_split.csv'), names=['Subject'])
test_subs = pd.read_csv(os.path.join(splits_dir, 'test_split.csv'), names=['Subject'])

In [29]:
label='Right_FIP'
labels = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/hcp/FIP/FIP_labels.csv', usecols=['Subject', label])
labels.columns = ['Subject', label]
proportions = np.unique(labels[label], return_counts=True)
proportions = proportions[1] / np.sum(proportions[1])

In [30]:
train = skel_subs.loc[skel_subs['Subject'].isin(train_subs['Subject'])]
idxs_train = train.index.tolist()
Y_train = pd.merge(train, labels)[label]
train_skels= skels[idxs_train]

val = skel_subs.loc[skel_subs['Subject'].isin(val_subs['Subject'])]
idxs_val = val.index.tolist()
Y_val = pd.merge(val, labels)[label]
val_skels= skels[idxs_val]

test = skel_subs.loc[skel_subs['Subject'].isin(test_subs['Subject'])]
idxs_test = test.index.tolist()
Y_test = pd.merge(test, labels)[label]
test_skels= skels[idxs_test]

svm rbf

In [31]:
## hyperparameter search
for C in [1e-2, 1e-1, 1, 10]:    
    for class_weight in ['balanced']:
        for degree in [2, 3, 4, 5]:

            model = SVC(kernel='rbf', probability=True, degree=degree,
                        max_iter=-1, random_state=None,
                        C=C, class_weight=class_weight, decision_function_shape='ovr')
            
            model.fit(train_skels, Y_train)
            labels_proba = model.predict_proba(val_skels)
            roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])

            print(C, class_weight, degree)
            print(roc_auc)

0.01 balanced 2
0.4907407407407407
0.01 balanced 3
0.4907407407407407
0.01 balanced 4
0.4907407407407407
0.01 balanced 5
0.4907407407407407
0.1 balanced 2
0.4907407407407407
0.1 balanced 3
0.4907407407407407
0.1 balanced 4
0.4907407407407407
0.1 balanced 5
0.4907407407407407
1 balanced 2
0.5092592592592592
1 balanced 3
0.5092592592592592
1 balanced 4
0.5092592592592592
1 balanced 5
0.5092592592592592
10 balanced 2
0.48765432098765427
10 balanced 3
0.48765432098765427
10 balanced 4
0.48765432098765427
10 balanced 5
0.48765432098765427


In [None]:
## test set after hyperparameter search
model = SVC(kernel='rbf', probability=True, degree=3,
                        max_iter=-1, random_state=None,
                        C=1, class_weight='balanced', decision_function_shape='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

In [12]:
# LogisticRegression
for C in [1e-2, 1e-1, 0.3, 1, 10]:    
    for class_weight in ['balanced', None]:
        model = LogisticRegression(C=C, penalty='l1', solver='liblinear', class_weight=class_weight, max_iter=100000)
        model.fit(train_skels, Y_train)
        labels_proba = model.predict_proba(val_skels)
        roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])
        print(C, class_weight)
        print(roc_auc)

0.01 balanced
0.5
0.01 None
0.5
0.1 balanced
0.40740740740740744
0.1 None
0.3518518518518518
0.3 balanced
0.5123456790123457
0.3 None
0.49382716049382713
1 balanced
0.49074074074074076
1 None
0.49691358024691357
10 balanced
0.45679012345679015
10 None
0.42592592592592593


In [13]:
#test
model = LogisticRegression(C=0.1, penalty='l1', solver='liblinear', class_weight=None, max_iter=100000)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

The history saving thread hit an unexpected error (OperationalError('unable to open database file')).History will not be written to the database.
0.4313271604938272


In [15]:
# ElasticNet
for C in [1e-2, 1e-1, 0.3, 1, 10]:    
    for class_weight in ['balanced', None]:
        model = LogisticRegression(C=C, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight=class_weight, max_iter=100)
        model.fit(train_skels, Y_train)
        labels_proba = model.predict_proba(val_skels)
        roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])
        print(C, class_weight)
        print(roc_auc)

0.5
0.5




0.5648148148148148




0.5740740740740741




0.5246913580246914




0.5154320987654321




0.5092592592592593




0.5154320987654321




0.49691358024691357
0.4722222222222222




In [16]:
# test
model = LogisticRegression(C=0.1, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight=None, max_iter=100)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

0.5123456790123457




# FIP L

In [17]:
crops_dir = '/neurospin/dico/data/deep_folding/current/datasets/hcp/crops/2mm/F.I.P./mask'
skels = np.load(os.path.join(crops_dir, 'Lskeleton.npy'))
skel_subs = pd.read_csv(os.path.join(crops_dir, 'Lskeleton_subject.csv'))
skels = skels.reshape(skels.shape[0], np.prod(skels.shape[1:]))
skels = skels.astype(bool)

In [18]:
splits_dir = '/neurospin/dico/data/deep_folding/current/datasets/hcp/FIP'
train_subs = pd.read_csv(os.path.join(splits_dir, 'train_split.csv'), names=['Subject'])
val_subs = pd.read_csv(os.path.join(splits_dir, 'val_split.csv'), names=['Subject'])
test_subs = pd.read_csv(os.path.join(splits_dir, 'test_split.csv'), names=['Subject'])

In [19]:
label='Left_FIP'
labels = pd.read_csv('/neurospin/dico/data/deep_folding/current/datasets/hcp/FIP/FIP_labels.csv', usecols=['Subject', label])
labels.columns = ['Subject', label]
proportions = np.unique(labels[label], return_counts=True)
proportions = proportions[1] / np.sum(proportions[1])

In [20]:
train = skel_subs.loc[skel_subs['Subject'].isin(train_subs['Subject'])]
idxs_train = train.index.tolist()
Y_train = pd.merge(train, labels)[label]
train_skels= skels[idxs_train]

val = skel_subs.loc[skel_subs['Subject'].isin(val_subs['Subject'])]
idxs_val = val.index.tolist()
Y_val = pd.merge(val, labels)[label]
val_skels= skels[idxs_val]

test = skel_subs.loc[skel_subs['Subject'].isin(test_subs['Subject'])]
idxs_test = test.index.tolist()
Y_test = pd.merge(test, labels)[label]
test_skels= skels[idxs_test]

In [25]:
## hyperparameter search
for C in [1e-2, 1e-1, 1, 10]:    
    for class_weight in ['balanced']:
        for degree in [2, 3, 4, 5]:

            model = SVC(kernel='rbf', probability=True, degree=degree,
                        max_iter=-1, random_state=None,
                        C=C, class_weight=class_weight, decision_function_shape='ovr')
            
            model.fit(train_skels, Y_train)
            labels_proba = model.predict_proba(val_skels)
            roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])

            print(C, class_weight, degree)
            print(roc_auc)

0.01 balanced 2
0.3439153439153439
0.01 balanced 3
0.3439153439153439
0.01 balanced 4
0.3439153439153439
0.01 balanced 5
0.3439153439153439
0.1 balanced 2
0.3439153439153439
0.1 balanced 3
0.3439153439153439
0.1 balanced 4
0.3439153439153439
0.1 balanced 5
0.3439153439153439
1 balanced 2
0.6547619047619048
1 balanced 3
0.3386243386243386
1 balanced 4
0.6613756613756614
1 balanced 5
0.6613756613756614
10 balanced 2
0.6455026455026455
10 balanced 3
0.6455026455026455
10 balanced 4
0.6455026455026455
10 balanced 5
0.6455026455026455


In [26]:
## test set after hyperparameter search
model = SVC(kernel='rbf', probability=True, degree=3,
                        max_iter=-1, random_state=None,
                        C=1, class_weight='balanced', decision_function_shape='ovr')
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

0.6455026455026456


In [21]:
# LogisticRegression
for C in [1e-2, 1e-1, 0.3, 1, 10]:    
    for class_weight in ['balanced', None]:
        model = LogisticRegression(C=C, penalty='l1', solver='liblinear', class_weight=class_weight, max_iter=100000)
        model.fit(train_skels, Y_train)
        labels_proba = model.predict_proba(val_skels)
        roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])
        print(C, class_weight)
        print(roc_auc)

0.01 balanced
0.5
0.01 None
0.5
0.1 balanced
0.49603174603174605
0.1 None
0.5026455026455027
0.3 balanced
0.537037037037037
0.3 None
0.537037037037037
1 balanced
0.5211640211640212
1 None
0.5291005291005291
10 balanced
0.5740740740740741
10 None
0.5211640211640212


In [22]:
#test
model = LogisticRegression(C=10, penalty='l1', solver='liblinear', class_weight='balanced', max_iter=100000)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

0.4722222222222222


In [23]:
# ElasticNet
for C in [1e-2, 1e-1, 0.3, 1, 10]:    
    for class_weight in ['balanced', None]:
        model = LogisticRegression(C=C, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight=class_weight, max_iter=100)
        model.fit(train_skels, Y_train)
        labels_proba = model.predict_proba(val_skels)
        roc_auc = roc_auc_score(Y_val, labels_proba[:, 1])
        print(C, class_weight)
        print(roc_auc)

0.5
0.5




0.5608465608465609




0.5740740740740741




0.5793650793650794




0.5740740740740742




0.6507936507936508




0.6613756613756614




0.6375661375661376
0.6481481481481481




In [24]:
# test
model = LogisticRegression(C=1, penalty='elasticnet', solver='saga', l1_ratio=0.5, class_weight='balanced', max_iter=100)
model.fit(train_skels, Y_train)
labels_proba = model.predict_proba(test_skels)
roc_auc = roc_auc_score(Y_test, labels_proba[:, 1])
print(roc_auc)

0.5707671957671958


