In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder

from load_ADNI import load_ADNI, convert
from model_evaluation import repeatSVM_labeled
from kernels import compute_l2, make_l1l2_kernel

In [2]:
path = '/nmnt/media/home/anvar/conferences_code/MICCAI2017/reproducing_overlappingMICCAI/data'
_, _, info = load_ADNI(path)

ADNI data shape                   : (807, 68, 68) 
ADNI target variable shape        : (807,) 
ADNI number of unique patients    : (255,)


In [3]:
data = info[info.target != 'SMC']
data = data.sort_values(['target', 'subject_id', 'scan_id'],)

In [4]:
data.head()

Unnamed: 0_level_0,subject_id_file,subject_id,scan_id,matrix,target
subject_id_file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
003_S_4136_1,003_S_4136_1,003_S_4136,1,"[2618.0, 7.0, 68.0, 14.0, 3.0, 73.0, 1303.0, 4...",AD
003_S_4136_2,003_S_4136_2,003_S_4136,2,"[3620.0, 36.0, 50.0, 30.0, 9.0, 93.0, 1683.0, ...",AD
003_S_4136_3,003_S_4136_3,003_S_4136,3,"[3004.0, 3.0, 228.0, 3.0, 20.0, 120.0, 1741.0,...",AD
003_S_4136_4,003_S_4136_4,003_S_4136,4,"[3203.0, 0.0, 45.0, 19.0, 0.0, 115.0, 1683.0, ...",AD
003_S_4142_1,003_S_4142_1,003_S_4142,1,"[2450.0, 0.0, 401.0, 0.0, 0.0, 9.0, 1536.0, 93...",AD


In [5]:
data.groupby('target').count()

# 136 AD, 283 EMCI, 147 LMCI, 190 Normal, total of 756 scans/connectomes
# 228 unique subjects

Unnamed: 0_level_0,subject_id_file,subject_id,scan_id,matrix
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AD,136,136,136,136
EMCI,283,283,283,283
LMCI,147,147,147,147
Normal,190,190,190,190


In [6]:
data.drop_duplicates('subject_id').shape

(228, 5)

In [7]:
matrices = np.zeros((756, 68, 68))
for i in range(756):
    matrices[i] = convert(data.matrix[i], diag=0)

In [8]:
# encode target 
target = data.target.map({'AD':0, 'EMCI':1, 'LMCI':2, 'Normal':3}).values

# encode subjects
sub_dict = dict(zip(np.unique(data.subject_id.values), np.arange(228)))
subjects = data.subject_id.map(sub_dict).values

# binarizing matrices
matrices[matrices>0] = 1`

In [37]:
#l2_kernel = compute_l2(matrices)
#np.save('precomputed_kernels/l2_kernel.npy', l2_kernel)

l2_kernel = np.load('precomputed_kernels/l2_kernel.npy')

In [21]:
idx_ad_nc = (data.target == 'AD').values + (data.target == 'Normal').values

kernel_l2 = make_l1l2_kernel(kernel[idx_ad_nc, :][:, idx_ad_nc], 0.01)


auc = repeatSVM_labeled(kernel_l2, target, subjects, 
                        idx_ad_nc, penalty = 10)

print('AD vs NC : {} +- {}'.format(auc.mean(), auc.std()))

AD vs NC : 0.7929473317056157 +- 0.009491086964353036


In [25]:
idx_ad_lmci = (data.target == 'AD').values + (data.target == 'LMCI').values

kernel_l2 = make_l1l2_kernel(kernel[idx_ad_lmci, :][:, idx_ad_lmci], 0.01)


auc = repeatSVM_labeled(kernel_l2, target, subjects, 
                        idx_ad_lmci, penalty = 1)

print('AD vs LMCI : {} +- {}'.format(auc.mean(), auc.std()))

AD vs LMCI : 0.6756063829787234 +- 0.008460631007699016


In [24]:
idx_ad_emci = (data.target == 'AD').values + (data.target == 'EMCI').values

kernel_l2 = make_l1l2_kernel(kernel[idx_ad_emci, :][:, idx_ad_emci], 0.01)


auc = repeatSVM_labeled(kernel_l2, target, subjects, 
                        idx_ad_emci, penalty = 0.01)

print('AD vs EMCI : {} +- {}'.format(auc.mean(), auc.std()))

AD vs EMCI : 0.695659574468085 +- 0.010783246566724998


In [23]:
idx_lmci_emci = (data.target == 'LMCI').values + (data.target == 'EMCI').values

kernel_l2 = make_l1l2_kernel(kernel[idx_lmci_emci, :][:, idx_lmci_emci], 0.01)


auc = repeatSVM_labeled(kernel_l2, target, subjects, 
                        idx_lmci_emci, penalty = 0.1)

print('LMCI vs EMCI : {} +- {}'.format(auc.mean(), auc.std()))

LMCI vs EMCI : 0.5294625000000001 +- 0.024227118715398243


In [34]:
idx_lmci_nc = (data.target == 'LMCI').values + (data.target == 'Normal').values

kernel_l2 = make_l1l2_kernel(kernel[idx_lmci_nc, :][:, idx_lmci_nc], 0.01)


auc = repeatSVM_labeled(kernel_l2, target, subjects, 
                        idx_lmci_nc, penalty = 0.01)

print('LMCI vs NC : {} +- {}'.format(auc.mean(), auc.std()))

LMCI vs NC : 0.6097213114754099 +- 0.018861195409809524


In [36]:
idx_emci_nc = (data.target == 'EMCI').values + (data.target == 'Normal').values

kernel_l2 = make_l1l2_kernel(kernel[idx_emci_nc, :][:, idx_emci_nc], 0.1)


auc = repeatSVM_labeled(kernel_l2, target, subjects, 
                        idx_emci_nc, penalty = 10)

print('EMCI vs NC : {} +- {}'.format(auc.mean(), auc.std()))

EMCI vs NC : 0.5864549180327869 +- 0.014022458782576507
