In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import community

from sklearn.preprocessing import LabelEncoder

from load_ADNI import load_ADNI, convert
from model_evaluation import repeatSVM_labeled, grid_search
from kernels import compute_AMI, make_exp_kernel

In [2]:
path = '/nmnt/media/home/anvar/conferences_code/MICCAI2017/reproducing_overlappingMICCAI/data'
_, _, info = load_ADNI(path)

ADNI data shape                   : (807, 68, 68) 
ADNI target variable shape        : (807,) 
ADNI number of unique patients    : (255,)


In [3]:
data = info[info.target != 'SMC']
data = data.sort_values(['target', 'subject_id', 'scan_id'],)

In [4]:
data.head()

Unnamed: 0_level_0,subject_id_file,subject_id,scan_id,matrix,target
subject_id_file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
003_S_4136_1,003_S_4136_1,003_S_4136,1,"[2618.0, 7.0, 68.0, 14.0, 3.0, 73.0, 1303.0, 4...",AD
003_S_4136_2,003_S_4136_2,003_S_4136,2,"[3620.0, 36.0, 50.0, 30.0, 9.0, 93.0, 1683.0, ...",AD
003_S_4136_3,003_S_4136_3,003_S_4136,3,"[3004.0, 3.0, 228.0, 3.0, 20.0, 120.0, 1741.0,...",AD
003_S_4136_4,003_S_4136_4,003_S_4136,4,"[3203.0, 0.0, 45.0, 19.0, 0.0, 115.0, 1683.0, ...",AD
003_S_4142_1,003_S_4142_1,003_S_4142,1,"[2450.0, 0.0, 401.0, 0.0, 0.0, 9.0, 1536.0, 93...",AD


In [5]:
data.groupby('target').count()

# 136 AD, 283 EMCI, 147 LMCI, 190 Normal, total of 756 scans/connectomes
# 228 unique subjects

Unnamed: 0_level_0,subject_id_file,subject_id,scan_id,matrix
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AD,136,136,136,136
EMCI,283,283,283,283
LMCI,147,147,147,147
Normal,190,190,190,190


In [6]:
data.drop_duplicates('subject_id').shape

(228, 5)

In [7]:
matrices = np.zeros((756, 68, 68))
for i in range(756):
    matrices[i] = convert(data.matrix[i], diag=0)

In [8]:
# encode target 
target = data.target.map({'AD':0, 'EMCI':1, 'LMCI':2, 'Normal':3}).values

# encode subjects
sub_dict = dict(zip(np.unique(data.subject_id.values), np.arange(228)))
subjects = data.subject_id.map(sub_dict).values

# binarizing matrices
matrices[matrices>0] = 1

In [9]:
# def CommunityLouvian(data):

#     part_all = []

#     for idx, mat in enumerate(data):
#         g = nx.Graph(mat)
#         partition = community.best_partition(g)
#         part_all.append(list(partition.values()))

#     part_all = np.array(part_all)

#     return part_all
# partitions = CommunityLouvian(matrices)
# non_overlapping_kernel = compute_AMI(partitions)
# np.save('precomputed_kernels/non_overlapping_kernel.npy', non_overlapping_kernel)

kernel = np.load('precomputed_kernels/non_overlapping_kernel.npy')

In [11]:
# Choosing model parameters

idx_ad_nc = (data.target == 'AD').values + (data.target == 'Normal').values

train_auc_mean, train_auc_std, best_params, i, j = grid_search(kernel, target, subjects,
                                                               idx_ad_nc, kernel = 'exp')
print('AD vs NC : {}'.format(train_auc_mean[i,j]))
print(best_params)



Done 92.155 sec
AD vs NC : 0.8156539937216603
{'Kernel Parameter': 0.3, 'SVC Parameter': 10.0}


In [17]:
# idx_ad_nc = (data.target == 'AD').values + (data.target == 'Normal').values

# kernel_ami = make_exp_kernel(kernel[idx_ad_nc, :][:, idx_ad_nc], 0.3)


# auc = repeatSVM_labeled(kernel_ami, target, subjects, 
#                         idx_ad_nc, penalty = 10)

# print('AD vs NC : {} +- {}'.format(auc.mean(), auc.std()))

AD vs NC : 0.8156539937216603 +- 0.012743110835321265


In [12]:
idx_ad_lmci = (data.target == 'AD').values + (data.target == 'LMCI').values

train_auc_mean, train_auc_std, best_params, i, j = grid_search(kernel, target, subjects,
                                                               idx_ad_lmci, kernel = 'exp')
print('AD vs LMCI : {}'.format(train_auc_mean[i,j]))
print(best_params)

Done 80.115 sec
AD vs NC : 0.6518404255319149
{'Kernel Parameter': 8, 'SVC Parameter': 0.10000000000000001}


In [18]:
idx_ad_lmci = (data.target == 'AD').values + (data.target == 'LMCI').values

kernel_ami = make_exp_kernel(kernel[idx_ad_lmci, :][:, idx_ad_lmci], 8)


auc = repeatSVM_labeled(kernel_ami, target, subjects, 
                        idx_ad_lmci, penalty = 0.1)

print('AD vs LMCI : {} +- {}'.format(auc.mean(), auc.std()))

AD vs LMCI : 0.6518404255319149 +- 0.008703408582831803


In [13]:


idx_ad_emci = (data.target == 'AD').values + (data.target == 'EMCI').values

train_auc_mean, train_auc_std, best_params, i, j = grid_search(kernel, target, subjects,
                                                               idx_ad_emci, kernel = 'exp')
print('AD vs EMCI : {}'.format(train_auc_mean[i,j]))
print(best_params)



Done 127.349 sec
AD vs EMCI : 0.6468563829787235
{'Kernel Parameter': 0.7, 'SVC Parameter': 10.0}


In [None]:
idx_ad_emci = (data.target == 'AD').values + (data.target == 'EMCI').values

kernel_ami = make_exp_kernel(kernel[idx_ad_emci, :][:, idx_ad_emci], 0.01)


auc = repeatSVM_labeled(kernel_ami, target, subjects, 
                        idx_ad_emci, penalty = 0.01)

print('AD vs EMCI : {} +- {}'.format(auc.mean(), auc.std()))

In [14]:


idx_lmci_emci = (data.target == 'LMCI').values + (data.target == 'EMCI').values

train_auc_mean, train_auc_std, best_params, i, j = grid_search(kernel, target, subjects,
                                                               idx_lmci_emci, kernel = 'exp')
print('AD vs EMCI : {}'.format(train_auc_mean[i,j]))
print(best_params)



Done 148.314 sec
AD vs EMCI : 0.5265375
{'Kernel Parameter': 0.9, 'SVC Parameter': 50.0}


In [17]:
idx_lmci_emci = (data.target == 'LMCI').values + (data.target == 'EMCI').values

kernel_ami = make_exp_kernel(kernel[idx_lmci_emci, :][:, idx_lmci_emci], 0.01)


auc = repeatSVM_labeled(kernel_ami, target, subjects, 
                        idx_lmci_emci, penalty = 0.1)

print('LMCI vs EMCI : {} +- {}'.format(auc.mean(), auc.std()))

LMCI vs EMCI : 0.45048125000000006 +- 0.027831890010606528


In [15]:


idx_lmci_nc = (data.target == 'LMCI').values + (data.target == 'Normal').values

train_auc_mean, train_auc_std, best_params, i, j = grid_search(kernel, target, subjects,
                                                               idx_lmci_nc, kernel = 'exp')
print('AD vs EMCI : {}'.format(train_auc_mean[i,j]))
print(best_params)



Done 96.815 sec
AD vs EMCI : 0.7037704918032788
{'Kernel Parameter': 0.3, 'SVC Parameter': 10.0}


In [18]:
idx_lmci_nc = (data.target == 'LMCI').values + (data.target == 'Normal').values

kernel_ami = make_exp_kernel(kernel[idx_lmci_nc, :][:, idx_lmci_nc], 0.01)


auc = repeatSVM_labeled(kernel_ami, target, subjects, 
                        idx_lmci_nc, penalty = 0.01)

print('LMCI vs NC : {} +- {}'.format(auc.mean(), auc.std()))

LMCI vs NC : 0.6156393442622952 +- 0.02295789703523757


In [16]:


idx_emci_nc = (data.target == 'EMCI').values + (data.target == 'Normal').values

train_auc_mean, train_auc_std, best_params, i, j = grid_search(kernel, target, subjects,
                                                               idx_emci_nc, kernel = 'exp')
print('AD vs EMCI : {}'.format(train_auc_mean[i,j]))
print(best_params)



Done 148.583 sec
AD vs EMCI : 0.6768401639344261
{'Kernel Parameter': 5, 'SVC Parameter': 10.0}


In [19]:
idx_emci_nc = (data.target == 'EMCI').values + (data.target == 'Normal').values

kernel_ami = make_exp_kernel(kernel[idx_emci_nc, :][:, idx_emci_nc], 0.1)


auc = repeatSVM_labeled(kernel_ami, target, subjects, 
                        idx_emci_nc, penalty = 10)

print('EMCI vs NC : {} +- {}'.format(auc.mean(), auc.std()))

EMCI vs NC : 0.6545696721311477 +- 0.019676197069572837


In [20]:
data_meta = data.drop('matrix', axis=1)

In [22]:
data_meta.to_csv('meta.csv')