In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import StratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error

In [2]:
data_main_path = '/Users/tabaneslami/Desktop/hit/python-excel/Deep-Learning/acerta-abide-master/data/functionals/cpac/filt_global/just_NYU'
flist = os.listdir(data_main_path)
print(len(flist))
flist.remove('.DS_Store')
print(len(flist))

174
173


In [3]:
df_labels = pd.read_csv('/Users/tabaneslami/Desktop/hit/python-excel/Deep-Learning/acerta-abide-master/data/phenotypes/Phenotypic_V1_0b_preprocessed1.csv')

df_labels.DX_GROUP = df_labels.DX_GROUP.map({1: 1, 2:0})
print(len(df_labels))

labels = {}
for row in df_labels.iterrows():
    file_id = row[1]['FILE_ID']
    y_label = row[1]['DX_GROUP']
    if file_id == 'no_filename':
        continue
    assert(file_id not in labels)
    labels[file_id] = y_label

print("*",len(labels))

1112
* 1035


In [4]:
import numpy.ma as ma # for masked arrays

def get_corr_data(filename):
    df = pd.read_csv(os.path.join(data_main_path, filename), sep='\t')
    with np.errstate(invalid="ignore"):
        corr = np.nan_to_num(np.corrcoef(df.T))
        mask = np.invert(np.tri(corr.shape[0], k=-1, dtype=bool))
        m = ma.masked_where(mask == 1, mask)
        return ma.masked_where(m, corr).compressed()

def get_corr_data_recon(filename):
    df = pd.read_csv(os.path.join(data_main_path, filename), sep='\t')
    with np.errstate(invalid="ignore"):
        corr = np.nan_to_num(np.corrcoef(df.T))
    sig=np.random.normal(0, 1, (200,57))#random.randn(200,58)
    y=np.matmul(corr,sig)
    q, r = np.linalg.qr(y)
    b = np.matmul(q.T,corr)
    mask = np.invert(np.tri(np.matmul(q,b).shape[0], k=-1, dtype=bool))
    m = ma.masked_where(mask == 1, mask)
    return ma.masked_where(m, np.matmul(q,b)).compressed()

In [5]:
def get_label(filename):
    f_split = filename.split('_')
    if f_split[3] == 'rois':
        key = '_'.join(f_split[0:3]) #,f_split[2]])
    else:
        key = '_'.join(f_split[0:2])
    assert (key in labels)
    return labels[key]

In [6]:
def confusion(g_turth,predictions):
    tn, fp, fn, tp = confusion_matrix(g_turth,predictions).ravel()
    accuracy = (tp+tn)/(tp+fp+tn+fn)
    sensitivity = (tp)/(tp+fn)
    specificty = (tn)/(tn+fp)
    return accuracy,sensitivity,specificty



In [7]:
data_features = {}
data_labels = []
features_recon = {}

for i in flist:
    data_features[i]=(get_corr_data(i),get_label(i))
    data_labels.append(get_label(i))
    features_recon[i]=(get_corr_data_recon(i),get_label(i))

data_labels = np.array(data_labels)


In [8]:
def train_clf(all_corr,train_samples,all_label):
    clf = MLPClassifier()#svm.SVC(gamma='auto')
    X_train, y_train = [], []
    for i in train_samples:
        X_train.append(all_corr[i][0])
        y_train.append(all_corr[i][1])
#    print("y_trin ",y_train)    
    clf.fit(X_train, y_train)
    return clf

def test_clf(model,test_samples,all_corr,all_label):
    X_test, y_test = [], []
    for i in test_samples:
        X_test.append(all_corr[i][0])
        y_test.append(all_corr[i][1])
    predict = model.predict(X_test)
    mlp_acc,mlp_sens,mlp_spef = confusion(y_test,predict)
    #print(mlp_acc)
    return mlp_acc,mlp_sens,mlp_spef

In [9]:
avg_acc = 0
avg_sens = 0
avg_spe = 0

avg_acc_re = 0
avg_sens_re = 0
avg_spe_re = 0
kf = StratifiedKFold(n_splits=5, shuffle=True)
avg_org =[]
avg_recon = []
for jj in range(20):
    print(jj," -- ")
    acc = 0
    sens = 0
    spef = 0
    acc_re = 0
    sens_re = 0
    spef_re = 0
    acc_org = []
    acc_recon = []
    for kk,(train_index, test_index) in enumerate(kf.split(flist, data_labels)):
        train_samples = np.array(flist)[train_index]
        test_samples = np.array(flist)[test_index]
        svmmodel = train_clf(data_features,train_samples,data_labels)
        svmmodel2 = train_clf(features_recon,train_samples,data_labels)
        
        acc_org.append(test_clf(svmmodel,test_samples,data_features,data_labels))
        acc_recon.append(test_clf(svmmodel,test_samples,features_recon,data_labels))


    print("org: ",np.mean(np.array(acc_org),axis=0))
    print("recon: ",np.mean(np.array(acc_recon),axis=0))
    avg_org.append(np.mean(np.array(acc_org),axis=0))
    avg_recon.append(np.mean(np.array(acc_recon),axis=0))
print(" avg_acc_org is: ",np.mean(np.array(avg_org),axis=0))
print(" avg_acc_recon is: ",np.mean(np.array(avg_recon),axis=0))
                

0  -- 
org:  [0.57764706 0.46666667 0.65736842]
recon:  [0.57193277 0.46666667 0.64736842]
1  -- 
org:  [0.58857143 0.77333333 0.44315789]
recon:  [0.58857143 0.77333333 0.44315789]
2  -- 
org:  [0.58453782 0.66666667 0.52842105]
recon:  [0.58453782 0.66666667 0.52842105]
3  -- 
org:  [0.68773109 0.48       0.84736842]
recon:  [0.69344538 0.49333333 0.84736842]
4  -- 
org:  [0.63008403 0.52       0.71736842]
recon:  [0.62436975 0.52       0.70736842]
5  -- 
org:  [0.71630252 0.54666667 0.84631579]
recon:  [0.71042017 0.53333333 0.84631579]
6  -- 
org:  [0.5789916  0.64       0.53421053]
recon:  [0.57327731 0.62666667 0.53421053]
7  -- 
org:  [0.61411765 0.8        0.47526316]
recon:  [0.61411765 0.8        0.47526316]
8  -- 
org:  [0.6302521  0.36       0.83631579]
recon:  [0.6302521  0.36       0.83631579]
9  -- 
org:  [0.58521008 0.65333333 0.53789474]
recon:  [0.59092437 0.66666667 0.53789474]
10  -- 
org:  [0.61378151 0.53333333 0.67368421]
recon:  [0.61378151 0.53333333 0.67368421

In [10]:
for i in flist:
    print(mean_absolute_error(data_features[i][0],features_recon[i][0]))

0.002968129280862714
0.003721667696933749
0.0042795205622351805
0.0043474737689909735
0.002959001155777011
0.0025100127037313348
0.0033316558480911834
0.005353410485477166
0.004067695219901213
0.004332577246750697
0.003977961192929516
0.003144401081663074
0.004183108882975391
0.003013664874427715
0.005007858808632687
0.0034137359597065315
0.0034983795493472468
0.002918133313847283
0.0028732889824888835
0.003989930658394246
0.0031202615622057026
0.004269293434841254
0.0019369093277407648
0.0037092213390573967
0.003536942020464227
0.002278223443119267
0.0029927938394417072
0.0030046503899032635
0.00215709559111247
0.0023794267040609216
0.003054768658479359
0.004253423259022625
0.0035209106816148127
0.003845676139503564
0.0056824098283873815
0.0025854206495303594
0.003786412292351551
0.002197551419681611
0.003434453289942749
0.001615120472209258
0.003573738101122493
0.0030582137831554387
0.0030132358321964354
0.003778073708710492
0.002909045383687351
0.0031233426670793465
0.00385829066876

In [11]:
from scipy.stats import pearsonr
count = 0
for jj in flist:
    print(jj,data_labels[count])
    count+=1

NYU_0050952_rois_cc200.1D 1
NYU_0050954_rois_cc200.1D 1
NYU_0050955_rois_cc200.1D 1
NYU_0050956_rois_cc200.1D 1
NYU_0050957_rois_cc200.1D 1
NYU_0050958_rois_cc200.1D 1
NYU_0050959_rois_cc200.1D 1
NYU_0050960_rois_cc200.1D 1
NYU_0050961_rois_cc200.1D 1
NYU_0050962_rois_cc200.1D 1
NYU_0050964_rois_cc200.1D 1
NYU_0050965_rois_cc200.1D 1
NYU_0050966_rois_cc200.1D 1
NYU_0050967_rois_cc200.1D 1
NYU_0050968_rois_cc200.1D 1
NYU_0050969_rois_cc200.1D 1
NYU_0050970_rois_cc200.1D 1
NYU_0050972_rois_cc200.1D 1
NYU_0050973_rois_cc200.1D 1
NYU_0050974_rois_cc200.1D 1
NYU_0050976_rois_cc200.1D 1
NYU_0050977_rois_cc200.1D 1
NYU_0050978_rois_cc200.1D 1
NYU_0050979_rois_cc200.1D 1
NYU_0050981_rois_cc200.1D 1
NYU_0050982_rois_cc200.1D 1
NYU_0050983_rois_cc200.1D 1
NYU_0050984_rois_cc200.1D 1
NYU_0050985_rois_cc200.1D 1
NYU_0050986_rois_cc200.1D 1
NYU_0050987_rois_cc200.1D 1
NYU_0050988_rois_cc200.1D 1
NYU_0050989_rois_cc200.1D 1
NYU_0050990_rois_cc200.1D 1
NYU_0050991_rois_cc200.1D 1
NYU_0050992_rois_cc2