In [43]:
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import CalibratedClassifierCV


In [3]:
adni = pd.read_csv('../database_split/database_info/ADNI_experiment_information.csv')
adni = adni[['Subject ID', 'Research Group', 'MMSE Total Score', 'Visit']]
adni['Image Filename'] = adni[['Subject ID', 'Visit']].apply(lambda x: x[0]+'_'+x[1].replace(' ', '_').replace('/', ''), axis=1)
print(adni.shape)
adni.head()

(762, 5)


Unnamed: 0,Subject ID,Research Group,MMSE Total Score,Visit,Image Filename
0,002_S_0619,AD,22,ADNI Screening,002_S_0619_ADNI_Screening
1,002_S_0938,AD,23,ADNI Screening,002_S_0938_ADNI_Screening
2,002_S_0955,AD,21,ADNI Screening,002_S_0955_ADNI_Screening
3,002_S_5018,AD,24,ADNI2 Month 6-New Pt,002_S_5018_ADNI2_Month_6-New_Pt
4,003_S_1059,AD,25,ADNI Screening,003_S_1059_ADNI_Screening


In [5]:
cn = pd.read_csv('../predict_results/test_70-100_LR_holds.csv', sep=',')
cn['Research Group'] = 'CN'
ad = pd.read_csv('../predict_results/ad_70-100_LR_holds.csv', sep=',')
ad['Research Group'] = 'AD'
mci= pd.read_csv('../predict_results/mci_70-100_LR_holds.csv', sep=',')
mci['Research Group'] = 'MCI'
results = pd.concat([cn, ad, mci]).reset_index(drop=True)
results = results[['Name', 'True', 'Pred', 'Research Group']]
results.columns = ['Image Filename', 'Chronological age', 'Estimated age', 'Research Group']
print(results.shape)
results.head()

(762, 4)


Unnamed: 0,Image Filename,Chronological age,Estimated age,Research Group
0,023_S_4164_ADNI2_Month_6-New_Pt,73.6,78.10059,CN
1,014_S_4093_ADNI2_Month_6-New_Pt,70.6,76.46625,CN
2,021_S_0647_ADNI_Screening,72.9,74.11468,CN
3,019_S_4835_ADNI2_Year_1_Visit,80.4,75.6461,CN
4,022_S_4320_ADNI2_Month_6-New_Pt,71.5,76.68798,CN


In [6]:
data = adni.merge(results, on=['Image Filename', 'Research Group']).reset_index(drop=True)
data.columns = ['Subject ID', 'group', 'MMSE', 'Visit', 'Image Filename','Chronological age', 'Estimated age']
data['Delta'] = data[['Chronological age', 'Estimated age']].apply(lambda x: x['Estimated age'] - x['Chronological age'], axis=1)
data.shape


(762, 8)

In [83]:
kernel = 'linear'
classifier = SVC(gamma = 'scale',
                 kernel = kernel,
                 degree = 1,
                 class_weight = 'balanced')

def grid():
    skf = StratifiedKFold(n_splits=5)
    pipe = Pipeline([("scale", StandardScaler()),
                     ('clf', classifier)])

    param_grid = {"clf__C": [2**i for i in np.arange(-5, 10, 0.25)]}

    clf = GridSearchCV(pipe,
                       param_grid=param_grid,
                       cv=skf,  n_jobs=-1,
                       scoring='roc_auc')
    return clf



def calibrate_probabilities(svc):
    # https://github.com/scikit-learn/scikit-learn/issues/13211
    clf = SVC(gamma='scale',
              kernel=kernel,
              degree=1,
              class_weight='balanced',
              C=svc.best_params_['clf__C'])

    skf = StratifiedKFold(n_splits=5,
                          shuffle=True,
                          random_state=3)

    pipe = Pipeline([("scale", StandardScaler()),
                     ('cal', CalibratedClassifierCV(clf,
                                                    method='sigmoid',
                                                    cv=skf))
                     ])
    return pipe


def metrics(y_pred, y_test, y_score):
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    auc_ = roc_auc_score(y_test, y_score)
    sensitivity = tp / (tp+fn)
    specificity = tn / (tn+fp)
    results = {'auc': auc_,
               'acc': accuracy,
               'sens': sensitivity,
               'spe': specificity,
               'f1': f1,}
    return results


def get_data(data, class_not, class_zero):
    subj = data[data.group != class_not]['Image Filename'].values
    X = data[data.group != class_not].Delta.values.reshape(
        len(data[data.group != class_not].Delta.values), -1)
    y = data[data.group != class_not].group.values
    y = np.where(y == class_zero, 0, 1)
    #print(np.unique(y, return_counts=True))
    return X, y, subj


def fit_classifier(X, y, subjects):
    predictions = pd.DataFrame()
    results = []
    for i, (train_idx, test_idx) in enumerate(skf.split(X, y)):
        X_train, y_train = X[train_idx], y[train_idx]
        X_test, y_test = X[test_idx], y[test_idx]
        subj_test = subjects[test_idx]

        clf = grid()
        clf.fit(X_train, y_train)
        # Predictions
        y_pred = clf.predict(X_test)
        y_score = clf.decision_function(X_test)
        clf_cal = calibrate_probabilities(clf)
        clf_cal.fit(X_train, y_train)
        y_score = clf_cal.predict_proba(X_test)[:, 1]
        
        prediction = pd.DataFrame(subj_test, columns=['Subject'])
        prediction['True'] = y_test
        prediction['Pred'] = y_pred
        prediction['Score'] = y_score
        predictions = pd.concat([prediction, predictions])
        scores = metrics(y_pred, y_test, y_score)
        results.append(scores)
    return predictions, pd.DataFrame(results)


def print_results(results):
    means = results.mean()
    stds = results.std(ddof=0)
    for i in range(len(means)):
        if means.index[i] in ['auc', 'f1']:
            print(means.index[i].upper(),
                  means[i].round(round), "+/-",
                  stds[i].round(round))
        else:
            print(means.index[i].upper(),
                  (means[i]*100).round(round), "+/-",
                  (stds[i]*100).round(round))


def get_data_screening(data, class_zero):
    data['group2'] = data.group.apply(lambda x: 'CN' if x=='CN' else 'MCI+AD')
    subj = data['Image Filename'].values
    X = data.Delta.values.reshape(
        len(data.Delta.values), -1)
    y = data.group2.values
    y = np.where(y == class_zero, 0, 1)
    return X, y, subj


In [85]:

skf = StratifiedKFold(n_splits=10)
round = 2
class_not = ['MCI', 'AD', 'CN', 'None']
for i, exp in enumerate(['cn_ad', 'cn_mci', 'mci_ad', 'cn_mciad']):
    print(exp.upper())
    if exp != 'mci_ad':
        X, y, subj = get_data(data, class_not[i], exp.split("_")[0].upper())
    elif exp=='mci_ad':
        X, y, subj = get_data(data, class_not[i], exp.split("_")[1].upper())
    else:
        X, y, subj = get_data_screening(data, exp.split("_")[0].upper())
    predictions, results = fit_classifier(X, y, subj)
    predictions.to_csv("../predict_results/classification_"+exp+".csv", index=False)
    print_results(results)


CN_AD
AUC 0.73 +/- 0.09
ACC 65.96 +/- 4.74
SENS 65.95 +/- 11.27
SPE 65.91 +/- 7.49
F1 0.61 +/- 0.07
CN_MCI
AUC 0.64 +/- 0.07
ACC 58.96 +/- 5.26
SENS 56.52 +/- 9.01
SPE 60.92 +/- 8.27
F1 0.55 +/- 0.06
MCI_AD
AUC 0.58 +/- 0.08
ACC 56.3 +/- 6.97
SENS 51.83 +/- 12.25
SPE 61.62 +/- 13.56
F1 0.56 +/- 0.08
CN_MCIAD
AUC 0.68 +/- 0.07
ACC 62.05 +/- 5.21
SENS 62.17 +/- 8.71
SPE 61.91 +/- 7.69
F1 0.66 +/- 0.06
