In [1]:
import pandas as pd
import pickle as pkl
import numpy as np
from sklearn import preprocessing

from sklearn import metrics

In [2]:
from machine_learning_models import create_class_ADNI, create_class_LOAD, baseline_model

### Baseline model in ADNI dataset

#### PET target

In [3]:
file = 'data/AD_PPI_missense_with_biomarkers.csv'
data = pd.read_csv(file, index_col = 0)
data_wclass = create_class_ADNI(data, 'PET') # or PET&DX

x = data_wclass.drop(columns=['y'])
x = x['APOE']

y = data_wclass['y']
x.index = x.index.str.upper()

f = open(f'data/splits/split_PET.pkl', 'rb')
split_pet = pkl.load(f)
f.close()

auc_pet = baseline_model(split_pet, x, y)
print('Baseline model PET, AUC ROC:', auc_pet)

Class distribution:
1.0    410
0.0    316
Name: y, dtype: int64

Confusion matrix:
 [[ 6 26]
 [ 2 39]]

              precision    recall  f1-score   support

         0.0       0.75      0.19      0.30        32
         1.0       0.60      0.95      0.74        41

    accuracy                           0.62        73
   macro avg       0.68      0.57      0.52        73
weighted avg       0.67      0.62      0.54        73

Baseline model PET, AUC ROC: 0.6894054878048781


#### PET&DX target

In [4]:
file = 'data/AD_PPI_missense_with_biomarkers.csv'
data = pd.read_csv(file, index_col = 0)
data_wclass = create_class_ADNI(data, 'PETandDX') # or PET&DX

x = data_wclass.drop(columns=['y'])
x = x['APOE']

y = data_wclass['y']

x.index = x.index.str.upper()

f = open(f'data/splits/split_PETandDX.pkl', 'rb')
split_petdx = pkl.load(f)
f.close()

auc_petdx = baseline_model(split_petdx, x, y)
print('Baseline model PET, AUC ROC:', auc_petdx)

Class distribution:
1.0    182
0.0    135
Name: y, dtype: int64

Confusion matrix:
 [[ 8  6]
 [ 2 16]]

              precision    recall  f1-score   support

         0.0       0.80      0.57      0.67        14
         1.0       0.73      0.89      0.80        18

    accuracy                           0.75        32
   macro avg       0.76      0.73      0.73        32
weighted avg       0.76      0.75      0.74        32

Baseline model PET, AUC ROC: 0.6825396825396824


### Baseline model in T Gen II dataset
#### LOAD target

In [5]:
file = 'data/AD_PPI_missense_with_biomarkers_LOAD.csv'
data = pd.read_csv(file, index_col = 0)
data_wclass = create_class_LOAD(data) # or PET&DX
data_wclass

x = data_wclass.drop(columns=['y'])
x = x['APOE']

y = data_wclass['y']

x.index = x.index.str.upper()

f = open(f'data/splits/split_LOAD.pkl', 'rb')
split_load = pkl.load(f)
f.close()

auc_petdx = baseline_model(split_load, x, y)
print('Baseline model PET, AUC ROC:', auc_petdx)

Class distribution:
1.0    1014
0.0     585
Name: y, dtype: int64

Confusion matrix:
 [[36 23]
 [30 71]]

              precision    recall  f1-score   support

         0.0       0.55      0.61      0.58        59
         1.0       0.76      0.70      0.73       101

    accuracy                           0.67       160
   macro avg       0.65      0.66      0.65       160
weighted avg       0.68      0.67      0.67       160

Baseline model PET, AUC ROC: 0.6266152038932706
