In [1]:
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 16 17:10:53 2020

@author: wanxiang.shen@u.nus.edu
"""

import warnings
warnings.filterwarnings("ignore")


import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_auc_score
from sklearn.metrics import auc as calculate_auc

import matplotlib.pyplot as plt
import seaborn as sns

from aggmap import AggMap, AggModel, loadmap

np.random.seed(666) #just for reaptable results

def prc_auc_score(y_true, y_score):
    precision, recall, threshold  = precision_recall_curve(y_true, y_score) #PRC_AUC
    auc = calculate_auc(recall, precision)
    return auc

In [2]:
df1 = pd.read_excel('./proteins_processed.xlsx', index_col = 0)
df2 = pd.read_excel('./Metabolites_processed.xlsx', index_col = 0)
df2 = df2.loc[df1.index]

df = df1.join(df2)
dfx = df[df.columns[2:]]


In [3]:
dfx.shape

(41, 1486)

In [4]:
#df.to_excel('./proteins_metabolites.xlsx')

In [5]:
dfx.shape, df2.shape, df1.shape

((41, 1486), (41, 847), (41, 641))

In [6]:
dfy = df[['Severe']]*1.

In [7]:
tr = df[df['Train_Test'] == 'Train'].index
ts = df[df['Train_Test'] == 'Test'].index

In [8]:
y = pd.get_dummies(dfy['Severe'])[[1,0]]

Y = y.loc[tr].values
X = dfx.loc[tr].values

Y_test = y.loc[ts].values
X_test = dfx.loc[ts].values

In [9]:
mp = loadmap('./results/COV-S.mp')

In [10]:
trainX = mp.batch_transform(X, scale_method = 'standard')
testX = mp.batch_transform(X_test, scale_method = 'standard')

print("\n input train and test X shape is %s, %s " % (trainX.shape,  testX.shape))

# fit the model
clf = AggModel.MultiClassEstimator(epochs = 50,  gpuid = 5, batch_size = 1, conv1_kernel_size = 5,  verbose = 0,) 
clf.fit(trainX, Y)

# save the model for explaination
clf._model.save( './results/model.h55')

# make prediction
y_true = Y_test[:,0]
y_pred = clf.predict(testX)[:,0]
y_score = clf.predict_proba(testX)[:,0]

res = {'index':ts, 'y_true':y_true, 'y_score':y_score}
dfres = pd.DataFrame(res)


100%|##########| 31/31 [00:02<00:00, 12.02it/s]
100%|##########| 10/10 [00:00<00:00, 160.13it/s]



 input train and test X shape is (31, 39, 39, 5), (10, 39, 39, 5) 
{'epochs': 50, 'lr': 0.0001, 'conv1_kernel_size': 5, 'dense_layers': [128], 'dense_avf': 'relu', 'batch_size': 1, 'dropout': 0.0, 'batch_norm': False, 'n_inception': 2, 'monitor': 'val_loss', 'patience': 10000, 'random_state': 32, 'verbose': 0, 'name': 'AggMap MultiClass Estimator', 'gpuid': '5'}


In [11]:
dfres.to_csv('./results/test_predict_res.csv')

In [12]:
dfres = pd.DataFrame(res)
dfres

y_true = dfres.y_true.values
y_pred = dfres.y_score.values.round()
y_score = dfres.y_score.values


tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

acc = (tp + tn) / sum([tn, fp, fn, tp])

sensitivity = tp / sum([tp, fn])
specificity = tn / sum([tn, fp])

prc_auc = prc_auc_score(y_true, y_score)
roc_auc = roc_auc_score(y_true, y_score)

precision = tp / sum([tp, fp])
recall =  tp / sum([tp, fn]) #equals to sensitivity


res = {
    
       'accuracy':acc, 

       'prc_auc':prc_auc, 
       'roc_auc':roc_auc,

       'sensitivity': sensitivity, 
       'specificity': specificity,

       'precision':precision,
       'recall':recall,

       'F1': 2*precision*sensitivity/(precision+sensitivity)
      }
res

{'accuracy': 0.8,
 'prc_auc': 1.0,
 'roc_auc': 1.0,
 'sensitivity': 1.0,
 'specificity': 0.6666666666666666,
 'precision': 0.6666666666666666,
 'recall': 1.0,
 'F1': 0.8}