In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras import layers
import os
from time import time
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from sklearn.metrics import auc, roc_curve, roc_auc_score, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import label_binarize

In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="6"

In [3]:
df_val = pd.read_excel("/raid/data/yanglab/ILD/joint_val_xm.xlsx")

In [4]:
df_val.head()

Unnamed: 0,pid_x,thickness,filename,pid_y,Rad Dx,rad_label,study_date,MRN,Home_O2,FEV1/FVC,...,age,FVC,Consensus,LivingStatus,MedicationName,StartInstant,TherapeuticClass,Med_label,Therapeutic_label,Consensus_label
0,1125388,1.0,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,1125388,sar,0,20170202,1125388,0,43,...,39,3.093023,3,L,PREDNISONE 20 MG TABLET,20170602.0,HORMONES,6.0,2.0,sar
1,1125388,1.0,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,1125388,sar,0,20170202,1125388,0,43,...,39,3.093023,3,L,PREDNISONE 20 MG TABLET,20170602.0,HORMONES,6.0,2.0,sar
2,1125388,1.0,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,1125388,sar,0,20170202,1125388,0,43,...,39,3.093023,3,L,PREDNISONE 20 MG TABLET,20170602.0,HORMONES,6.0,2.0,sar
3,1125388,1.0,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,1125388,sar,0,20170202,1125388,0,43,...,39,3.093023,3,L,PREDNISONE 20 MG TABLET,20170602.0,HORMONES,6.0,2.0,sar
4,1125388,1.0,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,1125388,sar,0,20170202,1125388,0,43,...,39,3.093023,3,L,PREDNISONE 20 MG TABLET,20170602.0,HORMONES,6.0,2.0,sar


In [4]:
data_generator = ImageDataGenerator(preprocessing_function=preprocess_input, rescale=1./255)

In [5]:
image_size = 256

In [6]:
val_generator = data_generator.flow_from_dataframe(
        dataframe=df_val,
        x_col = 'filename',
        y_col = 'Consensus',
        target_size=(image_size, image_size),
        batch_size=1,
        shuffle=False,
        seed=726,
        class_mode='raw')

Found 13953 validated image filenames.


In [3]:
best_model = load_model("/raid/data/yanglab/ILD/ILD_results/models/ct-unfreezetop10-CT-IRV2-256-400-0.001.h5")
best_model.summary()
#pred = best_model.predict_generator(val_generator,steps=len(val_generator.labels), verbose=1)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 127, 127, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 127, 127, 32) 96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 127, 127, 32) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [8]:
df_prob = pd.DataFrame({'MRN':df_val.MRN,'filename':val_generator.filenames, 'label':list(val_generator.labels)})
d_pred = pd.DataFrame(pred)
d_pred.columns = ["chp","nsip","o","sar",'uip']
df_prob = pd.concat([df_prob, d_pred], axis=1)


def get_series_name(f):
    x = f[:-8]
    return x

In [9]:
df_prob.head()

Unnamed: 0,MRN,filename,label,chp,nsip,o,sar,uip
0,1125388,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,3,0.108097,0.061029,0.072366,0.019605,0.738902
1,1125388,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,3,0.11414,0.049401,0.054103,0.022597,0.759758
2,1125388,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,3,0.120565,0.063291,0.094575,0.032794,0.688775
3,1125388,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,3,0.131429,0.047168,0.080826,0.024074,0.716503
4,1125388,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,3,0.133211,0.043551,0.070423,0.018771,0.734044


In [10]:
df_prob['series'] = df_prob.filename.apply(get_series_name)

In [11]:
df_mean = df_prob.groupby(['MRN','label','series']).mean().reset_index()
df_mean

Unnamed: 0,MRN,label,series,chp,nsip,o,sar,uip
0,1125388,3,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,0.291036,0.021051,0.081050,0.052245,0.554618
1,1661101,1,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,0.133326,0.175449,0.288847,0.052897,0.349481
2,1756096,4,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,0.173947,0.181245,0.287240,0.026377,0.331189
3,2348239,2,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,0.156633,0.174602,0.320816,0.094448,0.253501
4,2503506,1,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,0.038501,0.176430,0.083485,0.013925,0.687659
...,...,...,...,...,...,...,...,...
58,E196853,4,/raid/data/yanglab/ILD/MSH_preprocessed/1.2.84...,0.155389,0.098677,0.125303,0.018724,0.601907
59,E212063,2,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.2...,0.068090,0.348554,0.163770,0.075656,0.343930
60,E274928,4,/raid/data/yanglab/ILD/MSH_preprocessed/ST_1.3...,0.188888,0.079193,0.294676,0.023617,0.413626
61,E337398,2,/raid/data/yanglab/ILD/MSH_preprocessed/171475...,0.087748,0.247361,0.263823,0.047110,0.353958


In [12]:
n_classes=5
fpr = dict()
tpr = dict()
roc_auc = dict()
pred_per_patient = np.array(df_mean.iloc[:, 3:])
y_true=label_binarize(df_mean.label, classes=[0, 1, 2, 3,4])
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true[:,i], pred_per_patient[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    print(i, roc_auc[i])

0 0.6608187134502924
1 0.6399521531100478
2 0.590566037735849
3 0.7704545454545455
4 0.6523255813953488


In [14]:
#df_mean.to_excel("/raid/data/yanglab/ILD/ILD_results/prob_sheet_validation/ct-cnn_val.xlsx", index=False)