## 1D-CNNs for 1Ch-NoMC
- Image level predictions from 1Ch-NoMC
- No MC dropout applied in the 1Ch-NoMC: no information about uncertainty in predictions

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix
tf.set_random_seed(3004)

In [None]:
data_folder = 'C:/Users/hezo/Dropbox/PhD/Stroke/Stroke_classification/Analyses_Oct_2018/outputs/5fold_CV_bl_without_mc_dropout/'
folder = 'C:/Users/hezo/Documents/Stroke/patient_aggregation_1D_CNN_CV_bl_without_mc_dropout/'

## Load data: 

In [None]:
#run 0
train2_0 = pd.read_csv(data_folder + 'run0/predictions_dropout_train2.csv')
valid2_0 = pd.read_csv(data_folder + 'run0/predictions_dropout_valid2.csv')
test_0 = pd.read_csv(data_folder + 'run0/predictions_dropout_test.csv')

In [None]:
#run 1
train2_1 = pd.read_csv(data_folder + 'run1/predictions_dropout_train2.csv')
valid2_1 = pd.read_csv(data_folder + 'run1/predictions_dropout_valid2.csv')
test_1 = pd.read_csv(data_folder + 'run1/predictions_dropout_test.csv')

In [None]:
#run 2
train2_2 = pd.read_csv(data_folder + 'run2/predictions_dropout_train2.csv')
valid2_2 = pd.read_csv(data_folder + 'run2/predictions_dropout_valid2.csv')
test_2 = pd.read_csv(data_folder + 'run2/predictions_dropout_test.csv')

In [None]:
#run 3
train2_3 = pd.read_csv(data_folder + 'run3/predictions_dropout_train2.csv')
valid2_3 = pd.read_csv(data_folder + 'run3/predictions_dropout_valid2.csv')
test_3 = pd.read_csv(data_folder + 'run3/predictions_dropout_test.csv')

In [None]:
#run 4
train2_4 = pd.read_csv(data_folder + 'run4/predictions_dropout_train2.csv')
valid2_4 = pd.read_csv(data_folder + 'run4/predictions_dropout_valid2.csv')
test_4 = pd.read_csv(data_folder + 'run4/predictions_dropout_test.csv')

In [None]:
# define accuracy, specificity and sensitivity
def acc(true, pred):
    conf_mat = confusion_matrix(true, pred)
    return (conf_mat[0][0]+conf_mat[1][1])/np.sum(conf_mat)
def spec(true, pred):
    conf_mat = confusion_matrix(true, pred)
    return conf_mat[0][0]/np.sum(conf_mat[0])
def sens(true, pred):
    conf_mat = confusion_matrix(true, pred)
    return conf_mat[1][1]/np.sum(conf_mat[1])

In [None]:
# Summarize the data which we use for training and validation
train = [train2_0, train2_1, train2_2, train2_3, train2_4]

In [None]:
# Summarize the data which we use for validing and validation
valid = [valid2_0, valid2_1, valid2_2, valid2_3, valid2_4]

In [None]:
# Summarize the data which we use for testing and validation
test = [test_0, test_1, test_2, test_3, test_4]

### Convert data to one hot

In [None]:
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='int32')
    result[np.arange(len(vector)), vector] = 1
    return result

## 1D-CNN: Some tries

In [None]:
import keras 
from keras.layers import Dense, Convolution1D, Input, Activation, Flatten, Dropout, GlobalMaxPool1D, MaxPooling1D
from keras.models import Model, Sequential
from keras import regularizers
from keras.callbacks import ModelCheckpoint

In [None]:
def show_results(acc_train, acc_valid, loss_train, loss_valid):
    plt.plot(acc_train, 'blue')
    plt.plot(acc_valid, 'cyan')
    plt.ylim(0, 1.1)
    plt.title('Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epochs')
    plt.legend(['Train', 'Valid'], loc='lower right')
    plt.show()
    plt.plot(loss_train, 'blue')
    plt.plot(loss_valid, 'cyan')
    plt.ylim(0, 2.5)
    plt.title('Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    plt.legend(['Train', 'Valid'], loc='upper right')
    plt.show()
    print("Max val accuracy: ", np.max(acc_valid))
    print("In epochs: ", np.where(acc_valid==np.max(acc_valid)))
    print("Min val loss: ", np.min(loss_valid))
    print('In epoch: ', np.where(loss_valid==np.min(loss_valid)))
    return np.where(loss_valid==np.min(loss_valid))[0]+1

In [None]:
def generate_batches_from_file(dat):
    while True:
        for p_id_tmp in set(dat.p_id):
            # select one patient
            pat_tmp = dat.loc[dat.p_id==p_id_tmp,:]
            # take all the predictions and save them in a list
            X = pat_tmp.mean1.values
            X = X.reshape((1,X.shape[0],1)) # 1 = batch, X.shape[0] = n images, 1 = n features
            Y = pat_tmp.pat_true.head(n=1)
            Y = convertToOneHot(Y.astype(int), 2)
            #print(X, Y)
            yield X, Y

### CV training

In [None]:
for i in range(5):
    print("Run", i)
    
    # Extract information for run i
    train_run = train[i]
    valid_run = valid[i]
    
    # Define model and compile
    num_classes = 2
    model = Sequential()
    model.add(Convolution1D(16, kernel_size=3, activation="relu", batch_input_shape=(None, None, 1)))
    # model.add(Dropout(0.5))
    # model.add(Convolution1D(8, kernel_size=3, activation="relu"))
    # model.add(Dropout(0.5))
    # model.add(Convolution1D(8, kernel_size=3, activation="relu"))
    model.add(GlobalMaxPool1D())
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
    
    # Train the model and save checkpoints
    cp_callback = ModelCheckpoint(folder + 'run'+str(i)+'/nn0/model-{epoch:02d}.hdf5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    results = model.fit_generator(generate_batches_from_file(train_run),
                                  epochs=50,
                                  steps_per_epoch=len(np.unique(train_run.p_id)),
                                  verbose=1,
                                  validation_data=generate_batches_from_file(valid_run),
                                  validation_steps=len(np.unique(valid_run.p_id)),
                                  callbacks=[cp_callback])
                    
    # save history
    pd.DataFrame(results.history).to_csv(folder + 'run'+str(i)+'/nn0/history.csv', index=False)
    
    #### Find epoch with lowest validation loss
    epoch = show_results(results.history['acc'], results.history['val_acc'], results.history['loss'], results.history['val_loss'])

#### Prediction

In [None]:
from keras.models import load_model

accuracy = []
sensitivity = []
specificity = []
pred_total = []
p_id_total = []
pred_true_total = []

for i in range(5):
    
    print("Run", i)
    #### Extract information of run i
    test_run = test[i]

    # load the history
    dat = pd.DataFrame.from_csv(folder + 'run'+str(i)+'/nn0/history.csv')
    epoch = np.where(dat.val_loss==np.min(dat.val_loss))[0]+1
    if epoch[0]<10:
        model = load_model(folder + 'run'+str(i)+ '/nn0/model-0' + str(epoch[0]) + '.hdf5')
    else:
        model = load_model(folder + 'run'+str(i)+ '/nn0/model-' + str(epoch[0]) + '.hdf5')
    pred_tmp = model.predict_generator(generate_batches_from_file(test_run), steps=len(np.unique(test_run.p_id)))
    pred = np.argmax(pred_tmp, axis=1)
    
    # get the true labels from the generator
    true = []
    p_id =  []
    for p_id_tmp in set(test_run.p_id):
        # select one patient
        pat_tmp = test_run.loc[test_run.p_id==p_id_tmp,:]
        # take all the predictions and save them in a list
        Y = pat_tmp.pat_true.head(n=1)
        pid = pat_tmp.p_id.head(n=1)
        true.append(Y.values[0])
        p_id.append(pid.values[0])
    
    pred_total.append(pred_tmp[:,1])
    pred_true_total.append(true)
    p_id_total.append(p_id)

    accuracy.append(acc(true,pred))
    specificity.append(spec(true,pred))
    sensitivity.append(sens(true,pred))
    print('Accuracy: ', acc(true, pred))
    print('Specificity: ', spec(true, pred))
    print('Sensitivity: ', sens(true, pred))

pred_total = np.concatenate(pred_total)
pred_true_total = np.concatenate(pred_true_total)
p_id_total = np.concatenate(p_id_total)
    
dat = pd.DataFrame({'p_id':p_id_total, 'pred':pred_total, 'pat_true':pred_true_total})
dat.to_csv(folder + '/CV_predictions_pat_test_nn0.csv', index=False)

## 1D-CNN with MC Dropout: predictions

In [None]:
from keras.layers import Dense, Input, Activation, Flatten, Dropout, Lambda
from keras.models import Model
from keras import backend as K

In [None]:
for i in range(5):
    print("Run", i)
    
    # Extract information for run i
    train_run = train[i]
    valid_run = valid[i]
    
    # Define model and compile
    data_input = Input(shape=(None,1))
    # Hidden layer
    x = Convolution1D(16, kernel_size=3)(data_input)
    x = Activation("relu")(x)
    # x = Lambda(lambda x: K.dropout(x, level=0.5))(x)
    # x = Convolution1D(8, kernel_size=3)(x)
    # x = Activation("relu")(x)
    # x = Lambda(lambda x: K.dropout(x, level=0.5))(x)
    # x = Convolution1D(8, kernel_size=3)(x)
    # x = Activation("relu")(x)
    x = GlobalMaxPool1D()(x)
    x = Lambda(lambda x: K.dropout(x, level=0.5))(x)
    x = Dense(num_classes)(x)
    out = Activation("softmax")(x)
    
    model = Model(inputs=data_input, outputs=out)
    
    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
    
    # Train the model and save checkpoints
    cp_callback = ModelCheckpoint(folder + 'run'+str(i)+'/nn0_mc/model-{epoch:02d}.hdf5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    results = model.fit_generator(generate_batches_from_file(train_run),
                                  epochs=100,
                                  steps_per_epoch=len(np.unique(train_run.p_id)),
                                  verbose=1,
                                  validation_data=generate_batches_from_file(valid_run),
                                  validation_steps=len(np.unique(valid_run.p_id)),
                                  callbacks=[cp_callback])
                    
    # save history
    pd.DataFrame(results.history).to_csv(folder + 'run'+str(i)+'/nn0_mc/history.csv', index=False)
    
    #### Find epoch with lowest validation loss
    epoch = show_results(results.history['acc'], results.history['val_acc'], results.history['loss'], results.history['val_loss'])

#### Prediction

In [None]:
from keras.models import load_model

n_classes = 2

accuracy = []
sensitivity = []
specificity = []
pred_total = []
p_id_total = []
pred_true_total = []
sd1_total = []
total_var_total = []
vr_total = []
pe_total = []
mi_total = []

for i in range(5):
    
    print("Run", i)
    #### Extract information of run i
    test_run = test[i]

    # load the history
    dat = pd.DataFrame.from_csv(folder + 'run'+str(i)+'/nn0_mc/history.csv')
    epoch = np.where(dat.val_loss==np.min(dat.val_loss))[0]+1
    if epoch[0]<10:
        model = load_model(folder + 'run'+str(i)+ '/nn0_mc/model-0' + str(epoch[0]) + '.hdf5')
    else:
        model = load_model(folder + 'run'+str(i)+ '/nn0_mc/model-' + str(epoch[0]) + '.hdf5')
    
    # get 200 predictions for each patient
    raw_pred = np.empty([500,len(np.unique(test_run.p_id)),2])
    for i in range(500):
        raw_pred[i] = model.predict_generator(generate_batches_from_file(test_run), steps=len(np.unique(test_run.p_id)))
    # get the mean over all predictions
    pred_tmp = np.mean(raw_pred,axis=0)
    pred = np.argmax(pred_tmp, axis=1)

    # get the true labels from the generator
    # save the variances
    sd0_tmp = np.array(np.std(raw_pred, ddof=1, axis=0))[:,0]
    sd1_tmp = np.array(np.std(raw_pred, ddof=1, axis=0))[:,1]
    total_var_total.append(sd0_tmp**2 + sd1_tmp**2)
    sd1_total.append(sd1_tmp)
    
    raw_pred[raw_pred==0]=1e-40
    j=0
    
    true = []
    p_id =  []
    vr = []
    pe = []
    mi = []
    for p_id_tmp in set(test_run.p_id):
        # select one patient
        pat_tmp = test_run.loc[test_run.p_id==p_id_tmp,:]
        # take all the predictions and save them in a list
        Y = pat_tmp.pat_true.head(n=1)
        pid = pat_tmp.p_id.head(n=1)
        true.append(Y.values[0])
        p_id.append(pid.values[0])
        
        vr.append(1-(np.max(np.histogram(np.argmax(raw_pred[:,j,:], axis=1), bins=n_classes, range=[0,n_classes])[0])/len(raw_pred[:,j,:])))
        pe_tmp = (-1)*np.sum(np.mean(raw_pred[:,j,:], axis=0)*np.log(np.mean(raw_pred[:,j,:], axis=0)))
        pe.append(pe_tmp)
        mi.append(pe_tmp + np.sum(np.array([np.sum(raw_pred[:,j,i]*np.log(raw_pred[:,j,i]))for i in range(0,n_classes)]))/len(raw_pred[:,j,:]))
        j = j+1
        
    
    pred_total.append(pred_tmp[:,1])
    pred_true_total.append(true)
    p_id_total.append(p_id)
    vr_total.append(vr)
    pe_total.append(pe)
    mi_total.append(mi)
    
    accuracy.append(acc(true,pred))
    specificity.append(spec(true,pred))
    sensitivity.append(sens(true,pred))
    print('Accuracy: ', acc(true, pred))
    print('Specificity: ', spec(true, pred))
    print('Sensitivity: ', sens(true, pred))

pred_total = np.concatenate(pred_total)
pred_true_total = np.concatenate(pred_true_total)
p_id_total = np.concatenate(p_id_total)
sd1_total = np.concatenate(sd1_total)
total_var_total = np.concatenate(total_var_total)
vr_total = np.concatenate(vr_total)
pe_total = np.concatenate(pe_total)
mi_total = np.concatenate(mi_total)
    
dat = pd.DataFrame({'p_id':p_id_total, 'pred':pred_total, 'pat_true':pred_true_total,
                   'total_var':total_var_total, 'sd1':sd1_total, 'vr':vr_total, 'pe':pe_total,
                   'mi':mi_total})
dat.to_csv(folder + '/CV_predictions_pat_test_nn0_mc.csv', index=False)