# Covid Chest Project
## Simple Models

### Imports

In [None]:
import numpy as np
from utils import load_covid_data, create_dataset
from models import simple_cnn_model
from plots import plot_cm_handy, plot_roc_handy
from evaluation import mode_robustness, evaluation
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import load_model

import warnings
warnings.filterwarnings('ignore')

## Dataset1 (X-Ray)

In [None]:
x_train, y_train = load_covid_data(path='Data/train', shuffle=True, class_frequency=True)
x_test,y_test = load_covid_data(path='Data/test')

class_weights = compute_class_weight('balanced',np.unique(np.argmax(y_train,axis=1)), np.argmax(y_train,axis=1))
class_weights = {0:class_weights[0],
                 1:class_weights[1],
                 2:class_weights[2]}
BATCH_SIZE=64
train_dataset, validation_dataset = create_dataset_xray(x_train, y_train, x_test, y_test, BATCH_SIZE)

## Dataset2 (CT)

In [None]:
X = np.load('CT_X.npy')
Y = np.load('CT_Y.npy')

In [None]:
BATCH_SIZE=256
train_dataset, validation_dataset, X_train, X_test, y_train, y_test = create_dataset_ct(X, Y, BATCH_SIZE)

## Without Monte-Carlo Dropout

### Simple CNN Model

In [None]:
model, callbacks=simple_cnn_model(mc=False, lr=0.00001)

In [None]:
hist = model.fit(train_dataset, epochs=200, validation_data=validation_dataset,
                          class_weight=class_weights, callbacks=callbacks)

#### Results

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix,precision_score,recall_score,f1_score
from tensorflow.keras.models import load_model

model = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/simple_cnn_model_covid_simple.h5')

preds = model.predict(x_test)
acc = accuracy_score(np.argmax(y_test, axis=1), np.argmax(preds, axis=1))*100

cm = confusion_matrix(np.argmax(y_test, axis=1)
, np.argmax(preds, axis=1))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print('CONFUSION MATRIX ------------------')
print(cm)

print('\nTEST METRICS ----------------------')

precision = precision_score(np.argmax(y_test, axis=1),
                            np.argmax(preds, axis=1), average='weighted')*100
recall = recall_score(np.argmax(y_test, axis=1),
                      np.argmax(preds, axis=1), average='weighted')*100

print('Accuracy: {}%'.format(acc))
print('Precision: {}%'.format(precision))
print('Recall: {}%'.format(recall))
print('F1-score: {}'.format( 2*precision*recall/(precision+recall) ))

In [None]:
model = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/simple_cnn_model_covid_simple.h5')

y_p = model.predict(x_test, batch_size=BATCH_SIZE)


plot_roc_handy(y_test, y_p, zoom=True, lw=2, name='Roc of Simple CNN model without uncertainty (X-Ray)',
               class_name=['COVID19','Normal','Pneumonia'])

plot_cm_handy(y_test, y_p,
              lw=2, name='Confusion Matrix of Simple CNN model without uncertainty (X-Ray)',
              class_name=['COVID19','Normal','Pneumonia'])

#### T-SNE

In [None]:
from models import simple_cnn_trunc_model
model = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/simple_cnn_model_covid_simple.h5')
trunc_model = simple_cnn_trunc_model(model, mc=False)
hidden_features = trunc_model.predict(x_test)

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

pca = PCA(n_components=120)
pca_result = pca.fit_transform(hidden_features)
print('Variance PCA: {}'.format(np.sum(pca.explained_variance_ratio_)))

tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(pca_result)

In [None]:
from keras.utils import np_utils
import matplotlib.pyplot as plt
import matplotlib 
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 

plt.rcParams.update({'font.size': 25})
%matplotlib inline
Name='T-SNE Visualization of Simple CNN model without uncertainty (X-Ray)'
fig = plt.figure(figsize=[15, 15])
color_map = np.argmax(y_test, axis=1)
classes=['COVID19','Normal','Pneumonia']
for cl in range(3):
    indices = np.where(color_map==cl)
    indices = indices[0]
    plt.title(Name, fontsize=20)
    plt.ylabel('Dim_2', fontsize=20)
    plt.xlabel('Dim_1', fontsize=20)
    matplotlib.rc('xtick', labelsize=20) 
    matplotlib.rc('ytick', labelsize=20) 
    plt.scatter(tsne_results[indices,0], tsne_results[indices, 1], label=classes[cl])

plt.rcParams.update({'font.size': 20})

plt.legend()
plt.show()
fig.savefig('{}.pdf'.format(Name),dpi=300)



### Multi-headed Model

In [None]:
model_mh, callbacks_mh = multi_headed_model(False)

In [None]:
hist_mh = model_mh.fit(train_dataset, epochs=200, validation_data=validation_dataset,
                          class_weight=class_weights, callbacks=callbacks_mh)

#### Results

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix,precision_score,recall_score,f1_score
from tensorflow.keras.models import load_model

model_mh = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/multi_headed_model_covid_simple.h5')

preds = model_mh.predict(x_test)
acc = accuracy_score(np.argmax(y_test, axis=1), np.argmax(preds, axis=1))*100

cm = confusion_matrix(np.argmax(y_test, axis=1)
, np.argmax(preds, axis=1))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print('CONFUSION MATRIX ------------------')
print(cm)

print('\nTEST METRICS ----------------------')

precision = precision_score(np.argmax(y_test, axis=1),
                            np.argmax(preds, axis=1), average='weighted')*100
recall = recall_score(np.argmax(y_test, axis=1),
                      np.argmax(preds, axis=1), average='weighted')*100

print('Accuracy: {}%'.format(acc))
print('Precision: {}%'.format(precision))
print('Recall: {}%'.format(recall))
print('F1-score: {}'.format( 2*precision*recall/(precision+recall) ))

In [None]:
model_mh = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/multi_headed_model_covid_simple.h5')

y_p = model_mh.predict(x_test, batch_size=BATCH_SIZE)


plot_roc_handy(y_test, y_p, zoom=True, lw=2, name='Roc of Multi-headed model without uncertainty (X-Ray)',
               class_name=['COVID19','Normal','Pneumonia'], axis=[0.0, 0.18, 0.85, 1.0])

plot_cm_handy(y_test, y_p,
              lw=2, name='Confusion Matrix of Multi-headed model without uncertainty (X-Ray)',
              class_name=['COVID19','Normal','Pneumonia'])

#### T-SNE

In [None]:
from models import multi_headed_trunc_model
model_mh = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/multi_headed_model_covid_simple.h5')
trunc_model = multi_headed_trunc_model(model_mh, mc=False)
hidden_features = trunc_model.predict(x_test)

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

pca = PCA(n_components=120)
pca_result = pca.fit_transform(hidden_features)
print('Variance PCA: {}'.format(np.sum(pca.explained_variance_ratio_)))

tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(pca_result)

In [None]:
from keras.utils import np_utils
import matplotlib.pyplot as plt
import matplotlib 
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 

plt.rcParams.update({'font.size': 25})
%matplotlib inline
Name='T-SNE Visualization of Multi-headed model without uncertainty (X-Ray)'
fig = plt.figure(figsize=[15, 15])
color_map = np.argmax(y_test,  axis=1)
classes=['COVID19','Normal','Pneumonia']
for cl in range(3):
    indices = np.where(color_map==cl)
    indices = indices[0]
    plt.title(Name, fontsize=20)
    plt.ylabel('Dim_2', fontsize=20)
    plt.xlabel('Dim_1', fontsize=20)
    matplotlib.rc('xtick', labelsize=20) 
    matplotlib.rc('ytick', labelsize=20) 
    plt.scatter(tsne_results[indices,0], tsne_results[indices, 1], label=classes[cl])

plt.rcParams.update({'font.size': 20})

plt.legend()
plt.show()
fig.savefig('{}.pdf'.format(Name),dpi=300)



## With Monte-Carlo Dropout

### Simple CNN Model

In [None]:
mc_model, mc_callbacks=simple_cnn_model(mc=True, lr=0.00001)

In [None]:
mc_hist = mc_model.fit(train_dataset, epochs=200, validation_data=validation_dataset,
                          class_weight=class_weights, callbacks=mc_callbacks)

#### Results

In [None]:
from sklearn.metrics import accuracy_score,recall_score,precision_score
import tqdm

mc_model = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/simple_cnn_model_covid_mc.h5')

number_prediction=200
mc_predictions = []
for i in tqdm.tqdm(range(number_prediction)):
    y_p = mc_model.predict(x_test)
    mc_predictions.append(y_p)

accs=recalls=precisions=F1s=[]
for y_p in mc_predictions:
    acc = accuracy_score(y_test.argmax(axis=1), y_p.argmax(axis=1))
    recall=recall_score(y_test.argmax(axis=1), y_p.argmax(axis=1),average='weighted')
    precision=precision_score(y_test.argmax(axis=1), y_p.argmax(axis=1),average='weighted')
    F1=(2*precision*recall)/(precision+recall)
    accs.append(acc)
    recalls.append(recall)
    precisions.append(precision)
    F1s.append(F1)


print("MC accuracy: {:.5%}".format(sum(accs)/len(accs)))
print("MC precision: {:.5%}".format(sum(precisions)/len(precisions)))
print("MC recall: {:.5%}".format(sum(recalls)/len(recalls)))
print("MC F1: {:.5%}".format(sum(F1s)/len(F1s)))

mc_ensemble_pred = np.array(mc_predictions).mean(axis=0).argmax(axis=1)
ensemble_acc = accuracy_score(y_test.argmax(axis=1), mc_ensemble_pred)
ensemble_precision=precision_score(y_test.argmax(axis=1), mc_ensemble_pred, average='weighted')
ensemble_recall=recall_score(y_test.argmax(axis=1), mc_ensemble_pred, average='weighted')
ensemble_F1=(2*ensemble_precision*ensemble_recall)/(ensemble_precision+ensemble_recall)

print("MC-ensemble accuracy: {:.5%}".format(ensemble_acc))
print("MC-ensemble precision: {:.5%}".format(ensemble_precision))
print("MC-ensemble recall: {:.5%}".format(ensemble_recall))
print("MC-ensemble F1: {:.5%}".format(ensemble_F1))



In [None]:
Name = 'Histogram of Simple CNN model with uncertainty (X-Ray)'
f, ax = plt.subplots(figsize=[10, 7])
plt.hist(accs);
plt.axvline(x=ensemble_acc, color="r")
ax.set_title(Name,fontsize=19)
plt.show()
f.savefig('{}.pdf'.format(Name))
ax.figure.savefig("{}.pdf".format(Name), bbox_inches='tight')

In [None]:
plot_roc_handy(y_test, y_p, zoom=True, lw=2, name='Roc of Simple CNN model with uncertainty (X-Ray)',
               class_name=['COVID19','Normal','Pneumonia'], axis=[0.0, 0.21, 0.85, 1.0])

plot_cm_handy(y_test, y_p,
              lw=2, name='Confusion Matrix of Simple CNN model with uncertainty (X-Ray)',
              class_name=['COVID19','Normal','Pneumonia'])


#### Noise Robustness

In [None]:
mc_model=load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/simple_cnn_model_covid_mc.h5')
std_coef=[1e-4,1e-3,1e-2,1e-1, 0.2,0.3,0.4,0.5,0.6]

In [None]:
mode_robustness(x_test, y_test, mc_model, std_coef)

#### T-SNE

In [None]:
mc_model = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/simple_cnn_model_covid_mc.h5')
trunc_model = simple_cnn_trunc_model(mc_model, mc=True)

In [None]:
hidden_features=[]
for i in range(200):
  hidden_features.append(trunc_model.predict(x_test))

hidden_features=np.array(hidden_features).mean(axis=0)

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

pca = PCA(n_components=120)
pca_result = pca.fit_transform(hidden_features)
print('Variance PCA: {}'.format(np.sum(pca.explained_variance_ratio_)))

tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(pca_result)

In [None]:
from keras.utils import np_utils
import matplotlib.pyplot as plt
import matplotlib 
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 

plt.rcParams.update({'font.size': 25})
%matplotlib inline
Name='T-SNE Visualization of Simple CNN model with uncertainty (X-Ray)'
fig = plt.figure(figsize=[15, 15])
color_map = np.argmax(y_test, axis=1)
classes=['COVID19','Normal','Pneumonia']
for cl in range(3):
    indices = np.where(color_map==cl)
    indices = indices[0]
    plt.title(Name, fontsize=20)
    plt.ylabel('Dim_2', fontsize=20)
    plt.xlabel('Dim_1', fontsize=20)
    matplotlib.rc('xtick', labelsize=20) 
    matplotlib.rc('ytick', labelsize=20) 
    plt.scatter(tsne_results[indices,0], tsne_results[indices, 1], label=classes[cl])

plt.rcParams.update({'font.size': 20})

plt.legend()
plt.show()
fig.savefig('{}.pdf'.format(Name),dpi=300)



### Multi-headed Model

In [None]:
mc_model_mh, mc_callbacks_mh = multi_headed_model(True)

In [None]:
mc_hist_mh = mc_model_mh.fit(train_dataset, epochs=200, validation_data=validation_dataset,
                          class_weight=class_weights, callbacks=mc_callbacks_mh)

#### Results

In [None]:
from sklearn.metrics import accuracy_score,recall_score,precision_score
import tqdm

mc_model_mh = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/multi_headed_model_covid_mc.h5')

number_prediction=200
mc_predictions = []
for i in tqdm.tqdm(range(number_prediction)):
    y_p = mc_model_mh.predict(x_test)
    mc_predictions.append(y_p)

accs=recalls=precisions=F1s=[]
for y_p in mc_predictions:
    acc = accuracy_score(y_test.argmax(axis=1), y_p.argmax(axis=1))
    recall=recall_score(y_test.argmax(axis=1), y_p.argmax(axis=1), average='weighted')
    precision=precision_score(y_test.argmax(axis=1), y_p.argmax(axis=1), average='weighted')
    F1=(2*precision*recall)/(precision+recall)
    accs.append(acc)
    recalls.append(recall)
    precisions.append(precision)
    F1s.append(F1)


print("MC accuracy: {:.5%}".format(sum(accs)/len(accs)))
print("MC precision: {:.5%}".format(sum(precisions)/len(precisions)))
print("MC recall: {:.5%}".format(sum(recalls)/len(recalls)))
print("MC F1: {:.5%}".format(sum(F1s)/len(F1s)))

mc_ensemble_pred = np.array(mc_predictions).mean(axis=0).argmax(axis=1)
ensemble_acc = accuracy_score(y_test.argmax(axis=1), mc_ensemble_pred)
ensemble_precision=precision_score(y_test.argmax(axis=1), mc_ensemble_pred, average='weighted')
ensemble_recall=recall_score(y_test.argmax(axis=1), mc_ensemble_pred, average='weighted')
ensemble_F1=(2*ensemble_precision*ensemble_recall)/(ensemble_precision+ensemble_recall)

print("MC-ensemble accuracy: {:.5%}".format(ensemble_acc))
print("MC-ensemble precision: {:.5%}".format(ensemble_precision))
print("MC-ensemble recall: {:.5%}".format(ensemble_recall))
print("MC-ensemble F1: {:.5%}".format(ensemble_F1))



In [None]:
Name = 'Histogram of Multi-headed model with uncertainty (X-Ray)'
f, ax = plt.subplots(figsize=[10, 7])
plt.hist(accs);
plt.axvline(x=ensemble_acc, color="r")
ax.set_title(Name,fontsize=19)
plt.show()
f.savefig('{}.pdf'.format(Name))
ax.figure.savefig("{}.pdf".format(Name), bbox_inches='tight')

In [None]:
plot_roc_handy(y_test, np.array(mc_predictions).mean(axis=0) ,zoom=True, lw=2, name='Roc of Multi-headed model with uncertainty (X-Ray)',
               class_name=['COVID19','Normal','Pneumonia'], axis=[0.0, 0.21, 0.85, 1.0])

plot_cm_handy(y_test, np.array(mc_predictions).mean(axis=0),
              lw=2, name='Confusion Matrix of Multi-headed model with uncertainty (X-Ray)',
              class_name=['COVID19','Normal','Pneumonia'])

#### Noise Robustness

In [None]:
mc_model_mh = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/multi_headed_model_covid_mc.h5')
std_coef=[1e-4,1e-3,1e-2,1e-1, 0.2,0.3,0.4,0.5,0.6]

In [None]:
mode_robustness(x_test, y_test, mc_model_mh, std_coef)

#### T-SNE

In [None]:
mc_model_mh = load_model('drive/My Drive/Chest_Covid/Simple Models/X Ray Results/multi_headed_model_covid_mc.h5')
trunc_model = multi_headed_trunc_model(mc_model_mh, mc=True)

In [None]:
hidden_features=[]
for i in range(200):
  hidden_features.append(trunc_model.predict(x_test))

hidden_features=np.array(hidden_features).mean(axis=0)

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

pca = PCA(n_components=120)
pca_result = pca.fit_transform(hidden_features)
print('Variance PCA: {}'.format(np.sum(pca.explained_variance_ratio_)))

tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(pca_result)

In [None]:
from keras.utils import np_utils
import matplotlib.pyplot as plt
import matplotlib 
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 

plt.rcParams.update({'font.size': 25})
%matplotlib inline
Name='T-SNE Visualization of Multi-headed model with uncertainty (X-Ray)'
fig = plt.figure(figsize=[15, 15])
color_map = np.argmax(y_test, axis=1)
classes=['COVID19','Normal','Pneumonia']
for cl in range(3):
    indices = np.where(color_map==cl)
    indices = indices[0]
    plt.title(Name,fontsize=20)
    plt.ylabel('Dim_2',fontsize=20)
    plt.xlabel('Dim_1',fontsize=20)
    matplotlib.rc('xtick', labelsize=20) 
    matplotlib.rc('ytick', labelsize=20) 
    plt.scatter(tsne_results[indices,0], tsne_results[indices, 1], label=classes[cl])

plt.rcParams.update({'font.size': 20})

plt.legend()
plt.show()
fig.savefig('{}.pdf'.format(Name),dpi=300)

