In [1]:
# imports 
import os
import shutil

In [2]:
# dataset location
covid_path = 'dataset/covid/'
non_covid_path = 'dataset/no_covid/'

base_directory = 'dataset/cross_validation/'
test_directory = 'dataset/cross_validation_test/'

In [3]:
# this function has been adapted from the 
# https://github.com/sagihaider/TransferLearning_COVID19 github repo created by Dr. Haider
def rename_dataset_files(path, class_handler):
    for count, filename in enumerate(os.listdir(path)): 
        dst = class_handler + "-" + str(count) + ".png"
        src = path + filename 
        dst = path + dst 
        
        os.rename(src, dst)
    
    print(class_handler, count)

In [5]:
rename_dataset_files(covid_path, "covid")
rename_dataset_files(non_covid_path, "noncovid")

noncovid 396


In [6]:
# create folders for the dataset split
if not os.path.exists(base_directory):
    os.mkdir(base_directory)
    
if not os.path.exists(test_directory):
    os.mkdir(test_directory)

In [7]:
# copy the files from the full dataset into custom destinations and ranges
def copy_files_from_dataset(source, destination, fname_format, lower_limit, upper_limit):
    fnames = [fname_format.format(i) for i in range(lower_limit, upper_limit)]
    
    for fname in fnames: 
        src = os.path.join(source, fname)
        dst = os.path.join(destination, fname)
        
        shutil.copyfile(src, dst)

In [8]:
# copy covid
copy_files_from_dataset(covid_path, base_directory, 'covid-{}.png', 0, 300)
copy_files_from_dataset(covid_path, test_directory, 'covid-{}.png', 300, 348)

# copy non-covid
copy_files_from_dataset(non_covid_path, base_directory, 'noncovid-{}.png', 0, 300)
copy_files_from_dataset(non_covid_path, test_directory, 'noncovid-{}.png', 300, 348)

In [9]:
import cv2
import numpy as np
from random import shuffle

import os

IMG_SIZE = 227 # alexnet image size

num_folds = 5

no_epochs = 50

In [10]:
# these functions have been taken from the 
# https://github.com/sagihaider/TransferLearning_COVID19 github repo created by Dr. Haider
def label_img(img):
    word_label = img.split('-')[0]
    if word_label == 'covid': return 1
    elif word_label == 'noncovid': return 0
    
    
def createDataSplitSet(datapath):
    X=[]
    y=[]

    for img in os.listdir(datapath):
        label = label_img(img)
        path = os.path.join(datapath, img)
        image = cv2.resize(cv2.imread(path), (IMG_SIZE, IMG_SIZE))
        image = cv2.normalize(image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

        X.append(np.array(image))
        y.append(label)

    return np.array(X), np.array(y)

In [11]:
train_X, train_y = createDataSplitSet(base_directory) # train_dataset # capital 'X'
test_X, test_y = createDataSplitSet(test_directory) # test_dataset # capital 'X'
print(train_X.shape)
print(test_X.shape)

(600, 227, 227, 3)
(96, 227, 227, 3)


In [15]:
# alexnet crossvalidation training

from sklearn.model_selection import KFold
import numpy as np

from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Input, Flatten
from tensorflow.keras import Model

from tensorflow import keras

from sklearn.metrics import accuracy_score

from sklearn.metrics import precision_recall_fscore_support, roc_auc_score

# Define per-fold score containers
acc_per_fold = []
loss_per_fold = []
test_acc_per_fold = []
precision_per_fold = []
recall_per_fold = []
f1score_per_fold = []
auc_per_fold = []

# k-means cross-validation for alexnet with a k-value of 5

kfold = KFold(n_splits=num_folds, shuffle=True)

fold_no = 1
for train, test in kfold.split(train_X, train_y):
    model = keras.models.Sequential([
        keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu',
                            input_shape=(227, 227, 3)),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
        keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
        keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(filters=384, kernel_size=(1, 1), strides=(1, 1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.Conv2D(filters=256, kernel_size=(1, 1), strides=(1, 1), activation='relu', padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation='relu', name='fc1'),
        keras.layers.Dropout(0.1),
        keras.layers.Dense(64, activation='relu', name='fc2'),
        keras.layers.Dropout(0.1),
        keras.layers.Dense(1, activation='sigmoid', name='output')
    ])

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    
    history = model.fit(train_X[train], train_y[train],
                                  batch_size=20,
                                  epochs=no_epochs)
    
    # Generate generalization metrics
    scores = model.evaluate(train_X[test], train_y[test], verbose=0)
    
    print(f'Score for fold {fold_no}:')    
    
    # These metrics have been taken from the 
    # https://github.com/sagihaider/TransferLearning_COVID19 github repo created by Dr. Haider
    predictions = model.predict(test_X)
    ypred = predictions > 0.5
    test_acc = accuracy_score(test_y, ypred)

    precision, recall, f1score, _ = precision_recall_fscore_support(test_y, ypred, average='binary')

    auc = roc_auc_score(test_y, ypred)
    
    print("Train Accuray:\t", scores[1])
    print("Loss:\t\t", scores[0])
    print("Test Accuracy:\t", test_acc)
    print("Precision:\t", precision)
    print("Recall:\t\t", recall)
    print("F1 Score:\t", f1score)
    print("AUC:\t\t", auc)
    
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])
    test_acc_per_fold.append(test_acc)
    precision_per_fold.append(precision)
    recall_per_fold.append(recall)
    f1score_per_fold.append(f1score)
    auc_per_fold.append(auc)

    # Increase fold number
    fold_no = fold_no + 1


------------------------------------------------------------------------
Training for fold 1 ...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.8456728458404541, 0.625]


In [14]:

print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
    print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - Loss:          {loss_per_fold[i]}')
    print(f'>        - Accuracy:      {acc_per_fold[i]}')
    print(f'>        - Test Accuracy: {test_acc_per_fold[i]}')
    print(f'>        - Precision:     {precision_per_fold[i]}')
    print(f'>        - Recall:        {recall_per_fold[i]}')
    print(f'>        - F1 score:      {f1score_per_fold[i]}')
    print(f'>        - AUC:           {auc_per_fold[i]}')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy:      {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss:          {np.mean(loss_per_fold)}')
print(f'> Test Accuracy: {np.mean(test_acc_per_fold)}')
print(f'> Precision:     {np.mean(precision_per_fold)}')
print(f'> Recall:        {np.mean(recall_per_fold)}')
print(f'> F1 score:      {np.mean(f1score_per_fold)}')
print(f'> AUC:           {np.mean(auc_per_fold)}')
print('------------------------------------------------------------------------')

------------------------------------------------------------------------
Score per fold
------------------------------------------------------------------------
> Fold 1 - Loss:          2.7427515983581543
>        - Accuracy:      0.5083333253860474
>        - Test Accuracy: 0.5208333333333334
>        - Precision:     0.5106382978723404
>        - Recall:        1.0
>        - F1 score:      0.676056338028169
>        - AUC:           0.5208333333333333
------------------------------------------------------------------------
> Fold 2 - Loss:          2.196779489517212
>        - Accuracy:      0.49166667461395264
>        - Test Accuracy: 0.5104166666666666
>        - Precision:     0.5052631578947369
>        - Recall:        1.0
>        - F1 score:      0.6713286713286714
>        - AUC:           0.5104166666666667
------------------------------------------------------------------------
> Fold 3 - Loss:          3.4329965114593506
>        - Accuracy:      0.5083333253860474
>   