In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from typing import Tuple

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L1, L2, L1L2

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve

import seaborn as sns
sns.set_context("paper")
sns.set_style("darkgrid")

### Load data

In [None]:
def load_data(data_path:str) -> Tuple[np.ndarray, np.ndarray]:
    '''
        Function to load dataset and label from JSON file and do necessary preprocessing.

        Parameters:
            data_path: String corresonding to the path to the saved JSON file to be loaded.

        Returns:
            A tuple with 2 NumPy N dimensional array with the features and the binary encoded labels. 
    '''
    
    with open(data_path, 'r') as f:
        data = json.load(f)
    
    # Extract labels and MFCCs
    X = np.array(data['mfcc'])
    y = np.array(data['label'])
    
    X = X.reshape(X.shape[0], -1, 15, 1)
    
    # Encode labels
    y[y=='p'] = 1
    y[y=='n'] = 0
    y[y=='augmented_p_data'] = 1
    y = y.astype(np.int32)
    
    return X, y

In [None]:
data_path = '../../Coswara-Data/data/shallow/shallow_mfcc15_augdata.json'
X, y = load_data(data_path)
X.shape, y.shape

### Split data

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, stratify=y, random_state=7)

In [None]:
X_train.shape, y_train.shape, X_valid.shape, y_valid.shape

### Construct model

In [None]:
model = Sequential()

model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid', input_shape=(302, 15, 1)))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(32, (2, 2), strides=(1, 1), activation='relu', padding='valid'))
model.add(BatchNormalization())

model.add(Flatten())

model.add(Dense(256, activation='relu',
                kernel_regularizer=L1L2(l1=3e-4, l2=4e-3),
                bias_regularizer=L2(3e-3),
                activity_regularizer=L2(3e-4)))
model.add(Dropout(0.5))

model.add(Dense(128, activation='relu',
                kernel_regularizer=L1L2(l1=1e-3, l2=1e-2),
                bias_regularizer=L2(1e-2),
                activity_regularizer=L2(1e-3)))
model.add(Dropout(0.3))

model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)]
history = model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_valid, y_valid), callbacks=callbacks, verbose=0)

In [None]:
def plot_learningCurve(history):
  # Plot training & validation accuracy values
    epochs = len(history.history['accuracy'])
    epoch_range = range(1, epochs+1)
    plt.plot(epoch_range, history.history['accuracy'])
    plt.plot(epoch_range, history.history['val_accuracy'])
    plt.title('Model accuracy', fontsize=20)
    plt.ylabel('Accuracy', fontsize=14)
    plt.xlabel('Epoch', fontsize=14)
    plt.legend(['Train', 'Val'], loc='upper right')
    # plt.savefig('../figures/brogrammer_train_acc.pdf')
    # plt.savefig('../figures/brogrammer_train_acc.png')
    plt.show()

    # Plot training & validation loss values
    plt.plot(epoch_range, history.history['loss'])
    plt.plot(epoch_range, history.history['val_loss'])
    plt.title('Model loss', fontsize=20)
    plt.ylabel('Loss', fontsize=14)
    plt.xlabel('Epoch', fontsize=14)
    plt.legend(['Train', 'Val'], loc='upper right')
    plt.tight_layout()
    # plt.savefig('../figures/brogrammer_train_loss.pdf')
    # plt.savefig('../figures/brogrammer_train_loss.png')
    plt.show()

In [None]:
plot_learningCurve(history)

In [None]:
# Save model weights
model.save('brogrammers.h5', save_format='h5')

In [None]:
loss, acc = model.evaluate(X_valid, y_valid, verbose=0)
print(f'Validation loss: {loss:.4f}')
print(f'Validation accuracy: {acc:.4f}')

In [None]:
y_pred = (model.predict(X_valid, verbose=0) > 0.5).astype(int)

In [None]:
cf_matrix = confusion_matrix(y_valid, y_pred)
ax = sns.heatmap(cf_matrix, annot=True, fmt='')

In [None]:
final_cm = np.array([[cf_matrix[1, 1], cf_matrix[1, 0]],
                     [cf_matrix[0, 1], cf_matrix[0, 0]]])
df_cm = pd.DataFrame(final_cm, index = [i for i in ['COVID','NON-COVID']],
                     columns = [i for i in ['COVID','NON-COVID']]
                     )
plt.figure(figsize = (10,7))
sns.set(font_scale=1.4) # for label size
sns.heatmap(df_cm.transpose(), annot=True, annot_kws={"size": 16},fmt='.3g') # font size

plt.title('Confusion Matrix', fontsize=20)
plt.xlabel('PREDICTED')
plt.ylabel('TRUE')
plt.tight_layout()
# plt.savefig('../figures/confusion_matrix.pdf', dpi=577)
# plt.savefig('../figures/confusion_matrix.png', dpi=577)

plt.show()

In [None]:
y_pred = model.predict(X_valid)
fpr, tpr, thresholds = roc_curve(y_valid, y_pred)
auc = roc_auc_score(y_valid, y_pred)
plt.plot(fpr, tpr, label=f'AUC={auc:.4f}')
plt.title('ROC Curve', fontsize=20)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.tight_layout()
# plt.savefig('../figures/roc_curve.pdf')
# plt.savefig('../figures/roc_curve.png')