In [None]:
import json
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

In [None]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
MODEL_PATH = "E:/Jupyter/masterthesis/models/two_classes/mlp_2class_normal.h5"

In [None]:
DATA_PATH = "E:/Jupyter/masterthesis/data/json/two_class/normal/all_mfcc_normal.json"
TRAIN_PATH = "E:/Jupyter/masterthesis/data/json/two_class/normal/train_mfcc_normal.json"
TEST_PATH = "E:/Jupyter/masterthesis/data/json/two_class/normal/test_mfcc_normal.json"
VAL_PATH = "E:/Jupyter/masterthesis/data/json/two_class/normal/val_mfcc_normal.json"

In [None]:
def load_all_data(data_path):

    with open(data_path, "r") as fp:
        data = json.load(fp)

    # convert lists to numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    print("Data succesfully loaded!")

    return  X, y

In [None]:
def load_train_data(train_path):
  
    with open(train_path, "r") as fp:
        data = json.load(fp)

    # convert lists to numpy arrays
    X_tr = np.array(data["mfcc"])
    y_tr = np.array(data["labels"])

    print("Train Data succesfully loaded!")

    return  X_tr, y_tr

In [None]:
def load_test_data(test_path):

    with open(test_path, "r") as fp:
        data = json.load(fp)

    # convert lists to numpy arrays
    X_ts = np.array(data["mfcc"])
    y_ts = np.array(data["labels"])

    print("Test Data succesfully loaded!")

    return  X_ts, y_ts

In [None]:
def load_val_data(val_path):

    with open(val_path, "r") as fp:
        data = json.load(fp)

    # convert lists to numpy arrays
    X_v = np.array(data["mfcc"])
    y_v = np.array(data["labels"])

    print("Validation Data succesfully loaded!")

    return  X_v, y_v

In [None]:
def build_model(input_shape):
    
    weight_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None)
    bias_initializer=tf.keras.initializers.Zeros()
    
    model = Sequential()
    
    #input layer
    model.add(Flatten(input_shape=(X.shape[1], X.shape[2])))
    
    # 1st dense layer
    model.add(Dense(512, activation='relu', kernel_initializer = weight_initializer, bias_initializer = bias_initializer, kernel_regularizer = l2(0.001)))
    model.add(Dropout(0.3))
    
    # 2nd dense layer
    model.add(Dense(256, activation='relu', kernel_initializer = weight_initializer, bias_initializer = bias_initializer, kernel_regularizer = l2(0.001)))
    model.add(Dropout(0.3))
    
    # 3rd dense layer
    model.add(Dense(64, activation='relu', kernel_initializer = weight_initializer, bias_initializer = bias_initializer, kernel_regularizer = l2(0.001)))
    model.add(Dropout(0.3))
    
    # Output layer
    model.add(Dense(2, activation='softmax'))
    
    return model
    

In [None]:
def predict_single_sample(model, X, y):

    # add a dimension to input data for sample - model.predict() expects a 4d array in this case
    X = X[np.newaxis, ...] # array shape (1, 130, 13, 1)

    # perform prediction
    prediction = model.predict(X)

    # get index with max value
    predicted_index = np.argmax(prediction, axis=1)

    print("\nTarget: {}, Predicted label: {}".format(y, predicted_index))
    print('\n')

In [None]:
def plot(history):
    
    acc = history.history['categorical_accuracy']
    val_acc = history.history['val_categorical_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

In [None]:
if __name__ == "__main__":
    
    # load data
    X, y = load_all_data(DATA_PATH)
    
    #load the data from train, test and validation dataset
    X_train, y_train = load_train_data(TRAIN_PATH)
    X_test, y_test = load_test_data(TEST_PATH)
    X_val, y_val = load_val_data(VAL_PATH)
    
    
    
    # define input shape to the model
    input_shape = (X.shape[1], X.shape[2])
    
    # build model
    model = build_model(input_shape)


    # compile model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),
                  loss=tf.keras.losses.CategoricalCrossentropy(),
                  metrics=[tf.keras.metrics.CategoricalAccuracy()],
                  sample_weight_mode=[None])


    # model summary
    model.summary()
    
    print('\n')
    
    # The training stops if there is no improvement in the validation accuracy after 10 epoch
    es = EarlyStopping(monitor='val_categorical_accuracy', patience=25)
    
    # if there is a better validation accuracy than previous better accuracy, then we save it in the model
    chkp = ModelCheckpoint(filepath=MODEL_PATH, verbose=1, save_best_only=True)
    
    # cnn_normal_log.csv has the accuracy and loss history
    log = CSVLogger('E:/Jupyter/masterthesis/logs/two_classes/normal/mlp_2class_normal_log.csv', append=True, separator=' ')
    

    # train model
    history = model.fit(X_train, 
                        y_train, 
                        validation_data=(X_val, y_val), 
                        batch_size=32, 
                        epochs=100,
                        callbacks = [chkp,es,log])
    print('\n')

    
    # initializing time stamp
    startTime = time.time()
    
    print('\n')

    
    # Loading the saved Model
    model = tf.keras.models.load_model(MODEL_PATH)
    model.summary()
    print('\n')

    
    
    # Evaluation on validation dataset
    score_val = model.evaluate(X_val, y_val, verbose=2)
    print('\nValidation loss and Accuracy')
    print('------------------------------------')
    print(model.metrics_names)
    print(model.metrics_names[0], score_val[0])
    print(model.metrics_names[1], score_val[1])
    print('\n')


    # Evaluation on test dataset
    score_test = model.evaluate(X_test, y_test, verbose=2)
    print('\nTest loss and Accuracy')
    print('-------------------------------------')
    print(model.metrics_names)
    print(model.metrics_names[0], score_test[0])
    print(model.metrics_names[1], score_test[1])
    print('\n')
    
    # pick a sample to predict from the test set
    X_to_predict = X_test[1550]
    y_to_predict = y_test[1550]

    # prediction for a single sample sample
    predict_single_sample(model, X_to_predict, y_to_predict)
    
    # take the entire test set for prediction
    pred = model.predict(X_test, batch_size = 32, verbose = 2)
    p_i = np.argmax(pred, axis=1)
    print("\n Target: {}, Predicted label: {}".format(y_test, p_i))
    
    results=pd.DataFrame({"Target":y_test,
                      "Predicted Labels":p_i})
    results.to_csv("E:/Jupyter/masterthesis/predictions/two_class/normal/mlp_2class_normal_result.csv",index=False)

    
    print('\n')

    executionTime = (time.time() - startTime)
    print('\nExecution time in seconds: ' + str(executionTime))


In [None]:
# plot accuracy and error as a function of the epochs
plot(history)

In [None]:
#confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, classification_report

pred_file = "E:/Jupyter/masterthesis/predictions/two_class/normal/mlp_2class_normal_result.csv"
df = pd.read_csv(pred_file)

y_test = df["Target"]
y_pred = df["Predicted Labels"]

pos_label = 0

labels=[ 0,1]

print('Accuracy: {:.4f}'.format(accuracy_score(y_test, y_pred)))
print('Precision: {:.4f}'.format(precision_score(y_test, y_pred, pos_label = pos_label)))
print('Recall: {:.4f}'.format(recall_score(y_test, y_pred, pos_label = pos_label)))
print('F1 Score: {:.4f}\n'.format(f1_score(y_test, y_pred, pos_label = pos_label)))

conf_matrix = confusion_matrix(y_true=y_test, y_pred=y_pred, labels=labels)
fig, ax = plt.subplots(figsize=(5, 5))

ax.matshow(conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')

ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)

plt.xlabel('Predictions', fontsize=15)
plt.ylabel('Actuals', fontsize=15)
plt.title('Confusion Matrix\n', fontsize=18)
plt.show()
