In [None]:
import os
import sys
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Conv2D , Dropout, MaxPooling2D, Flatten, Dense
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

In [None]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
IMG_SIZE = 574
BATCH_SIZE = 32

In [None]:
MODEL_PATH = "E:/Jupyter/masterthesis/models/two_classes/cnn_2class_augmented.h5"

In [None]:
# dataset paths
TRAINING_DIR = 'E:/Jupyter/masterthesis/data/model_data_logScaleMelSpectrogram/two_class/augmented/train'
VALIDATION_DIR = 'E:/Jupyter/masterthesis/data/model_data_logScaleMelSpectrogram/two_class/augmented/val'
TEST_DIR = 'E:/Jupyter/masterthesis/data/model_data_logScaleMelSpectrogram/two_class/normal/test'

In [None]:
def prepare_train_data(train_path, img_size, batch_size):
    
    train_datagen = ImageDataGenerator(
            rescale = 1./255)

    train_dataset = train_datagen.flow_from_directory(
            train_path,
            target_size = (img_size, img_size),
            batch_size = batch_size,
            seed = 42,
            class_mode = 'categorical',
            shuffle = True)

    return train_dataset

In [None]:
def prepare_val_data(val_path, img_size, batch_size):

    val_datagen = ImageDataGenerator(
            rescale = 1./255)

    val_dataset = val_datagen.flow_from_directory(
            val_path,
            target_size = (img_size, img_size),
            batch_size = batch_size,
            seed = 42,
            class_mode = 'categorical',
            shuffle = True )
 
    return val_dataset

In [None]:
def prepare_test_data(test_path, img_size):
    
    test_datagen = ImageDataGenerator(
            rescale = 1./255)

    test_dataset = test_datagen.flow_from_directory(
            test_path,
            target_size = (img_size, img_size),
            batch_size = 1,
            seed = 42,
            class_mode = 'categorical',
            shuffle = False )
    
    return test_dataset

In [None]:
def build_model(input_shape):
    
    weight_initializer = tf.keras.initializers.RandomNormal(mean = 0.0, stddev = 0.01, seed = None)
    bias_initializer = tf.keras.initializers.Zeros()

    model = Sequential()

    # input layer
    model.add(Conv2D(32, 3, padding = 'same', activation = 'relu', input_shape = input_shape, kernel_initializer = weight_initializer, bias_initializer = bias_initializer))
    model.add(MaxPooling2D(pool_size = (3, 3), strides = (2,2), padding = 'same'))
    model.add(Dropout(0.2))

    
    model.add(Conv2D(64, 3, padding = 'same', activation = 'relu', kernel_initializer = weight_initializer, bias_initializer = bias_initializer))
    model.add(MaxPooling2D(pool_size = (3, 3), strides = (2,2), padding = 'same'))
    model.add(Dropout(0.2))

    
    model.add(Conv2D(128, 3, padding = 'same', activation = 'relu', kernel_initializer = weight_initializer, bias_initializer = bias_initializer))
    model.add(MaxPooling2D(pool_size = (3, 3), strides=(2,2), padding = 'same'))
    model.add(Dropout(0.2))

    
    model.add(Conv2D(128, 3, padding = 'same', activation ='relu', kernel_initializer = weight_initializer, bias_initializer = bias_initializer))
    model.add(MaxPooling2D(pool_size = (3, 3), strides = (2,2), padding = 'same'))
    model.add(Dropout(0.2))

    
    model.add(Flatten())
    model.add(Dense(64, activation = 'relu', kernel_initializer = weight_initializer, bias_initializer = bias_initializer))
    model.add(Dropout(0.2))
    model.add(Dense(2, activation = 'softmax'))
    
    return model

In [None]:
def plot(history):
    
    acc = history.history['categorical_accuracy']
    val_acc = history.history['val_categorical_accuracy']
    
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    plt.figure(figsize = (8, 8))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label = 'Training Accuracy')
    plt.plot(val_acc, label = 'Validation Accuracy')
    plt.legend(loc = 'lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label = 'Training Loss')
    plt.plot(val_loss, label = 'Validation Loss')
    plt.legend(loc = 'upper right')
    plt.title('Training and Validation Loss')
    plt.show()

In [None]:
if __name__ == "__main__":
    
    # prepare data for training, validation and testing
    train_set = prepare_train_data(TRAINING_DIR, IMG_SIZE, BATCH_SIZE)
    val_set = prepare_val_data(VALIDATION_DIR, IMG_SIZE, BATCH_SIZE)
    test_set = prepare_test_data(TEST_DIR, IMG_SIZE)
    
    # define input shape to the model
    input_shape = (IMG_SIZE, IMG_SIZE, 3)
    
    # build model
    model = build_model(input_shape)
    
    # compile model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),
                  loss=tf.keras.losses.CategoricalCrossentropy(),
                  metrics=[tf.keras.metrics.CategoricalAccuracy()], 
                  sample_weight_mode=[None])
    
    # model summary
    model.summary()
    
    # The training stops if there is no improvement in the validation accuracy after 10 epoch
    es = EarlyStopping(monitor='val_categorical_accuracy', patience = 10)
    
    # if there is a better validation accuracy than previous better accuracy, then we save it in the model
    chkp = ModelCheckpoint(filepath=MODEL_PATH, verbose=1, save_best_only=True)
    
    # cnn_normal_log.csv has the accuracy and loss history
    log = CSVLogger('E:/Jupyter/masterthesis/logs/two_classes/augmented/cnn_2class_augmented_log.csv', append=True, separator=' ')
    
    # train model 
    history = model.fit(train_set,
        validation_data = val_set,
        epochs = 50,
        verbose = 1,
        shuffle = True,
        callbacks = [chkp,es,log])
    
 
    
    # initializing time stamp
    startTime = time.time()

    # Loading the saved Model
    model = tf.keras.models.load_model(MODEL_PATH)
    model.summary()
    
    # Evaluation on validation dataset
    score_val = model.evaluate(val_set)
    print(model.metrics_names)
    print(model.metrics_names[0], score_val[0])
    print(model.metrics_names[1], score_val[1])


    
    # Evaluation on test dataset
    score_test = model.evaluate(test_set)
    print(model.metrics_names)
    print(model.metrics_names[0], score_val[0])
    print(model.metrics_names[1], score_val[1])
    
    # Prediction on test dataset
    test_set.reset()
    
    pred = model.predict(test_set, batch_size = 32, verbose=2)
    p_i = np.argmax(pred, axis=1)
    
    class_labels = (train_set.class_indices)
    class_labels = dict((v,k) for k,v in class_labels.items())
    
    predictions = [class_labels[k] for k in p_i]
    #predictions = predictions[:200]
    
    files = test_set.filenames
    print(len(files), len(predictions))
    
    # Saving the predictions in a csv file
    results = pd.DataFrame({"Filename":files,
                      "Predictions":predictions})
    results.to_csv("E:/Jupyter/masterthesis/predictions/two_class/augmented/cnn_2class_augmented_result.csv",index=False)

    executionTime = (time.time() - startTime)
    print('\n\n\n\nExecution time in seconds: ' + str(executionTime))

In [None]:
#confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

pred_file = "E:/Jupyter/masterthesis/predictions/two_class/augmented/CNN_2class_augmented_result.csv"
df = pd.read_csv(pred_file)
df['Target'] = df['Filename'].str.extract('(down_clicks|up_clicks)', expand = False)
df["Predicted Labels"] = df['Predictions'].str.extract('(down_clicks|up_clicks)', expand = False)


df_new = df[['Target', 'Predicted Labels']]

mapping = {'down_clicks': 0, 'up_clicks': 1}
df1 = df_new.replace({'Target': mapping, 'Predicted Labels': mapping})

y_test = df_new["Target"]
y_pred = df_new["Predicted Labels"]

pos_label = 'down_clicks'
labels=['down_clicks','up_clicks']

print('Accuracy: {:.4f}'.format(accuracy_score(y_test, y_pred)))
print('Precision: {:.4f}'.format(precision_score(y_test, y_pred, pos_label = pos_label)))
print('Recall: {:.4f}'.format(recall_score(y_test, y_pred, pos_label = pos_label)))
print('F1 Score: {:.4f}\n'.format(f1_score(y_test, y_pred, pos_label = pos_label)))

conf_matrix = confusion_matrix(y_true=y_test, y_pred=y_pred, labels=labels)
fig, ax = plt.subplots(figsize=(5, 5))

ax.matshow(conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')

ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
        
plt.xlabel('\nPredictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix\n', fontsize=18)
plt.show()