In [None]:
# Setup library
## install -r requirements.txt
from __future__ import absolute_import, division, print_function, unicode_literals
import os
from operator import itemgetter
import collections

import matplotlib.pylab as plt
# %matplotlib widget
%matplotlib inline

import pandas as pd
import numpy as np
import tensorflow as tf
tf.random.set_seed(99)
from tensorflow.keras import layers, models

In [None]:
### Global variables
# Setup scripts (or notebook)
IMG_DATA = './dataset/dynamic/'
IMG_SHAPE = (375, 4)

In [None]:
## test set
dataset = dict()
classes = ['icqchat', 'vimeo', 'scp', 'voipbuster', 'ftps', 'sftp', 'aim', 'youtube',
           'hangout', 'netflix', 'spotify', 'torrent', 'email', 'skype', 'facebook', 'gmail']
for i in range(0, 8):
    dataset_root = os.path.abspath(os.path.expanduser(os.path.join(IMG_DATA, str(i))))
    print(f'Dataset root: {dataset_root}')

    image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255,
                                                                      validation_split=0.10)
    train_data = image_generator.flow_from_directory(dataset_root, target_size=IMG_SHAPE,
                                                     classes=classes[:(i+1)*2],
                                                     shuffle=False,
                                                     color_mode='grayscale',
                                                     subset='training')
    validation_data = image_generator.flow_from_directory(dataset_root, target_size=IMG_SHAPE,
                                                          classes=classes[:(i+1)*2],
                                                          shuffle=False,
                                                          color_mode='grayscale',
                                                          subset='validation')

    class_names = sorted(train_data.class_indices.items(), key=itemgetter(1))
    class_names = np.array([key.title() for key, value in class_names])
    print(f'Dataset {i} Classes: {class_names}')
    dataset[i] = (train_data, validation_data)

In [None]:
# Create Deep and Wide CNN
def create_model(nclass):
    img_input = tf.keras.Input(shape=IMG_SHAPE+(1, ))
    features1 = tf.keras.layers.Conv2D(32, (1, 1), activation='relu')(img_input)
    features1 = tf.keras.layers.Flatten()(features1)

    features2 = tf.keras.layers.Conv2D(32, (1, 2), activation='relu')(img_input)
    features2 = tf.keras.layers.Flatten()(features2)

    features3 = tf.keras.layers.Conv2D(32, (1, 4), activation='relu')(img_input)
    features3 = tf.keras.layers.Flatten()(features3)

    features4 = tf.keras.layers.Conv2D(32, (2, 2), activation='relu')(img_input)
    features4 = tf.keras.layers.MaxPooling2D((2, 2), strides=(1, 1))(features4)
    features5 = tf.keras.layers.Conv2D(32, (2, 2), activation='relu')(features4)

    features4 = tf.keras.layers.Flatten()(features4)
    features5 = tf.keras.layers.Flatten()(features5)

    x = tf.keras.layers.concatenate([features1, features2, features3, features4, features5])

    pred = tf.keras.layers.Dense(nclass)(x)

    model = tf.keras.Model(inputs=[img_input],
                           outputs=[pred])
    return model

In [None]:
models = dict()
for k, v in dataset.items():
    nclass = len(dataset[k][0].class_indices)
    models[k] = create_model(nclass)

In [None]:
## Log class
### https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback
class CollectBatchStats(tf.keras.callbacks.Callback):
    def __init__(self):
        self.batch_losses = []
        self.batch_val_losses = []
        self.batch_acc = []
        self.batch_val_acc = []
    
    def on_epoch_end(self, epoch, logs=None):
        self.batch_losses.append(logs['loss'])
        self.batch_acc.append(logs['accuracy'])
        self.batch_val_losses.append(logs['val_loss'])
        self.batch_val_acc.append(logs['val_accuracy'])
        self.model.reset_metrics()

In [None]:
batch_stats_callback = CollectBatchStats()
metrics = list()
for i in range(0, 8):
    train_data = dataset[i][0]
    validation_data = dataset[i][1]
    model = models[i]
    steps_per_epoch = np.ceil(train_data.samples/train_data.batch_size)
    epochs = 10
    
    if i != 0:
        for j in range(0, len(model.layers)-1):
            model.layers[j].set_weights(models[i-1].layers[j].get_weights())
    
    model.compile(optimizer=tf.keras.optimizers.SGD(),
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    
    model.fit(train_data,
              epochs=epochs,
              steps_per_epoch=steps_per_epoch,
              validation_data=validation_data,
              callbacks=[batch_stats_callback])
    
    validation_data.reset()

    predicted_batch = model.predict(validation_data)
    predicted_id = np.argmax(predicted_batch, axis=-1)

    label_id = validation_data.classes
    class_names = sorted(validation_data.class_indices.items(), key=itemgetter(1))
    class_names = np.array([key.title() for key, value in class_names])

    con_mat = tf.math.confusion_matrix(label_id, predicted_id)

    result_df = pd.DataFrame(con_mat.numpy(), index=class_names, columns=class_names, dtype=int)

    true_positive = np.diag(result_df)

    true_negative = list()
    for i in result_df.index:
        t = result_df.drop(i, axis=1)
        t = t.drop(i, axis=0)
        true_negative.append(np.sum(np.sum(t)))
    true_negative = np.asarray(true_negative)

    false_positive = np.sum(result_df, axis=1) - true_positive

    false_negative = np.sum(result_df, axis=0) - true_positive
    
    accuracy = np.sum(true_positive) / np.sum(np.sum(result_df))

    recall = true_positive / (true_positive + false_negative)
    recall[np.isnan(recall)] = 0
    recall = np.average(recall)

    precision = true_positive / (true_positive + false_positive)
    precision[np.isnan(precision)] = 0
    precision = np.average(precision)

    f1_score = 2*((precision*recall)/(precision+recall))

    metrics.append((accuracy, recall, precision, f1_score))
    

In [None]:
# Draw learning curves chart
acc = batch_stats_callback.batch_acc
val_acc = batch_stats_callback.batch_val_acc
loss = batch_stats_callback.batch_losses
val_loss = batch_stats_callback.batch_val_losses

fig2 = plt.figure(figsize=(8, 8))
ax1 = fig2.add_subplot(2, 1, 1)
ax1.plot(acc, label='Training Accuracy')
ax1.plot(val_acc, label='Validation Accuracy')
ax1.legend(loc='lower right')
ax1.set_ylabel('Accuracy')
ax1.set_ylim([0, 1])
ax1.set_title('Training and Validation Accuracy')

ax2 = fig2.add_subplot(2, 1, 2)
ax2.plot(loss, label='Training Loss')
ax2.plot(val_loss, label='Validation Loss')
ax2.legend(loc='upper right')
ax2.set_ylabel('Cross Entropy')
ax2.set_ylim([0,max(ax2.get_ylim())])
ax2.set_title('Training and Validation Loss')
ax2.set_xlabel('epoch')

In [None]:
fig3 = plt.figure(figsize=(8, 8))
ax1 = fig3.add_subplot()
ax1.plot(range(1, 9), [x[0] for x in metrics], label='Accuracy')
ax1.plot(range(1, 9), [x[1] for x in metrics], label='Recall')
ax1.plot(range(1, 9), [x[2] for x in metrics], label='Precision')
ax1.plot(range(1, 9), [x[3] for x in metrics], label='F1 Score')
ax1.legend(loc='lower right')
ax1.set_ylim([0, 1])
ax1.set_title('Scores')