In [2]:
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, LeakyReLU, MultiHeadAttention, LayerNormalization, MaxPooling1D, Dropout
from sklearn.preprocessing import LabelEncoder
import evaluation_utils

In [None]:
def create_model_attention(optimizer="adam", num_heads=5, filter_size=225, kernel_size=5, epsilon=1e-6,
                 activation_function='relu', activation_output='softmax', loss='categorical_crossentropy'):
    # create model
    input_layer = keras.layers.Input(
        shape=(144, 3),
        name='Input',
    )
    conv_layer = Conv1D(filter_size, kernel_size=kernel_size, activation=activation_function)(input_layer)
    query = Dense(filter_size)(conv_layer)
    key = Dense(filter_size)(conv_layer)
    value = Dense(filter_size)(conv_layer)
    attention_layer = MultiHeadAttention(num_heads=num_heads, key_dim=1)(query, key, value)
    normalisation_layer = LayerNormalization(epsilon=epsilon)(attention_layer)
    conv_layer = Conv1D(filter_size, kernel_size=kernel_size, activation=activation_function)(normalisation_layer)
    flatten_layer = Flatten()(conv_layer)
    output_layer = Dense(19, activation=activation_output)(flatten_layer)
    attention_based_1d_model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    attention_based_1d_model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return attention_based_1d_model

In [None]:
def create_model_complex(optimizer="adam", filter_size_1=144, kernel_size_1=3, filter_size_2=12, kernel_size_2=1,
                 activation_function='relu', activation_output='softmax', loss='categorical_crossentropy', pool_size=3,
                 dropout_rate=0.1):
    # create model
    conv_1D = Sequential()
    # add model layers
    conv_1D.add(Conv1D(filter_size_1, kernel_size=kernel_size_1, activation=activation_function, input_shape=(144, 3)))
    conv_1D.add(MaxPooling1D(pool_size=pool_size))
    conv_1D.add(Dropout(dropout_rate))
    if filter_size_1 <= filter_size_2:
        filter_size_2 = 12
    if kernel_size_1 <= kernel_size_2:
        kernel_size_2 = 1
    conv_1D.add(Conv1D(filter_size_2, kernel_size=kernel_size_2, activation=activation_function))
    conv_1D.add(MaxPooling1D(pool_size=pool_size))
    conv_1D.add(Dropout(dropout_rate))
    conv_1D.add(Flatten())
    conv_1D.add(Dense(4, activation=activation_output))
    conv_1D.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return conv_1D


In [3]:
def create_model(optimizer="adam", filter_size=144, kernel_size=3, activation_function='relu', activation_output='softmax', loss='categorical_crossentropy'):
    #create model
    conv_1D = Sequential()
    #add model layers
    conv_1D.add(Conv1D(filter_size, kernel_size=kernel_size, activation=activation_function, input_shape=(144,3)))
    conv_1D.add(Flatten())
    conv_1D.add(Dense(4, activation=activation_output))
    conv_1D.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return conv_1D

In [4]:
base_data_path = "/home/keegan/Desktop/UCT/Masters/Code/final-code/data-sets/"
tcp_udp_path = base_data_path + "tcp_udp/"
tcp_udp_144_3 = tcp_udp_path + "144-3/"

In [5]:
packet_data_bittorrent = np.load(tcp_udp_144_3 + "bittorrent-3.npy", allow_pickle=True)  # load pre processed data
packet_data_bittorrent = np.reshape(packet_data_bittorrent, (-1, 432))
df_bittorrent = pd.DataFrame(packet_data_bittorrent)
df_bittorrent['label'] = 'BitTorrent'
# facebook
packet_data_facebook = np.load(tcp_udp_144_3 + "facebook-3.npy", allow_pickle=True)  # load pre processed data
packet_data_facebook = np.reshape(packet_data_facebook, (-1, 432))
df_facebook = pd.DataFrame(packet_data_facebook)
df_facebook['label'] = 'Facebook'

# instagram
packet_data_instagram = np.load(tcp_udp_144_3 + "instagram-3.npy", allow_pickle=True)  # load pre processed data
packet_data_instagram = np.reshape(packet_data_instagram, (-1, 432))
df_instagram = pd.DataFrame(packet_data_instagram)
df_instagram['label'] = 'Instagram'

# messenger
packet_data_messenger = np.load(tcp_udp_144_3 + "messenger-3.npy", allow_pickle=True)  # load pre processed data
packet_data_messenger = np.reshape(packet_data_messenger, (-1, 432))
df_messenger = pd.DataFrame(packet_data_messenger)
df_messenger['label'] = 'Messenger'

# tiktok
packet_data_tiktok = np.load(tcp_udp_144_3 + "tiktok-3.npy", allow_pickle=True)  # load pre processed data
packet_data_tiktok = np.reshape(packet_data_tiktok, (-1, 432))
df_tiktok = pd.DataFrame(packet_data_tiktok)
df_tiktok['label'] = 'TikTok'


# whatsapp
packet_data_whatsapp = np.load(tcp_udp_144_3 + "whatsapp-3.npy", allow_pickle=True)  # load pre processed data
packet_data_whatsapp = np.reshape(packet_data_whatsapp, (-1, 432))
df_whatsapp = pd.DataFrame(packet_data_whatsapp)
df_whatsapp['label'] = 'WhatsApp'


    # youtube
packet_data_youtube = np.load(tcp_udp_144_3 + "youtube-3.npy", allow_pickle=True)  # load pre processed data
packet_data_youtube = np.reshape(packet_data_youtube, (-1, 432))
df_youtube = pd.DataFrame(packet_data_youtube)
df_youtube['label'] = 'YouTube'

In [6]:
index_60 = 14160
index_80 = 18880
max = 23600
index_streaming_60 = index_60 // 2
index_streaming_80 = index_80 // 2
index_messaging_60 = index_60 // 2
index_messaging_80 = index_80 // 2
index_social_media_60 = index_60 // 2
index_social_media_80 = index_80 // 2
index_messaging_end = max // 2
index_social_media_end = max // 2
index_streaming_end = max // 2
df_train = pd.concat([df_youtube[:index_streaming_60], df_tiktok[:index_streaming_60],
                          df_messenger[:index_messaging_60], df_whatsapp[:index_messaging_60],
                          df_instagram[:index_social_media_60], df_facebook[:index_social_media_60],
                          df_bittorrent[:index_60]])

df_test = pd.concat(
        [df_youtube[index_streaming_60:index_streaming_80], df_tiktok[index_streaming_60:index_streaming_80],
         df_messenger[index_messaging_60:index_messaging_80], df_whatsapp[index_messaging_60:index_messaging_80],
         df_instagram[index_social_media_60:index_social_media_80],
         df_facebook[index_social_media_60:index_social_media_80], df_bittorrent[index_60:index_80]])

df_validation = pd.concat(
        [df_youtube[index_streaming_80:index_streaming_end], df_tiktok[index_streaming_80:index_streaming_end],
         df_messenger[index_messaging_80:index_messaging_end], df_whatsapp[index_messaging_80:index_messaging_end],
         df_instagram[index_social_media_80:index_social_media_end],
         df_facebook[index_social_media_80:index_social_media_end], df_bittorrent[index_80:max]])

df_k_fold = pd.concat(
        [df_youtube[:index_streaming_80], df_tiktok[:index_streaming_80], df_messenger[:index_messaging_80],
         df_whatsapp[:index_messaging_80], df_instagram[:index_social_media_80], df_facebook[:index_social_media_80],
         df_bittorrent[:index_80]])

y_train = df_train['label']
y_test = df_test['label']
y_validation = df_validation['label']
y_k_fold = df_k_fold['label']

# Add category labels
y_train.replace(
        {"YouTube": "Streaming", "TikTok": "Streaming", "WhatsApp": "Messaging", "WhatsAppFiles": "Messaging",
         "Instagram": "SocialMedia", "Facebook": "SocialMedia", "Messenger": "Messaging"}, inplace=True)
y_test.replace(
        {"YouTube": "Streaming", "TikTok": "Streaming", "WhatsApp": "Messaging", "WhatsAppFiles": "Messaging",
         "Instagram": "SocialMedia", "Facebook": "SocialMedia", "Messenger": "Messaging"}, inplace=True)
y_validation.replace(
        {"YouTube": "Streaming", "TikTok": "Streaming", "WhatsApp": "Messaging", "WhatsAppFiles": "Messaging",
         "Instagram": "SocialMedia", "Facebook": "SocialMedia", "Messenger": "Messaging"}, inplace=True)
y_k_fold.replace(
        {"YouTube": "Streaming", "TikTok": "Streaming", "WhatsApp": "Messaging", "WhatsAppFiles": "Messaging",
         "Instagram": "SocialMedia", "Facebook": "SocialMedia", "Messenger": "Messaging"}, inplace=True)

# Encode Labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train.to_numpy())
y_test_encoded = label_encoder.fit_transform(y_test.to_numpy())
y_k_fold_encoded = label_encoder.fit_transform(y_k_fold.to_numpy())
y_validation_encoded = label_encoder.fit_transform(y_validation.to_numpy())
y_train_encoded = keras.utils.np_utils.to_categorical(y_train_encoded)
y_test_encoded = keras.utils.np_utils.to_categorical(y_test_encoded)
y_validation_encoded = keras.utils.np_utils.to_categorical(y_validation_encoded)
y_k_fold_encoded = keras.utils.np_utils.to_categorical(y_k_fold_encoded)
x_train = np.array(df_train.drop("label", axis=1)).reshape(-1, 144, 3)
x_train = x_train.astype(int) / 255
x_test = np.array(df_test.drop("label", axis=1)).reshape(-1, 144, 3)
x_test = x_test.astype(int) / 255
x_validation = np.array(df_validation.drop("label", axis=1)).reshape(-1, 144, 3)
x_validation = x_validation.astype(int) / 255
x_k_fold = np.array(df_k_fold.drop("label", axis=1)).reshape(-1, 144, 3)
x_k_fold = x_k_fold.astype(int) / 255
# X_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
# X_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
# X_validation = tf.convert_to_tensor(x_validation, dtype=tf.float32)
# X_k_fold = tf.convert_to_tensor(x_k_fold, dtype=tf.float32)

In [None]:
conv_1D = create_model(optimizer='RMSprop', loss='categorical_crossentropy', kernel_size=1, filter_size=144, activation_output='sigmoid', activation_function='relu')
conv_1D.fit(x_train, y_train_encoded, validation_data=(x_validation, y_validation_encoded), epochs=150, shuffle=True, batch_size=500)

In [None]:
evaluation_utils.plot_conf_mtx(conv_1D, x_test, y_test_encoded, df_validation, 4, title="1D CNN - 144 Bytes and \n3 Packets")

In [None]:
conv_1D.save('./144_3_1D')