# Deep Learning

In [1]:
from keras.layers import Input, Dense, Lambda, Layer
from keras.models import Model
from keras import regularizers
import keras
import pandas as pd
import numpy as np
from keras import backend as K
from keras import metrics
pd.set_option("display.max_rows",15)
%matplotlib inline

Using TensorFlow backend.


In [2]:
kdd_train_2labels = pd.read_pickle("dataset/kdd_train_2labels.pkl")
kdd_test_2labels = pd.read_pickle("dataset/kdd_test_2labels.pkl")
y_train_labels = pd.read_pickle("dataset/kdd_train_2labels_y.pkl")
y_test_labels = pd.read_pickle("dataset/kdd_test_2labels_y.pkl")

output_columns_2labels = ['is_Attack','is_Normal']

from sklearn import model_selection as ms
from sklearn import preprocessing as pp

x_input = kdd_train_2labels.drop(output_columns_2labels, axis = 1)
#y_output = kdd_train_2labels.loc[:,output_columns_2labels]

ss = pp.StandardScaler()
x_input = ss.fit_transform(x_input)

le = pp.LabelEncoder()
y_train = le.fit_transform(y_train_labels).reshape(-1, 1)
y_test = le.transform(y_test_labels).reshape(-1, 1)


x_train, x_valid, y_train, y_valid = ms.train_test_split(x_input, 
                              y_train, 
                              test_size=0.2)
#x_valid, x_test, y_valid, y_test = ms.train_test_split(x_valid, y_valid, test_size = 0.4)

x_test = kdd_test_2labels.drop(output_columns_2labels, axis = 1)
#y_test = kdd_test_2labels.loc[:,output_columns_2labels]

x_test = ss.transform(x_test)

x_train = np.hstack((x_train, y_train))
x_valid = np.hstack((x_valid, y_valid))

x_test = np.hstack((x_test, np.random.normal(loc = 0, scale = 0.05, size = y_test.shape)))

In [None]:
input_dim = 123
intermediate_dim = 80
latent_dim = 32
batch_size = 64
epochs = 5
hidden_layers = 8

class Train:
    def train():
        Train.x = Input(shape=(input_dim,))
        
        hidden_encoder = Train.x
        for i in range(hidden_layers):
            hidden_encoder = Dense(intermediate_dim, activation='relu')(hidden_encoder)

        mean_encoder = Dense(latent_dim, activation=None)(hidden_encoder)

        logvar_encoder = Dense(latent_dim, activation=None)(hidden_encoder)

        def get_distrib(args):

            mean_encoder, logvar_encoder = args

            # Sample epsilon
            epsilon = np.random.normal(loc=0.0, scale=0.05, size = (batch_size, latent_dim))

            # Sample latent variable
            z = mean_encoder + K.exp(logvar_encoder / 2) * epsilon
            return z

        z = Lambda(get_distrib)([mean_encoder, logvar_encoder])

        hidden_decoder = z
        for i in range(hidden_layers):
            hidden_decoder = Dense(intermediate_dim, activation="relu")(hidden_decoder)

        Train.x_ = Dense(input_dim, activation=None)(hidden_decoder)

def get_loss(x, x_):
    xent_loss = input_dim * metrics.binary_crossentropy(x, x_) 
    kl_loss = - 0.5 * K.sum(1 + logvar_encoder - K.square(mean_encoder) - K.exp(logvar_encoder), axis=-1)
    return K.abs(K.mean(xent_loss + kl_loss))



In [None]:
import itertools
features_arr = [2, 4, 8, 16, 32, 64, 128, 256]
hidden_layers_arr = [2, 4, 6, 10]
epoch_arr = [1]

for e, h, f in itertools.product(epoch_arr, hidden_layers_arr, features_arr):
    
    print("Current Layer Attributes - epochs:{} hidden layers:{} features count:{}".format(e,h,f))
    latent_dim = f
    epochs = e
    hidden_layers = h

    Train.train()

    vae_model = Model(inputs = Train.x, outputs = Train.x_ )
    vae_model.compile(optimizer = "adam", loss = "mean_squared_error" )

    train_size = x_train.shape[0] - x_train.shape[0]%batch_size
    valid_size = x_valid.shape[0] - x_valid.shape[0]%batch_size

    vae_model.fit(x = x_train[:train_size,:], y = x_train[:train_size,:], 
                  shuffle=True, epochs=epochs, 
                  batch_size = batch_size, 
                  validation_data = (x_valid[:valid_size,:], x_valid[:valid_size,:]),
                  verbose = 1)
    #vae_model.evaluate()

Current Layer Attributes - epochs:1 hidden layers:2 features count:2
Train on 100736 samples, validate on 25152 samples
Epoch 1/1
Current Layer Attributes - epochs:1 hidden layers:2 features count:4
Train on 100736 samples, validate on 25152 samples
Epoch 1/1
Current Layer Attributes - epochs:1 hidden layers:2 features count:8
Train on 100736 samples, validate on 25152 samples
Epoch 1/1
Current Layer Attributes - epochs:1 hidden layers:2 features count:16
Train on 100736 samples, validate on 25152 samples
Epoch 1/1
Current Layer Attributes - epochs:1 hidden layers:2 features count:32
Train on 100736 samples, validate on 25152 samples
Epoch 1/1
Current Layer Attributes - epochs:1 hidden layers:2 features count:64
Train on 100736 samples, validate on 25152 samples
Epoch 1/1
Current Layer Attributes - epochs:1 hidden layers:2 features count:128
Train on 100736 samples, validate on 25152 samples
Epoch 1/1
Current Layer Attributes - epochs:1 hidden layers:2 features count:256
Train on 10073

In [None]:
x_pred = vae_model.predict(x_train[:train_size,:], batch_size = batch_size)
pred_value = x_pred[:,-1]
actual_value = y_test

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    np.set_printoptions(precision=4)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j].round(4),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
from sklearn.metrics import confusion_matrix
cm_2labels = confusion_matrix(y_pred = pred_value, y_true = actual_value)
plt.figure(figsize=[6,6])
plot_confusion_matrix(cm_2labels, output_columns_2labels, normalize = True)

In [None]:
kdd_diff_level_train = pd.read_pickle("dataset/kdd_diff_level_train.pkl")
kdd_diff_level_test = pd.read_pickle("dataset/kdd_diff_level_test.pkl")

