# Read Data Sample

In [1]:
import pandas as pd
import numpy as np
import os
from collections import namedtuple
pd.set_option("display.max_rows",35)
%matplotlib inline

In [2]:
class dataset:
    kdd_train_2labels = pd.read_pickle("dataset/kdd_train_2labels.pkl")
    kdd_test_2labels = pd.read_pickle("dataset/kdd_test_2labels.pkl")
    
    kdd_train_5labels = pd.read_pickle("dataset/kdd_train_5labels.pkl")
    kdd_test_5labels = pd.read_pickle("dataset/kdd_test_5labels.pkl")
    

In [3]:
dataset.kdd_train_2labels.shape

(125973, 124)

In [4]:
dataset.kdd_test_2labels.shape

(22544, 124)

In [5]:
from sklearn import model_selection as ms
from sklearn import preprocessing as pp

class preprocess:
    
    output_columns_2labels = ['is_Attack','is_Normal']
    
    x_input = dataset.kdd_train_2labels.drop(output_columns_2labels, axis = 1)
    y_output = dataset.kdd_train_2labels.loc[:,output_columns_2labels]

    x_test_input = dataset.kdd_test_2labels.drop(output_columns_2labels, axis = 1)
    y_test = dataset.kdd_test_2labels.loc[:,output_columns_2labels]

    ss = pp.StandardScaler()

    x_train = ss.fit_transform(x_input)
    x_test = ss.transform(x_test_input)

    y_train = y_output.values
    y_test = y_test.values

    x_train_normal = x_train[y_output.is_Normal == 1]
    y_train_normal = y_output.is_Normal[y_output.is_Normal == 1].values
    
preprocess.x_train_normal.shape

(67343, 122)

Data Reduction by:

In [6]:
(preprocess.x_train.shape[0] - preprocess.x_train_normal.shape[0]) / preprocess.x_train.shape[0]

0.4654171925730117

In [7]:
import tensorflow as tf


In [8]:
class network(object):
    
    input_dim = 122
    classes = 1
    hidden_dim = 122
    

In [9]:
class discriminator(network):
    
    def __init__(self, hidden_layers):
        


        input_dim = self.input_dim
        classes = self.classes
        hidden_dim = self.hidden_dim

        self.x_real = tf.placeholder("float", shape=[None, input_dim])
        self.y_real_ = tf.placeholder("float", shape=[None, classes])

        self.x_random = tf.placeholder("float", shape=[None, input_dim])
        self.y_fake_ = tf.placeholder("float", shape=[None, classes])

        self.keep_prob = tf.placeholder("float")
        self.learning_rate = tf.placeholder("float")

        def discriminator_network(x, reuse=False):
            with tf.variable_scope("discriminator", reuse=reuse):
                hidden = tf.layers.dense(x, hidden_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
                hidden = tf.nn.dropout(hidden, self.keep_prob)

                for h in range(hidden_layers - 1):
                    hidden = tf.layers.dense(hidden, hidden_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
                    hidden = tf.nn.dropout(hidden, self.keep_prob)

            y = tf.layers.dense(hidden, classes, activation=tf.nn.sigmoid)
            return y
        
        def generator_network(x, reuse=False):
            with tf.variable_scope('generator', reuse=reuse):
                hidden = tf.layers.dense(x, hidden_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
                hidden = tf.nn.dropout(hidden, self.keep_prob)

                for h in range(hidden_layers - 2):
                    hidden = tf.layers.dense(hidden, hidden_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
                    hidden = tf.nn.dropout(hidden, self.keep_prob)

            y = tf.layers.dense(hidden, input_dim, activation=tf.nn.tanh)
            return y

        x_fake = generator_network(self.x_random)

        self.y = discriminator_network(self.x_real)
        y_fake = discriminator_network(x_fake, reuse=True)

        
        loss_real = tf.losses.sigmoid_cross_entropy(self.y_real_, self.y)
        loss_fake = tf.losses.sigmoid_cross_entropy(self.y_fake_, y_fake)

        self.loss = loss_real + loss_fake

        correct_prediction = tf.equal(self.y_real_, self.y)
        self.tf_accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name = "Accuracy")

        d_optimizer = tf.train.AdamOptimizer(self.learning_rate)
        g_optimizer = tf.train.AdamOptimizer(self.learning_rate)

        #gradients, variables = zip(*optimizer.compute_gradients(self.loss))
        #gradients = [
        #    None if gradient is None else tf.clip_by_value(gradient, -1, 1)
        #    for gradient in gradients]
        #self.train_op = optimizer.apply_gradients(zip(gradients, variables))
        self.d_train_op = d_optimizer.minimize(self.loss)
        self.g_train_op = g_optimizer.minimize(loss_fake)

        # add op for merging summary
        #self.summary_op = tf.summary.merge_all()
        self.pred = tf.argmax(self.y, axis = 1)
        self.actual = tf.argmax(self.y_real_, axis = 1)

        # add Saver ops
        self.saver = tf.train.Saver()


In [10]:
class generator1(network):
    
    def __init__(self, hidden_layers):
        
        input_dim = self.input_dim
        hidden_dim = self.hidden_dim
            
        self.graph = tf.Graph()
        with self.graph.as_default():
            
            self.x = tf.placeholder("float", shape=[None, input_dim])
            self.keep_prob = tf.placeholder("float")

            self.learning_rate = tf.placeholder("float")
            self.loss = tf.placeholder("float")

            hidden = tf.layers.dense(self.x, hidden_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
            hidden = tf.nn.dropout(hidden, self.keep_prob)

            for h in range(hidden_layers - 1):
                hidden = tf.layers.dense(hidden, hidden_dim, activation = tf.nn.relu, kernel_regularizer=tf.nn.l2_loss)
                hidden = tf.nn.dropout(hidden, self.keep_prob)

            self.y = tf.layers.dense(hidden, input_dim, activation=tf.nn.tanh)

            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            #gradients, variables = zip(*optimizer.compute_gradients(self.loss))
            #gradients = [
            #    None if gradient is None else tf.clip_by_value(gradient, -1, 1)
            #    for gradient in gradients]
            #self.train_op = optimizer.apply_gradients(zip(gradients, variables))
            self.train_op = optimizer.minimize(hidden)

            # add Saver ops
            self.saver = tf.train.Saver()

In [11]:
import collections

class Train:    
    
    result = namedtuple("score", ['epoch', 'no_of_features','hidden_layers','train_score', 'test_score'])

    predictions = {}

    results = []
    best_acc = 0
    
    def train(epochs, h, f):
        batch_iterations = 200
        train_loss = None
        Train.best_acc = 0
        
        d = discriminator(h)
        #g = generator(h)
        
        os.makedirs("dataset/tf_GAN_with_Dense_nsl_kdd/hidden_layers_{}_features_count_{}".format(epochs,h,f),
                    exist_ok = True)
        
        d.sess = tf.Session()
        
        #summary_writer_train = tf.summary.FileWriter('./logs/kdd/VAE/training', graph=sess.graph)
        #summary_writer_valid = tf.summary.FileWriter('./logs/kdd/VAE/validation')

        #g.sess.run(tf.global_variables_initializer())

        for epoch in range(1, (epochs+1)):
            x_train, x_valid, y_train, y_valid, = ms.train_test_split(preprocess.x_train_normal, 
                                                                      preprocess.y_train_normal, 
                                                                      test_size=0.1)
            y_train = np.reshape(y_train, (-1,1))
            y_train_normal = np.zeros_like(y_train)
            y_train_attack = np.ones_like(y_train)
            x_train_attack_rnd = np.random.normal(size=x_train.shape)

            batch_indices = np.array_split(np.arange(x_train.shape[0]), batch_iterations)

            for i in batch_indices:

                def train_batch():
                    nonlocal train_loss

                    # Passing Normal (real) and Attack (fake) Traffic and training together
                    d_loss_normal, _ = d.sess.run([d.loss, d.d_train_op], 
                                                  feed_dict={d.x_real: x_train[i,:], 
                                                             d.y_real_: y_train_normal[i,:],
                                                             d.x_random: x_train_attack_rnd[i,:],
                                                             d.y_fake_: y_train_attack[i,:],
                                                             d.keep_prob:1})
                
                    #Train Generator
                    #g.sess.run([g.train_op], feed_dict={g.loss:d_loss_attack_fake})

                    train_loss = d_loss

                train_batch()
                #summary_writer_train.add_summary(summary_str, epoch)
                while((train_loss > 1e4 or np.isnan(train_loss)) and epoch > 1):
                    print("Step {} | Training Loss: {:.6f}".format(epoch, train_loss))
                    d.saver.restore(sess, 
                                      tf.train.latest_checkpoint('dataset/tf_GAN_with_Dense_nsl_kdd/hidden_layers_{}_features_count_{}'
                                                                 .format(epochs,h,f)))
                    train_batch()


            valid_accuracy = d.sess.run(d.tf_accuracy, #net.summary_op 
                                                  feed_dict={d.x: x_valid, 
                                                             d.y_: y_valid, 
                                                             d.keep_prob:1})
            #summary_writer_valid.add_summary(summary_str, epoch)


            accuracy, pred_value, actual_value, y_pred = d.sess.run([d.tf_accuracy, 
                                                                       d.pred, 
                                                                       d.actual, d.y], 
                                                                      feed_dict={d.x: preprocess.x_test, 
                                                                                 d.y_: preprocess.y_test, 
                                                                                 d.keep_prob:1})

            print("Step {} | Training Loss: {:.6f} | Validation Accuracy: {:.6f}".format(epoch, train_loss, valid_accuracy))
            print("Accuracy on Test data: {}".format(accuracy))

            if accuracy > Train.best_acc:
                Train.best_acc = accuracy
                Train.pred_value = pred_value
                Train.actual_value = actual_value
                Train.best_parameters = "Hidden Layers:{}, Features Count:{}".format(h, f)
                if not (np.isnan(train_loss)):
                    d.saver.save(d.sess, 
                               "dataset/tf_GAN_with_Dense_nsl_kdd/hidden_layers_{}_features_count_{}".format(h,f),
                                global_step = epochs)
                curr_pred = pd.DataFrame({"Attack_prob":y_pred[:,-2], "Normal_prob":y_pred[:, -1], "Prediction":pred_value})
                Train.predictions.update({"{}_{}_{}".format(epochs,f,h):(curr_pred, 
                                           Train.result(epochs, f, h,valid_accuracy, accuracy))})

                #Train.results.append(Train.result(epochs, f, h,valid_accuracy, accuracy))


In [12]:
import itertools
class Hyperparameters:
#    features_arr = [2, 4, 8, 16, 32, 64, 128, 256]
#    hidden_layers_arr = [2, 4, 6, 10]
    features_arr = [4, 8, 16, 32, 122]
    hidden_layers_arr = [2, 4, 6]

    epochs = [1]
    
    for e, h, f in itertools.product(epochs, hidden_layers_arr, features_arr):
        print("Current Layer Attributes - epochs:{} hidden layers:{} features count:{}".format(e,h,f))
        
        Train.train(e, h, f)
        

Current Layer Attributes - epochs:1 hidden layers:2 features count:4


FailedPreconditionError: Attempting to use uninitialized value beta2_power
	 [[Node: beta2_power/read = Identity[T=DT_FLOAT, _class=["loc:@generator/dense/kernel"], _device="/job:localhost/replica:0/task:0/cpu:0"](beta2_power)]]

Caused by op 'beta2_power/read', defined at:
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2683, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2787, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2847, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-ab7cca2dd14b>", line 2, in <module>
    class Hyperparameters:
  File "<ipython-input-12-ab7cca2dd14b>", line 13, in Hyperparameters
    Train.train(e, h, f)
  File "<ipython-input-11-ae151964178f>", line 17, in train
    d = discriminator(h)
  File "<ipython-input-9-dff41f2dfb7a>", line 66, in __init__
    self.d_train_op = d_optimizer.minimize(self.loss)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 325, in minimize
    name=name)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 446, in apply_gradients
    self._create_slots([_get_variable_for(v) for v in var_list])
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/training/adam.py", line 119, in _create_slots
    trainable=False)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 197, in __init__
    expected_shape=expected_shape)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 316, in _init_from_args
    self._snapshot = array_ops.identity(self._variable, name="read")
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1338, in identity
    result = _op_def_lib.apply_op("Identity", input=input, name=name)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/ritesh_malaiya/anaconda3/envs/p3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

FailedPreconditionError (see above for traceback): Attempting to use uninitialized value beta2_power
	 [[Node: beta2_power/read = Identity[T=DT_FLOAT, _class=["loc:@generator/dense/kernel"], _device="/job:localhost/replica:0/task:0/cpu:0"](beta2_power)]]


In [None]:
dict1 = {}
dict2 = []
for k, (v1, v2) in Train.predictions.items():
    dict1.update({k: v1})
    dict2.append(v2)

In [None]:
Train.predictions = dict1
Train.results = dict2

In [None]:
df_results = pd.DataFrame(Train.results)

In [None]:
df_results.sort_values(by = 'test_score', ascending = False)

In [None]:
pd.Panel(Train.predictions).to_pickle("dataset/tf_dense_only_nsl_kdd_predictions.pkl")
df_results.to_pickle("dataset/tf_dense_only_nsl_kdd_scores.pkl")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    np.set_printoptions(precision=4)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j].round(4),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def plot(actual_value, pred_value):
    from sklearn.metrics import confusion_matrix
    cm_2labels = confusion_matrix(y_pred = pred_value, y_true = actual_value)
    plt.figure(figsize=[6,6])
    plot_confusion_matrix(cm_2labels, preprocess.output_columns_2labels, normalize = True,
                         title = Train.best_parameters)

In [None]:
plot(actual_value = Train.actual_value, pred_value = Train.pred_value)