In [1]:
import pandas as pd
import numpy as np
pd.set_option("display.max_rows",40)
%matplotlib inline

In [2]:
class dataset:
    kdd_train_2labels = pd.read_pickle("dataset/kdd_train_2labels.pkl")
    kdd_train_2labels_y = pd.read_pickle("dataset/kdd_train_2labels_y.pkl")
    
    kdd_test_2labels = pd.read_pickle("dataset/kdd_test_2labels.pkl")
    kdd_test_2labels_y = pd.read_pickle("dataset/kdd_test_2labels_y.pkl")
    

In [3]:
from sklearn.preprocessing import LabelEncoder

le_2labels = LabelEncoder()
dataset.y_train_2labels = le_2labels.fit_transform(dataset.kdd_train_2labels_y)
dataset.y_test_2labels = le_2labels.transform(dataset.kdd_test_2labels_y)


In [4]:
from itertools import product
from sklearn.model_selection import train_test_split

class preprocessing:
    x_train = dataset.kdd_train_2labels.iloc[:,:-2].values
    y_train = np.array(dataset.y_train_2labels)

    x_test, y_test = (dataset.kdd_test_2labels.iloc[:,:-2].values, 
                      np.array(dataset.y_test_2labels))

    

In [5]:
from collections import namedtuple
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras import regularizers

class Train:
    score = namedtuple("score", ['epoch', 'no_of_features','hidden_layers','train_score', 'test_score'])
    #model_detail = namedtuple("model_detail", ['epoch', 'no_of_features','hidden_layers', 'model'])
    scores = []
    predictions = pd.DataFrame()
    #models = []
    def execute(x_train, x_test, 
                y_train, y_test, 
                input_dim, no_of_features, hidden_layers,
                epochs = 5, keep_prob = 1):
        
        print("Training for no_of_features: {}, hidden_layer: {}".format(no_of_features, hidden_layers
                                                                        ))
        model = Sequential()
        model.add(Dense(no_of_features, input_dim=input_dim, activation='relu'))
        model.add(Dropout(keep_prob))
        #model.add(BatchNormalization())
        
        for i in range(hidden_layers - 1):
            model.add(Dense(no_of_features, activation='relu'))
            model.add(Dropout(keep_prob))
            #model.add(BatchNormalization())

        
        model.add(Dense(1, activation=None))

        model.compile(loss='mean_squared_error',
                      optimizer="Adam",
                      metrics=['accuracy'])

        x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=.6)
        
        model.fit(x_train, y_train,
                  validation_data=(x_test, y_test),
                  epochs=epochs,
                  batch_size=128,
                  verbose = 1)
        
        curr_score_valid = model.evaluate(x_valid, y_valid) #, batch_size=128)
        curr_score_test = model.evaluate(x_test, y_test) #, batch_size=128)
        pred_value = model.predict(x_test)
        
        print("\n Train Accuracy: {}, Test Accuracy: {}".format(curr_score_valid[1], curr_score_test[1])  )
        Train.scores.append(Train.score(epochs,no_of_features,hidden_layers,curr_score_valid[1], curr_score_test[1]))
        #Train.models.append(Train.model_detail(epochs,no_of_features,hidden_layers,model))
        y_pred = pred_value[:,-1]
        y_pred[y_pred >= pred_value[:,-1].mean()] = 1
        y_pred[y_pred < pred_value[:,-1].mean()] = 0
        curr_pred = pd.DataFrame({"{}_{}_{}".format(epochs,f,h):y_pred},)
        Train.predictions = pd.concat([Train.predictions, curr_pred], axis = 1)
                

Using TensorFlow backend.


In [6]:
#features_arr = [4, 8, 16, 32, 64, 128, 256, 1024]
#hidden_layers_arr = [2, 4, 6, 50, 100]

features_arr = [2, 4, 8, 16, 32]
hidden_layers_arr = [2, 6, 10]


for f, h in product(features_arr, hidden_layers_arr):
    Train.execute(preprocessing.x_train, preprocessing.x_test, 
                  preprocessing.y_train, preprocessing.y_test, 
                 122, f, h)

Training for no_of_features: 2, hidden_layer: 2
Train on 50389 samples, validate on 22544 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 Train Accuracy: 0.5372565622353938, Test Accuracy: 0.43075762952448543
Training for no_of_features: 2, hidden_layer: 6
Train on 50389 samples, validate on 22544 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 Train Accuracy: 0.8461711473327689, Test Accuracy: 0.7745741660752307
Training for no_of_features: 2, hidden_layer: 10
Train on 50389 samples, validate on 22544 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 Train Accuracy: 0.5352587849280271, Test Accuracy: 0.43075762952448543
Training for no_of_features: 4, hidden_layer: 2
Train on 50389 samples, validate on 22544 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 Train Accuracy: 0.030522332768839967, Test Accuracy: 0.023243435060326473
Training for no_of_features: 4, hidden_layer: 6
Train on 50389 samples, validate on 22544 samples
Epoch 1/5
Epoch 2/5

In [7]:
pd.DataFrame(Train.scores)

Unnamed: 0,epoch,no_of_features,hidden_layers,train_score,test_score
0,5,2,2,0.537257,0.430758
1,5,2,6,0.846171,0.774574
2,5,2,10,0.535259,0.430758
3,5,4,2,0.030522,0.023243
4,5,4,6,0.948693,0.712207
5,5,4,10,0.534042,0.430758
6,5,8,2,0.017636,0.009359
7,5,8,6,0.557009,0.551721
8,5,8,10,0.969584,0.770271
9,5,16,2,0.000913,0.000621


In [8]:
#for m in Train.models:
#    m.model.save("dataset/keras_model_epoch_{}_no_of_features_{}_hidden_layers_{}".format(m.epoch,
#                                                                                         m.no_of_features,
#                                                                                         m.hidden_layers))

In [9]:
Train.predictions.to_pickle("dataset/keras_dense_nsl_kdd.pkl")
pd.DataFrame(Train.scores).to_pickle("dataset/keras_dense_nsl_kdd.pkl")