In [10]:
import pandas as pd
import numpy as np
pd.set_option("display.max_rows",40)
%matplotlib inline

In [2]:
class dataset:
    kdd_train_2labels = pd.read_pickle("dataset/kdd_train_2labels.pkl")
    kdd_train_2labels_y = pd.read_pickle("dataset/kdd_train_2labels_y.pkl")
    
    kdd_test_2labels = pd.read_pickle("dataset/kdd_test_2labels.pkl")
    kdd_test_2labels_y = pd.read_pickle("dataset/kdd_test_2labels_y.pkl")
    

In [3]:
from sklearn.preprocessing import LabelEncoder

le_2labels = LabelEncoder()
dataset.y_train_2labels = le_2labels.fit_transform(dataset.kdd_train_2labels_y)
dataset.y_test_2labels = le_2labels.transform(dataset.kdd_test_2labels_y)


In [4]:
from itertools import product
from sklearn.model_selection import train_test_split

class preprocessing:
    x_train = dataset.kdd_train_2labels.iloc[:,:-2].values
    y_train = np.array(dataset.y_train_2labels)

    x_test, y_test = (dataset.kdd_test_2labels.iloc[:,:-2].values, 
                      np.array(dataset.y_test_2labels))

    

In [5]:
from collections import namedtuple
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras import regularizers

class Train:
    score = namedtuple("score", ['epoch', 'no_of_features','hidden_layers','train_score', 'test_score'])
    #model_detail = namedtuple("model_detail", ['epoch', 'no_of_features','hidden_layers', 'model'])
    scores = []
    predictions = pd.DataFrame()
    #models = []
    def execute(x_train, x_test, 
                y_train, y_test, 
                input_dim, no_of_features, hidden_layers,
                epochs = 10, keep_prob = 0.4):
        
        print("Training for no_of_features: {}, hidden_layer: {}".format(no_of_features, hidden_layers
                                                                        ))
        model = Sequential()
        model.add(Dense(no_of_features, input_dim=input_dim, activation='relu'))
        model.add(Dropout(keep_prob))
        model.add(BatchNormalization())
        
        for i in range(hidden_layers - 1):
            model.add(Dense(no_of_features, activation='relu'))
            model.add(Dropout(keep_prob))
            model.add(BatchNormalization())

        
        model.add(Dense(1, activation=None))

        model.compile(loss='mean_squared_error',
                      optimizer="Adam",
                      metrics=['accuracy'])

        x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=.6)
        
        model.fit(x_train, y_train,
                  validation_data=(x_valid, y_valid),
                  epochs=epochs,
                  batch_size=128,
                  verbose = 0)
        
        curr_score_valid = model.evaluate(x_valid, y_valid) #, batch_size=128)
        curr_score_test = model.evaluate(x_test, y_test) #, batch_size=128)
        pred_value = model.predict(x_test)
        
        print("\n Train Accuracy: {}, Test Accuracy: {}".format(curr_score_valid[1], curr_score_test[1])  )
        Train.scores.append(Train.score(epochs,no_of_features,hidden_layers,curr_score_valid[1], curr_score_test[1]))
        #Train.models.append(Train.model_detail(epochs,no_of_features,hidden_layers,model))
        y_pred = pred_value[:,-1]
        y_pred[y_pred >= pred_value[:,-1].mean()] = 1
        y_pred[y_pred < pred_value[:,-1].mean()] = 0
        curr_pred = pd.DataFrame({"{}_{}_{}".format(epochs,f,h):y_pred},)
        Train.predictions = pd.concat([Train.predictions, curr_pred], axis = 1)
                

Using TensorFlow backend.


In [6]:
features_arr = [4, 8, 16, 32, 64, 128, 256, 1024]
hidden_layers_arr = [2, 4, 6, 50, 100]


for f, h in product(features_arr, hidden_layers_arr):
    Train.execute(preprocessing.x_train, preprocessing.x_test, 
                  preprocessing.y_train, preprocessing.y_test, 
                 122, f, h)

Training for no_of_features: 4, hidden_layer: 2
 Train Accuracy: 0.8895665749364945, Test Accuracy: 0.7222764371894961
Training for no_of_features: 4, hidden_layer: 4
 Train Accuracy: 0.8694035774767146, Test Accuracy: 0.6386178140525195
Training for no_of_features: 4, hidden_layer: 6
 Train Accuracy: 0.5343855842506351, Test Accuracy: 0.43075762952448543
Training for no_of_features: 4, hidden_layer: 50
 Train Accuracy: 0.5331816257408976, Test Accuracy: 0.43075762952448543
Training for no_of_features: 4, hidden_layer: 100

 Train Accuracy: 0.5348618755292125, Test Accuracy: 0.43075762952448543
Training for no_of_features: 8, hidden_layer: 2
 Train Accuracy: 0.9181043607112617, Test Accuracy: 0.738555713271824
Training for no_of_features: 8, hidden_layer: 4
 Train Accuracy: 0.9460335520745131, Test Accuracy: 0.7541696238466998
Training for no_of_features: 8, hidden_layer: 6
 Train Accuracy: 0.9169268628281118, Test Accuracy: 0.6952625975869411
Training for no_of_features: 8, hidden_lay

In [11]:
pd.DataFrame(Train.scores)

Unnamed: 0,epoch,no_of_features,hidden_layers,train_score,test_score
0,10,4,2,0.889567,0.722276
1,10,4,4,0.869404,0.638618
2,10,4,6,0.534386,0.430758
3,10,4,50,0.533182,0.430758
4,10,4,100,0.534862,0.430758
5,10,8,2,0.918104,0.738556
6,10,8,4,0.946034,0.75417
7,10,8,6,0.916927,0.695263
8,10,8,50,0.53555,0.430758
9,10,8,100,0.534822,0.430758


In [8]:
#for m in Train.models:
#    m.model.save("dataset/keras_model_epoch_{}_no_of_features_{}_hidden_layers_{}".format(m.epoch,
#                                                                                         m.no_of_features,
#                                                                                         m.hidden_layers))

In [9]:
Train.predictions.to_pickle("dataset/dense_only_predictions.pkl")
pd.DataFrame(Train.scores).to_pickle("dataset/dense_only_scores.pkl")