# NSL-KDD: Sequenziale 5 Classi Shuffle

### Importo librerie e dati

In [None]:
from google.colab import drive
drive.mount('/drive')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

Mounted at /drive


In [None]:
#creo il nome delle colonne
columns = np.array(['duration','protocol_type','service','flag','src_bytes','dst_bytes','land','wrong_fragment','urgent','hot','num_failed_logins',
           'logged_in','num_compromised','root_shell','su_attempted','num_root','num_file_creations','num_shells','num_access_files','num_outbound_cmds',
           'is_host_login','is_guest_login','count','srv_count','serror_rate','srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate',
           'diff_srv_rate','srv_diff_host_rate','dst_host_count','dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate',
           'dst_host_same_src_port_rate','dst_host_srv_diff_host_rate','dst_host_serror_rate','dst_host_srv_serror_rate','dst_host_rerror_rate',
           'dst_host_srv_rerror_rate','Attack', 'difficulty'])
#features raccoglie le colonne con dati utili all'algoritmo
features = np.array(['duration','protocol_type','service','flag','src_bytes','dst_bytes','land','wrong_fragment','urgent','hot','num_failed_logins',
           'logged_in','num_compromised','root_shell','su_attempted','num_root','num_file_creations','num_shells','num_access_files','num_outbound_cmds',
           'is_host_login','is_guest_login','count','srv_count','serror_rate','srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate',
           'diff_srv_rate','srv_diff_host_rate','dst_host_count','dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate',
           'dst_host_same_src_port_rate','dst_host_srv_diff_host_rate','dst_host_serror_rate','dst_host_srv_serror_rate','dst_host_rerror_rate',
           'dst_host_srv_rerror_rate'])
#target contiene la colonna con l'output
target = np.array(['Attack'])

In [None]:
#importo i dataset
train = pd.read_csv('/drive/My Drive/NSLKDD/Dataset/KDDTrain+.txt', names=columns)
test = pd.read_csv('/drive/My Drive/NSLKDD/Dataset/KDDTest+.txt', names=columns)

### Modello il dataset

In [None]:
#cancello la colonna difficulty in quanto non necessaria
del train['difficulty']
del test['difficulty']

In [None]:
print('Features Totali:',features.size)
print('Target Totali:',target.size)

Features Totali: 41
Target Totali: 1


In [None]:
#controllo le dimensioni
print('Train:', train.shape)
print('Test:', test.shape)

Train: (125973, 42)
Test: (22544, 42)


In [None]:
#Load attacks.txt containing the attack categories
map_attacks = [x.strip().split() for x in open('/drive/My Drive/NSLKDD/Dataset/attackMap.txt', 'r')]
map_attacks = {k:v for (k,v) in map_attacks}

train['Attack'] = train['Attack'].replace(map_attacks)
test['Attack'] = test['Attack'].replace(map_attacks)

In [None]:
train.dtypes

duration                         int64
protocol_type                   object
service                         object
flag                            object
src_bytes                        int64
dst_bytes                        int64
land                             int64
wrong_fragment                   int64
urgent                           int64
hot                              int64
num_failed_logins                int64
logged_in                        int64
num_compromised                  int64
root_shell                       int64
su_attempted                     int64
num_root                         int64
num_file_creations               int64
num_shells                       int64
num_access_files                 int64
num_outbound_cmds                int64
is_host_login                    int64
is_guest_login                   int64
count                            int64
srv_count                        int64
serror_rate                    float64
srv_serror_rate          

In [None]:
# unisco train e test per fare l'encoding
df = pd.concat([train, test])

In [None]:
from sklearn.utils import shuffle
df = shuffle(df)

In [None]:
df.shape

(148517, 42)

In [None]:
#Inizio a fare l'encoding degli object in category
for i in columns:
    if df[i].dtypes==object:
        print(i + ": " + str(df[i].dtype))
        df[i] = df[i].astype('category')
        if i=="Attack":
            break
        df=pd.get_dummies(df, columns=[i])   

protocol_type: object
service: object
flag: object
Attack: object


In [None]:
#feature e labels
xCol = df.columns.drop('Attack')
x = df[xCol].values
dummies = pd.get_dummies(df['Attack']) # Classification
outcomes = dummies.columns
num_classes = len(outcomes)
y = dummies.values

### Creo i dati del training

In [None]:
# normalizzazione
x = preprocessing.scale(x)
x = preprocessing.normalize(x)

In [None]:
import io
import requests
import os
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
X = x[:125973] #sarà il train+validation
xTest = x[125973:] # dati nuovi usati esclusivamente per l'evaluation
Y = y[:125973] 
yTest = y[125973:]

In [None]:
xTrain, xVal, yTrain, yVal = train_test_split(X, Y, test_size = 0.2, random_state = 42)
print(len(xTrain), "Training sequences",xTrain.shape)
print(len(yTrain), "Training sequences",yTrain.shape)
print(len(xVal), "Validation sequences",xVal.shape)
print(len(yVal), "Validation sequences",yVal.shape)

100778 Training sequences (100778, 122)
100778 Training sequences (100778, 5)
25195 Validation sequences (25195, 122)
25195 Validation sequences (25195, 5)


### Creazione del modello Sequenziale

In [None]:
model = Sequential()
model.add(Dense(10, input_dim=xTrain.shape[1], activation='relu'))
model.add(Dense(50, input_dim=xTrain.shape[1], activation='relu'))
model.add(Dense(10, input_dim=xTrain.shape[1], activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.add(Dense(yTrain.shape[1],activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, 
                        patience=5, verbose=1, mode='auto',
                           restore_best_weights=True)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                1230      
                                                                 
 dense_1 (Dense)             (None, 50)                550       
                                                                 
 dense_2 (Dense)             (None, 10)                510       
                                                                 
 dense_3 (Dense)             (None, 1)                 11        
                                                                 
 dense_4 (Dense)             (None, 5)                 10        
                                                                 
Total params: 2,311
Trainable params: 2,311
Non-trainable params: 0
_________________________________________________________________


#### Training

In [None]:
model.fit(xTrain,yTrain,validation_data=(xVal,yVal),
          callbacks=[monitor],epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 32: early stopping


<keras.callbacks.History at 0x7fa88fb9c5d0>

#### Evaluation

In [None]:
prediction = model.predict(xTest)
pred = np.argmax(prediction,axis=1)
y_eval = np.argmax(yTest,axis=1)
score = metrics.accuracy_score(y_eval, pred)
print("Validation score: {}".format(score))

Validation score: 0.9803495386799148


### Validation

In [None]:
# dos normal probe r2l u2r
def count(index, array):
  n=0
  for arr in array:
    if(arr[index]==1):
      n=n+1
  return n

In [None]:
#train
print('dos',count(0, yTrain))
print('normal',count(1, yTrain))
print('probe',count(2, yTrain))
print('r2l',count(3, yTrain))
print('u2r',count(4, yTrain))

dos 36257
normal 52257
probe 9527
r2l 2653
u2r 84


In [None]:
#validation
print('dos',count(0, yVal))
print('normal',count(1, yVal))
print('probe',count(2, yVal))
print('r2l',count(3, yVal))
print('u2r',count(4, yVal))

dos 8959
normal 13195
probe 2396
r2l 633
u2r 12


In [None]:
#test
print('dos',count(0, yTest))
print('normal',count(1, yTest))
print('probe',count(2, yTest))
print('r2l',count(3, yTest))
print('u2r',count(4, yTest))

dos 8171
normal 11602
probe 2154
r2l 594
u2r 23


In [None]:
valScores = []
for i in range(5):
  index = 0
  yRare = []
  xRare = []
  for array in yTest:
    if(array[i]==1):
      yRare.append(yTest[index].tolist())
      xRare.append(xTest[index].tolist())
    index = index + 1
  prediction = model.predict(xRare)
  pred = np.argmax(prediction,axis=1)
  y_eval = np.argmax(yRare,axis=1)
  score = metrics.accuracy_score(y_eval, pred)
  valScores.append(score)

In [None]:
print('Validation score dos {:.3f}' .format(valScores[0]))
print('Validation score normal {:.3f}' .format(valScores[1]))
print('Validation score probe {:.3f}' .format(valScores[2]))
print('Validation score r2l {:.3f}' .format(valScores[3]))
print('Validation score u2r {:.3f}' .format(valScores[4]))

Validation score dos 0.996
Validation score normal 0.989
Validation score probe 0.979
Validation score r2l 0.635
Validation score u2r 0.000


### Conversione

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

In [None]:
open("/drive/My Drive/NSLKDD/seq_Shuffle_5class.tflite","wb").write(tflite_model)

11716