In [1]:
# !pip install pydot

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

#### LOAD DATASET

In [5]:
# helper functions
def DataLoading (mypath):
    print ("Loading the data")
    dataframe = pd.read_csv(mypath,header = None,engine = 'python',sep=",")
    return dataframe


# column names
def prepate_dataset(df):
  column_names = ["duration", "protocol_type", "service", "flag" ,"src_bytes",
                  "dst_bytes", "land", "wrong_fragment", "urgent", "hot", "num_failed_logins",
                  "logged_in", "num_compromised", "root_shell", "su_attempted", "num_root", 
                  "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds",
                  "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate", 
                  "srv_error_rate", "rerror_rate", "srv_rerror_rate", "same_srv_rate", 
                  "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count",
                  "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate",
                  "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate",
                  "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "attack", "difficulty"]

  # assigning the column names
  df.columns = column_names

  # preparing class labels 
  df['class'] = df['attack'].replace(['back', 'land', 'neptune', 'pod','smurf', 'teardrop'],'attack') # Dos
  df['class'] = df['class'].replace(['ipsweep', 'nmap','portsweep', 'satan'],'attack') # Probe
  df['class'] = df['class'].replace(['buffer_overflow','loadmodule', 'perl','rootkit'],'attack') # U2R
  df['class'] = df['class'].replace(['ftp_write', 'guess_passwd','imap', 'multihop', 'phf','spy', 'warezclient','warezmaster'],'attack') # R2L

  # droping attacks column
  df = df.drop(['attack'], axis=1)
  df = df.drop(['difficulty'], axis=1)
  # Creating a instance of label Encoder.
  le = LabelEncoder()
  
  # Using .fit_transform function to fit label
  # encoder and return encoded label
  df['protocol_type'] = le.fit_transform(df['protocol_type'])
  df['service'] = le.fit_transform(df['service'])
  df['flag'] = le.fit_transform(df['flag'])
  df['class'] = le.fit_transform(df['class'])

  return df



In [6]:
df_train = DataLoading("./NSL-KDD/KDDTrain+.txt")
df_train = prepate_dataset(df_train)


df_val = DataLoading("./NSL-KDD/KDDTrain+_20Percent.txt")
df_val = prepate_dataset(df_val)

Loading the data
Loading the data


#### Pre-Precess DATA

In [7]:
# divide to train and test arrays 

x_train = df_train.drop(['class'], axis=1).to_numpy()
y_train = df_train['class'].to_numpy()

x_test = df_val.drop(['class'], axis=1).to_numpy()
y_test = df_val['class'].to_numpy()

print(x_train.shape, y_train.shape,  x_test.shape, y_test.shape)

(125973, 41) (125973,) (25192, 41) (25192,)


In [8]:
# normalize the data

norm = np.linalg.norm(x_train)
x_train = x_train/norm

norm = np.linalg.norm(x_test)
x_test = x_test/norm

In [9]:
# reshape to fit in lstm

x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

#### BiGRU

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GRU, Bidirectional, Flatten,Embedding
from keras.utils import plot_model

2022-09-29 11:24:11.211889: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
model = Sequential()
model.add(Bidirectional(GRU(64, return_sequences=True),input_shape=(41, 1)))
model.add(Dropout(0.2))


model.add(Bidirectional(GRU(32)))
model.add(Dropout(0.2))


model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

In [13]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_3 (Bidirectio  (None, 41, 128)          25728     
 nal)                                                            
                                                                 
 dropout_4 (Dropout)         (None, 41, 128)           0         
                                                                 
 bidirectional_4 (Bidirectio  (None, 64)               31104     
 nal)                                                            
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 64)                4160      
                                                                 
 dropout_6 (Dropout)         (None, 64)               

In [18]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)

model.compile(
    loss='binary_crossentropy',
    optimizer=opt,
    metrics=['accuracy'],
)

model.fit(x_train,
          y_train,
          epochs=100,
          validation_data=(x_test, y_test))



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78