# Deep Learning with CICIDS Dataset 

In [2]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.10.0-cp39-cp39-win_amd64.whl (455.9 MB)
     -------------------------------------- 455.9/455.9 MB 7.7 MB/s eta 0:00:00
Collecting protobuf<3.20,>=3.9.2
  Downloading protobuf-3.19.6-cp39-cp39-win_amd64.whl (895 kB)
     ------------------------------------- 895.9/895.9 kB 11.4 MB/s eta 0:00:00
Collecting tensorflow-io-gcs-filesystem>=0.23.1
  Downloading tensorflow_io_gcs_filesystem-0.27.0-cp39-cp39-win_amd64.whl (1.5 MB)
     ---------------------------------------- 1.5/1.5 MB 13.5 MB/s eta 0:00:00
Collecting flatbuffers>=2.0
  Downloading flatbuffers-22.10.26-py2.py3-none-any.whl (26 kB)
Collecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting tensorboard<2.11,>=2.10
  Downloading tensorboard-2.10.1-py3-none-any.whl (5.9 MB)
     ---------------------------------------- 5.9/5.9 MB 14.4 MB/s eta 0:00:00
Collecting keras-preprocessing>=1.1.1
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-

In [3]:
import os
from os.path import join
import glob
import pandas as pd
import numpy as np
import time
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow.keras as keras

In [4]:
keras.__version__

'2.10.0'

In [5]:
%tensorflow_version

UsageError: Line magic function `%tensorflow_version` not found.


In [6]:
def display_metrics(y_test, y_pred, label_names):
  print('\nAccuracy: {:.2f}\n'.format(accuracy_score(y_test, y_pred)))

  print('Micro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='micro')))
  print('Micro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='micro')))
  print('Micro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='micro')))

  print('Macro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='macro')))
  print('Macro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='macro')))
  print('Macro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='macro')))

  print('Weighted Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='weighted')))
  print('Weighted Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='weighted')))
  print('Weighted F1-score: {:.2f}'.format(f1_score(y_test, y_pred, average='weighted')))

  print('\nClassification Report\n')
  print(classification_report(y_test, y_pred, target_names=label_names))

In [7]:
def display_all(df):
    with pd.option_context("display.max_rows", 100, "display.max_columns", 100): 
        print(df)

In [8]:
def make_value2index(attacks):
    #make dictionary
    attacks = sorted(attacks)
    d = {}
    counter=0
    for attack in attacks:
        d[attack] = counter
        counter+=1
    return d

In [9]:
# chganges label from string to integer/index
def encode_label(Y_str):
    labels_d = make_value2index(np.unique(Y_str))
    Y = [labels_d[y_str] for y_str  in Y_str]
    Y = np.array(Y)
    return np.array(Y)

In [10]:
# All columns
col_names = np.array(['dst sport count', 'src dport count', 'dst src count', 'dport count', 'sport count', 'dst host count','src host count','Source Port', 'Destination Port',
                      'Protocol', 'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 'Total Length of Fwd Packets',
                      'Total Length of Bwd Packets', 'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean',
                      'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Bwd Packet Length Std',
                      'Flow Bytes/s', 'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Total',
                      'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
                      'Bwd IAT Min', 'Fwd PSH Flags', 'Fwd URG Flags', 'Fwd Header Length', 'Bwd Header Length',
                      'Fwd Packets/s', 'Bwd Packets/s', 'Min Packet Length', 'Max Packet Length', 'Packet Length Mean', 'Packet Length Std',
                      'Packet Length Variance', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count',
                      'URG Flag Count', 'CWE Flag Count', 'ECE Flag Count', 'Down/Up Ratio', 'Average Packet Size', 'Avg Fwd Segment Size',
                      'Avg Bwd Segment Size','Subflow Fwd Packets', 'Subflow Fwd Bytes',
                      'Subflow Bwd Packets', 'Subflow Bwd Bytes', 'Init_Win_bytes_forward', 'Init_Win_bytes_backward',
                      'act_data_pkt_fwd', 'min_seg_size_forward', 'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean',
                      'Idle Std', 'Idle Max', 'Idle Min', 'Label'])

In [11]:
df_train = pd.read_csv('train_MachineLearningCVE.csv',names=col_names, skiprows=1)  

In [12]:
print('Train set size: ', df_train.shape)

Train set size:  (2264594, 79)


In [13]:
df_test = pd.read_csv('test_MachineLearningCVE.csv',names=col_names, skiprows=1)  
print('Test set size: ', df_test.shape)

Test set size:  (566149, 79)


In [14]:
df_train.head()

Unnamed: 0,dst sport count,src dport count,dst src count,dport count,sport count,dst host count,src host count,Source Port,Destination Port,Protocol,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,80,6018089.0,5,3,177,994,159,0,35.4,69.143329,...,20.0,275923.0,0.0,275923,275923,5742166.0,0.0,5742166,5742166,0
1,443,323049.0,8,6,531,3208,194,0,66.375,85.880211,...,20.0,0.0,0.0,0,0,0.0,0.0,0,0,0
2,80,39270118.0,9,10,898,3944,431,0,99.777778,187.795619,...,20.0,123253.6667,149943.928,296394,36667,10000000.0,300.561142,10000000,9999980,0
3,4848,43.0,1,1,2,6,2,2,2.0,0.0,...,24.0,0.0,0.0,0,0,0.0,0.0,0,0,10
4,80,5754816.0,3,1,12,0,6,0,4.0,3.464102,...,20.0,0.0,0.0,0,0,0.0,0.0,0,0,0


In [15]:
df_train['Label'].value_counts()

0     1818477
4      184858
10     127144
2      102421
3        8234
7        6350
11       4718
6        4637
5        4399
1        1573
12       1206
14        522
9          29
13         17
8           9
Name: Label, dtype: int64

In [16]:
df_train.describe()

Unnamed: 0,dst sport count,src dport count,dst src count,dport count,sport count,dst host count,src host count,Source Port,Destination Port,Protocol,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
count,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,...,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0,2264594.0
mean,8083.469,14793150.0,9.843101,11.03084,549.6153,17640.19,207.7673,18.6924,58.22619,68.99965,...,26.11347,81275.06,41106.07,152808.2,57990.99,8322365.0,505025.9,8702922.0,7925571.0,1.064424
std,18295.01,33662800.0,808.9885,1076.331,10532.9,2448270.0,718.5458,60.26335,186.3955,281.9866,...,6.577371,633100.9,390790.3,1014727.0,560037.5,23638230.0,4610524.0,24377020.0,23370790.0,2.576436
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,53.0,155.0,2.0,1.0,12.0,0.0,6.0,0.0,6.0,0.0,...,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,80.0,31316.0,2.0,2.0,62.0,122.0,37.0,2.0,34.0,0.0,...,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,443.0,3211786.0,5.0,4.0,187.0,482.0,81.0,36.0,50.0,26.16295,...,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,65535.0,120000000.0,219759.0,291922.0,12900000.0,655453000.0,24820.0,2325.0,5940.857,7049.469,...,138.0,110000000.0,74200000.0,110000000.0,110000000.0,120000000.0,76900000.0,120000000.0,120000000.0,14.0


In [17]:
print('Test set: ')
df_test['Label'].value_counts()

Test set: 


0     454620
4      46215
10     31786
2      25606
3       2059
7       1588
11      1179
6       1159
5       1100
1        393
12       301
14       130
9          7
13         4
8          2
Name: Label, dtype: int64

In [18]:
df_test.describe()

Unnamed: 0,dst sport count,src dport count,dst src count,dport count,sport count,dst host count,src host count,Source Port,Destination Port,Protocol,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
count,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,...,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0,566149.0
mean,8023.536525,14755720.0,7.433398,7.845504,548.051,10252.44,206.930559,18.798663,58.104949,68.552052,...,26.122669,82656.36,41246.28,154679.9,59515.14,8290722.0,499116.3,8667073.0,7897870.0,1.064391
std,18237.981102,33617490.0,438.398407,583.054409,7455.954,1277379.0,711.715142,60.642342,184.868811,277.965989,...,6.579084,707207.1,403580.3,1069064.0,640787.3,23597440.0,4572702.0,24326310.0,23333910.0,2.576357
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,53.0,155.0,2.0,1.0,12.0,2.0,6.0,0.0,6.0,0.0,...,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,80.0,31316.0,2.0,2.0,62.0,124.0,37.0,2.0,34.0,0.0,...,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,443.0,3175237.0,5.0,4.0,187.0,480.0,81.0,36.0,50.0,26.162951,...,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,65533.0,120000000.0,199057.0,269619.0,2866110.0,584000000.0,24820.0,1983.0,4638.923469,7125.596846,...,126.0,107000000.0,63900000.0,107000000.0,107000000.0,120000000.0,75145020.0,120000000.0,120000000.0,14.0


In [19]:
df_label = df_train['Label']
data = df_train.drop(columns=['Label'])
Xtrain = data.values
y_train = encode_label(df_label.values)

In [20]:
df_label = df_test['Label']
data = df_test.drop(columns=['Label'])
Xtest = data.values
y_test = encode_label(df_label.values)

In [21]:
from sklearn.preprocessing import MinMaxScaler

In [22]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(Xtrain)
X_train

array([[1.22072175e-03, 5.01507425e-02, 1.82018402e-05, ...,
        0.00000000e+00, 4.78513833e-02, 4.78513833e-02],
       [6.75974670e-03, 2.69207504e-03, 3.18532204e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.22072175e-03, 3.27250989e-01, 3.64036804e-05, ...,
        3.90846738e-06, 8.33333333e-02, 8.33331667e-02],
       ...,
       [1.14290074e-02, 3.58333339e-07, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.71915770e-02, 6.50000011e-07, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [9.97756924e-01, 6.32742594e-02, 0.00000000e+00, ...,
        0.00000000e+00, 6.30086083e-02, 6.30086083e-02]])

In [23]:
X_test = scaler.transform(Xtest)
X_test

array([[8.08728161e-04, 1.50000003e-06, 4.55046005e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.22072175e-03, 7.01777962e-01, 4.09541405e-05, ...,
        0.00000000e+00, 6.96666667e-01, 6.96666667e-01],
       [1.34279393e-03, 1.30000002e-06, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [1.22072175e-03, 1.16711919e-02, 9.10092010e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [8.08728161e-04, 6.83175011e-04, 1.36513802e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [3.54467079e-02, 6.00000010e-07, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [24]:
from sklearn.preprocessing import StandardScaler

In [25]:
scaler = StandardScaler()

X_train = scaler.fit_transform(Xtrain)
X_test = scaler.transform(Xtest)

X_test

array([[-4.38943193e-01, -4.39445625e-01, -9.69494948e-03, ...,
        -1.09537648e-01, -3.57013384e-01, -3.39122989e-01],
       [-4.37467380e-01,  2.06222335e+00,  1.93944960e-04, ...,
        -1.09537648e-01,  3.07244625e+00,  3.23799222e+00],
       [-4.37030103e-01, -4.39446338e-01, -1.09310613e-02, ...,
        -1.09537648e-01, -3.57013384e-01, -3.39122989e-01],
       ...,
       [-4.37467380e-01, -3.97845903e-01, -8.45883767e-03, ...,
        -1.09537648e-01, -3.57013384e-01, -3.39122989e-01],
       [-4.38943193e-01, -4.37015613e-01, -7.22272587e-03, ...,
        -1.09537648e-01, -3.57013384e-01, -3.39122989e-01],
       [-3.14865627e-01, -4.39448833e-01, -1.09310613e-02, ...,
        -1.09537648e-01, -3.57013384e-01, -3.39122989e-01]])

In [26]:
from tensorflow.keras.utils import to_categorical

In [27]:
y_train_ada = y_train
y_test_ada = y_test

In [28]:
y_train = to_categorical(y_train, 15)
y_test = to_categorical(y_test, 15)

In [29]:
import tensorflow as tf
tf.compat.v1.disable_v2_behavior()
from sklearn.ensemble import RandomForestClassifier

#importing confusion matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn import metrics
from sklearn.metrics import accuracy_score

#importing accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import mean_squared_error,mean_absolute_error

Instructions for updating:
non-resource variables are not supported in the long term


In [30]:
labels_d = make_value2index(df_test['Label'])

In [31]:
print(labels_d)

{0: 454619, 1: 455012, 2: 480618, 3: 482677, 4: 528892, 5: 529992, 6: 531151, 7: 532739, 8: 532741, 9: 532748, 10: 564534, 11: 565713, 12: 566014, 13: 566018, 14: 566148}


In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, Flatten, Dense, Activation,Dropout,MaxPooling1D
from tensorflow.keras.constraints import max_norm

In [33]:
#hyper-params
batch_size = 7500 # increasing batch size with more gpu added
input_dim = X_train.shape[1]
num_class = 15                   # 15 intrusion classes, including benign traffic class
num_epochs = 6
learning_rates = 1e-4
regularizations = 1e-3
optim = tf.keras.optimizers.Adam(lr=learning_rates, beta_1=0.9, beta_2=0.999, epsilon=1e-8)

print(input_dim)
print(num_class)

78
15


  super().__init__(name, **kwargs)


In [34]:
X_train.shape

(2264594, 78)

In [35]:
#X_train_r = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train_r = np.zeros((len(X_train), input_dim, 1))
X_train_r[:, :, 0] = X_train[:, :input_dim]
print(X_train_r.shape)

(2264594, 78, 1)


In [36]:
X_test_r = np.zeros((len(X_test), input_dim, 1))
X_test_r[:, :, 0] = X_test[:, :input_dim]
print(X_test_r.shape)

(566149, 78, 1)


In [37]:
type(X_train_r)

numpy.ndarray

In [38]:
model = Sequential()

# input layer
model.add(Conv1D(filters=32, kernel_size=23, activation='relu', padding='same', kernel_initializer='he_uniform', input_shape=(78,1)))
model.add(Conv1D(filters=32, kernel_size=17, activation='relu', padding='same', kernel_initializer='he_uniform'))
model.add(MaxPooling1D(pool_size=2,strides=2))
model.add(Dropout(0.2))
model.add(BatchNormalization(axis=1))

model.add(Conv1D(filters=64, kernel_size=23, activation='relu', padding='same', kernel_initializer='he_uniform'))
model.add(Conv1D(filters=64, kernel_size=17, activation='relu', padding='same', kernel_initializer='he_uniform'))
model.add(MaxPooling1D(pool_size=2,strides=2))
model.add(Dropout(0.2))
model.add(BatchNormalization(axis=1)) 

model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_class))
model.add(Activation('softmax'))

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 78, 32)            768       
                                                                 
 conv1d_1 (Conv1D)           (None, 78, 32)            17440     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 39, 32)           0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 39, 32)            0         
                                                                 
 batch_normalization (BatchN  (None, 39, 32)           156       
 ormalization)                                                   
                                                  

In [39]:
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      #tf.keras.metrics.AUC(name='auc'),
]

metrics=METRICS

In [None]:
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
import tensorflow.keras
import time
time_start = time.time()

nadam = Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
model.compile(loss = "categorical_crossentropy",optimizer = "nadam", metrics = metrics)

#model.compile(
#      optimizer=tf.keras.optimizers.Adam(lr=1e-4),
#      loss=tf.keras.losses.BinaryCrossentropy(),
#      metrics=metrics)
  

history = model.fit(X_train_r, y_train, 
                    epochs=1, 
                    batch_size=batch_size, 
                    verbose=2)
time_end = time.time()
train_time = time_end - time_start
print("train_time:",train_time)

Train on 2264594 samples
