###### Creating the classes for the DRL algorithm

In [1]:
%matplotlib inline
import math
import random
import numpy as np
import pandas as pd
import matplotlib 
import matplotlib.pyplot as plt
import re
from collections import namedtuple
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T


In [2]:
#Settings
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [3]:
#Reading all the datasets from Monday to friday and concatenating the data in one DataFrame
import os
filepaths = [f for f in os.listdir(".") if f.endswith('.csv')]
df = pd.concat(map(pd.read_csv, filepaths))
df.columns = [c.strip() for c in df.columns]

In [4]:
df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,22,166,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,12048.19277,166.0,0.0,166,166,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,6024.096386,6024.096386,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,1,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,1,0,290,243,0,32,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
1,60148,83,1,2,0,0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,36144.57831,41.5,10.6066,49,34,0,0.0,0.0,0,0,49,49.0,0.0,49,49,0,0,0,0,32,64,12048.19277,24096.38554,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,2,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,2,0,243,290,0,32,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2,123,99947,1,1,48,48,48,48,48.0,0.0,48,48,48.0,0.0,960.50907,20.010606,99947.0,0.0,99947,99947,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,40,40,10.005303,10.005303,48,48,48.0,0.0,0.0,0,0,0,0,0,0,0,0,1,72.0,48.0,48.0,40,0,0,0,0,0,0,1,48,1,48,-1,-1,0,40,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
3,123,37017,1,1,48,48,48,48,48.0,0.0,48,48,48.0,0.0,2593.403031,54.02923,37017.0,0.0,37017,37017,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,27.014615,27.014615,48,48,48.0,0.0,0.0,0,0,0,0,0,0,0,0,1,72.0,48.0,48.0,32,0,0,0,0,0,0,1,48,1,48,-1,-1,0,32,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
4,0,111161336,147,0,0,0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,1.322402,761379.0137,2539814.0,13600000,0,111000000,761379.0137,2539814.273,13600000,0,0,0.0,0.0,0,0,0,0,0,0,0,0,1.322402,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,147,0,0,0,-1,-1,0,0,1753752.625,2123197.578,4822992,95,9463032.7,2657727.996,13600000,5700287,BENIGN


In [5]:
#checking for missing values in our dataset
df.isna().sum().sum()

1358

In [6]:
#Replacing all the infinity values with NaN, as it is easier to drop later 
df.replace([np.inf,-np.inf],np.nan,inplace=True)
#NaN values increased from 1358 to 5734
df.isna().sum().sum()

5734

In [7]:
#Iterating through the columns and dropping columns that contain nan/null values 
deletecol = []
for column in df.columns:
    if df[column].isnull().values.any():
        deletecol.append(column)
for column in deletecol:
    df.drop([column],axis=1,inplace=True)
    
#Array that contains the columns that needs to be dropped
deletecol

['Flow Bytes/s', 'Flow Packets/s']

In [8]:
df['Label'].value_counts()

BENIGN                        2273097
DoS Hulk                       231073
PortScan                       158930
DDoS                           128027
DoS GoldenEye                   10293
FTP-Patator                      7938
SSH-Patator                      5897
DoS slowloris                    5796
DoS Slowhttptest                 5499
Bot                              1966
Web Attack � Brute Force         1507
Web Attack � XSS                  652
Infiltration                       36
Web Attack � Sql Injection         21
Heartbleed                         11
Name: Label, dtype: int64

In [9]:
df.columns

Index(['Destination Port', 'Flow Duration', 'Total Fwd Packets',
       'Total Backward Packets', 'Total Length of Fwd Packets',
       'Total Length of Bwd Packets', 'Fwd Packet Length Max',
       'Fwd Packet Length Min', 'Fwd Packet Length Mean',
       'Fwd Packet Length Std', 'Bwd Packet Length Max',
       'Bwd Packet Length Min', 'Bwd Packet Length Mean',
       'Bwd Packet Length Std', 'Flow IAT Mean', 'Flow IAT Std',
       'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Total', 'Fwd IAT Mean',
       'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total',
       'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min',
       'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags',
       'Fwd Header Length', 'Bwd Header Length', 'Fwd Packets/s',
       'Bwd Packets/s', 'Min Packet Length', 'Max Packet Length',
       'Packet Length Mean', 'Packet Length Std', 'Packet Length Variance',
       'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count',

In [10]:
df.shape

(2830743, 77)

# Seperating categorical and numerical data 

In [11]:
numerical_data = df.select_dtypes(include= [np.number])
categorical_data = df.select_dtypes(exclude= [np.number])

In [12]:
numerical_data.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,22,166,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,166.0,0.0,166,166,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,6024.096386,6024.096386,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,1,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,1,0,290,243,0,32,0.0,0.0,0,0,0.0,0.0,0,0
1,60148,83,1,2,0,0,0,0,0.0,0.0,0,0,0.0,0.0,41.5,10.6066,49,34,0,0.0,0.0,0,0,49,49.0,0.0,49,49,0,0,0,0,32,64,12048.19277,24096.38554,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,2,0.0,0.0,0.0,32,0,0,0,0,0,0,1,0,2,0,243,290,0,32,0.0,0.0,0,0,0.0,0.0,0,0
2,123,99947,1,1,48,48,48,48,48.0,0.0,48,48,48.0,0.0,99947.0,0.0,99947,99947,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,40,40,10.005303,10.005303,48,48,48.0,0.0,0.0,0,0,0,0,0,0,0,0,1,72.0,48.0,48.0,40,0,0,0,0,0,0,1,48,1,48,-1,-1,0,40,0.0,0.0,0,0,0.0,0.0,0,0
3,123,37017,1,1,48,48,48,48,48.0,0.0,48,48,48.0,0.0,37017.0,0.0,37017,37017,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,0,0,32,32,27.014615,27.014615,48,48,48.0,0.0,0.0,0,0,0,0,0,0,0,0,1,72.0,48.0,48.0,32,0,0,0,0,0,0,1,48,1,48,-1,-1,0,32,0.0,0.0,0,0,0.0,0.0,0,0
4,0,111161336,147,0,0,0,0,0,0.0,0.0,0,0,0.0,0.0,761379.0137,2539814.0,13600000,0,111000000,761379.0137,2539814.273,13600000,0,0,0.0,0.0,0,0,0,0,0,0,0,0,1.322402,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,147,0,0,0,-1,-1,0,0,1753752.625,2123197.578,4822992,95,9463032.7,2657727.996,13600000,5700287


In [13]:
print(categorical_data.head())
categorical_data.reset_index(drop=True, inplace=True)
print(categorical_data.head())

    Label
0  BENIGN
1  BENIGN
2  BENIGN
3  BENIGN
4  BENIGN
    Label
0  BENIGN
1  BENIGN
2  BENIGN
3  BENIGN
4  BENIGN


# Normalise numerical_data

In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))

In [15]:
numerical_data = pd.DataFrame(scaler.fit_transform(numerical_data), columns=numerical_data.columns)

In [16]:
numerical_data.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,0.000336,1.491667e-06,0.0,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.491667e-06,0.0,1.491667e-06,1.5e-06,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,0.002008032,0.003012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.00641,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3e-06,0.0,0.00444,0.003723,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.9178,7.999999e-07,0.0,7e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.541666e-07,1.250775e-07,5.166666e-07,4e-07,0.0,0.0,0.0,0.0,9.999999e-08,4.083333e-07,4.083333e-07,0.0,4.083333e-07,4.083333e-07,0.0,0.0,0.0,0.0,0.999856,0.994592,0.004016064,0.012048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7e-06,0.0,0.003723,0.00444,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.001877,0.0008329999,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0008329999,0.0,0.0008329999,0.0008330082,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,3.335101e-06,5e-06,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.001877,0.0003085833,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0003085833,0.0,0.0003085833,0.0003085916,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,9.004872e-06,1.4e-05,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.9263445,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006344933,0.02995055,0.1133334,1.166667e-07,0.925,0.006345,0.03002,0.113333,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,4.408007e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.015943,0.028615,0.043845,8.636364e-07,0.078859,0.034561,0.113333,0.047502


In [17]:
data = pd.concat([numerical_data, categorical_data], axis=1, join='inner')

In [18]:
data.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,0.000336,1.491667e-06,0.0,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.491667e-06,0.0,1.491667e-06,1.5e-06,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,0.002008032,0.003012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.00641,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3e-06,0.0,0.00444,0.003723,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
1,0.9178,7.999999e-07,0.0,7e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.541666e-07,1.250775e-07,5.166666e-07,4e-07,0.0,0.0,0.0,0.0,9.999999e-08,4.083333e-07,4.083333e-07,0.0,4.083333e-07,4.083333e-07,0.0,0.0,0.0,0.0,0.999856,0.994592,0.004016064,0.012048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7e-06,0.0,0.003723,0.00444,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
2,0.001877,0.0008329999,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0008329999,0.0,0.0008329999,0.0008330082,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,3.335101e-06,5e-06,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
3,0.001877,0.0003085833,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0003085833,0.0,0.0003085833,0.0003085916,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,9.004872e-06,1.4e-05,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
4,0.0,0.9263445,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006344933,0.02995055,0.1133334,1.166667e-07,0.925,0.006345,0.03002,0.113333,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,4.408007e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.015943,0.028615,0.043845,8.636364e-07,0.078859,0.034561,0.113333,0.047502,BENIGN


# Adding a binary label column

In [19]:
normal_class = 'BENIGN' 
output_column = 'Label_binary'
data[output_column] = data['Label'].apply(lambda x: 0 if x==normal_class else 1)

In [20]:
data.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label,Label_binary
0,0.000336,1.491667e-06,0.0,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.491667e-06,0.0,1.491667e-06,1.5e-06,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,0.002008032,0.003012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.00641,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3e-06,0.0,0.00444,0.003723,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN,0
1,0.9178,7.999999e-07,0.0,7e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.541666e-07,1.250775e-07,5.166666e-07,4e-07,0.0,0.0,0.0,0.0,9.999999e-08,4.083333e-07,4.083333e-07,0.0,4.083333e-07,4.083333e-07,0.0,0.0,0.0,0.0,0.999856,0.994592,0.004016064,0.012048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7e-06,0.0,0.003723,0.00444,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN,0
2,0.001877,0.0008329999,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0008329999,0.0,0.0008329999,0.0008330082,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,3.335101e-06,5e-06,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN,0
3,0.001877,0.0003085833,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0003085833,0.0,0.0003085833,0.0003085916,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,9.004872e-06,1.4e-05,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN,0
4,0.0,0.9263445,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006344933,0.02995055,0.1133334,1.166667e-07,0.925,0.006345,0.03002,0.113333,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,4.408007e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.015943,0.028615,0.043845,8.636364e-07,0.078859,0.034561,0.113333,0.047502,BENIGN,0


# Label Encoder

In [21]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
data['Label'] = encoder.fit_transform(data['Label'])

In [22]:
data['Label'].unique()

array([ 0,  9,  1, 10,  2,  7, 11,  6,  5,  4,  3,  8, 12, 14, 13])

In [23]:
data.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label,Label_binary
0,0.000336,1.491667e-06,0.0,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.491667e-06,0.0,1.491667e-06,1.5e-06,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,0.002008032,0.003012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.00641,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3e-06,0.0,0.00444,0.003723,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,0.9178,7.999999e-07,0.0,7e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.541666e-07,1.250775e-07,5.166666e-07,4e-07,0.0,0.0,0.0,0.0,9.999999e-08,4.083333e-07,4.083333e-07,0.0,4.083333e-07,4.083333e-07,0.0,0.0,0.0,0.0,0.999856,0.994592,0.004016064,0.012048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7e-06,0.0,0.003723,0.00444,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,0.001877,0.0008329999,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0008329999,0.0,0.0008329999,0.0008330082,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,3.335101e-06,5e-06,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,0.001877,0.0003085833,0.0,3e-06,4e-06,7.323179e-08,0.001934,0.020645,0.00808,0.0,0.002458,0.016575,0.008275,0.0,0.0003085833,0.0,0.0003085833,0.0003085916,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,9.004872e-06,1.4e-05,0.033149,0.001934,0.014384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.018493,0.00808,0.008275,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4e-06,3e-06,7.323179e-08,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,0.0,0.9263445,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006344933,0.02995055,0.1133334,1.166667e-07,0.925,0.006345,0.03002,0.113333,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,4.408007e-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.000664,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.015943,0.028615,0.043845,8.636364e-07,0.078859,0.034561,0.113333,0.047502,0,0


In [24]:
data.shape

(2830743, 78)

# Replacing the multiclass labels to binary digits (0=Benign, 1 = Attack)

In [11]:
#label_df = df[' Label']
#print(label_df.unique())
#newlabel_df = label_df.replace({'BENIGN' : 0, 'Infiltration' : 1, 'Bot': 1, 'PortScan' : 1, 'DDoS' : 1, 'FTP-Patator' : 1, 'SSH-Patator' : 1, 'DoS slowloris' :1, 'DoS Slowhttptest' : 1, 'DoS Hulk' : 1, 'DoS GoldenEye' : 1, 'Heartbleed' : 1, 'Web Attack � Brute Force' : 1, 'Web Attack � XSS': 1, 'Web Attack � Sql Injection': 1 })
#print(newlabel_df.value_counts())
#df[' Label'] = newlabel_df
#print(df[' Label'].value_counts())

# Convert to Binary Class

In [11]:
#df['Label'] = df['Label'].apply(lambda x: re.sub('[^0-9a-zA-Z -]+', '', x))
#df['Label'] = df['Label'].apply(lambda x: re.sub(' +', ' ', x)) 

In [12]:
#df['Label'].unique()

array(['BENIGN', 'Infiltration', 'Bot', 'PortScan', 'DDoS', 'FTP-Patator',
       'SSH-Patator', 'DoS slowloris', 'DoS Slowhttptest', 'DoS Hulk',
       'DoS GoldenEye', 'Heartbleed', 'Web Attack Brute Force',
       'Web Attack XSS', 'Web Attack Sql Injection'], dtype=object)

# Dataset Partition 

In [26]:
X = data.drop(['Label', 'Label_binary'], axis=1)
y_multilabel = data['Label'] #the multilabel classes are saved for later use
y_binarylabel = data['Label_binary']

In [27]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y_binarylabel,test_size=0.3,random_state=4)

In [28]:
X_train.shape, X_test.shape

((1981520, 76), (849223, 76))

In [29]:
y_train.shape, y_test.shape

((1981520,), (849223,))

In [30]:
X_train.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Fwd Header Length.1,Fwd Avg Bytes/Bulk,Fwd Avg Packets/Bulk,Fwd Avg Bulk Rate,Bwd Avg Bytes/Bulk,Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
28177,0.001221,0.086922,1.8e-05,1.7e-05,3.7e-05,4.204725e-06,0.018251,0.0,0.015856,0.028151,0.074757,0.0,0.095026,0.091751,0.009658,0.039109,0.083329,4.833333e-07,0.08666667,0.02172975,0.05826,0.08332864,1.7e-06,0.085,0.02128273,0.058814,0.08333208,1.975e-06,0.0,0.0,0.0,0.0,0.999856,0.994592,1.597855e-07,2.396782e-07,0.0,0.058824,0.087909,0.11651,0.013567,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.00641,0.082885,0.015856,0.095026,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,1.8e-05,3.7e-05,1.7e-05,4.204725e-06,0.125015,0.001892,1.9e-05,1.0,0.003917,0.0,0.003917,0.003917,0.083329,0.0,0.083329,0.083329
209111,0.000809,0.000197,0.0,3e-06,6e-06,3.173378e-07,0.003223,0.034409,0.013466,0.0,0.01065,0.071823,0.035859,0.0,0.000197,0.0,0.000197,0.0001967666,0.0,0.0,0.0,0.0,9.999999e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999856,0.994592,1.412549e-05,2.118824e-05,0.055249,0.00838,0.036758,0.015619,0.000244,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.04726,0.013466,0.035859,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6e-06,3e-06,3.173378e-07,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
762215,0.000809,0.000464,1.4e-05,1.4e-05,1.4e-05,5.91957e-07,0.001773,0.018925,0.007406,0.0,0.004967,0.033494,0.016723,0.0,6.6e-05,0.000246,0.00046,1.5e-07,0.000461825,0.0001539417,0.000378,0.0004613833,1.333333e-07,0.0004628083,0.0001542694,0.000379,0.000462,4e-07,0.0,0.0,0.0,0.0,0.999856,0.994592,2.395195e-05,3.592793e-05,0.030387,0.003908,0.020244,0.005904,3.5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.019521,0.007406,0.016723,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,1.4e-05,1.4e-05,1.4e-05,5.91957e-07,0.0,0.0,1.4e-05,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2625115,0.000809,0.000201,5e-06,7e-06,5e-06,2.105414e-07,0.00137,0.014624,0.005723,0.0,0.003533,0.023826,0.011896,0.0,6.7e-05,0.000164,0.0002,1.416667e-07,2.5e-08,2.5e-08,0.0,2.5e-08,1.25e-07,2.5e-08,2.5e-08,0.0,2.5e-08,2.5e-08,0.0,0.0,0.0,0.0,0.999856,0.994592,2.771772e-05,4.157658e-05,0.023481,0.00278,0.014384,0.004052,1.6e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00641,0.015411,0.005723,0.011896,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,5e-06,5e-06,7e-06,2.105414e-07,0.0,0.0,5e-06,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2340337,0.001221,0.972192,9.1e-05,5.8e-05,0.000142,4.56173e-06,0.017446,0.0,0.014652,0.024133,0.038198,0.0,0.030322,0.03978,0.026276,0.053831,0.083318,2.916666e-07,0.975,0.04860962,0.05723,0.08333333,0.0001745083,0.9333333,0.05826748,0.055191,0.08333333,0.0001724,0.0,0.0,0.0,0.0,0.999856,0.994592,6.000184e-08,7.285938e-08,0.0,0.030056,0.037019,0.052909,0.002798,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.032566,0.014652,0.030322,0.999856,0.0,0.0,0.0,0.0,0.0,0.0,9.1e-05,0.000142,5.8e-05,4.56173e-06,0.125015,0.00563,9.4e-05,1.0,0.001763,0.006999,0.015999,0.000334,0.083136,0.000763,0.083318,0.081663


# Feature Elimination

In [32]:
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
import itertools

clf = DecisionTreeClassifier(random_state=0)
rfe = RFE(clf, n_features_to_select = 20,verbose=1)
rfe.fit(X_train, y_train)

Fitting estimator with 76 features.
Fitting estimator with 75 features.
Fitting estimator with 74 features.
Fitting estimator with 73 features.
Fitting estimator with 72 features.
Fitting estimator with 71 features.
Fitting estimator with 70 features.
Fitting estimator with 69 features.
Fitting estimator with 68 features.
Fitting estimator with 67 features.
Fitting estimator with 66 features.
Fitting estimator with 65 features.
Fitting estimator with 64 features.
Fitting estimator with 63 features.
Fitting estimator with 62 features.
Fitting estimator with 61 features.
Fitting estimator with 60 features.
Fitting estimator with 59 features.
Fitting estimator with 58 features.
Fitting estimator with 57 features.
Fitting estimator with 56 features.
Fitting estimator with 55 features.
Fitting estimator with 54 features.
Fitting estimator with 53 features.
Fitting estimator with 52 features.
Fitting estimator with 51 features.
Fitting estimator with 50 features.
Fitting estimator with 49 fe

RFE(estimator=DecisionTreeClassifier(random_state=0), n_features_to_select=20,
    verbose=1)

In [33]:
feature_map = [(i,v) for i,v in itertools.zip_longest(rfe.get_support(),X_train.columns)]
selected_features = [v for i,v in feature_map if i==True]
selected_features

['Destination Port',
 'Flow Duration',
 'Fwd Packet Length Max',
 'Bwd Packet Length Std',
 'Flow IAT Std',
 'Flow IAT Min',
 'Fwd IAT Mean',
 'Fwd IAT Min',
 'Bwd IAT Mean',
 'Bwd IAT Std',
 'Fwd Packets/s',
 'Packet Length Mean',
 'Packet Length Std',
 'PSH Flag Count',
 'Average Packet Size',
 'Avg Bwd Segment Size',
 'Subflow Fwd Packets',
 'Init_Win_bytes_forward',
 'Init_Win_bytes_backward',
 'Active Std']

In [34]:
a = [i[0] for i in feature_map]
X_train = X_train.iloc[:,a]
X_test = X_test.iloc[:,a]

In [35]:
X_train.shape, X_test.shape

((1981520, 20), (849223, 20))

We have reduced the number of features from 78 to 20 by using the Recusrssive Feature elimination (RFE) method.

In [41]:
X_train_arr = X_train.values
X_test_arr = X_test.values
states = X_train_arr.shape[1]
print(states)

20


In [42]:
y_train_arr = y_train.values
y_test_arr = y_test.values
num_actions = len(np.unique(y_train_arr))
print(num_actions)

2


# DQN Class

In [43]:
class DQN(nn.Module):
    def __init__(self, states, num_actions):
        super().__init__()
        
        self.fc1 = nn.Linear(in_features = states , out_features = 10  )
        self.fc2 = nn.Linear(in_features = 10 , out_features = 15 )
        self.fc3 = nn.Linear(in_features = 15 , out_features = 2 )
        self.out = nn.Linear(in_features = 2 ,out_features = num_actions )
        
    def forward(self, t):
        t = t.flatten(start_dim =1)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = F.relu(self.fc3(t))
        t = self.out(t)
        return t
    

# Experience Class

In [44]:
Experience = namedtuple(
        'Experience',
            ('state', 'action', 'next_state', 'reward')
)

In [45]:
e = Experience(2,3,1,4)
e

Experience(state=2, action=3, next_state=1, reward=4)

# Replay Memory Class

In [46]:
class ReplayMemory():
    def __init__(self,capacity):
        self.capacity = capacity
        self.memory = []
        self.push_count = 0  
        
    def push(self, experience):
        if len(self.memory) < len(self.capacity):
            self.memory.append(experience)
        else:
            self.memory[self.push_count % self.capacity] =experience
        self.push_count += 1
    
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def can_provide_sample(self, batch_size):
        return len(self.memory) >= batch_size

# Epsilon Greedy Strategy

This strategy helps the agent indentify when to exploit and when to explore the environment. This done based on the Epsilon value that we assign. 

We can also implement the Boltzmann strategy which uses a T varibale that decays 

In [47]:
class EpsilonGreedyStrategy():
    def __init__(self, start, end, decay):
        self.start = start
        self.end = end
        self.decay = decay
    
    def get_exploration_rate(self, current_step):
        return self.end + (self.start - self.end) * math.exp(-1. * current_step * self.decay)

# Reinforcement Learning Agent

In [48]:
class Agent():
    def __init__(self, strategy, num_actions):
        self.current_step = 0
        self.strategy = strategy
        self.num_actions = num_actions
        
    def select_action(self, state, policy_net):
        rate = strategy.get_exploration_rate(self.current_step)
        self.current_step += 1
        
        if rate > random.random():
            return random.randrange(self.num_actions) #explore
        else:
            with torch.no_grad():
                return policy_net(state).argmax(dim=1).item() #exploit

# Extract tensors

In [53]:
def extract_tensors(experiences):
    batch = Experience(*zip(*experiences))
    
    t1= torch.cat(batch.state)
    t2= torch.cat(batch.action)
    t3= torch.cat(batch.reward)
    t4= torch.cat(batch.next_state)
    
    return (t1,t2,t3,t4)

# Calculating QValues 

In [None]:
class QValues():
    
    @staticmethod
    def get_current(policy_net, states, actions):
        return policy_net(states).gather(dim=1, index=actions.unsqueeze(-1))
    
    @staticmethod
    def get_next(target_net, next_states):
        
        

# Main Program

In [49]:
#Tune and experiment with different values
batch_size = 256
gamma = 0.999 #discount factor used in bellman's equation
eps_start = 1 #Starting value of the exploration rate
eps_end = 0.01
eps_decay = 0.001
target_update = 10  #how frequently in terms of episodes we will update the target_network's weights with the policy network weights
memory_size = 100000
lr = 0.0001 #learning rate
num_episodes = 10000 



In [50]:
#Set up the network environment by calling the network class

In [51]:
strategy = EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)
agent = Agent(strategy, num_actions)
memory = ReplayMemory(memory_size)

We need to create two objects of the DQN class. a policy_net object and a target_net object. The target_net object clones the DQN model and is used to predict the Qvalues based on the next-states that can be passed to the mode. This results in the next Qvalue which is used in the Bellmann calculation. 

In [52]:
policy_net = DQN( states, num_actions)
target_net  = DQN(states, num_actions)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval() #This network is not in training mode
optimizer = optim.Adam(params = policy_net.parameters(), lr = lr)

Now we create 2 for loops, one for the episodes and the other for the timesteps. 
We have to reset the environment, take an action, update the tuples experience, update the reward and next states.


In [None]:
episode_durations = []
for episode in range(num_episodes):
    #env.reset() reset the environment
    state = env.get_state()
    
    for timestep in count():
        action = agent.select_action(state, policy_net)The 4
        reward = env.take_action(action)
        next_state = env.get_state()
        memory.push(Experience(state, action, next_state, reward))
        state = next_state
        
        
        if memory.can_provide_sample(batch_size):
            experiences = memory.sample(batch_size)
            states, actions, rewards, next_states = extract_tensors(experiences)
            
            current_q_values = QValues.get_current(policy_net, states, actions)
            next_q_values = QValues.get_next(target_net, next_states)
            target_q_values = (next_q_values * gamma) + rewards
            
            loss = F.mse_loss(current_q_values, target_q_values.unsqueeze(1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if episode % target_update == 0:
            target_net.load_state_dict(policy_net.state_dict())
