Import Libraries

In [1]:
#basic
import os
import math
import pandas as pd
import time
import datetime
from dateutil import parser
import numpy as np
from scipy.stats.stats import pearsonr, array
from scipy.spatial import distance
import json
from sklearn.decomposition import PCA
import random
import matplotlib.pyplot as plt

#modules
import importlib
import import_ipynb

Utility Functions

In [2]:
def randomListPick(list_obj, num):  
    result = []

    if (num >= len(list_obj)):
        return list_obj, []

    for i in range(num):
        list_indeces = list(range(len(list_obj)))
        index = random.choice(list_indeces)
        result.append(list_obj.pop(index))
        
    return result, list_obj

def randomListPickWithoutPop(list_obj, num):  
    result = []

    if (num >= len(list_obj)):
        return list_obj, []

    for i in range(num):
        list_indeces = list(range(len(list_obj)))
        index = random.choice(list_indeces)
        result.append(list_obj[index])
        
    return result

Genetic Algorithm

In [3]:
class GA_C:
    def __init__(self, init_pop, mutation_rate, crossover_rate, fitness_func):
        self.pop = init_pop
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.fitness_func = fitness_func
        self.size = len(init_pop)
        self.fitness = [1 for x in range(self.size)]
        
    def getPop(self):
        return self.pop
        
    def setFitness(self, fitness):
        self.fitness = fitness

    def breed(self, new_children, label):
        next_gen = new_children        
        if (label == "attack"):
            self.fitness = [(1.0 - x) for x in self.fitness]        
        elite, mutation, cross = self.getPopClasses(self.fitness, self.size - len(new_children))
        
        for kid in elite:
            next_gen.append(self.pop[kid])
        
        for kid in mutation:
            next_gen.append(self.mutate(self.pop[kid], self.fitness[kid]))
        
        cross_zip = list(zip(mutation, cross))
        for kids in cross_zip:
            next_gen.append(self.cross(self.pop[kids[0]], self.pop[kids[1]]))
            
    def mutate(self, gene, fitness):
        for i in range(len(gene)):
            #prob = random.randint(1,101) / 100.0
            #if (prob <= self.mutation_rate):
            prob = random.randint(1,101)
            if (prob > 50):
                gene[i] = gene[i] + (self.mutation_rate * gene[i])
            else:
                gene[i] = gene[i] - (self.mutation_rate * gene[i])
        return gene

    def cross(self, first, second):
        gene = second
        for i in range(len(gene)):
            prob = random.randint(1,101) / 100.0
            if (prob <= self.crossover_rate):
                gene[i] = first[i]
        return gene

    def getPopClasses(self, fitness, new_size):
        size = len(fitness)
        cat_size = math.ceil(new_size * 0.25)
        elite_size = new_size - (cat_size * 2)
        pop_fitness = fitness
        
        fitness.sort(reverse = True)
        fitness = fitness[:new_size]
        elite_cat = fitness[:elite_size]
        mutation_cat = fitness[elite_size:elite_size + cat_size]
        cross_cat = fitness[elite_size + cat_size:]
        
        elite = []
        mutation = []
        cross = []
        
        for i in range(size):
            if pop_fitness[i] in elite_cat:
                elite.append(i)
            elif pop_fitness[i] in mutation_cat:
                mutation.append(i)
            elif pop_fitness[i] in cross_cat:
                cross.append(i)
        
        return elite, mutation, cross

    def checkFitness(self, flow):
        if (self.fitness_func == "dist"):
            return self.fitnessDist(flow)
        elif (self.fitness_func == "diff"):
            return self.fitnessDiff(flow)
        
    def fitnessDiff(self, flow):
        diffs = []
        
        for i in range(self.size):
            diffs.append(self.checkDiff(flow, self.pop[i]))
        
        self.fitness = diffs
        return abs(min(diffs))

    def checkDiff(self, first, second):
        diffs = []
        gene_size = len(first)
        
        for i in range(gene_size):
            diffs.append(self.getDiff(first[i], second[i]))
        
        return (sum(diffs) / len(diffs))

    def getDiff(self, first, second):
        if (first == 0 or second == 0):
            if (first != second):
                return 1.0
            else:
                return 0.0
        else:
            return ((abs(first-second)) * 1.0 / first)

    def fitnessDist(self, flow):
        dists = []
        
        for i in range(self.size):
            dists.append(distance.euclidean(flow, self.pop[i]))
        
        self.fitness = dists
        return abs(min(dists))

    def checkDist(self, first, second):
        print("Distance fitness not defined yet")

Blue Team

In [4]:
class Blue_C:
    def __init__(self, init_pop, size, mutation_rate, crossover_rate, fitness_func):
        genome = GA_C(init_pop, mutation_rate, crossover_rate, fitness_func)
        self.genome = genome
        self.size = size

    def getPop(self):
        return self.genome.getPop()

    def breed(self, label):
        self.genome.breed([], label)

    def defend(self, flow):
        fitness = self.genome.checkFitness(flow)
        return fitness

Red Team

In [5]:
class Red_C:
    def __init__(self, data, init_pop, size, mutation_rate, crossover_rate, fitness_func):
        if len(init_pop) != size:
            extension, data = randomListPick(data, size - len(init_pop))
            init_pop = init_pop + extension

        genome = GA_C(init_pop, mutation_rate, crossover_rate, fitness_func)
        
        self.genome = genome
        self.data = data
        self.size = size

    def getPop(self):
        return self.genome.getPop()

    def breed(self, fitness_values, new_rate):
        new_children, self.data = randomListPick(self.data, new_rate)
        self.genome.setFitness(fitness_values)
        self.genome.breed(new_children, "")

Find strongly correlated features (pearson-cor-coeff >= 0.7)

In [6]:
def findCorrelatedFeatures(net_data):

    features = net_data.columns.values
    feature_size = len(net_data.columns.values)

    correlation_matrix = np.zeros((feature_size, feature_size))
    correlation_matrix = correlation_matrix - 1


    for i in range(feature_size - 1):
        for j in range(i+1, feature_size - 1):
            
            print(features[i], features[j])

            col_1 = net_data[[features[i]]].values
            col_2 = net_data[[features[j]]].values

            correlation_matrix[i][j] = 0
            corr, p = pearsonr(col_1, col_2)

            if math.isnan(corr[0]):
                correlation_matrix[i][j] = 0
                correlation_matrix[j][i] = 0
            else:
                correlation_matrix[i][j] = abs(corr[0])
                correlation_matrix[j][i] = abs(corr[0])

    #print(correlation_matrix)

    extra_features = {}

    for i in range(feature_size - 1):
        for j in range(i+1, feature_size - 1):

            if (correlation_matrix[i][j] >= 0.7) and (features[i] not in extra_features):
                extra_features[features[i]] = features[j]

    print(len(extra_features))
    print(json.dumps(extra_features, sort_keys=False, indent=4))

    return extra_features

Read Data

In [7]:
dataDir = "../data/processed"
filter_ = "Wednesday"

def readNetworkData(dataDir, filter_):
    dirList = os.listdir(dataDir)
    dataFiles = [os.path.join(dataDir, x) for x in dirList if filter_ in x]

    for file in dataFiles:
        print(file)
        data = pd.read_csv(file)
            
    return data

print("dataset Dirs: ", dataDir)
net_data = readNetworkData(dataDir, filter_)

dataset Dirs:  ../data/processed
../data/processed/Wednesday-28-02-2018_TrafficForML_CICFlowMeter.csv


  if (await self.run_code(code, result,  async_=asy)):


Print Data

In [8]:
print(len(net_data.index))
print(net_data["Label"].value_counts(), "\n")
print(len(net_data.columns.values))
print(net_data.columns.values, "\n")

613104
Benign           544200
Infilteration     68871
Label                33
Name: Label, dtype: int64 

80
['Dst Port' 'Protocol' 'Timestamp' 'Flow Duration' 'Tot Fwd Pkts'
 'Tot Bwd Pkts' 'TotLen Fwd Pkts' 'TotLen Bwd Pkts' 'Fwd Pkt Len Max'
 'Fwd Pkt Len Min' 'Fwd Pkt Len Mean' 'Fwd Pkt Len Std' 'Bwd Pkt Len Max'
 'Bwd Pkt Len Min' 'Bwd Pkt Len Mean' 'Bwd Pkt Len Std' 'Flow Byts/s'
 'Flow Pkts/s' 'Flow IAT Mean' 'Flow IAT Std' 'Flow IAT Max'
 'Flow IAT Min' 'Fwd IAT Tot' 'Fwd IAT Mean' 'Fwd IAT Std' 'Fwd IAT Max'
 'Fwd IAT Min' 'Bwd IAT Tot' 'Bwd IAT Mean' 'Bwd IAT Std' 'Bwd IAT Max'
 'Bwd IAT Min' 'Fwd PSH Flags' 'Bwd PSH Flags' 'Fwd URG Flags'
 'Bwd URG Flags' 'Fwd Header Len' 'Bwd Header Len' 'Fwd Pkts/s'
 'Bwd Pkts/s' 'Pkt Len Min' 'Pkt Len Max' 'Pkt Len Mean' 'Pkt Len Std'
 'Pkt Len Var' 'FIN Flag Cnt' 'SYN Flag Cnt' 'RST Flag Cnt' 'PSH Flag Cnt'
 'ACK Flag Cnt' 'URG Flag Cnt' 'CWE Flag Count' 'ECE Flag Cnt'
 'Down/Up Ratio' 'Pkt Size Avg' 'Fwd Seg Size Avg' 'Bwd Seg Size Avg

Remove trash from Data

In [9]:
if 'Timestamp' in net_data.columns:
        net_data = net_data.drop(columns=['Timestamp'])

net_data = net_data[(net_data.Label != 'Label')]
net_data = net_data[(net_data["Flow Byts/s"] != 'Infinity')]
net_data = net_data[(net_data["Flow Pkts/s"] != 'Infinity')]

net_data.index = [''] * len(net_data)

for col in net_data.columns.values:
    if(col != 'Label'):
        net_data[col] = pd.to_numeric(net_data[col])

Print Data after trash removal

In [10]:
pd.set_option('display.max_row', 200, 'display.max_columns', None)

print(len(net_data.index))
print(net_data["Label"].value_counts(), "\n")
print(len(net_data.columns.values))
print(net_data.columns.values, "\n")
display(net_data)

606902
Benign           538666
Infilteration     68236
Name: Label, dtype: int64 

79
['Dst Port' 'Protocol' 'Flow Duration' 'Tot Fwd Pkts' 'Tot Bwd Pkts'
 'TotLen Fwd Pkts' 'TotLen Bwd Pkts' 'Fwd Pkt Len Max' 'Fwd Pkt Len Min'
 'Fwd Pkt Len Mean' 'Fwd Pkt Len Std' 'Bwd Pkt Len Max' 'Bwd Pkt Len Min'
 'Bwd Pkt Len Mean' 'Bwd Pkt Len Std' 'Flow Byts/s' 'Flow Pkts/s'
 'Flow IAT Mean' 'Flow IAT Std' 'Flow IAT Max' 'Flow IAT Min'
 'Fwd IAT Tot' 'Fwd IAT Mean' 'Fwd IAT Std' 'Fwd IAT Max' 'Fwd IAT Min'
 'Bwd IAT Tot' 'Bwd IAT Mean' 'Bwd IAT Std' 'Bwd IAT Max' 'Bwd IAT Min'
 'Fwd PSH Flags' 'Bwd PSH Flags' 'Fwd URG Flags' 'Bwd URG Flags'
 'Fwd Header Len' 'Bwd Header Len' 'Fwd Pkts/s' 'Bwd Pkts/s' 'Pkt Len Min'
 'Pkt Len Max' 'Pkt Len Mean' 'Pkt Len Std' 'Pkt Len Var' 'FIN Flag Cnt'
 'SYN Flag Cnt' 'RST Flag Cnt' 'PSH Flag Cnt' 'ACK Flag Cnt'
 'URG Flag Cnt' 'CWE Flag Count' 'ECE Flag Cnt' 'Down/Up Ratio'
 'Pkt Size Avg' 'Fwd Seg Size Avg' 'Bwd Seg Size Avg' 'Fwd Byts/b Avg'
 'Fwd Pkts/b Avg'

Unnamed: 0,Dst Port,Protocol,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
,443,6,94658,6,7,708,3718,387,0,118.000000,159.284651,1460,0,531.142857,673.118224,46757.801771,1.373365e+02,7.888167e+03,1.113004e+04,24325,0,72880,1.457600e+04,1.259038e+04,24385,363,72178,1.202967e+04,1.318926e+04,24718,0,0,0,0,0,132,152,6.338608e+01,73.950432,0,1460,316.142857,519.205881,269574.747253,0,0,1,1,0,0,0,1,1,340.461538,118.000000,531.142857,0,0,0,0,0,0,6,708,7,3718,8192,7484,3,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,443,6,206,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,9.708738e+03,2.060000e+02,0.000000e+00,206,206,206,2.060000e+02,0.000000e+00,206,206,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,40,0,9.708738e+03,0.000000,0,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,445,6,165505,3,1,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,2.416845e+01,5.516833e+04,9.547815e+04,165417,35,165505,8.275250e+04,1.169802e+05,165470,35,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,72,32,1.812634e+01,6.042114,0,0,0.000000,0.000000,0.000000,0,0,0,1,0,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,3,0,1,0,8192,8192,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,443,6,102429,6,7,708,3718,387,0,118.000000,159.284651,1460,0,531.142857,673.118224,43210.418924,1.269172e+02,8.535750e+03,1.095664e+04,24473,0,80271,1.605420e+04,1.426975e+04,31379,366,79733,1.328883e+04,1.475348e+04,30931,0,0,0,0,0,132,152,5.857716e+01,68.340021,0,1460,316.142857,519.205881,269574.747253,0,0,1,1,0,0,0,1,1,340.461538,118.000000,531.142857,0,0,0,0,0,0,6,708,7,3718,8192,7484,3,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,443,6,167,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,1.197605e+04,1.670000e+02,0.000000e+00,167,167,167,1.670000e+02,0.000000e+00,167,167,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,40,0,1.197605e+04,0.000000,0,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,443,6,164387,9,7,553,3773,202,0,61.444444,87.534438,1460,0,539.000000,655.432936,26315.949558,9.733130e+01,1.095913e+04,2.869127e+04,112846,0,164387,2.054838e+04,3.781120e+04,112846,12,38444,6.407333e+03,9.634419e+03,23267,0,0,0,0,0,192,152,5.474885e+01,42.582443,0,1460,254.470588,474.712955,225352.389706,0,0,1,1,0,0,0,1,0,270.375000,61.444444,539.000000,0,0,0,0,0,0,9,553,7,3773,8192,119,4,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,445,6,131411,3,1,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,3.043885e+01,4.380367e+04,7.442718e+04,129740,51,131411,6.570550e+04,9.063058e+04,129791,1620,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,72,32,2.282914e+01,7.609713,0,0,0.000000,0.000000,0.000000,0,0,0,1,0,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,3,0,1,0,8192,8192,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,445,6,279349,3,1,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,1.431901e+01,9.311633e+04,1.610449e+05,279075,60,279349,1.396745e+05,1.972269e+05,279135,214,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,72,32,1.073925e+01,3.579751,0,0,0.000000,0.000000,0.000000,0,0,0,1,0,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,3,0,1,0,8192,8192,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,443,6,20771523,6,8,708,3695,387,0,118.000000,159.284651,1460,0,461.875000,652.749446,211.972902,6.739997e-01,1.597809e+06,5.694143e+06,20548866,0,150247,3.004940e+04,2.664772e+04,51700,318,20724374,2.960625e+06,7.755745e+06,20548866,0,0,0,0,0,132,172,2.888570e-01,0.385143,0,1460,293.533333,507.597685,257655.409524,0,0,1,1,0,0,0,1,1,314.500000,118.000000,461.875000,0,0,0,0,0,0,6,708,8,3695,8192,7484,3,20,2.226560e+05,0.000000e+00,222656,222656,2.054887e+07,0.000000e+00,20548866,20548866,Benign
,443,6,201,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,9.950249e+03,2.010000e+02,0.000000e+00,201,201,201,2.010000e+02,0.000000e+00,201,201,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,40,0,9.950249e+03,0.000000,0,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign


Separate and print port 443 data

In [11]:
net_data = net_data[(net_data["Dst Port"] == 443)]
net_data = net_data[(net_data["Protocol"] == 6)]
if "Dst Port" in net_data.columns.values:
    net_data = net_data.drop(["Dst Port"], axis=1)

print(len(net_data.index))
print(net_data["Label"].value_counts(), "\n")
print(len(net_data.columns.values))
print(net_data.columns.values, "\n")
display(net_data)

146050
Benign           129065
Infilteration     16985
Name: Label, dtype: int64 

78
['Protocol' 'Flow Duration' 'Tot Fwd Pkts' 'Tot Bwd Pkts'
 'TotLen Fwd Pkts' 'TotLen Bwd Pkts' 'Fwd Pkt Len Max' 'Fwd Pkt Len Min'
 'Fwd Pkt Len Mean' 'Fwd Pkt Len Std' 'Bwd Pkt Len Max' 'Bwd Pkt Len Min'
 'Bwd Pkt Len Mean' 'Bwd Pkt Len Std' 'Flow Byts/s' 'Flow Pkts/s'
 'Flow IAT Mean' 'Flow IAT Std' 'Flow IAT Max' 'Flow IAT Min'
 'Fwd IAT Tot' 'Fwd IAT Mean' 'Fwd IAT Std' 'Fwd IAT Max' 'Fwd IAT Min'
 'Bwd IAT Tot' 'Bwd IAT Mean' 'Bwd IAT Std' 'Bwd IAT Max' 'Bwd IAT Min'
 'Fwd PSH Flags' 'Bwd PSH Flags' 'Fwd URG Flags' 'Bwd URG Flags'
 'Fwd Header Len' 'Bwd Header Len' 'Fwd Pkts/s' 'Bwd Pkts/s' 'Pkt Len Min'
 'Pkt Len Max' 'Pkt Len Mean' 'Pkt Len Std' 'Pkt Len Var' 'FIN Flag Cnt'
 'SYN Flag Cnt' 'RST Flag Cnt' 'PSH Flag Cnt' 'ACK Flag Cnt'
 'URG Flag Cnt' 'CWE Flag Count' 'ECE Flag Cnt' 'Down/Up Ratio'
 'Pkt Size Avg' 'Fwd Seg Size Avg' 'Bwd Seg Size Avg' 'Fwd Byts/b Avg'
 'Fwd Pkts/b Avg' 'Fwd Blk R

Unnamed: 0,Protocol,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
,6,94658,6,7,708,3718,387,0,118.000000,159.284651,1460,0,531.142857,673.118224,46757.801771,137.336517,7.888167e+03,1.113004e+04,24325,0,72880,1.457600e+04,1.259038e+04,24385,363,72178,1.202967e+04,1.318926e+04,24718,0,0,0,0,0,132,152,63.386085,73.950432,0,1460,316.142857,519.205881,269574.747253,0,0,1,1,0,0,0,1,1,340.461538,118.000000,531.142857,0,0,0,0,0,0,6,708,7,3718,8192,7484,3,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,6,206,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,9708.737864,2.060000e+02,0.000000e+00,206,206,206,2.060000e+02,0.000000e+00,206,206,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,40,0,9708.737864,0.000000,0,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,6,102429,6,7,708,3718,387,0,118.000000,159.284651,1460,0,531.142857,673.118224,43210.418924,126.917182,8.535750e+03,1.095664e+04,24473,0,80271,1.605420e+04,1.426975e+04,31379,366,79733,1.328883e+04,1.475348e+04,30931,0,0,0,0,0,132,152,58.577161,68.340021,0,1460,316.142857,519.205881,269574.747253,0,0,1,1,0,0,0,1,1,340.461538,118.000000,531.142857,0,0,0,0,0,0,6,708,7,3718,8192,7484,3,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,6,167,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,11976.047904,1.670000e+02,0.000000e+00,167,167,167,1.670000e+02,0.000000e+00,167,167,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,40,0,11976.047904,0.000000,0,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,6,164387,9,7,553,3773,202,0,61.444444,87.534438,1460,0,539.000000,655.432936,26315.949558,97.331297,1.095913e+04,2.869127e+04,112846,0,164387,2.054838e+04,3.781120e+04,112846,12,38444,6.407333e+03,9.634419e+03,23267,0,0,0,0,0,192,152,54.748855,42.582443,0,1460,254.470588,474.712955,225352.389706,0,0,1,1,0,0,0,1,0,270.375000,61.444444,539.000000,0,0,0,0,0,0,9,553,7,3773,8192,119,4,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,6,20771523,6,8,708,3695,387,0,118.000000,159.284651,1460,0,461.875000,652.749446,211.972902,0.674000,1.597809e+06,5.694143e+06,20548866,0,150247,3.004940e+04,2.664772e+04,51700,318,20724374,2.960625e+06,7.755745e+06,20548866,0,0,0,0,0,132,172,0.288857,0.385143,0,1460,293.533333,507.597685,257655.409524,0,0,1,1,0,0,0,1,1,314.500000,118.000000,461.875000,0,0,0,0,0,0,6,708,8,3695,8192,7484,3,20,2.226560e+05,0.000000e+00,222656,222656,2.054887e+07,0.000000e+00,20548866,20548866,Benign
,6,201,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,9950.248756,2.010000e+02,0.000000e+00,201,201,201,2.010000e+02,0.000000e+00,201,201,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,40,0,9950.248756,0.000000,0,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,6,10466194,11,12,2008,4434,1078,0,182.545455,315.125170,1460,0,369.500000,539.481148,615.505503,2.197551,4.757361e+05,2.172614e+06,10202612,0,263419,2.634190e+04,2.739810e+04,70565,31,10421507,9.474097e+05,3.077904e+06,10227380,0,0,0,0,0,232,252,1.051003,1.146549,0,1460,268.416667,440.861402,194358.775362,0,0,1,1,0,0,0,1,1,280.086957,182.545455,369.500000,0,0,0,0,0,0,11,2008,12,4434,8192,8002,6,20,2.634190e+05,0.000000e+00,263419,263419,1.020261e+07,0.000000e+00,10202612,10202612,Benign
,6,143,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,13986.013986,1.430000e+02,0.000000e+00,143,143,143,1.430000e+02,0.000000e+00,143,143,0,0.000000e+00,0.000000e+00,0,0,0,0,0,0,40,0,13986.013986,0.000000,0,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0,0.000000,0.000000,0.000000,0,0,0,0,0,0,2,0,0,0,255,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,6,20634701,6,8,709,3695,388,0,118.166667,159.622576,1460,0,461.875000,652.749446,213.426887,0.678469,1.587285e+06,5.659180e+06,20421985,1,140514,2.810280e+04,2.479418e+04,47027,301,20589834,2.941405e+06,7.708266e+06,20421985,1,0,0,0,0,132,172,0.290772,0.387696,0,1460,293.600000,507.610903,257668.828571,0,0,1,1,0,0,0,1,1,314.571429,118.166667,461.875000,0,0,0,0,0,0,6,709,8,3695,8192,7483,3,20,2.124960e+05,0.000000e+00,212496,212496,2.042198e+07,0.000000e+00,20421985,20421985,Benign


In [12]:
extra_col = ["Protocol", "Bwd PSH Flags", "Bwd URG Flags", "Pkt Len Min", "Fwd Byts/b Avg", "Fwd Pkts/b Avg", "Fwd Blk Rate Avg",
                "Bwd Blk Rate Avg", "Bwd Pkts/b Avg", "Bwd Byts/b Avg", "Fwd URG Flags", "CWE Flag Count"]

for col in extra_col:
    if col in net_data.columns.values:
        net_data = net_data.drop([col], axis=1)

print(len(net_data.columns.values))
print(net_data.columns.values)
display(net_data)

66
['Flow Duration' 'Tot Fwd Pkts' 'Tot Bwd Pkts' 'TotLen Fwd Pkts'
 'TotLen Bwd Pkts' 'Fwd Pkt Len Max' 'Fwd Pkt Len Min' 'Fwd Pkt Len Mean'
 'Fwd Pkt Len Std' 'Bwd Pkt Len Max' 'Bwd Pkt Len Min' 'Bwd Pkt Len Mean'
 'Bwd Pkt Len Std' 'Flow Byts/s' 'Flow Pkts/s' 'Flow IAT Mean'
 'Flow IAT Std' 'Flow IAT Max' 'Flow IAT Min' 'Fwd IAT Tot' 'Fwd IAT Mean'
 'Fwd IAT Std' 'Fwd IAT Max' 'Fwd IAT Min' 'Bwd IAT Tot' 'Bwd IAT Mean'
 'Bwd IAT Std' 'Bwd IAT Max' 'Bwd IAT Min' 'Fwd PSH Flags'
 'Fwd Header Len' 'Bwd Header Len' 'Fwd Pkts/s' 'Bwd Pkts/s' 'Pkt Len Max'
 'Pkt Len Mean' 'Pkt Len Std' 'Pkt Len Var' 'FIN Flag Cnt' 'SYN Flag Cnt'
 'RST Flag Cnt' 'PSH Flag Cnt' 'ACK Flag Cnt' 'URG Flag Cnt'
 'ECE Flag Cnt' 'Down/Up Ratio' 'Pkt Size Avg' 'Fwd Seg Size Avg'
 'Bwd Seg Size Avg' 'Subflow Fwd Pkts' 'Subflow Fwd Byts'
 'Subflow Bwd Pkts' 'Subflow Bwd Byts' 'Init Fwd Win Byts'
 'Init Bwd Win Byts' 'Fwd Act Data Pkts' 'Fwd Seg Size Min' 'Active Mean'
 'Active Std' 'Active Max' 'Active Min' 'Idle Me

Unnamed: 0,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
,94658,6,7,708,3718,387,0,118.000000,159.284651,1460,0,531.142857,673.118224,46757.801771,137.336517,7.888167e+03,1.113004e+04,24325,0,72880,1.457600e+04,1.259038e+04,24385,363,72178,1.202967e+04,1.318926e+04,24718,0,0,132,152,63.386085,73.950432,1460,316.142857,519.205881,269574.747253,0,0,1,1,0,0,1,1,340.461538,118.000000,531.142857,6,708,7,3718,8192,7484,3,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,206,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,9708.737864,2.060000e+02,0.000000e+00,206,206,206,2.060000e+02,0.000000e+00,206,206,0,0.000000e+00,0.000000e+00,0,0,0,40,0,9708.737864,0.000000,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0.000000,0.000000,0.000000,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,102429,6,7,708,3718,387,0,118.000000,159.284651,1460,0,531.142857,673.118224,43210.418924,126.917182,8.535750e+03,1.095664e+04,24473,0,80271,1.605420e+04,1.426975e+04,31379,366,79733,1.328883e+04,1.475348e+04,30931,0,0,132,152,58.577161,68.340021,1460,316.142857,519.205881,269574.747253,0,0,1,1,0,0,1,1,340.461538,118.000000,531.142857,6,708,7,3718,8192,7484,3,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,167,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,11976.047904,1.670000e+02,0.000000e+00,167,167,167,1.670000e+02,0.000000e+00,167,167,0,0.000000e+00,0.000000e+00,0,0,0,40,0,11976.047904,0.000000,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0.000000,0.000000,0.000000,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,164387,9,7,553,3773,202,0,61.444444,87.534438,1460,0,539.000000,655.432936,26315.949558,97.331297,1.095913e+04,2.869127e+04,112846,0,164387,2.054838e+04,3.781120e+04,112846,12,38444,6.407333e+03,9.634419e+03,23267,0,0,192,152,54.748855,42.582443,1460,254.470588,474.712955,225352.389706,0,0,1,1,0,0,1,0,270.375000,61.444444,539.000000,9,553,7,3773,8192,119,4,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,20771523,6,8,708,3695,387,0,118.000000,159.284651,1460,0,461.875000,652.749446,211.972902,0.674000,1.597809e+06,5.694143e+06,20548866,0,150247,3.004940e+04,2.664772e+04,51700,318,20724374,2.960625e+06,7.755745e+06,20548866,0,0,132,172,0.288857,0.385143,1460,293.533333,507.597685,257655.409524,0,0,1,1,0,0,1,1,314.500000,118.000000,461.875000,6,708,8,3695,8192,7484,3,20,2.226560e+05,0.000000e+00,222656,222656,2.054887e+07,0.000000e+00,20548866,20548866,Benign
,201,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,9950.248756,2.010000e+02,0.000000e+00,201,201,201,2.010000e+02,0.000000e+00,201,201,0,0.000000e+00,0.000000e+00,0,0,0,40,0,9950.248756,0.000000,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0.000000,0.000000,0.000000,2,0,0,0,258,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,10466194,11,12,2008,4434,1078,0,182.545455,315.125170,1460,0,369.500000,539.481148,615.505503,2.197551,4.757361e+05,2.172614e+06,10202612,0,263419,2.634190e+04,2.739810e+04,70565,31,10421507,9.474097e+05,3.077904e+06,10227380,0,0,232,252,1.051003,1.146549,1460,268.416667,440.861402,194358.775362,0,0,1,1,0,0,1,1,280.086957,182.545455,369.500000,11,2008,12,4434,8192,8002,6,20,2.634190e+05,0.000000e+00,263419,263419,1.020261e+07,0.000000e+00,10202612,10202612,Benign
,143,2,0,0,0,0,0,0.000000,0.000000,0,0,0.000000,0.000000,0.000000,13986.013986,1.430000e+02,0.000000e+00,143,143,143,1.430000e+02,0.000000e+00,143,143,0,0.000000e+00,0.000000e+00,0,0,0,40,0,13986.013986,0.000000,0,0.000000,0.000000,0.000000,0,0,0,0,1,0,0,0,0.000000,0.000000,0.000000,2,0,0,0,255,-1,0,20,0.000000e+00,0.000000e+00,0,0,0.000000e+00,0.000000e+00,0,0,Benign
,20634701,6,8,709,3695,388,0,118.166667,159.622576,1460,0,461.875000,652.749446,213.426887,0.678469,1.587285e+06,5.659180e+06,20421985,1,140514,2.810280e+04,2.479418e+04,47027,301,20589834,2.941405e+06,7.708266e+06,20421985,1,0,132,172,0.290772,0.387696,1460,293.600000,507.610903,257668.828571,0,0,1,1,0,0,1,1,314.571429,118.166667,461.875000,6,709,8,3695,8192,7483,3,20,2.124960e+05,0.000000e+00,212496,212496,2.042198e+07,0.000000e+00,20421985,20421985,Benign


Feature minimization by removing correlated features

In [13]:
extra_features = findCorrelatedFeatures(net_data.drop(columns=['Label']))

for feature in extra_features:
    if feature in net_data.columns.values:
        net_data = net_data.drop(columns=[feature])

print(len(net_data.columns.values))
print(net_data.columns.values, "\n")
for col in net_data.columns.values:
    print(col)
    print(net_data[col].value_counts())
display(net_data)

Flow Duration Tot Fwd Pkts
Flow Duration Tot Bwd Pkts
Flow Duration TotLen Fwd Pkts
Flow Duration TotLen Bwd Pkts
Flow Duration Fwd Pkt Len Max
Flow Duration Fwd Pkt Len Min
Flow Duration Fwd Pkt Len Mean
Flow Duration Fwd Pkt Len Std
Flow Duration Bwd Pkt Len Max
Flow Duration Bwd Pkt Len Min
Flow Duration Bwd Pkt Len Mean
Flow Duration Bwd Pkt Len Std
Flow Duration Flow Byts/s
Flow Duration Flow Pkts/s
Flow Duration Flow IAT Mean
Flow Duration Flow IAT Std
Flow Duration Flow IAT Max
Flow Duration Flow IAT Min
Flow Duration Fwd IAT Tot
Flow Duration Fwd IAT Mean
Flow Duration Fwd IAT Std
Flow Duration Fwd IAT Max
Flow Duration Fwd IAT Min
Flow Duration Bwd IAT Tot
Flow Duration Bwd IAT Mean
Flow Duration Bwd IAT Std
Flow Duration Bwd IAT Max
Flow Duration Bwd IAT Min
Flow Duration Fwd PSH Flags
Flow Duration Fwd Header Len
Flow Duration Bwd Header Len
Flow Duration Fwd Pkts/s
Flow Duration Bwd Pkts/s
Flow Duration Pkt Len Max
Flow Duration Pkt Len Mean
Flow Duration Pkt Len Std
Flow D

TotLen Bwd Pkts Idle Max
Fwd Pkt Len Max Fwd Pkt Len Min
Fwd Pkt Len Max Fwd Pkt Len Mean
Fwd Pkt Len Max Fwd Pkt Len Std
Fwd Pkt Len Max Bwd Pkt Len Max
Fwd Pkt Len Max Bwd Pkt Len Min
Fwd Pkt Len Max Bwd Pkt Len Mean
Fwd Pkt Len Max Bwd Pkt Len Std
Fwd Pkt Len Max Flow Byts/s
Fwd Pkt Len Max Flow Pkts/s
Fwd Pkt Len Max Flow IAT Mean
Fwd Pkt Len Max Flow IAT Std
Fwd Pkt Len Max Flow IAT Max
Fwd Pkt Len Max Flow IAT Min
Fwd Pkt Len Max Fwd IAT Tot
Fwd Pkt Len Max Fwd IAT Mean
Fwd Pkt Len Max Fwd IAT Std
Fwd Pkt Len Max Fwd IAT Max
Fwd Pkt Len Max Fwd IAT Min
Fwd Pkt Len Max Bwd IAT Tot
Fwd Pkt Len Max Bwd IAT Mean
Fwd Pkt Len Max Bwd IAT Std
Fwd Pkt Len Max Bwd IAT Max
Fwd Pkt Len Max Bwd IAT Min
Fwd Pkt Len Max Fwd PSH Flags
Fwd Pkt Len Max Fwd Header Len
Fwd Pkt Len Max Bwd Header Len
Fwd Pkt Len Max Fwd Pkts/s
Fwd Pkt Len Max Bwd Pkts/s
Fwd Pkt Len Max Pkt Len Max
Fwd Pkt Len Max Pkt Len Mean
Fwd Pkt Len Max Pkt Len Std
Fwd Pkt Len Max Pkt Len Var
Fwd Pkt Len Max FIN Flag Cnt
Fwd Pk

Bwd Pkt Len Min ACK Flag Cnt
Bwd Pkt Len Min URG Flag Cnt
Bwd Pkt Len Min ECE Flag Cnt
Bwd Pkt Len Min Down/Up Ratio
Bwd Pkt Len Min Pkt Size Avg
Bwd Pkt Len Min Fwd Seg Size Avg
Bwd Pkt Len Min Bwd Seg Size Avg
Bwd Pkt Len Min Subflow Fwd Pkts
Bwd Pkt Len Min Subflow Fwd Byts
Bwd Pkt Len Min Subflow Bwd Pkts
Bwd Pkt Len Min Subflow Bwd Byts
Bwd Pkt Len Min Init Fwd Win Byts
Bwd Pkt Len Min Init Bwd Win Byts
Bwd Pkt Len Min Fwd Act Data Pkts
Bwd Pkt Len Min Fwd Seg Size Min
Bwd Pkt Len Min Active Mean
Bwd Pkt Len Min Active Std
Bwd Pkt Len Min Active Max
Bwd Pkt Len Min Active Min
Bwd Pkt Len Min Idle Mean
Bwd Pkt Len Min Idle Std
Bwd Pkt Len Min Idle Max
Bwd Pkt Len Mean Bwd Pkt Len Std
Bwd Pkt Len Mean Flow Byts/s
Bwd Pkt Len Mean Flow Pkts/s
Bwd Pkt Len Mean Flow IAT Mean
Bwd Pkt Len Mean Flow IAT Std
Bwd Pkt Len Mean Flow IAT Max
Bwd Pkt Len Mean Flow IAT Min
Bwd Pkt Len Mean Fwd IAT Tot
Bwd Pkt Len Mean Fwd IAT Mean
Bwd Pkt Len Mean Fwd IAT Std
Bwd Pkt Len Mean Fwd IAT Max
Bwd Pkt

Flow IAT Max Fwd IAT Max
Flow IAT Max Fwd IAT Min
Flow IAT Max Bwd IAT Tot
Flow IAT Max Bwd IAT Mean
Flow IAT Max Bwd IAT Std
Flow IAT Max Bwd IAT Max
Flow IAT Max Bwd IAT Min
Flow IAT Max Fwd PSH Flags
Flow IAT Max Fwd Header Len
Flow IAT Max Bwd Header Len
Flow IAT Max Fwd Pkts/s
Flow IAT Max Bwd Pkts/s
Flow IAT Max Pkt Len Max
Flow IAT Max Pkt Len Mean
Flow IAT Max Pkt Len Std
Flow IAT Max Pkt Len Var
Flow IAT Max FIN Flag Cnt
Flow IAT Max SYN Flag Cnt
Flow IAT Max RST Flag Cnt
Flow IAT Max PSH Flag Cnt
Flow IAT Max ACK Flag Cnt
Flow IAT Max URG Flag Cnt
Flow IAT Max ECE Flag Cnt
Flow IAT Max Down/Up Ratio
Flow IAT Max Pkt Size Avg
Flow IAT Max Fwd Seg Size Avg
Flow IAT Max Bwd Seg Size Avg
Flow IAT Max Subflow Fwd Pkts
Flow IAT Max Subflow Fwd Byts
Flow IAT Max Subflow Bwd Pkts
Flow IAT Max Subflow Bwd Byts
Flow IAT Max Init Fwd Win Byts
Flow IAT Max Init Bwd Win Byts
Flow IAT Max Fwd Act Data Pkts
Flow IAT Max Fwd Seg Size Min
Flow IAT Max Active Mean
Flow IAT Max Active Std
Flow 

Bwd IAT Mean ACK Flag Cnt
Bwd IAT Mean URG Flag Cnt
Bwd IAT Mean ECE Flag Cnt
Bwd IAT Mean Down/Up Ratio
Bwd IAT Mean Pkt Size Avg
Bwd IAT Mean Fwd Seg Size Avg
Bwd IAT Mean Bwd Seg Size Avg
Bwd IAT Mean Subflow Fwd Pkts
Bwd IAT Mean Subflow Fwd Byts
Bwd IAT Mean Subflow Bwd Pkts
Bwd IAT Mean Subflow Bwd Byts
Bwd IAT Mean Init Fwd Win Byts
Bwd IAT Mean Init Bwd Win Byts
Bwd IAT Mean Fwd Act Data Pkts
Bwd IAT Mean Fwd Seg Size Min
Bwd IAT Mean Active Mean
Bwd IAT Mean Active Std
Bwd IAT Mean Active Max
Bwd IAT Mean Active Min
Bwd IAT Mean Idle Mean
Bwd IAT Mean Idle Std
Bwd IAT Mean Idle Max
Bwd IAT Std Bwd IAT Max
Bwd IAT Std Bwd IAT Min
Bwd IAT Std Fwd PSH Flags
Bwd IAT Std Fwd Header Len
Bwd IAT Std Bwd Header Len
Bwd IAT Std Fwd Pkts/s
Bwd IAT Std Bwd Pkts/s
Bwd IAT Std Pkt Len Max
Bwd IAT Std Pkt Len Mean
Bwd IAT Std Pkt Len Std
Bwd IAT Std Pkt Len Var
Bwd IAT Std FIN Flag Cnt
Bwd IAT Std SYN Flag Cnt
Bwd IAT Std RST Flag Cnt
Bwd IAT Std PSH Flag Cnt
Bwd IAT Std ACK Flag Cnt
Bwd IA

Pkt Len Max Fwd Seg Size Min
Pkt Len Max Active Mean
Pkt Len Max Active Std
Pkt Len Max Active Max
Pkt Len Max Active Min
Pkt Len Max Idle Mean
Pkt Len Max Idle Std
Pkt Len Max Idle Max
Pkt Len Mean Pkt Len Std
Pkt Len Mean Pkt Len Var
Pkt Len Mean FIN Flag Cnt
Pkt Len Mean SYN Flag Cnt
Pkt Len Mean RST Flag Cnt
Pkt Len Mean PSH Flag Cnt
Pkt Len Mean ACK Flag Cnt
Pkt Len Mean URG Flag Cnt
Pkt Len Mean ECE Flag Cnt
Pkt Len Mean Down/Up Ratio
Pkt Len Mean Pkt Size Avg
Pkt Len Mean Fwd Seg Size Avg
Pkt Len Mean Bwd Seg Size Avg
Pkt Len Mean Subflow Fwd Pkts
Pkt Len Mean Subflow Fwd Byts
Pkt Len Mean Subflow Bwd Pkts
Pkt Len Mean Subflow Bwd Byts
Pkt Len Mean Init Fwd Win Byts
Pkt Len Mean Init Bwd Win Byts
Pkt Len Mean Fwd Act Data Pkts
Pkt Len Mean Fwd Seg Size Min
Pkt Len Mean Active Mean
Pkt Len Mean Active Std
Pkt Len Mean Active Max
Pkt Len Mean Active Min
Pkt Len Mean Idle Mean
Pkt Len Mean Idle Std
Pkt Len Mean Idle Max
Pkt Len Std Pkt Len Var
Pkt Len Std FIN Flag Cnt
Pkt Len Std S

Subflow Fwd Pkts Fwd Act Data Pkts
Subflow Fwd Pkts Fwd Seg Size Min
Subflow Fwd Pkts Active Mean
Subflow Fwd Pkts Active Std
Subflow Fwd Pkts Active Max
Subflow Fwd Pkts Active Min
Subflow Fwd Pkts Idle Mean
Subflow Fwd Pkts Idle Std
Subflow Fwd Pkts Idle Max
Subflow Fwd Byts Subflow Bwd Pkts
Subflow Fwd Byts Subflow Bwd Byts
Subflow Fwd Byts Init Fwd Win Byts
Subflow Fwd Byts Init Bwd Win Byts
Subflow Fwd Byts Fwd Act Data Pkts
Subflow Fwd Byts Fwd Seg Size Min
Subflow Fwd Byts Active Mean
Subflow Fwd Byts Active Std
Subflow Fwd Byts Active Max
Subflow Fwd Byts Active Min
Subflow Fwd Byts Idle Mean
Subflow Fwd Byts Idle Std
Subflow Fwd Byts Idle Max
Subflow Bwd Pkts Subflow Bwd Byts
Subflow Bwd Pkts Init Fwd Win Byts
Subflow Bwd Pkts Init Bwd Win Byts
Subflow Bwd Pkts Fwd Act Data Pkts
Subflow Bwd Pkts Fwd Seg Size Min
Subflow Bwd Pkts Active Mean
Subflow Bwd Pkts Active Std
Subflow Bwd Pkts Active Max
Subflow Bwd Pkts Active Min
Subflow Bwd Pkts Idle Mean
Subflow Bwd Pkts Idle Std
S

Unnamed: 0,Fwd Pkt Len Min,Bwd Pkt Len Min,Flow IAT Min,Fwd IAT Min,Bwd IAT Min,Bwd Pkts/s,FIN Flag Cnt,SYN Flag Cnt,URG Flag Cnt,Down/Up Ratio,Fwd Seg Size Avg,Bwd Seg Size Avg,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Std,Active Min,Idle Std,Idle Max,Idle Min,Label
,0,0,0,363,0,73.950432,0,0,0,1,118.000000,531.142857,3718,8192,7484,3,20,0.000000e+00,0,0.000000e+00,0,0,Benign
,0,0,206,206,0,0.000000,0,0,0,0,0.000000,0.000000,0,258,-1,0,20,0.000000e+00,0,0.000000e+00,0,0,Benign
,0,0,0,366,0,68.340021,0,0,0,1,118.000000,531.142857,3718,8192,7484,3,20,0.000000e+00,0,0.000000e+00,0,0,Benign
,0,0,167,167,0,0.000000,0,0,0,0,0.000000,0.000000,0,258,-1,0,20,0.000000e+00,0,0.000000e+00,0,0,Benign
,0,0,0,12,0,42.582443,0,0,0,0,61.444444,539.000000,3773,8192,119,4,20,0.000000e+00,0,0.000000e+00,0,0,Benign
,0,0,0,318,0,0.385143,0,0,0,1,118.000000,461.875000,3695,8192,7484,3,20,0.000000e+00,222656,0.000000e+00,20548866,20548866,Benign
,0,0,201,201,0,0.000000,0,0,0,0,0.000000,0.000000,0,258,-1,0,20,0.000000e+00,0,0.000000e+00,0,0,Benign
,0,0,0,31,0,1.146549,0,0,0,1,182.545455,369.500000,4434,8192,8002,6,20,0.000000e+00,263419,0.000000e+00,10202612,10202612,Benign
,0,0,143,143,0,0.000000,0,0,0,0,0.000000,0.000000,0,255,-1,0,20,0.000000e+00,0,0.000000e+00,0,0,Benign
,0,0,1,301,1,0.387696,0,0,0,1,118.166667,461.875000,3695,8192,7483,3,20,0.000000e+00,212496,0.000000e+00,20421985,20421985,Benign


Separate benign and infiltration data

In [14]:
benign_data = net_data[(net_data["Label"] == 'Benign')]
if 'Label' in benign_data.columns:
        benign_data = benign_data.drop(columns=['Label'])

infilteration_data = net_data[(net_data["Label"] == 'Infilteration')]
if 'Label' in infilteration_data.columns:
        infilteration_data = infilteration_data.drop(columns=['Label'])

print(len(benign_data.columns.values))
print(benign_data.columns.values, "\n")
print(net_data["Label"].value_counts())
display(benign_data)
display(infilteration_data)

22
['Fwd Pkt Len Min' 'Bwd Pkt Len Min' 'Flow IAT Min' 'Fwd IAT Min'
 'Bwd IAT Min' 'Bwd Pkts/s' 'FIN Flag Cnt' 'SYN Flag Cnt' 'URG Flag Cnt'
 'Down/Up Ratio' 'Fwd Seg Size Avg' 'Bwd Seg Size Avg' 'Subflow Bwd Byts'
 'Init Fwd Win Byts' 'Init Bwd Win Byts' 'Fwd Act Data Pkts'
 'Fwd Seg Size Min' 'Active Std' 'Active Min' 'Idle Std' 'Idle Max'
 'Idle Min'] 

Benign           129065
Infilteration     16985
Name: Label, dtype: int64


Unnamed: 0,Fwd Pkt Len Min,Bwd Pkt Len Min,Flow IAT Min,Fwd IAT Min,Bwd IAT Min,Bwd Pkts/s,FIN Flag Cnt,SYN Flag Cnt,URG Flag Cnt,Down/Up Ratio,Fwd Seg Size Avg,Bwd Seg Size Avg,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Std,Active Min,Idle Std,Idle Max,Idle Min
,0,0,0,363,0,73.950432,0,0,0,1,118.000000,531.142857,3718,8192,7484,3,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,206,206,0,0.000000,0,0,0,0,0.000000,0.000000,0,258,-1,0,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,0,366,0,68.340021,0,0,0,1,118.000000,531.142857,3718,8192,7484,3,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,167,167,0,0.000000,0,0,0,0,0.000000,0.000000,0,258,-1,0,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,0,12,0,42.582443,0,0,0,0,61.444444,539.000000,3773,8192,119,4,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,0,318,0,0.385143,0,0,0,1,118.000000,461.875000,3695,8192,7484,3,20,0.000000e+00,222656,0.000000e+00,20548866,20548866
,0,0,201,201,0,0.000000,0,0,0,0,0.000000,0.000000,0,258,-1,0,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,0,31,0,1.146549,0,0,0,1,182.545455,369.500000,4434,8192,8002,6,20,0.000000e+00,263419,0.000000e+00,10202612,10202612
,0,0,143,143,0,0.000000,0,0,0,0,0.000000,0.000000,0,255,-1,0,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,1,301,1,0.387696,0,0,0,1,118.166667,461.875000,3695,8192,7483,3,20,0.000000e+00,212496,0.000000e+00,20421985,20421985


Unnamed: 0,Fwd Pkt Len Min,Bwd Pkt Len Min,Flow IAT Min,Fwd IAT Min,Bwd IAT Min,Bwd Pkts/s,FIN Flag Cnt,SYN Flag Cnt,URG Flag Cnt,Down/Up Ratio,Fwd Seg Size Avg,Bwd Seg Size Avg,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Std,Active Min,Idle Std,Idle Max,Idle Min
,0,0,13,13,0,0.000000,0,1,0,0,25.666667,0.000000,0,256,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,19,19,0,0.000000,0,1,0,0,25.666667,0.000000,0,256,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,14,14,0,0.000000,0,1,0,0,25.666667,0.000000,0,258,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,9,9,0,0.000000,0,1,0,0,25.666667,0.000000,0,254,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,9,9,0,0.000000,0,1,0,0,25.666667,0.000000,0,258,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,16,16,0,0.000000,0,1,0,0,25.666667,0.000000,0,256,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,17,17,0,0.000000,0,1,0,0,25.666667,0.000000,0,254,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,14,14,0,0.000000,0,1,0,0,25.666667,0.000000,0,258,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,15,15,0,0.000000,0,1,0,0,25.666667,0.000000,0,256,-1,1,20,0.000000e+00,0,0.000000e+00,0,0
,0,0,15,15,0,0.000000,0,1,0,0,25.666667,0.000000,0,253,-1,1,20,0.000000e+00,0,0.000000e+00,0,0


Get data in lists

In [15]:
ben_list = benign_data.values.tolist()
inf_list = infilteration_data.values.tolist()
#print(ben_list, "\n\n\n")
#print(inf_list)

Purple Team variables definitions

In [21]:
def purpleTeam(ben_list, inf_list, thresh, fitness_func, pop_size, mutation_rate, crossover_rate, mix_rate, new_rate, iterations, print_every):    
    attack_acc = []
    benign_acc = []
    total_acc = []
    fitness_log = []
    precision = []
    recall = []
    
    blue_init_pop, inf_list = randomListPick(inf_list, pop_size)
    blue = Blue_C(blue_init_pop, pop_size, mutation_rate, crossover_rate, fitness_func)
    red_init_pop, inf_list = randomListPick(inf_list, pop_size)
    red = Red_C(inf_list, red_init_pop, pop_size, mutation_rate, crossover_rate, fitness_func)
    
    for i in range(iterations):
        red_pop = red.getPop()
        red_pop_it = 0
        red_fitness = []
        red_flag = True
        n_attack = 0
        n_benign = 0
        n_attack_correct = 0
        n_benign_correct = 0
        while red_pop_it < pop_size:
            mix_prob = random.randint(1,101) / 100.0
            if mix_prob <= mix_rate:
                next_flow = randomListPickWithoutPop(ben_list, 1)
                next_flow = next_flow[0]
                red_flag = False
                n_benign += 1
            else:
                #print("iteration ", i, len(red_pop), pop_size, red_pop_it)
                next_flow = red_pop[red_pop_it]
                red_pop_it += 1
                red_flag = True
                n_attack += 1

            temp_red_fitness = blue.defend(next_flow)
            fitness_log.append(temp_red_fitness)
            if red_flag:
                red_fitness.append(temp_red_fitness)
                blue.breed("attack")
                if (temp_red_fitness < thresh):
                    n_attack_correct += 1
            else:
                blue.breed("benign")
                if (temp_red_fitness >= thresh):
                    n_benign_correct += 1

        red.breed(red_fitness, new_rate)

        #cur_attack_acc = n_attack_correct * 1.0 / n_attack
        #cur_benign_acc = n_benign_correct * 1.0 / n_benign
        cur_total_acc = (n_attack_correct + n_benign_correct) * 1.0 / (n_attack + n_benign)
        #if (cur_benign_acc < 0.1):
        #    cur_attack_acc = 0.0
        #    cur_benign_acc = 0.0
        #    cur_total_acc = 0.0
        #attack_acc.append(cur_attack_acc)
        #benign_acc.append(cur_benign_acc)
        total_acc.append(cur_total_acc)
        precision.append(n_attack_correct * 1.0 / (n_attack_correct + n_benign - n_benign_correct))
        recall.append(n_attack_correct * 1.0 / (n_attack_correct + n_attack - n_attack_correct))
        
    #print("attack acc")
    #plt.figure()
    #plt.plot(attack_acc)

    #print("benign acc")
    #plt.figure()
    #plt.plot(benign_acc)

    #print("total acc")
    #plt.figure()
    #plt.title("total acc: " + str(thresh))
    #plt.plot(total_acc)
    
    #print("fitness log")
    #plt.figure()
    #plt.plot(fitness_log)
    
    return total_acc[len(total_acc)-1], precision[len(precision)-1], recall[len(recall)-1]

Purple Team begins

In [17]:
pop_size = 300
mutation_rate = 0.01
crossover_rate = 0.7
mix_rate = 0.001
new_rate = 15
iterations = 40
print_every = 1
thresh = 100

In [None]:
results = []

for mutation_ in np.arange(0.01, 0.11, 0.01):
    for crossover_ in np.arange(0.1, 1.0, 0.1):
        for diff_ in range(1000, 30000, 500):
            for it in range(10, 45, 5):
                ben_list = benign_data.values.tolist()
                inf_list = infilteration_data.values.tolist()
                acc, prec, recall = purpleTeam(ben_list, inf_list, diff_, "diff", pop_size, mutation_, crossover_, mix_rate, new_rate, it, print_every)
                result = [mutation_, crossover_, diff_, it, acc, prec, recall]
                results.append(result)
                print(result)

[0.01, 0.1, 1000, 10, 0.6966666666666667, 1.0, 0.6966666666666667]
[0.01, 0.1, 1000, 15, 0.6966666666666667, 1.0, 0.6966666666666667]
[0.01, 0.1, 1000, 20, 0.7, 1.0, 0.7]
[0.01, 0.1, 1000, 25, 0.6733333333333333, 1.0, 0.6733333333333333]
[0.01, 0.1, 1000, 30, 0.71, 1.0, 0.71]
[0.01, 0.1, 1000, 35, 0.7033333333333334, 1.0, 0.7033333333333334]
[0.01, 0.1, 1000, 40, 0.6833333333333333, 1.0, 0.6833333333333333]
[0.01, 0.1, 1500, 10, 0.65, 1.0, 0.65]
[0.01, 0.1, 1500, 15, 0.6833333333333333, 1.0, 0.6833333333333333]
[0.01, 0.1, 1500, 20, 0.6966666666666667, 1.0, 0.6966666666666667]
[0.01, 0.1, 1500, 25, 0.7033333333333334, 1.0, 0.7033333333333334]
[0.01, 0.1, 1500, 30, 0.6933333333333334, 1.0, 0.6933333333333334]
[0.01, 0.1, 1500, 35, 0.7366666666666667, 1.0, 0.7366666666666667]
[0.01, 0.1, 1500, 40, 0.66, 1.0, 0.66]
[0.01, 0.1, 2000, 10, 0.6633333333333333, 1.0, 0.6633333333333333]
[0.01, 0.1, 2000, 15, 0.7, 1.0, 0.7]
[0.01, 0.1, 2000, 20, 0.6766666666666666, 1.0, 0.6766666666666666]
[0.01

In [None]:
print("thresh test")
plt.figure()
plt.plot(accs)

In [None]:
def purpleTeamTest(ben_list, inf_list, thresh, fitness_func, pop_size, mutation_rate, crossover_rate, mix_rate, new_rate, iterations, print_every, test_it):
    attack_acc = []
    benign_acc = []
    total_acc = []
    fitness_log = []
    
    blue_init_pop, inf_list = randomListPick(inf_list, pop_size)
    blue = Blue_C(blue_init_pop, pop_size, mutation_rate, crossover_rate, fitness_func)
    red_init_pop, inf_list = randomListPick(inf_list, pop_size)
    red = Red_C(inf_list, red_init_pop, pop_size, mutation_rate, crossover_rate, fitness_func)
    
    for i in range(iterations):
        red_pop = red.getPop()
        red_pop_it = 0
        red_fitness = []
        red_flag = True
        n_attack = 0
        n_benign = 0
        n_attack_correct = 0
        n_benign_correct = 0
        while red_pop_it < pop_size:
            mix_prob = random.randint(1,101) / 100.0
            if mix_prob <= mix_rate:
                next_flow = randomListPickWithoutPop(ben_list, 1)
                next_flow = next_flow[0]
                red_flag = False
                n_benign += 1
            else:
                #print("iteration ", i, len(red_pop), pop_size, red_pop_it)
                next_flow = red_pop[red_pop_it]
                red_pop_it += 1
                red_flag = True
                n_attack += 1

            temp_red_fitness = blue.defend(next_flow)
            fitness_log.append(temp_red_fitness)
            if red_flag:
                red_fitness.append(temp_red_fitness)
                blue.breed("attack")
                if (temp_red_fitness < thresh):
                    n_attack_correct += 1
            else:
                blue.breed("benign")
                if (temp_red_fitness >= thresh):
                    n_benign_correct += 1

        red.breed(red_fitness, new_rate)

        cur_attack_acc = n_attack_correct * 1.0 / n_attack
        cur_benign_acc = n_benign_correct * 1.0 / n_benign
        cur_total_acc = (n_attack_correct + n_benign_correct) * 1.0 / (n_attack + n_benign)
        
        if(cur_attack_acc > 0.9):
            cur_attack_acc = 0
            cur_benign_acc = 0
            cur_total_acc = 0
            

        attack_acc.append(cur_attack_acc)
        benign_acc.append(cur_benign_acc)
        total_acc.append(cur_total_acc)
        
    print("attack acc")
    plt.figure()
    plt.title("attack acc: " + str(thresh))
    plt.plot(attack_acc)

    print("benign acc")
    plt.figure()
    plt.title("benign acc: " + str(thresh))
    plt.plot(benign_acc)

    print("total acc")
    plt.figure()
    plt.title("total acc: " + str(thresh))
    plt.plot(total_acc)
    print("Best breed: ", total_acc.index(max(total_acc)))
    
    ### TESTING ###
    n_attack = 0
    n_benign = 0
    n_attack_correct = 0
    n_benign_correct = 0

    for i in range(test_it):
        mix_prob = random.randint(1,101) / 100.0
        if mix_prob <= mix_rate:
            next_flow = randomListPickWithoutPop(ben_list, 1)
            next_flow = next_flow[0]
            red_flag = False
            n_benign += 1
        else:
            next_flow = randomListPickWithoutPop(inf_list, 1)
            next_flow = next_flow[0]
            red_flag = True
            n_attack += 1

        temp_red_fitness = blue.defend(next_flow)
        if red_flag:
            if (temp_red_fitness < thresh):
                n_attack_correct += 1
        else:
            if (temp_red_fitness >= thresh):
                n_benign_correct += 1


    attack_acc = n_attack_correct * 1.0 / n_attack
    benign_acc = n_benign_correct * 1.0 / n_benign
    total_acc = (n_attack_correct + n_benign_correct) * 1.0 / (n_attack + n_benign)

    print("attack acc: ", attack_acc)
    print("benign acc: ", benign_acc)
    print("total acc: ", total_acc)

    ga_red_acc = 0
    red_pop = red.getPop()
    for next_flow in red_pop:
        temp_red_fitness = blue.defend(next_flow)
        if (temp_red_fitness < thresh):
            ga_red_acc += 1
    print("evolved attack acc: ", (ga_red_acc * 1.0) / len(red_pop))

iterations = 13
thresh = 3500
ben_list = benign_data.values.tolist()
inf_list = infilteration_data.values.tolist()
purpleTeamTest(ben_list, inf_list, thresh, "diff", pop_size, mutation_rate, crossover_rate, mix_rate, new_rate, iterations, print_every, 1000)

Red Blue declarations

In [None]:
accs = []

for i in range(1000, 100000, 500):
    ben_list = benign_data.values.tolist()
    inf_list = infilteration_data.values.tolist()
    avg_acc = purpleTeam(ben_list, inf_list, i, "dist", pop_size, mutation_rate, crossover_rate, mix_rate, new_rate, iterations, print_every)
    accs.append(avg_acc)
    print("thresh test: ", i, avg_acc)

Red Blue Trains

In [None]:
print("thresh test")
plt.figure()
plt.plot(accs)

In [None]:
accs = []

for i in range(100, 1001, 100):
    ben_list = benign_data.values.tolist()
    inf_list = infilteration_data.values.tolist()
    avg_acc = purpleTeam(ben_list, inf_list, i, "dist", pop_size, mutation_rate, crossover_rate, mix_rate, new_rate, iterations, print_every)
    accs.append(avg_acc)
    print("thresh test: ", i, avg_acc)

In [None]:
print("thresh test")
plt.figure()
plt.plot(accs)

# Reinforcement Learning

In [None]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

In [None]:
ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions available in the Cartpole problem
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

In [None]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

In [None]:
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this slows down training quite a lot. 
dqn.fit(env, nb_steps=5000, visualize=True, verbose=2)

In [None]:
dqn.test(env, nb_episodes=5, visualize=True)