In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
import time
import warnings
import random
from matplotlib import pyplot
from tensorflow.keras import Model
from tensorflow.keras import Sequential
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Input, Dropout, Activation, Dense, MaxPooling2D, Flatten, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.ConvergenceWarning)

In [3]:
df = pd.read_csv("../../preprocessing/iot23_combined.csv", skiprows=lambda x: x > 0 and random.random() >= 0.05)
del df["Unnamed: 0"]
df = df.replace(r'^\s*$', np.nan, regex=True)
df

Unnamed: 0,duration,orig_bytes,resp_bytes,missed_bytes,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,label,proto_icmp,...,conn_state_RSTOS0,conn_state_RSTR,conn_state_RSTRH,conn_state_S0,conn_state_S1,conn_state_S2,conn_state_S3,conn_state_SF,conn_state_SH,conn_state_SHR
0,0.000000,0,0,0.0,1.0,60.0,0.0,0.0,PartOfAHorizontalPortScan,0,...,0,0,0,1,0,0,0,0,0,0
1,2.998806,0,0,0.0,3.0,180.0,0.0,0.0,PartOfAHorizontalPortScan,0,...,0,0,0,1,0,0,0,0,0,0
2,0.000000,0,0,0.0,1.0,60.0,0.0,0.0,PartOfAHorizontalPortScan,0,...,0,0,0,1,0,0,0,0,0,0
3,0.000000,0,0,0.0,1.0,60.0,0.0,0.0,PartOfAHorizontalPortScan,0,...,0,0,0,1,0,0,0,0,0,0
4,0.000000,0,0,0.0,1.0,60.0,0.0,0.0,PartOfAHorizontalPortScan,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67016,0.000000,0,0,0.0,0.0,0.0,0.0,0.0,DDoS,0,...,0,0,0,0,0,0,0,0,0,0
67017,0.000000,0,0,0.0,0.0,0.0,0.0,0.0,DDoS,0,...,0,0,0,0,0,0,0,0,0,0
67018,0.000000,0,0,0.0,0.0,0.0,0.0,0.0,DDoS,0,...,0,0,0,0,0,0,0,0,0,0
67019,0.000000,0,0,0.0,0.0,0.0,0.0,0.0,DDoS,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
conditions = [df.label == 'PartOfAHorizontalPortScan', df.label == 'Benign',df.label == 'Attack',df.label == 'C&C',df.label == 'Okiru',df.label == 'C&C-HeartBeat',df.label == 'DDoS',df.label == 'C&C-Torii',df.label == 'C&C-FileDownload',df.label == 'FileDownload',df.label ==  'C&C-HeartBeat-FileDownload',df.label ==  'C&C-Mirai']
# len(conditions)
choices = [1,2,3,4,5,6,7,8,9,10,11,12]
df['trueLabel'] = np.select(conditions,choices)

In [5]:
X = df[['duration', 'orig_bytes', 'resp_bytes', 'missed_bytes', 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes', 'proto_icmp', 'proto_tcp', 'proto_udp', 'conn_state_OTH', 'conn_state_REJ', 'conn_state_RSTO', 'conn_state_RSTOS0', 'conn_state_RSTR', 'conn_state_RSTRH', 'conn_state_S0', 'conn_state_S1', 'conn_state_S2', 'conn_state_S3', 'conn_state_SF', 'conn_state_SH', 'conn_state_SHR']]
# Y = df['trueLabel']
# Y = df['label']
Y = pd.factorize(df['label'])[0].reshape(-1, 1)

In [6]:
scaler = MinMaxScaler()
normalized_x = scaler.fit_transform(X)
normalized_x

array([[2.49879313e-04, 5.37145690e-09, 6.07939692e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [9.99218898e-04, 5.37145690e-09, 6.07939692e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.49879313e-04, 5.37145690e-09, 6.07939692e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [2.49879313e-04, 5.37145690e-09, 6.07939692e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.49879313e-04, 5.37145690e-09, 6.07939692e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.49879313e-04, 5.37145690e-09, 6.07939692e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(normalized_x, Y, random_state= 100, test_size=0.2)


In [8]:
model = Sequential()

In [9]:
model.add(Dense(2000, activation='relu',input_dim=24))
model.add(Dense(1500, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(800,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(400,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(150,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(12, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [10]:
model.fit(X_train,Y_train,batch_size=64,epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

KeyboardInterrupt: 