In [1]:
####################################################################
############# All the Dependencies #################################
####################################################################
import pandas as pd
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM,MaxPooling2D,MaxPooling1D,Convolution1D
from keras.layers import Dense,Activation , Dropout,GRU, Bidirectional, Flatten,Conv1D
import plotly.graph_objects as go
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,precision_score,accuracy_score,roc_auc_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from plotly.offline import plot
import plotly.offline as py

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
train_df= pd.read_csv(r'C:\Users\mkahs\Repository\CICIDS\ProcessedDataset\train_MachineLearningCVE.csv')
test_df= pd.read_csv(r'C:\Users\mkahs\Repository\CICIDS\ProcessedDataset\test_MachineLearningCVE.csv')

In [4]:
print(train_df.shape)
print(test_df.shape)

(2263508, 80)
(565877, 80)


In [5]:
col_list= train_df.columns.to_list()
col_list

['Unnamed: 0',
 'Destination Port',
 'Flow Duration',
 'Total Fwd Packets',
 'Total Backward Packets',
 'Total Length of Fwd Packets',
 'Total Length of Bwd Packets',
 'Fwd Packet Length Max',
 'Fwd Packet Length Min',
 'Fwd Packet Length Mean',
 ' Fwd Packet Length Std',
 'Bwd Packet Length Max',
 'Bwd Packet Length Min',
 'Bwd Packet Length Mean',
 'Bwd Packet Length Std',
 'Flow Bytes/s',
 ' Flow Packets/s',
 'Flow IAT Mean',
 'Flow IAT Std',
 'Flow IAT Max',
 'Flow IAT Min',
 'Fwd IAT Total',
 'Fwd IAT Mean',
 'Fwd IAT Std',
 'Fwd IAT Max',
 'Fwd IAT Min',
 'Bwd IAT Total',
 'Bwd IAT Mean',
 'Bwd IAT Std',
 'Bwd IAT Max',
 'Bwd IAT Min',
 'Fwd PSH Flags',
 'Bwd PSH Flags',
 'Fwd URG Flags',
 'Bwd URG Flags',
 'Fwd Header Length',
 'Bwd Header Length',
 'Fwd Packets/s',
 'Bwd Packets/s',
 'Min Packet Length',
 'Max Packet Length',
 'Packet Length Mean',
 'Packet Length Std',
 'Packet Length Variance',
 'FIN Flag Count',
 'SYN Flag Count',
 'RST Flag Count',
 'PSH Flag Count',
 'ACK 

In [6]:
train_df.drop('Unnamed: 0', axis=1, inplace=True) 
test_df.drop('Unnamed: 0', axis=1, inplace=True) 
print(train_df.shape)
print(test_df.shape)

(2263508, 79)
(565877, 79)


In [7]:
train_df = train_df.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
x_train = train_df.iloc[:, train_df.columns != 'Label']
x_train.shape

(2263508, 78)

In [8]:
test_df = test_df.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
x_test = test_df.iloc[:, test_df.columns != 'Label']
x_test.shape

(565877, 78)

In [9]:
y_train = train_df[['Label']]
y_test = test_df[['Label']]
print(y_train.shape)
print(y_test.shape)

(2263508, 1)
(565877, 1)


In [10]:
y_train.value_counts()

Label                   
BENIGN                      1818282
DoS Hulk                     184043
PortScan                     127284
DDoS                         102225
DoS GoldenEye                  8284
FTP-Patator                    6345
SSH-Patator                    4707
DoS slowloris                  4633
DoS Slowhttptest               4370
Bot                            1564
Web Attack-Brute Force         1210
Web Attack-XSS                  507
Infiltration                     30
Web Attack-Sql Injection         15
Heartbleed                        9
dtype: int64

In [11]:
y_test.value_counts()

Label                   
BENIGN                      454406
DoS Hulk                     46081
PortScan                     31646
DDoS                         25802
DoS GoldenEye                 2009
FTP-Patator                   1593
SSH-Patator                   1190
DoS slowloris                 1163
DoS Slowhttptest              1129
Bot                            402
Web Attack-Brute Force         297
Web Attack-XSS                 145
Infiltration                     6
Web Attack-Sql Injection         6
Heartbleed                       2
dtype: int64

In [12]:
from sklearn.preprocessing import LabelEncoder
target_train=y_train['Label'].values.tolist()
label_encoder = LabelEncoder()
y_train = np.array(label_encoder.fit_transform(target_train))


In [13]:
y_train

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [14]:
print(set(y_train))

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}


In [15]:
print(len(y_train))

2263508


In [16]:
target_test=y_test['Label'].values.tolist()
label_encoder = LabelEncoder()
y_test = np.array(label_encoder.fit_transform(target_test))
print(set(y_test))
print(len(y_test))

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}
565877


In [17]:
scalar=MinMaxScaler()
x_train=scalar.fit_transform(x_train)
x_test=scalar.fit_transform(x_test)

In [18]:
####################################################################
############# Training Hybrid Model ################################
####################################################################
hybrid = Sequential()
hybrid.add(Convolution1D(128, 3, padding="same",activation="relu",input_shape=(78, 1)))
hybrid.add(Convolution1D(128, 3, padding="same", activation="relu"))
hybrid.add(MaxPooling1D(pool_size=(2)))
hybrid.add(Convolution1D(128, 3, padding="same", activation="relu"))
hybrid.add(Convolution1D(128, 3, padding="same", activation="relu"))
hybrid.add(MaxPooling1D(pool_size=(2)))
hybrid.add(LSTM(256))
hybrid.add(Dropout(0.1))
hybrid.add(Dense(15, activation="softmax"))
hybrid.compile(loss="sparse_categorical_crossentropy", optimizer="adam",metrics=['accuracy'])
hybrid.fit(x_train.reshape(len(x_train), len(x_train[0]),1), y_train,epochs=10,verbose=1,batch_size = 500)
hybrid.save(r"C:\Users\mkahs\Repository\CICIDS\Model\CNN_LSTM.h5")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [21]:
loss, accuracy = hybrid.evaluate(x_test, y_test)
print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100))


Loss: 0.38, Accuracy: 96.93%


In [24]:

from sklearn.metrics import classification_report
y_pred = hybrid.predict(x_test, batch_size=1024, verbose=False)
y_pred = np.argmax(y_pred, axis=1)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      1.00      0.98    454406
           1       1.00      0.39      0.56       402
           2       1.00      1.00      1.00     25802
           3       1.00      0.97      0.99      2009
           4       0.99      1.00      0.99     46081
           5       0.96      0.95      0.96      1129
           6       0.95      0.98      0.97      1163
           7       1.00      1.00      1.00      1593
           8       1.00      1.00      1.00         2
           9       0.00      0.00      0.00         6
          10       1.00      0.51      0.68     31646
          11       1.00      0.51      0.68      1190
          12       0.54      0.89      0.67       297
          13       0.00      0.00      0.00         6
          14       1.00      0.03      0.05       145

    accuracy                           0.97    565877
   macro avg       0.83      0.68      0.70    565877
weighted avg       0.97   

In [19]:
####################################################################
############# Training GRU Model ################################
####################################################################
gru = Sequential()
gru.add(GRU(100, activation='relu', return_sequences=True, input_dim = 78))
gru.add(GRU(100, activation='relu'))
gru.add(Dense(15, activation = 'softmax'))
gru.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
gru.fit(x_train.reshape(len(x_train),1, len(x_train[0])), y_train,epochs=10,verbose=1,batch_size = 500)
gru.save(r"C:\Users\mkahs\Repository\CICIDS\Model\GRU.h5")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
loss, accuracy = gru.evaluate(x_test, y_test)
print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100))



ValueError: in user code:

    File "C:\Users\mkahs\anaconda3\envs\RTX2080\lib\site-packages\keras\engine\training.py", line 1525, in test_function  *
        return step_function(self, iterator)
    File "C:\Users\mkahs\anaconda3\envs\RTX2080\lib\site-packages\keras\engine\training.py", line 1514, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\mkahs\anaconda3\envs\RTX2080\lib\site-packages\keras\engine\training.py", line 1507, in run_step  **
        outputs = model.test_step(data)
    File "C:\Users\mkahs\anaconda3\envs\RTX2080\lib\site-packages\keras\engine\training.py", line 1471, in test_step
        y_pred = self(x, training=False)
    File "C:\Users\mkahs\anaconda3\envs\RTX2080\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\mkahs\anaconda3\envs\RTX2080\lib\site-packages\keras\engine\input_spec.py", line 214, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" '

    ValueError: Exception encountered when calling layer "sequential_1" (type Sequential).
    
    Input 0 of layer "gru" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 78)
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(None, 78), dtype=float32)
      • training=False
      • mask=None
