# **Initialization**

## **Connecting GDrive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## **Testing TensorFlow and GPU**

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print(tf.__version__)

Found GPU at: /device:GPU:0
2.5.0


## **Importing Libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
np.random.seed(1337)  # for reproducibility
from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Activation, Embedding, LSTM, SimpleRNN, GRU
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import regularizers
from keras.utils.np_utils import to_categorical
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score, precision_recall_curve, roc_auc_score, roc_curve
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import LabelEncoder

%matplotlib inline


# **Binary Classification Evaluation**

## **Binary Data Preparation**

In [None]:
##Loading Dataset
dataset = pd.read_csv("//content/drive/MyDrive/M.Tech Project/Dataset and Codes/CIC IDS 2018/cic-ids-2018-preprocessed.csv")
dataset = dataset.drop(['Unnamed: 0', 'category'],axis=1)
## Separating Dependent(X) and InDependent Variables ##
X = dataset.iloc[:, :-1]
Y = dataset.iloc[:,-1]
## Train Test Split for X and Y ##
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y)
##Normalizing the X_test
scaler = Normalizer().fit(X_test)
X_test = scaler.transform(X_test)
np.set_printoptions(precision=3)
##Converting the dataframes to arrays
X_test = np.array(X_test)
Y_test = np.array(Y_test)

In [None]:
AutoEncoder = load_model('/content/drive/MyDrive/M.Tech Project/Dataset and Codes/Saved Models/AutoEncoder5LayerBinary.h5')
AutoEncoder.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 78)]              0         
_________________________________________________________________
dense (Dense)                (None, 64)                5056      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_3 (Dense)              (None, 78)                5070      
Total params: 14,318
Trainable params: 14,318
Non-trainable params: 0
_________________________________________________________________


In [None]:
AutoEncoder.evaluate(X_test, X_test)



1.0846559234778397e-05

In [None]:
encoder = Sequential()
encoder.add(AutoEncoder.layers[0])
encoder.add(AutoEncoder.layers[1])
encoder.add(AutoEncoder.layers[2])

In [None]:
X_test = encoder.predict(X_test)
print(X_test.shape, Y_test.shape)

(642670, 32) (642670,)


## **Model Evaluation**

In [None]:
model = load_model('/content/drive/MyDrive/M.Tech Project/Dataset and Codes/Saved Models/AutoEncoder5LayeredDNN5layeredBinary.h5')
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 1024)              33792     
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 768)               787200    
_________________________________________________________________
dropout_1 (Dropout)          (None, 768)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               393728    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)              

In [None]:
Y_predicted = np.round(model.predict(X_test))
Y_predicted = np.reshape(Y_predicted, Y_test.shape)
print(classification_report(Y_test, Y_predicted))

              precision    recall  f1-score   support

           0       0.89      0.99      0.94    208521
           1       1.00      0.94      0.97    434149

    accuracy                           0.96    642670
   macro avg       0.94      0.97      0.95    642670
weighted avg       0.96      0.96      0.96    642670



In [None]:
cm = confusion_matrix(Y_test, Y_predicted)
print(cm)

[[207246   1275]
 [ 26301 407848]]


# **Multiclass Classification Evaluation**

## **Multiclass Data Preparation**

In [None]:
## Loading the dataset
dataset = pd.read_csv("/content/drive/MyDrive/M.Tech Project/Dataset and Codes/CIC IDS 2018/cic-ids-2018-multiclass.csv")
dataset = dataset.drop(['Unnamed: 0', 'label'],axis=1)
dataset.shape
## Splitting the Dataset
X = dataset.iloc[:, :-1]
Y = dataset.iloc[:,-1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y)

##Normalizing the X_train
# scaler = Normalizer().fit(X_train)
# X_train = scaler.transform(X_train)

##Normalizing the X_test
scaler = Normalizer().fit(X_test)
X_test = scaler.transform(X_test)
## Converting to Numpy Arrays
# Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

In [None]:
from imblearn.over_sampling import SMOTE



In [None]:
## Oversampling for better results
strategy = {1:2000, 2:2000, 11:2000}
oversample = SMOTE(sampling_strategy = strategy)
X_test, Y_test = oversample.fit_resample(X_test, Y_test)
print(X_test.shape, Y_test.shape)



(439964, 78) (439964,)




In [None]:
## Converting Output Labels to Sparse Format
# Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)

In [None]:
print(X_test.shape, Y_test.shape)

(439964, 78) (439964, 13)


In [None]:
## Loading the AutoEncoder for Compressing Test Data
AutoEncoder = load_model('/content/drive/MyDrive/M.Tech Project/Dataset and Codes/Saved Models/AutoEncoder5LayerSMOTE.h5')
AutoEncoder.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 78)]              0         
_________________________________________________________________
dense (Dense)                (None, 64)                5056      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_3 (Dense)              (None, 78)                5070      
Total params: 14,318
Trainable params: 14,318
Non-trainable params: 0
_________________________________________________________________


In [None]:
AutoEncoder.evaluate(X_test, X_test)



5.220545062911697e-05

In [None]:
## Extracting the Encoder Module
encoder = Sequential()
encoder.add(AutoEncoder.layers[0])
encoder.add(AutoEncoder.layers[1])
encoder.add(AutoEncoder.layers[2])

In [None]:
## Encoding the Testing Data
# X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)

In [None]:
print(X_test_encoded.shape, Y_test.shape)

(439964, 32) (439964, 13)


## **Multiclass Model Evaluation**

In [None]:
## Loading the Multiclass Classifier
model = load_model('/content/drive/MyDrive/M.Tech Project/Dataset and Codes/Saved Models/AutoEncoder5LayeredSMOTEDNN5layeredMulticlassSMOTEMORECLASS.h5')
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_18 (Dense)             (None, 1024)              33792     
_________________________________________________________________
dropout_15 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_19 (Dense)             (None, 768)               787200    
_________________________________________________________________
dropout_16 (Dropout)         (None, 768)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 512)               393728    
_________________________________________________________________
dropout_17 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 256)              

In [None]:
model.evaluate(X_test_encoded, Y_test)



[0.1092715635895729, 0.9522188305854797]

In [None]:
## Generating Predictions
Y_predicted = np.round(model.predict(X_test_encoded))
Y_predicted = np.reshape(Y_predicted, Y_test.shape)

In [None]:
## Generating Classification Report on the Predicted vs Actual Value
print(classification_report(Y_test, Y_predicted))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     57238
           1       0.77      0.81      0.79      2000
           2       0.84      0.89      0.86      2000
           3       1.00      1.00      1.00    137203
           4       1.00      1.00      1.00       346
           5       0.94      0.99      0.97      8302
           6       1.00      1.00      1.00     92382
           7       0.74      0.52      0.61     27978
           8       0.91      0.99      0.95      2198
           9       0.71      0.86      0.78     38671
          10       1.00      0.97      0.98     32128
          11       0.76      0.85      0.80      2000
          12       1.00      1.00      1.00     37518

   micro avg       0.95      0.95      0.95    439964
   macro avg       0.90      0.91      0.90    439964
weighted avg       0.95      0.95      0.95    439964
 samples avg       0.95      0.95      0.95    439964



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
## Generating the Confusion Matrix
cm = confusion_matrix(Y_test.argmax(axis=1), Y_predicted.argmax(axis=1))
print(cm)

[[ 57217      1      1      0      0      2      8      0      0      0
       8      1      0]
 [    11   1627    182      0      0      0      0      0      0      0
       3    177      0]
 [    16    140   1771      0      0      0      0      0      0      0
       0     73      0]
 [     0      0      7 137195      0      0      0      0      0      0
       0      1      0]
 [     0      0      0      0    345      0      0      0      0      0
       0      0      1]
 [    18      1      0      0      0   8226      2      0     11      0
       1     43      0]
 [    55      0      0      0      0     10  92018      0    154      0
      59     86      0]
 [     0      0      0      0      0      0      0  14573      0  13405
       0      0      0]
 [     2      0      0      0      0      2      0      0   2166      0
       3     25      0]
 [     0      0      0      0      0      0      0   5223      0  33448
       0      0      0]
 [    45    212    107      9      0    