In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import pickle
from imblearn.over_sampling import RandomOverSampler
from glob import glob
import nfstream
import matplotlib.pyplot as plt
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
import os.path
from sklearn.neural_network import MLPClassifier

In [2]:
train_dataset_path = 'C:/Users/katsa/OneDrive/Jupyter_files/cic_nb15_hybrid/cic_12_feat_train_dataset.csv'
CICDataset_train = pd.read_csv(train_dataset_path)
X_train, y_train = CICDataset_train.iloc[:, :-1], CICDataset_train['Label']

In [3]:
CICDataset_train.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Mean,Bwd Packet Length Mean,Flow Bytes/s,Flow Packets/s,Fwd Packets/s,Bwd Packets/s,Label
0,80,249,2,0,0,0,0.0,0.0,0.0,8032.129,8032.129,0.0,ATTACK
1,80,1,2,0,0,0,0.0,0.0,0.0,2000000.0,2000000.0,0.0,ATTACK
2,55209,15,1,1,0,0,0.0,0.0,0.0,133333.3,66666.67,66666.66667,BENIGN
3,36778,3,2,0,0,0,0.0,0.0,0.0,666666.7,666666.7,0.0,BENIGN
4,54347,25,1,1,0,0,0.0,0.0,0.0,80000.0,40000.0,40000.0,BENIGN


In [4]:
pipe = make_pipeline(
    SimpleImputer(), StandardScaler(), MLPClassifier(max_iter=1000)
)

In [5]:
test_dataset_path = 'C:/Users/katsa/OneDrive/Jupyter_files/cic_nb15_hybrid/cic_12_feat_test_dataset.csv'
CICDataset_test = pd.read_csv(test_dataset_path)
X_test, y_test = CICDataset_test.iloc[:, :-1], CICDataset_test['Label']

In [6]:
pipe.fit(X_train.to_numpy(), y_train.to_numpy())

In [7]:
predictions = pipe.predict(X_test.to_numpy())
print(metrics.accuracy_score(y_test.to_numpy(), predictions))
print(metrics.classification_report(y_test.to_numpy(), predictions))

0.9882504221510983
              precision    recall  f1-score   support

      ATTACK       0.97      0.97      0.97     27790
      BENIGN       0.99      0.99      0.99    113747

    accuracy                           0.99    141537
   macro avg       0.98      0.98      0.98    141537
weighted avg       0.99      0.99      0.99    141537



In [8]:
nb_test_dataset_path = 'C:/Users/katsa/OneDrive/Jupyter_files/shallow_models_cic_nb15/nb_12_feat_test_dataset.csv'
nb_test_dataset = pd.read_csv(nb_test_dataset_path)
nb_test_dataset.head()

Unnamed: 0,dsport,dur,Spkts,Dpkts,sbytes,dbytes,smeansz,dmeansz,flow_bytes/s,flow_packets/s,fwd_packets/s,bwd_packets/s,Label
0,53.0,1.089,2,2,130,162,65,81,268135900.0,3673095.0,1836547.0,1836547.0,BENIGN
1,80.0,282.741,10,6,912,268,91,45,4173431.0,56588.89,35368.06,21220.83,ATTACK
2,5190.0,6.367,22,24,1920,4312,87,180,978796900.0,7224753.0,3455316.0,3769436.0,BENIGN
3,111.0,87.319,4,4,568,320,142,80,10169610.0,91618.09,45809.04,45809.04,BENIGN
4,53.0,1.062,2,2,146,178,73,89,305084700.0,3766478.0,1883239.0,1883239.0,BENIGN


In [9]:
X_test = nb_test_dataset.drop('Label', axis=1)
y_test = nb_test_dataset['Label']

In [10]:
predictions = pipe.predict(X_test.to_numpy())
print(metrics.accuracy_score(y_test.to_numpy(), predictions))
print(metrics.classification_report(y_test.to_numpy(), predictions))

0.8647369146276648
              precision    recall  f1-score   support

      ATTACK       0.12      0.01      0.02     12859
      BENIGN       0.87      0.99      0.93     88743

    accuracy                           0.86    101602
   macro avg       0.49      0.50      0.47    101602
weighted avg       0.78      0.86      0.81    101602



In [11]:
import joblib
joblib.dump(pipe, "MLPmodel")
joblib.dump(pipe[1], "scaler")

['scaler']