In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import pickle
from imblearn.over_sampling import RandomOverSampler
from glob import glob
import nfstream
import matplotlib.pyplot as plt
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
import os.path
from sklearn.neural_network import MLPClassifier

In [2]:
selected_features = ['Destination Port', 'Total Fwd Packets', 'Total Backward Packets', 'Total Length of Fwd Packets',
                     'Total Length of Bwd Packets', 'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean',
                     'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 
                     'Bwd Packet Length Std', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max',
                     'Flow IAT Min', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Mean', 'Bwd IAT Std',
                     'Bwd IAT Max', 'Bwd IAT Min', 'Min Packet Length', 'Max Packet Length', 
                     'Packet Length Mean', 'Packet Length Std', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 
                     'PSH Flag Count', 'ACK Flag Count', 'URG Flag Count', 'CWE Flag Count', 'ECE Flag Count']

added_features = ['Fwd PSH Flags', 'Fwd URG Flags', 'Bwd PSH Flags', 'Bwd URG Flags']

#extra_features = ['Active Max', 'Idle Max' ]

computed_features = ['Flow Duration', 'Flow Bytes/s', 'Flow Packets/s', 'Fwd Packets/s', 'Bwd Packets/s', 'Packet Length Variance']

selected_features_total = (selected_features + 
                           added_features + 
                           computed_features)
    

In [3]:
pipe = make_pipeline(
    SimpleImputer(), StandardScaler(), MLPClassifier(max_iter=1000)
)

In [4]:
train_dataset_path = 'C:/Users/katsa/OneDrive/Jupyter_files/shallow_models_online/cic_train_sample_binary.csv'
CICDataset_train = pd.read_csv(train_dataset_path)
X_train, y_train = CICDataset_train[selected_features_total], CICDataset_train['Label']

In [5]:
test_dataset_path = 'C:/Users/katsa/OneDrive/Jupyter_files/shallow_models_online/cic_test_sample_binary.csv'
CICDataset_test = pd.read_csv(test_dataset_path)
X_test, y_test = CICDataset_test[selected_features_total], CICDataset_test['Label']

In [6]:
pipe.fit(X_train.to_numpy(), y_train.to_numpy())
predictions = pipe.predict(X_test.to_numpy())
print(metrics.accuracy_score(y_test.to_numpy(), predictions))
print(metrics.classification_report(y_test.to_numpy(), predictions))

0.986706656210037
              precision    recall  f1-score   support

      ATTACK       0.96      0.98      0.97     55763
      BENIGN       0.99      0.99      0.99    227311

    accuracy                           0.99    283074
   macro avg       0.97      0.98      0.98    283074
weighted avg       0.99      0.99      0.99    283074



In [7]:
import joblib
joblib.dump(pipe, "MLPmodel")
joblib.dump(pipe[1], "scaler")

['scaler']