In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv('Scenario-A-merged_5s.csv')
df.columns = df.columns.str.strip()
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])
df.drop(['Source IP', 'Destination IP'], axis=1, inplace=True)

X = df.drop('label', axis=1)
y = df['label']

X.replace([np.inf, -np.inf], np.nan, inplace=True)
X.dropna(inplace=True)
y = y[X.index]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(),
    "Gradient Boost": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Perceptron": Perceptron()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n{name} Results")
    
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))



Random Forest Results
Accuracy: 0.9950112840004751
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      2986
           1       1.00      1.00      1.00     13852

    accuracy                           1.00     16838
   macro avg       0.99      0.99      0.99     16838
weighted avg       1.00      1.00      1.00     16838


SVM Results
Accuracy: 0.8812804371065447
              precision    recall  f1-score   support

           0       0.80      0.44      0.57      2986
           1       0.89      0.98      0.93     13852

    accuracy                           0.88     16838
   macro avg       0.84      0.71      0.75     16838
weighted avg       0.87      0.88      0.87     16838


Gradient Boost Results
Accuracy: 0.9861622520489369
              precision    recall  f1-score   support

           0       0.97      0.95      0.96      2986
           1       0.99      0.99      0.99     13852

    accuracy                       

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



XGBoost Results
Accuracy: 0.9991091578572276
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2986
           1       1.00      1.00      1.00     13852

    accuracy                           1.00     16838
   macro avg       1.00      1.00      1.00     16838
weighted avg       1.00      1.00      1.00     16838



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Logistic Regression Results
Accuracy: 0.9061646276279843
              precision    recall  f1-score   support

           0       0.75      0.71      0.73      2986
           1       0.94      0.95      0.94     13852

    accuracy                           0.91     16838
   macro avg       0.84      0.83      0.84     16838
weighted avg       0.90      0.91      0.91     16838


Perceptron Results
Accuracy: 0.8131013184463713
              precision    recall  f1-score   support

           0       0.49      0.90      0.63      2986
           1       0.97      0.80      0.87     13852

    accuracy                           0.81     16838
   macro avg       0.73      0.85      0.75     16838
weighted avg       0.89      0.81      0.83     16838



In [2]:
import joblib
joblib.dump(model, 'merged.pkl')

['merged.pkl']