In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
import joblib


In [2]:
# Load cleaned + feature-engineered dataset
df = pd.read_csv("../data/cleaned_cicids2017.csv")

# Features and labels
X = df.drop(columns=["Label", "LabelEncoded"])
y = df["LabelEncoded"]

# Train/test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

numeric_cols = X.select_dtypes(include=[np.number]).columns
scaler = StandardScaler()
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Data ready for training ✅")


Data ready for training ✅


In [3]:
# Initialize model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)

# Train model
rf_model.fit(X_train, y_train)

print("Random Forest trained ✅")


Random Forest trained ✅


In [4]:
# Predictions
y_pred = rf_model.predict(X_test)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Classification report
cr = classification_report(y_test, y_pred, target_names=["BENIGN", "ATTACK"])
print("\nClassification Report:\n", cr)


Confusion Matrix:
 [[453960    305]
 [   299 111012]]

Classification Report:
               precision    recall  f1-score   support

      BENIGN       1.00      1.00      1.00    454265
      ATTACK       1.00      1.00      1.00    111311

    accuracy                           1.00    565576
   macro avg       1.00      1.00      1.00    565576
weighted avg       1.00      1.00      1.00    565576



In [5]:
# Save model in models/ folder
joblib.dump(rf_model, "../models/ids_random_forest.pkl")
print("Trained model saved in models/ ✅")


Trained model saved in models/ ✅
