In [None]:
#This script tests and trains on CIC-DDoS2019 only

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

In [None]:
# Load cleaned dataset

df = pd.read_csv("CICDDoS2019-SVM-ready.csv")
df = df.sample(200000, random_state=42)  # can adjust 200000
X = df.drop(columns=["Label"])
y = df["Label"]

In [None]:
# META COLUMNS (same as 2017 and 2018)

META_COLS = [
    "Dst Port",
    "Init Fwd Win Byts",
    "Init Bwd Win Byts",
    "Fwd Act Data Pkts",
    "Fwd Seg Size Min",
    "Subflow Fwd Byts",
    "Active Mean", "Active Max", "Active Min",
    "Idle Mean", "Idle Max", "Idle Min"
]

# Split into META vs NO-META feature sets
X_meta = X.copy()                # keep all features
X_nometa = X.drop(columns=META_COLS, errors="ignore")  # remove metadata

print("Original feature count:", X.shape[1])
print("META feature count:", X_meta.shape[1])
print("NO-META feature count:", X_nometa.shape[1])

In [None]:
# Train-test split

def split_data(X, y):
    return train_test_split(
        X, y,
        test_size=0.2,
        random_state=42,
        stratify=y
    )

X_train_meta, X_test_meta, y_train, y_test = split_data(X_meta, y)
X_train_nometa, X_test_nometa, _, _ = split_data(X_nometa, y)

In [None]:
# Build SVM model

def build_svm():
    return Pipeline([
        ("scaler", StandardScaler()),
        ("svm", LinearSVC(
            class_weight="balanced",
            max_iter=5000
        ))
    ])

In [None]:
# Train WITH_META

svm_meta = build_svm()
svm_meta.fit(X_train_meta, y_train)
pred_meta = svm_meta.predict(X_test_meta)

print("\n========================")
print("=== WITH META RESULTS ===")
print("========================")
print(confusion_matrix(y_test, pred_meta))
print(classification_report(y_test, pred_meta, digits=4))

In [None]:
# Train NO_META

svm_nometa = build_svm()
svm_nometa.fit(X_train_nometa, y_train)
pred_nometa = svm_nometa.predict(X_test_nometa)

print("\n========================")
print("=== NO META RESULTS ===")
print("========================")
print(confusion_matrix(y_test, pred_nometa))
print(classification_report(y_test, pred_nometa, digits=4))