In [43]:
import pandas as pd

train_path = r"C:\Users\kbc_k\comp3000\UNSW_NB15_training-set1.csv"
test_path  = r"C:\Users\kbc_k\comp3000\UNSW_NB15_testing-set2.csv"

train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)

print("\nTrain columns:")
print(train_df.columns)

# Check label columns (UNSW usually has label + attack_cat)
if "label" in train_df.columns:
    print("\nTrain label counts:")
    print(train_df["label"].value_counts())
else:
    print("\nNo 'label' column found. Check column names above.")



Train shape: (175341, 45)
Test shape: (82332, 45)

Train columns:
Index(['id', 'dur', 'proto', 'service', 'state', 'spkts', 'dpkts', 'sbytes',
       'dbytes', 'rate', 'sttl', 'dttl', 'sload', 'dload', 'sloss', 'dloss',
       'sinpkt', 'dinpkt', 'sjit', 'djit', 'swin', 'stcpb', 'dtcpb', 'dwin',
       'tcprtt', 'synack', 'ackdat', 'smean', 'dmean', 'trans_depth',
       'response_body_len', 'ct_srv_src', 'ct_state_ttl', 'ct_dst_ltm',
       'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm',
       'is_ftp_login', 'ct_ftp_cmd', 'ct_flw_http_mthd', 'ct_src_ltm',
       'ct_srv_dst', 'is_sm_ips_ports', 'attack_cat', 'label'],
      dtype='object')

Train label counts:
label
1    119341
0     56000
Name: count, dtype: int64


In [45]:
import os
import pandas as pd
import numpy as np
import joblib

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import IsolationForest
from sklearn.metrics import confusion_matrix, classification_report

# Paths
train_path = r"C:\Users\kbc_k\comp3000\UNSW_NB15_training-set1.csv"
test_path  = r"C:\Users\kbc_k\comp3000\UNSW_NB15_testing-set2.csv"

# Ensure models folder exists
os.makedirs(r"C:\Users\kbc_k\comp3000\models", exist_ok=True)

# Load data
train_df = pd.read_csv(train_path)
test_df  = pd.read_csv(test_path)

# Label column
label_col = "label"

# Drop cols not used for features
drop_cols = [label_col, "attack_cat", "id"]

X_train = train_df.drop(columns=drop_cols, errors="ignore")
y_train = train_df[label_col]

X_test = test_df.drop(columns=drop_cols, errors="ignore")
y_test = test_df[label_col]

# Identify categorical + numeric
categorical_cols = X_train.select_dtypes(include=["object"]).columns.tolist()
numeric_cols = X_train.select_dtypes(exclude=["object"]).columns.tolist()

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
        ("num", StandardScaler(), numeric_cols),
    ],
    remainder="drop"
)

# Model
iso = IsolationForest(n_estimators=200, contamination=0.1, random_state=42)

# Pipeline
clf = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", iso)
])

# Train on NORMAL only (label == 0)
normal_train = train_df[train_df[label_col] == 0]
X_normal = normal_train.drop(columns=drop_cols, errors="ignore")

clf.fit(X_normal)
print(" Model trained!")

# Predict on test
raw_preds = clf.predict(X_test)            # 1 normal, -1 anomaly
pred_anomaly = np.where(raw_preds == -1, 1, 0)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, pred_anomaly))

print("\nClassification Report:")
print(classification_report(y_test, pred_anomaly))

# Save model
model_path = r"C:\Users\kbc_k\comp3000\models\unsw_isoforest.pkl"
joblib.dump(clf, model_path)

print("\n Model saved successfully at:")
print(model_path)


 Model trained!

Confusion Matrix:
[[31279  5721]
 [22290 23042]]

Classification Report:
              precision    recall  f1-score   support

           0       0.58      0.85      0.69     37000
           1       0.80      0.51      0.62     45332

    accuracy                           0.66     82332
   macro avg       0.69      0.68      0.66     82332
weighted avg       0.70      0.66      0.65     82332


 Model saved successfully at:
C:\Users\kbc_k\comp3000\models\unsw_isoforest.pkl


In [47]:
import os
os.makedirs(r"C:\Users\kbc_k\comp3000\models", exist_ok=True)
print("models folder ready!")


models folder ready!


In [49]:
import joblib

model = joblib.load(r"C:\Users\kbc_k\comp3000\models\unsw_isoforest.pkl")
print(" Model loaded!")

 Model loaded!
