In [37]:
import joblib
import pandas as pd
import numpy as np

iso_model = joblib.load("../models/iso_detector.pkl")
rf = joblib.load("../models/rf_classifier.pkl")
scaler = joblib.load("../models/scaler.pkl")


In [38]:
column_names = [
        "duration","protocol_type","service","flag","src_bytes","dst_bytes","land",
        "wrong_fragment","urgent","hot","num_failed_logins","logged_in",
        "num_compromised","root_shell","su_attempted","num_root","num_file_creations",
        "num_shells","num_access_files","num_outbound_cmds","is_host_login",
        "is_guest_login","count","srv_count","serror_rate","srv_serror_rate",
        "rerror_rate","srv_rerror_rate","same_srv_rate","diff_srv_rate",
        "srv_diff_host_rate","dst_host_count","dst_host_srv_count",
        "dst_host_same_srv_rate","dst_host_diff_srv_rate",
        "dst_host_same_src_port_rate","dst_host_srv_diff_host_rate",
        "dst_host_serror_rate","dst_host_srv_serror_rate",
        "dst_host_rerror_rate","dst_host_srv_rerror_rate",
        "label","difficulty"
    ]


test_df = pd.read_csv("../data/KDDTest+.txt", header=None)
test_df.columns = column_names

test_df = test_df.drop(columns=["difficulty"])

In [39]:
test_df["binary_label"] = test_df["label"].apply(
    lambda x: 0 if x == "normal" else 1
)


In [40]:
dos_attacks = ["back","land","neptune","pod","smurf","teardrop"]
probe_attacks = ["ipsweep","nmap","portsweep","satan"]
r2l_attacks = ["ftp_write","guess_passwd","imap","multihop",
               "phf","spy","warezclient","warezmaster"]

def map_attack(label):
    if label in dos_attacks:
        return "DoS"
    elif label in probe_attacks:
        return "Probe"
    elif label in r2l_attacks:
        return "R2L"
    else:
        return "normal"

test_df["category"] = test_df["label"].apply(map_attack)


In [41]:
X_test_bin = test_df.drop(columns=["label","category","binary_label"])

X_test_bin = pd.get_dummies(
    X_test_bin,
    columns=["protocol_type","service","flag"]
)


In [42]:
train_df = pd.read_csv("../data/KDDTrain+.txt", header=None)
train_df.columns = column_names
train_df = train_df.drop(columns=["difficulty"])

train_X_bin = train_df.drop(columns=["label"])
train_X_bin = pd.get_dummies(
    train_X_bin,
    columns=["protocol_type","service","flag"]
)

train_X_bin, X_test_bin = train_X_bin.align(
    X_test_bin,
    join="left",
    axis=1,
    fill_value=0
)


In [43]:
X_test_bin_scaled = scaler.transform(X_test_bin)


In [44]:
anomaly_pred = iso_model.predict(X_test_bin_scaled)
anomaly_pred = np.where(anomaly_pred == 1, 0, 1)
# anomaly detection


In [45]:
test_attack = test_df[test_df["category"] != "normal"].copy()

X_test_cat = test_attack.drop(columns=["label","category","binary_label"])

X_test_cat = pd.get_dummies(
    X_test_cat,
    columns=["protocol_type","service","flag"]
)

# align with training classifier columns
train_attack = train_df[train_df["label"] != "normal"].copy()
train_attack["category"] = train_attack["label"].apply(map_attack)

X_train_cat = train_attack.drop(columns=["label","category"])
X_train_cat = pd.get_dummies(
    X_train_cat,
    columns=["protocol_type","service","flag"]
)

X_train_cat, X_test_cat = X_train_cat.align(
    X_test_cat,
    join="left",
    axis=1,
    fill_value=0
)
##category classification

In [46]:
##now have to move the only the anomaly to the classifier 
attack_indices = np.where(anomaly_pred == 1)[0]
X_stage2 = X_test_cat.iloc[:len(attack_indices)]
category_pred = rf.predict(X_stage2)


In [47]:
final_pred = np.array(["normal"] * len(test_df))
for idx, cat in zip(attack_indices, category_pred):
    final_pred[idx] = cat
##here firstly initializing all as normal then only which are attack cllasifying them

In [48]:
from sklearn.metrics import classification_report
true_labels = test_df["category"].values
print(classification_report(true_labels, final_pred))


              precision    recall  f1-score   support

         DoS       0.53      0.53      0.53      5741
       Probe       0.11      0.18      0.14      1106
         R2L       0.02      0.02      0.02      2199
      normal       0.78      0.78      0.78     13498

    accuracy                           0.61     22544
   macro avg       0.36      0.38      0.37     22544
weighted avg       0.61      0.61      0.61     22544

