In [1]:
import pandas as pd

df = pd.read_csv("UNSW_NB15_training-set.csv")
print("Dataset shape:", df.shape)
df.head()

Dataset shape: (82332, 45)


Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,1,1.1e-05,udp,-,INT,2,0,496,0,90909.0902,...,1,2,0,0,0,1,2,0,Normal,0
1,2,8e-06,udp,-,INT,2,0,1762,0,125000.0003,...,1,2,0,0,0,1,2,0,Normal,0
2,3,5e-06,udp,-,INT,2,0,1068,0,200000.0051,...,1,3,0,0,0,1,3,0,Normal,0
3,4,6e-06,udp,-,INT,2,0,900,0,166666.6608,...,1,3,0,0,0,2,3,0,Normal,0
4,5,1e-05,udp,-,INT,2,0,2126,0,100000.0025,...,1,3,0,0,0,2,3,0,Normal,0


In [2]:
import numpy as np

df_numeric = df.select_dtypes(include=[np.number])

print("Original dataset shape:", df.shape)
print("Numeric-only dataset shape:", df_numeric.shape)

Original dataset shape: (82332, 45)
Numeric-only dataset shape: (82332, 41)


In [3]:
X = df_numeric.drop('label', axis=1)
y = df_numeric['label']

print("Features shape (X):", X.shape)
print("Labels shape (y):", y.shape)


Features shape (X): (82332, 40)
Labels shape (y): (82332,)


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,
    random_state=42
)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


Training set shape: (57632, 40)
Testing set shape: (24700, 40)


In [5]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)

print("Model training completed")


Model training completed


In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


Accuracy: 0.9989068825910932
Precision: 0.998305334512231
Recall: 0.9997048623920903
F1 Score: 0.9990046082949309

Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11147
           1       1.00      1.00      1.00     13553

    accuracy                           1.00     24700
   macro avg       1.00      1.00      1.00     24700
weighted avg       1.00      1.00      1.00     24700



In [7]:
sample = X_test.iloc[0:1]
prediction = model.predict(sample)[0]
confidence = model.predict_proba(sample).max()

if prediction == 1:
    print(f"ðŸš¨ Threat Detected | Confidence: {confidence:.2f}")
    print("Explanation: Abnormal network traffic pattern identified by the ML model.")
else:
    print(f"âœ… Normal Traffic | Confidence: {confidence:.2f}")


âœ… Normal Traffic | Confidence: 1.00
