In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, confusion_matrix
import keras

In [None]:
# Load the dataset
df = pd.read_csv("../../../data/UNSW_NB15/UNSW_NB15.csv")
df = df.drop(labels=["id", "attack_cat"], axis=1)
df = df.dropna()

In [None]:
# Extract categorical and numerical columns
categorical_cols = df.select_dtypes(include=['object']).columns
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
numeric_cols = numeric_cols.drop(['label'])

In [None]:
# Encode categorical variables
for col in categorical_cols:
    if col in df.columns:
        label_encoder = LabelEncoder()
        df[col] = label_encoder.fit_transform(df[col])

In [None]:
# Drop label column
x = df.drop(['label'], axis=1)
y = df['label']

# Split the dataset
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, shuffle=True, random_state=42, stratify=y
)

In [None]:
# Apply standard scaling to numeric columns
scaler = StandardScaler()
x_train[numeric_cols] = scaler.fit_transform(x_train[numeric_cols])
x_test[numeric_cols] = scaler.transform(x_test[numeric_cols])

In [None]:
# Model definition
input_shape = (x_train.shape[1],)
classes = len(y.unique())

model = keras.Sequential([
    # input layer
    keras.layers.Input(shape=input_shape),
    # hidden layers
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(96, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.25),
    # output layer
    keras.layers.Dense(classes, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Model training
model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=32
)

In [None]:
# Make predictions
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)

# Evaluate the model
metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred),
    "confusion_matrix": confusion_matrix(y_test, y_pred)
}
print(metrics)