In [2]:
%pip install imblearn

Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Collecting imbalanced-learn (from imblearn)
  Downloading imbalanced_learn-0.13.0-py3-none-any.whl.metadata (8.8 kB)
Collecting scikit-learn<2,>=1.3.2 (from imbalanced-learn->imblearn)
  Downloading scikit_learn-1.6.1-cp311-cp311-win_amd64.whl.metadata (15 kB)
Collecting sklearn-compat<1,>=0.1 (from imbalanced-learn->imblearn)
  Downloading sklearn_compat-0.1.3-py3-none-any.whl.metadata (18 kB)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Downloading imbalanced_learn-0.13.0-py3-none-any.whl (238 kB)
Downloading scikit_learn-1.6.1-cp311-cp311-win_amd64.whl (11.1 MB)
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/1

DEPRECATION: Loading egg at c:\users\emada\appdata\local\programs\python\python311\lib\site-packages\googletrans-3.0.0-py3.11.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330
  You can safely remove it manually.

[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import xgboost as xgb
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    roc_curve,
    auc,
    precision_recall_curve,
)
from imblearn.over_sampling import SMOTE
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

ModuleNotFoundError: No module named 'imblearn'

In [None]:


# Load dataset
df = pd.read_csv("network_traffic.csv")  # Replace with actual dataset
X = df.drop(columns=["Label"])
y = df["Label"]

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# Autoencoder Model with Weighted Loss
input_dim = X_train.shape[1]
encoding_dim = 16

input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation="relu")(input_layer)
decoded = Dense(input_dim, activation="sigmoid")(encoded)

autoencoder = Model(input_layer, decoded)
encoder = Model(input_layer, encoded)


# Weighted Loss Function
def custom_loss(y_true, y_pred):
    weights = np.where(y_true == 1, 10, 1)  # Give more weight to minority class
    return keras.losses.mean_squared_error(y_true, y_pred) * weights


autoencoder.compile(optimizer="adam", loss=custom_loss)
autoencoder.fit(
    X_train,
    X_train,
    epochs=50,
    batch_size=256,
    shuffle=True,
    validation_data=(X_test, X_test),
)

# Feature extraction
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)

# Calculate class weights for XGBoost
unique_classes, class_counts = np.unique(y_train, return_counts=True)
total_samples = len(y_train)
class_weights = {
    cls: total_samples / (len(unique_classes) * count)
    for cls, count in zip(unique_classes, class_counts)
}

# XGBoost Model with Class Weights
xgb_model = xgb.XGBClassifier(
    objective="multi:softmax",
    num_class=len(unique_classes),
    eval_metric="mlogloss",
    scale_pos_weight=[
        class_weights[cls] for cls in unique_classes
    ],  # Apply class weights
)
xgb_model.fit(X_train_encoded, y_train)

# Predictions
y_pred = xgb_model.predict(X_test_encoded)
y_pred_prob = xgb_model.predict_proba(X_test_encoded)

# Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
plt.figure(figsize=(10, 7))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# ROC Curve
plt.figure(figsize=(8, 6))
for i in range(len(unique_classes)):
    fpr, tpr, _ = roc_curve(y_test == i, y_pred_prob[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"Class {i} (AUC = {roc_auc:.2f})")

plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()

# Precision-Recall Curve
plt.figure(figsize=(8, 6))
for i in range(len(unique_classes)):
    precision, recall, _ = precision_recall_curve(y_test == i, y_pred_prob[:, i])
    plt.plot(recall, precision, label=f"Class {i}")

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.show()

# Save Models
encoder.save("autoencoder_encoder.h5")
xgb_model.save_model("xgboost_model.json")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(le, "label_encoder.pkl")

print("Models saved successfully.")
