In [1]:
import pandas as pd
import os
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.pipeline import Pipeline as ImbPipeline

In [2]:
train_data = pd.read_csv('/Users/marlenawasiak/Desktop/Data_Collection/NSL_KDD_Train.csv')
test_data = pd.read_csv('/Users/marlenawasiak/Desktop/Data_Collection/NSL_KDD_Test.csv')

In [5]:
X_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:, -1]
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Align columns to keep only the common columns
common_columns = X_train.columns.intersection(X_test.columns)
X_train = X_train[common_columns]
X_test = X_test[common_columns]

In [7]:
# Encode labels with combined categories
all_labels = pd.concat([y_train, y_test], axis=0)
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

y_train_encoded = label_encoder.transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


In [9]:
# Encode categorical features with consistent categories
categorical_columns = X_train.select_dtypes(include=['object']).columns
for col in categorical_columns:
    combined_categories = pd.concat([X_train[col], X_test[col]], axis=0).astype("category").cat.categories
    X_train[col] = pd.Categorical(X_train[col], categories=combined_categories)
    X_test[col] = pd.Categorical(X_test[col], categories=combined_categories)
    
    le = LabelEncoder()
    X_train[col] = le.fit_transform(X_train[col].astype(str))
    X_test[col] = le.transform(X_test[col].astype(str))
    
from sklearn.preprocessing import StandardScaler
# Define clipping thresholds based on the 99th percentile of the training data
clip_thresholds = {
    "0.1": X_train["0.1"].quantile(0.99),
    "0.2": X_train["0.2"].quantile(0.99)
}

# Clip the training and test data at these thresholds
X_train_clipped = X_train.copy()
X_test_clipped = X_test.copy()

for col, threshold in clip_thresholds.items():
    X_train_clipped[col] = X_train[col].clip(upper=threshold)
    X_test_clipped[col] = X_test[col].clip(upper=threshold)

from sklearn.preprocessing import RobustScaler

# Fit RobustScaler on the training data
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train_clipped)
X_test_scaled = scaler.transform(X_test_clipped)

# Verify the new scaled ranges
print("Scaled feature ranges after RobustScaler:")
print("Train set min:", X_train_scaled.min(), "max:", X_train_scaled.max())
print("Test set min:", X_test_scaled.min(), "max:", X_test_scaled.max())

from sklearn.preprocessing import QuantileTransformer
import numpy as np

# Apply QuantileTransformer with a normal distribution
scaler = QuantileTransformer(output_distribution='normal')  # Alternatively, try 'uniform' if normal doesn't work well
X_train_scaled = scaler.fit_transform(X_train_clipped)
X_test_scaled = scaler.transform(X_test_clipped)

# Optional: Clip values to remove any remaining extreme values
X_train_scaled = np.clip(X_train_scaled, -3, 3)  # Adjust the range as needed
X_test_scaled = np.clip(X_test_scaled, -3, 3)

# Verify the scaled ranges
print("Scaled feature ranges after QuantileTransformer and clipping:")
print("Train set min:", X_train_scaled.min(), "max:", X_train_scaled.max())
print("Test set min:", X_test_scaled.min(), "max:", X_test_scaled.max())


Scaled feature ranges after RobustScaler:
Train set min: -1.0989010989010988 max: 42908.0
Test set min: -1.0989010989010988 max: 57715.0
Scaled feature ranges after QuantileTransformer and clipping:
Train set min: -3.0 max: 3.0
Test set min: -3.0 max: 3.0


In [122]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_encoded), y=y_train_encoded)
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

# Define the neural network model
# Experiment with a deeper model
model = Sequential()
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train_encoded, epochs=150, batch_size=64, validation_data=(X_test_scaled, y_test_encoded), class_weight=class_weight_dict)


Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1969/1969[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 517us/step - accuracy: 0.8514 - loss: 42.3309 - val_accuracy: 0.4778 - val_loss: 7.1463
Epoch 2/150
[1m1969/1969[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 486us/step - accuracy: 0.9143 - loss: 9.9232 - val_accuracy: 0.4743 - val_loss: 7.4399
Epoch 3/150
[1m1969/1969[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 486us/step - accuracy: 0.9210 - loss: 9.0000 - val_accuracy: 0.4027 - val_loss: 9.9748
Epoch 4/150
[1m1969/1969[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 490us/step - accuracy: 0.9280 - loss: 8.2712 - val_accuracy: 0.3435 - val_loss: 10.1673
Epoch 5/150
[1m1969/1969[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 505us/step - accuracy: 0.9283 - loss: 8.5306 - val_accuracy: 0.3939 - val_loss: 9.1332
Epoch 6/150
[1m1969/1969[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 485us/step - accuracy: 0.9311 - loss: 7.9169 - val_accuracy: 0.4222 - val_loss: 9.8036
Epoch 7/

In [124]:
# Evaluate the model
y_pred_encoded = model.predict(X_test_scaled).argmax(axis=1)

# Calculate accuracy
test_accuracy = accuracy_score(y_test_encoded, y_pred_encoded)
print("Test Accuracy:", test_accuracy)

# Generate classification report with specified labels to handle all classes
class_report = classification_report(
    y_test_encoded, 
    y_pred_encoded, 
    labels=range(len(label_encoder.classes_)), 
    target_names=label_encoder.classes_, 
    zero_division=1
)
print("Classification Report:\n", class_report)


[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318us/step
Test Accuracy: 0.32830590427183604
Classification Report:
                  precision    recall  f1-score   support

        apache2       1.00      0.00      0.00       737
           back       1.00      0.00      0.00       359
buffer_overflow       0.00      0.00      0.00        20
      ftp_write       1.00      0.00      0.00         3
   guess_passwd       1.00      0.00      0.00      1231
     httptunnel       1.00      0.00      0.00       133
           imap       0.00      0.00      0.00         1
        ipsweep       1.00      0.00      0.00       141
           land       1.00      0.00      0.00         7
     loadmodule       1.00      0.00      0.00         2
       mailbomb       1.00      0.00      0.00       293
          mscan       1.00      0.00      0.00       996
       multihop       0.00      0.00      0.00        18
          named       1.00      0.00      0.00        17
        

In [132]:
# FGSM Attack with Higher Perturbation Strength
print("\n--- FGSM Attack with Higher Epsilon ---")
for eps in [2.0, 3.0]:  # Test stronger perturbations
    fgsm = FastGradientMethod(estimator=classifier, eps=eps)
    X_test_fgsm_adv = fgsm.generate(x=X_test_scaled)
    X_test_fgsm_adv = np.clip(X_test_fgsm_adv, -3, 3)  # Clip to valid range

    # Evaluate FGSM
    y_pred_fgsm_encoded = model.predict(X_test_fgsm_adv).argmax(axis=1)
    fgsm_accuracy = accuracy_score(y_test_encoded, y_pred_fgsm_encoded)
    print(f"FGSM Adversarial Accuracy (eps={eps}):", fgsm_accuracy)
    print("FGSM Classification Report:\n", 
          classification_report(y_test_encoded, y_pred_fgsm_encoded,zero_division=1))


--- FGSM Attack with Higher Epsilon ---
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252us/step
FGSM Adversarial Accuracy (eps=2.0): 0.2431797010158364
FGSM Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00       737
           1       1.00      0.00      0.00       359
           2       1.00      0.00      0.00        20
           3       1.00      0.00      0.00         3
           4       1.00      0.00      0.00      1231
           5       1.00      0.00      0.00       133
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00       141
           8       1.00      0.00      0.00         7
           9       1.00      0.00      0.00         2
          10       1.00      0.00      0.00       293
          11       1.00      0.00      0.00       996
          12       0.00      0.00      0.00        18
          13       1.00      0.00      0

In [138]:
# PGD Attack with More Iterations
print("\n--- PGD Attack with Increased Iterations ---")
pgd = ProjectedGradientDescent(
    estimator=classifier,
    norm=np.inf,
    eps=3.0,              # Increased epsilon
    eps_step=0.02,        # Smaller step size for fine perturbations
    max_iter=200,         # More iterations for stronger attack
    targeted=False
)
X_test_pgd_adv = pgd.generate(x=X_test_scaled)
X_test_pgd_adv = np.clip(X_test_pgd_adv, -3, 3)

# Evaluate PGD
y_pred_pgd_encoded = model.predict(X_test_pgd_adv).argmax(axis=1)
pgd_accuracy = accuracy_score(y_test_encoded, y_pred_pgd_encoded)
print("PGD Adversarial Accuracy (stronger):", pgd_accuracy)
print("PGD Classification Report:\n", 
      classification_report(y_test_encoded, y_pred_pgd_encoded,zero_division=1))



--- PGD Attack with Increased Iterations ---


PGD - Batches: 0it [00:00, ?it/s]

[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256us/step
PGD Adversarial Accuracy (stronger): 0.1356075056558577
PGD Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00       737
           1       1.00      0.00      0.00       359
           2       1.00      0.00      0.00        20
           3       1.00      0.00      0.00         3
           4       1.00      0.00      0.00      1231
           5       1.00      0.00      0.00       133
           6       1.00      0.00      0.00         1
           7       0.00      0.00      0.00       141
           8       1.00      0.00      0.00         7
           9       1.00      0.00      0.00         2
          10       1.00      0.00      0.00       293
          11       1.00      0.00      0.00       996
          12       0.00      0.00      0.00        18
          13       1.00      0.00      0.00        17
          14       0.25     

In [140]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescent, CarliniL2Method
from art.estimators.classification import TensorFlowV2Classifier

# Assuming your model and data are already loaded and preprocessed:
# model, X_test_scaled, y_test_encoded

# Wrap the trained Keras model in an ART classifier
classifier = TensorFlowV2Classifier(
    model=model,
    nb_classes=len(label_encoder.classes_),
    input_shape=(X_train_scaled.shape[1],),
    loss_object=tf.keras.losses.SparseCategoricalCrossentropy()
)

# Critical features
critical_features = [19, 23, 2, 26, 20]  # Indices of critical features

# Create a static mask for the critical features
static_mask = np.zeros_like(X_test_scaled)
static_mask[:, critical_features] = 1  # Mask only the critical features

# Function to apply the static mask
def apply_static_mask(X_original, X_adv, mask):
    """
    Applies a static mask to perturb only selected features.
    Args:
        X_original: The original input data.
        X_adv: The adversarial examples.
        mask: A mask indicating which features to perturb (1 = perturb, 0 = keep original).
    Returns:
        Masked adversarial examples.
    """
    return X_original + (X_adv - X_original) * mask




In [144]:
# FGSM Attack with Static Mask
print("\n--- FGSM Attack with Static Mask ---")
for eps in [3.0, 4.0]:  # Test with varying epsilon values
    fgsm = FastGradientMethod(estimator=classifier, eps=eps)
    X_test_fgsm_adv = fgsm.generate(x=X_test_scaled)
    X_test_fgsm_adv = apply_static_mask(X_test_scaled, X_test_fgsm_adv, static_mask)
    X_test_fgsm_adv = np.clip(X_test_fgsm_adv, -3, 3)  # Clip to valid range

    # Evaluate FGSM with masking
    y_pred_fgsm_encoded = model.predict(X_test_fgsm_adv).argmax(axis=1)
    fgsm_accuracy = accuracy_score(y_test_encoded, y_pred_fgsm_encoded)
    print(f"FGSM Adversarial Accuracy (eps={eps}):", fgsm_accuracy)
    print("FGSM Classification Report:\n", 
          classification_report(y_test_encoded, y_pred_fgsm_encoded,zero_division=1))


--- FGSM Attack with Static Mask ---
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283us/step
FGSM Adversarial Accuracy (eps=3.0): 0.09688151532626536
FGSM Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00       737
           1       1.00      0.00      0.00       359
           2       0.00      0.00      0.00        20
           3       1.00      0.00      0.00         3
           4       1.00      0.00      0.00      1231
           5       1.00      0.00      0.00       133
           6       1.00      0.00      0.00         1
           7       0.00      0.00      0.00       141
           8       0.00      0.00      0.00         7
           9       1.00      0.00      0.00         2
          10       1.00      0.00      0.00       293
          11       1.00      0.00      0.00       996
          12       0.00      0.00      0.00        18
          13       1.00      0.00      0.0

In [146]:
# PGD Attack with Static Mask
print("\n--- PGD Attack with Static Mask ---")
pgd = ProjectedGradientDescent(
    estimator=classifier,
    norm=np.inf,
    eps=3.0,
    eps_step=0.02,
    max_iter=100,
    targeted=False
)
X_test_pgd_adv = pgd.generate(x=X_test_scaled)
X_test_pgd_adv = apply_static_mask(X_test_scaled, X_test_pgd_adv, static_mask)
X_test_pgd_adv = np.clip(X_test_pgd_adv, -3, 3)


# Evaluate PGD with masking
y_pred_pgd_encoded = model.predict(X_test_pgd_adv).argmax(axis=1)
pgd_accuracy = accuracy_score(y_test_encoded, y_pred_pgd_encoded)
print("PGD Adversarial Accuracy (with masking):", pgd_accuracy)
print("PGD Classification Report:\n", 
      classification_report(y_test_encoded, y_pred_pgd_encoded,zero_division=1))


--- PGD Attack with Static Mask ---


PGD - Batches: 0it [00:00, ?it/s]

2024-11-17 22:45:55.174217: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265us/step
PGD Adversarial Accuracy (with masking): 0.20134853391296634
PGD Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00       737
           1       1.00      0.00      0.00       359
           2       1.00      0.00      0.00        20
           3       1.00      0.00      0.00         3
           4       1.00      0.00      0.00      1231
           5       1.00      0.00      0.00       133
           6       1.00      0.00      0.00         1
           7       1.00      0.00      0.00       141
           8       0.00      0.00      0.00         7
           9       1.00      0.00      0.00         2
          10       1.00      0.00      0.00       293
          11       1.00      0.00      0.00       996
          12       0.00      0.00      0.00        18
          13       1.00      0.00      0.00        17
          14       0.27

In [148]:
print("\n--- Carlini & Wagner Attack with Static Mask ---")
cw = CarliniL2Method(
    classifier=classifier,
    confidence=2.0,
    targeted=False,
    max_iter=100,
    learning_rate=0.01,
    binary_search_steps=5
)
X_test_cw_adv = cw.generate(x=X_test_scaled[:200])  # Subset for C&W due to computational cost
X_test_cw_adv = apply_static_mask(X_test_scaled[:200], X_test_cw_adv, static_mask[:200])
X_test_cw_adv = np.clip(X_test_cw_adv, -3, 3)

# Evaluate C&W with masking
y_pred_cw_encoded = model.predict(X_test_cw_adv).argmax(axis=1)
cw_accuracy = accuracy_score(y_test_encoded[:200], y_pred_cw_encoded)
print("C&W Adversarial Accuracy (with masking):", cw_accuracy)
print("C&W Classification Report:\n", 
      classification_report(y_test_encoded[:200], y_pred_cw_encoded,zero_division=1))


--- Carlini & Wagner Attack with Static Mask ---


C&W L_2:   0%|          | 0/200 [00:00<?, ?it/s]

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 528us/step
C&W Adversarial Accuracy (with masking): 0.205
C&W Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00         3
           1       1.00      0.00      0.00         1
           2       1.00      0.00      0.00         1
           4       1.00      0.00      0.00        10
           5       1.00      0.00      0.00         1
           7       1.00      0.00      0.00         1
          10       1.00      0.00      0.00         1
          11       1.00      0.00      0.00        10
          14       0.27      0.23      0.24        53
          15       1.00      0.00      0.00         2
          16       0.28      0.34      0.31        85
          19       1.00      0.00      0.00         2
          20       0.00      1.00      0.00         0
          21       1.00      0.00      0.00        10
          22       1.00      0.00      0.

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np
import tensorflow as tf

# Assume X_train and y_train are your training data (features and labels), already preprocessed
# Assume X_test and y_test are your test data (features and labels), already preprocessed

# Step 1: Scale the Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

from sklearn.preprocessing import QuantileTransformer

# Apply QuantileTransformer to transform features to a normal distribution
quantile_transformer = QuantileTransformer(output_distribution='normal', random_state=42)
X_train_scaled = quantile_transformer.fit_transform(X_train)
X_test_scaled = quantile_transformer.transform(X_test)

from sklearn.preprocessing import LabelEncoder
import numpy as np

# Concatenate y_train and y_test to include all labels
all_labels = np.concatenate([y_train, y_test])

# Fit LabelEncoder on the combined labels
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

# Transform y_train and y_test using the fitted encoder
y_train_encoded = label_encoder.transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

num_classes = len(np.unique(all_labels))  # Total number of unique classes in y_train and y_test
print("Number of unique classes:", num_classes)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Redefine the model with the correct number of classes
mlp_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(num_classes, activation='softmax')  # Ensure output layer matches the number of unique classes
])

# Compile the model
mlp_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Step 4: Train the Model
history = mlp_model.fit(X_train_scaled, y_train_encoded, epochs=50, batch_size=32, validation_data=(X_test_scaled, y_test_encoded))


Number of unique classes: 40
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 435us/step - accuracy: 0.9322 - loss: 0.2611 - val_accuracy: 0.3338 - val_loss: 5.7909
Epoch 2/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 423us/step - accuracy: 0.9673 - loss: 0.0933 - val_accuracy: 0.4062 - val_loss: 6.0013
Epoch 3/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 447us/step - accuracy: 0.9676 - loss: 0.0913 - val_accuracy: 0.2329 - val_loss: 6.8171
Epoch 4/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 449us/step - accuracy: 0.9712 - loss: 0.0795 - val_accuracy: 0.3936 - val_loss: 6.7951
Epoch 5/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 449us/step - accuracy: 0.9738 - loss: 0.0741 - val_accuracy: 0.3312 - val_loss: 8.4143
Epoch 6/50
[1m3937/3937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 442us/step - accuracy: 0.9771 - loss: 0.0676 - val_accuracy: 0.0919 - val_loss: 9.1451
Epoch 7/50
[1m

In [19]:
# Evaluate the MLP model on clean data using encoded labels for y_test
y_pred = mlp_model.predict(X_test_scaled).argmax(axis=1)
clean_accuracy = accuracy_score(y_test_encoded, y_pred)
clean_class_report = classification_report(y_test_encoded, y_pred, zero_division=1)
clean_conf_matrix = confusion_matrix(y_test_encoded, y_pred)
# Decode the integer labels back to the original class names
y_test_labels = label_encoder.inverse_transform(y_test_encoded)
y_pred_labels = label_encoder.inverse_transform(y_pred)
from sklearn.metrics import classification_report, confusion_matrix

# Generate the classification report with the original attack names
decoded_class_report = classification_report(y_test_labels, y_pred_labels, zero_division=1)
decoded_conf_matrix = confusion_matrix(y_test_labels, y_pred_labels)
print("MLP Model Accuracy on Clean Data:", clean_accuracy)
print("Classification Report on Clean Data with Attack Names:\n", decoded_class_report)


[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 280us/step
MLP Model Accuracy on Clean Data: 0.2957459078206095
Classification Report on Clean Data with Attack Names:
                  precision    recall  f1-score   support

        apache2       1.00      0.00      0.00       737
           back       1.00      0.00      0.00       359
buffer_overflow       1.00      0.00      0.00        20
      ftp_write       1.00      0.00      0.00         3
   guess_passwd       0.00      0.00      0.00      1231
     httptunnel       1.00      0.00      0.00       133
           imap       1.00      0.00      0.00         1
        ipsweep       0.00      0.00      0.00       141
           land       1.00      0.00      0.00         7
     loadmodule       1.00      0.00      0.00         2
       mailbomb       1.00      0.00      0.00       293
          mscan       1.00      0.00      0.00       996
       multihop       0.00      0.00      0.00        18
          named

In [21]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from art.attacks.evasion import FastGradientMethod, ProjectedGradientDescent, CarliniL2Method
from art.estimators.classification import TensorFlowV2Classifier

# Wrap the trained MLP model in an ART classifier
classifier = TensorFlowV2Classifier(
    model=mlp_model,
    nb_classes=len(label_encoder.classes_),
    input_shape=(X_train_scaled.shape[1],),
    loss_object=tf.keras.losses.SparseCategoricalCrossentropy()
)

# Define the critical features
critical_features = [19, 23, 26, 2, 20, 27]  # From SHAP analysis

# Create a dynamic mask targeting the critical features
def create_static_mask(data, critical_features):
    """
    Creates a mask with 1s for critical features and 0s elsewhere.
    Args:
        data: Dataset (numpy array or DataFrame).
        critical_features: List of critical feature indices.
    Returns:
        Mask of the same shape as data, with 1s for critical features.
    """
    mask = np.zeros_like(data)  # Initialize a mask of zeros
    mask[:, critical_features] = 1  # Target critical features
    return mask

# Generate the static mask for the test data
static_mask = create_static_mask(X_test_scaled, critical_features)

# Apply the mask to adversarial examples
def apply_static_mask(X_original, X_adv, mask):
    """
    Applies a static mask to perturb only selected features.
    Args:
        X_original: Original input data.
        X_adv: Adversarial examples.
        mask: Mask indicating which features to perturb (1 = perturb, 0 = keep original).
    Returns:
        Masked adversarial examples.
    """
    return X_original + (X_adv - X_original) * mask

In [23]:
# FGSM Attack with Dynamic Masking
print("\n--- FGSM Attack with Dynamic Masking ---")
for eps in [3.0]:  # Test with varying epsilon values
    fgsm = FastGradientMethod(estimator=classifier, eps=eps)
    X_test_fgsm_adv = fgsm.generate(x=X_test_scaled)
    X_test_fgsm_adv = apply_static_mask(X_test_scaled, X_test_fgsm_adv, static_mask)
    X_test_fgsm_adv = np.clip(X_test_fgsm_adv, -3, 3)  # Clip to valid range

    # Evaluate FGSM with masking
    y_pred_fgsm_encoded = mlp_model.predict(X_test_fgsm_adv).argmax(axis=1)
    fgsm_accuracy = accuracy_score(y_test_encoded, y_pred_fgsm_encoded)
    print(f"FGSM Adversarial Accuracy (eps={eps}):", fgsm_accuracy)
    print("FGSM Classification Report:\n", 
          classification_report(y_test_encoded, y_pred_fgsm_encoded,zero_division=1))


--- FGSM Attack with Dynamic Masking ---
[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248us/step
FGSM Adversarial Accuracy (eps=3.0): 0.33815375060994546
FGSM Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00       737
           1       0.00      0.00      0.00       359
           2       1.00      0.00      0.00        20
           3       1.00      0.00      0.00         3
           4       1.00      0.00      0.00      1231
           5       1.00      0.00      0.00       133
           6       1.00      0.00      0.00         1
           7       0.00      0.00      0.00       141
           8       0.00      0.00      0.00         7
           9       1.00      0.00      0.00         2
          10       1.00      0.00      0.00       293
          11       1.00      0.00      0.00       996
          12       1.00      0.00      0.00        18
          13       1.00      0.00     

In [112]:

# PGD Attack with Dynamic Masking
print("\n--- PGD Attack with Dynamic Masking ---")
pgd = ProjectedGradientDescent(
    estimator=classifier,
    norm=np.inf,
    eps=3.0,
    eps_step=0.02,
    max_iter=100,
    targeted=False
)
X_test_pgd_adv = pgd.generate(x=X_test_scaled)
X_test_pgd_adv = apply_static_mask(X_test_scaled, X_test_pgd_adv, static_mask)
X_test_pgd_adv = np.clip(X_test_pgd_adv, -3, 3)

# Evaluate PGD with masking
y_pred_pgd_encoded = mlp_model.predict(X_test_pgd_adv).argmax(axis=1)
pgd_accuracy = accuracy_score(y_test_encoded, y_pred_pgd_encoded)
print("PGD Adversarial Accuracy (with masking):", pgd_accuracy)
print("PGD Classification Report:\n", 
      classification_report(y_test_encoded, y_pred_pgd_encoded,zero_division=1))


--- PGD Attack with Dynamic Masking ---


PGD - Batches: 0it [00:00, ?it/s]

[1m705/705[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 267us/step
PGD Adversarial Accuracy (with masking): 0.36068846205030386
PGD Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       737
           1       0.00      0.00      0.00       359
           2       0.00      0.00      0.00        20
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00      1231
           5       0.00      0.00      0.00       133
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00       141
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         2
          10       0.00      0.00      0.00       293
          11       0.00      0.00      0.00       996
          12       0.00      0.00      0.00        18
          13       0.00      0.00      0.00        17
          14       0.62

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
# C&W Attack with Dynamic Masking
print("\n--- Carlini & Wagner Attack with Dynamic Masking ---")
cw = CarliniL2Method(
    classifier=classifier,
    confidence=2.0,
    targeted=False,
    max_iter=100,
    learning_rate=0.01,
    binary_search_steps=5
)
X_test_cw_adv = cw.generate(x=X_test_scaled[:200])  # Subset for C&W due to computational cost
X_test_cw_adv = apply_static_mask(X_test_scaled[:200], X_test_cw_adv, static_mask[:200])
X_test_cw_adv = np.clip(X_test_cw_adv, -3, 3)

# Evaluate C&W with masking
y_pred_cw_encoded = mlp_model.predict(X_test_cw_adv).argmax(axis=1)
cw_accuracy = accuracy_score(y_test_encoded[:200], y_pred_cw_encoded)
print("C&W Adversarial Accuracy (with masking):", cw_accuracy)
print("C&W Classification Report:\n", 
      classification_report(y_test_encoded[:200], y_pred_cw_encoded,zero_division=1))


--- Carlini & Wagner Attack with Dynamic Masking ---


C&W L_2:   0%|          | 0/200 [00:00<?, ?it/s]

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 542us/step
C&W Adversarial Accuracy (with masking): 0.27
C&W Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.00      0.00         3
           1       1.00      0.00      0.00         1
           2       1.00      0.00      0.00         1
           4       1.00      0.00      0.00        10
           5       1.00      0.00      0.00         1
           7       1.00      0.00      0.00         1
          10       1.00      0.00      0.00         1
          11       1.00      0.00      0.00        10
          14       0.31      0.23      0.26        53
          15       1.00      0.00      0.00         2
          16       0.53      0.49      0.51        85
          19       1.00      0.00      0.00         2
          20       0.00      1.00      0.00         0
          21       1.00      0.00      0.00        10
          22       1.00      0.00      0.0