In [35]:
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [None]:

train_data = pd.read_csv("/Users/marlenawasiak/Desktop/Data_Collection/UNSW_NB15_training-set.csv")
test_data = pd.read_csv("/Users/marlenawasiak/Desktop/Data_Collection/UNSW_NB15_testing-set.csv")



In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report

X_train = train_data.drop(columns=['attack_cat'])
y_train = train_data['attack_cat']
X_test = test_data.drop(columns=['attack_cat'])
y_test = test_data['attack_cat']
categorical_features = ['proto', 'service', 'state']
X_train = pd.get_dummies(X_train, columns=categorical_features, drop_first=True)
X_test = pd.get_dummies(X_test, columns=categorical_features, drop_first=True)
X_test = X_test.reindex(columns=X_train.columns, fill_value=0)
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
smote = SMOTE(random_state=42, sampling_strategy='all') 
X_train_augmented, y_train_augmented = smote.fit_resample(X_train, y_train_encoded)

print(f"Original training data shape: {X_train.shape}, {y_train_encoded.shape}")
print(f"Augmented training data shape: {X_train_augmented.shape}, {y_train_augmented.shape}")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_augmented)
X_test_scaled = scaler.transform(X_test)
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=6,
    min_samples_split=10,
    random_state=42  
)

rf_model.fit(X_train_scaled, y_train_augmented)
y_pred = rf_model.predict(X_test_scaled)
y_test_decoded = label_encoder.inverse_transform(y_test_encoded)
y_pred_decoded = label_encoder.inverse_transform(y_pred)
print("Confusion Matrix:")
print(confusion_matrix(y_test_decoded, y_pred_decoded))

print("\nClassification Report:")
print(classification_report(y_test_decoded, y_pred_decoded))


Original training data shape: (175341, 193), (175341,)
Augmented training data shape: (560000, 193), (560000,)
Confusion Matrix:
[[    0   326   287     1    63     0     0     0     0     0]
 [    0   231   287    12    19     0    16     3    11     4]
 [   69  2404   389   498   102     0   217    78   164   168]
 [  245  2410   637  4807   564     7   561   142   455  1304]
 [    0   671   569     9  3490     0   659   253   321    90]
 [    5    35     5   216   117 18095    83    74   122   119]
 [ 1519     1     0    30  4852     0 29540   449   468   141]
 [    2   306    12     4    62     0   163  2458   110   379]
 [    0     0     0     0     9     0    30    58   281     0]
 [    0     0     0     0     1     0     0     2     7    34]]

Classification Report:
                precision    recall  f1-score   support

      Analysis       0.00      0.00      0.00       677
      Backdoor       0.04      0.40      0.07       583
           DoS       0.18      0.10      0.12  

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

all_labels = np.concatenate((y_train, y_test))
label_encoder = LabelEncoder()
label_encoder.fit(all_labels) 
y_train_encoded = label_encoder.transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

critical_feature_indices = [6, 9, 7, 5, 4] 
def extreme_targeted_attack_v3(X, critical_feature_indices, epsilon_critical=2.5, epsilon_noise=0.3, iterations=10):
    """
    Applies an even stronger targeted perturbation.
    Args:
    - X: Test dataset (scaled).
    - critical_feature_indices: Indices of critical features to perturb.
    - epsilon_critical: Magnitude of perturbation for critical features.
    - epsilon_noise: Magnitude of noise for non-critical features.
    - iterations: Number of iterative perturbations to apply.
    Returns:
    - X_perturbed: Perturbed dataset.
    """
    X_perturbed = X.copy()
    
    for _ in range(iterations):
        for feature_idx in critical_feature_indices:
            perturbation = np.random.uniform(-epsilon_critical, epsilon_critical, size=X_perturbed.shape[0])
            X_perturbed[:, feature_idx] += perturbation
        for feature_idx in range(X.shape[1]):
            perturbation = np.random.normal(0, epsilon_noise, size=X_perturbed.shape[0])
            X_perturbed[:, feature_idx] += perturbation
    
    return X_perturbed

X_test_perturbed_extreme = extreme_targeted_attack_v3(X_test_scaled, critical_feature_indices, epsilon_critical=2.7, epsilon_noise=0.5, iterations=10)
y_pred_perturbed_extreme = rf_model.predict(X_test_perturbed_extreme)
y_pred_perturbed_extreme_decoded = label_encoder.inverse_transform(y_pred_perturbed_extreme)
y_test_decoded = label_encoder.inverse_transform(y_test_encoded)

print("Confusion Matrix After Extreme Targeted Attack (Random Forest):")
print(confusion_matrix(y_test_decoded, y_pred_perturbed_extreme_decoded))

print("\nClassification Report After Extreme Targeted Attack (Random Forest):")
print(classification_report(y_test_decoded, y_pred_perturbed_extreme_decoded))



Confusion Matrix After Extreme Targeted Attack (Random Forest):
[[    8     6    17    58    82     3   482     1     2    18]
 [    3     5    15    43    93     9   393     0     1    21]
 [   21    28    60   298   285    13  3227     0    13   144]
 [   38    34    97  1143   688    26  8343     5    10   748]
 [   28    30    72   552   862    31  4067     1    15   404]
 [   12     8   295   700  3617  2431 11603     8    16   181]
 [   16    12    57   713   653    11 35023     3    13   499]
 [   10    18    28   289   268     6  2585     3    15   274]
 [    2     2     4    30    29     2   287     0     1    21]
 [    1     0     0     5     3     0    31     0     0     4]]

Classification Report After Extreme Targeted Attack (Random Forest):
                precision    recall  f1-score   support

      Analysis       0.06      0.01      0.02       677
      Backdoor       0.03      0.01      0.01       583
           DoS       0.09      0.01      0.03      4089
      Expl

In [60]:

from art.estimators.classification import SklearnClassifier
from art.attacks.evasion import BoundaryAttack

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from imblearn.over_sampling import SMOTE
import xgboost as xgb

X_train = train_data.drop(columns=['attack_cat'])
y_train = train_data['attack_cat']
X_test = test_data.drop(columns=['attack_cat'])
y_test = test_data['attack_cat']
categorical_features = ['proto', 'service', 'state']
X_train = pd.get_dummies(X_train, columns=categorical_features, drop_first=True)
X_test = pd.get_dummies(X_test, columns=categorical_features, drop_first=True)
X_test = X_test.reindex(columns=X_train.columns, fill_value=0)
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

print(f"Original training data shape: {X_train.shape}, {y_train.shape}")
print(f"Resampled training data shape: {X_train_resampled.shape}, {y_train_resampled.shape}")
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)  
X_test_scaled = scaler.transform(X_test)                   
xgb_model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')
xgb_model.fit(X_train_resampled, y_train_resampled)
y_pred = xgb_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_rep)
print("Confusion Matrix:")
print(conf_matrix)


Original training data shape: (175341, 193), (175341,)
Resampled training data shape: (560000, 193), (560000,)


Parameters: { "use_label_encoder" } are not used.



Accuracy: 0.6119856192003109
Classification Report:
                precision    recall  f1-score   support

      Analysis       0.08      0.13      0.10       677
      Backdoor       0.02      0.22      0.03       583
           DoS       0.05      0.07      0.06      4089
      Exploits       0.52      0.49      0.50     11132
       Fuzzers       0.32      0.69      0.44      6062
       Generic       1.00      0.01      0.02     18871
        Normal       1.00      1.00      1.00     37000
Reconnaissance       0.49      0.79      0.60      3496
     Shellcode       0.24      0.93      0.39       378
         Worms       0.14      0.75      0.24        44

      accuracy                           0.61     82332
     macro avg       0.39      0.51      0.34     82332
  weighted avg       0.80      0.61      0.58     82332

Confusion Matrix:
[[   89   192   188   208     0     0     0     0     0     0]
 [   89   127   193   162     5     0     0     0     7     0]
 [  274  2574   2

In [None]:
def dynamic_targeted_attack(X, critical_feature_indices, initial_epsilon=0.1, max_iterations=20):
    """
    Perform a progressively stronger targeted attack with dynamic epsilon.
    
    Args:
    - X: Input feature matrix.
    - critical_feature_indices: Indices of the features to be perturbed.
    - initial_epsilon: Initial perturbation magnitude.
    - max_iterations: Number of iterations for iterative perturbation.
    
    Returns:
    - X_perturbed: Perturbed feature matrix.
    """
    X_perturbed = X.copy()
    epsilon = initial_epsilon

    for iteration in range(1, max_iterations + 1):
        epsilon_step = epsilon * (iteration / max_iterations)
        
        for feature_idx in critical_feature_indices:
            perturbation = np.random.choice([-epsilon_step, epsilon_step], size=X_perturbed.shape[0])
            X_perturbed[:, feature_idx] += perturbation * np.random.uniform(1, 2, size=X_perturbed.shape[0])
        X_perturbed = np.clip(X_perturbed, X.min(axis=0), X.max(axis=0))
    
    return X_perturbed

critical_features = [6, 9, 7, 8, 0]  
initial_epsilon = 4.0 
max_iterations = 20  
X_test_perturbed = dynamic_targeted_attack(X_test_scaled, critical_features, initial_epsilon=initial_epsilon, max_iterations=max_iterations)
y_pred_perturbed = xgb_model.predict(X_test_perturbed)
conf_matrix_perturbed = confusion_matrix(y_test, y_pred_perturbed)
classification_report_perturbed = classification_report(y_test, y_pred_perturbed, target_names=label_encoder.classes_)
print("Confusion Matrix After Stronger Attack (XGBoost):")
print(conf_matrix_perturbed)
print("\nClassification Report After Stronger Targeted Attack (XGBoost):")
print(classification_report_perturbed)



Confusion Matrix After Stronger Attack (XGBoost):
[[   41   182   215   210    29     0     0     0     0     0]
 [   38    89   203   224    28     0     0     0     0     1]
 [  446  2303   416   603   193     0     0    21     3   104]
 [  454  3992  1480  3513   967     0     0   109    11   606]
 [  119   775   961  1384  2777     0     0    13    15    18]
 [    2   128  2634 10487  5559     0     0    17     1    43]
 [    0     0     0     0     0     0 37000     0     0     0]
 [   53   742   383   925   778     0     0   440    47   128]
 [    0    78    44   133   108     0     0     2     9     4]
 [    0     9     6     8     6     0     0     0     0    15]]

Classification Report After Stronger Targeted Attack (XGBoost):
                precision    recall  f1-score   support

      Analysis       0.04      0.06      0.04       677
      Backdoor       0.01      0.15      0.02       583
           DoS       0.07      0.10      0.08      4089
      Exploits       0.20    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
