In [58]:
import warnings
warnings.filterwarnings("ignore")

In [59]:
import gc
import numpy as np
import pandas as pd
import tensorflow as tf
import xgboost as xgb

from collections import defaultdict
from sklearn.preprocessing import OneHotEncoder, label_binarize
from sklearn.metrics import accuracy_score, classification_report, balanced_accuracy_score, roc_auc_score, average_precision_score, fbeta_score, matthews_corrcoef
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Input, Conv1D, Lambda, Flatten

In [60]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [61]:
def create_convnext_like_feature_extractor(input_dim, sliding_window_size=3):
    input_layer = Input(shape=(input_dim,))
    reshaped_input = Lambda(lambda x: tf.expand_dims(x, axis=-1))(input_layer)
    x = reshaped_input

    # Simulated ConvNeXt block
    x = Conv1D(filters=int(max(16, input_dim * 2)), kernel_size=sliding_window_size, padding='same', activation='gelu', strides=1)(x)
    x = LayerNormalization()(x)
    x = Conv1D(filters=int(max(16, input_dim)), kernel_size=sliding_window_size, padding='same', activation='gelu', strides=1)(x)
    x = LayerNormalization()(x)
    x = Conv1D(filters=int(max(16, input_dim // 2)), kernel_size=sliding_window_size, padding='same', activation='gelu', strides=1)(x)
    x = LayerNormalization()(x)
    x = Conv1D(filters=int(max(16, input_dim // 4)), kernel_size=sliding_window_size, padding='same', activation='gelu', strides=1)(x)
    x = LayerNormalization()(x)
    x = Conv1D(filters=int(max(16, input_dim // 8)), kernel_size=sliding_window_size, padding='same', activation='gelu', strides=1)(x)
    x = LayerNormalization()(x)

    # Flatten the output for feature extraction
    feature_output = Flatten()(x)

    # Reconstruction head for self-supervised learning
    reconstruction_output = Dense(input_dim, activation='linear')(feature_output)

    # Create the model
    model = Model(inputs=input_layer, outputs=reconstruction_output)
    return model

In [62]:
def focal_loss_multiclass(alpha=0.25, gamma=2.0):
    def loss_function(y_pred, dtrain):
        y_true = dtrain.get_label().astype(int)
        y_true_one_hot = np.eye(y_pred.shape[1])[y_true]
        
        p_t = np.sum(y_true_one_hot * y_pred, axis=1, keepdims=True)
        modulating_factor = (1 - p_t) ** gamma
        alpha_weight = np.sum(y_true_one_hot * alpha, axis=1, keepdims=True)
        
        grad = -alpha_weight * modulating_factor * (y_true_one_hot - y_pred)
        hess = alpha_weight * modulating_factor * y_pred * (1 - y_pred) * (gamma * (y_true_one_hot - y_pred) + 1)
        
        return grad, hess
    
    return loss_function

In [63]:
def run_model(train_data_path, test_data_path, is_string_labels = False, label_mapping = None):

    # Load and Prepare Training Data
    train_data = pd.read_csv(train_data_path)
    train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle
    train_X = train_data.drop(['label'], axis=1)
    if (is_string_labels):
        train_y = train_data['label'].map(label_mapping)
    else:
        train_y = train_data['label'] - 1
    del train_data
    gc.collect()

    # Load and Prepare Test Data (this will not be used in training)
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle
    test_X = test_data.drop(['label'], axis=1)
    if (is_string_labels):
        test_y = test_data['label'].map(label_mapping)
    else:
        test_y = test_data['label'] - 1
    test_y_binarized = label_binarize(test_y, classes=range(len(np.unique(test_y))))
    del test_data
    gc.collect()

    # Number of runs for averaging results
    num_runs = 50

    # Initialize storage for metrics
    metrics_storage = defaultdict(list)

    # Define XGBoost parameters
    params = {
        "num_class": len(train_y.unique()),
        "eval_metric": "mlogloss",
        "max_depth": 6,
        "eta": 0.3,
        "seed": 42
    }

    # Train the Model with Validation Split N tines for more accurate metrics
    #print("Verbose output only for first run...")
    verbose_run = 0
    for run in range(num_runs):
        print(f"Run {run + 1}/{num_runs} started...")
                
        # Train the NN feature extractor
        feature_extractor = create_convnext_like_feature_extractor(train_X.shape[1])
        feature_extractor.compile(optimizer='adam', loss='categorical_focal_crossentropy', metrics=['accuracy'])
        feature_extractor.fit(train_X, train_X, epochs=3, batch_size=32, verbose=0)  # Self-supervised training

        # Extract features
        feature_extractor_truncated = Model(inputs=feature_extractor.input, outputs=feature_extractor.layers[-2].output)
        train_features = feature_extractor_truncated.predict(train_X, verbose=0)
        test_features = feature_extractor_truncated.predict(test_X, verbose=0)
        del feature_extractor
        gc.collect()

        train_combined = np.hstack((train_X, train_features))
        test_combined = np.hstack((test_X, test_features))
        del train_features
        del test_features
        gc.collect()

        # Convert data to DMatrix
        dtrain = xgb.DMatrix(data=train_combined, label=train_y)
        dtest = xgb.DMatrix(data=test_combined, label=test_y)
        del train_combined
        del test_combined
        gc.collect()

        # Train XGBoost
        num_round = 100
        bst = xgb.train(params, dtrain, num_round, obj=focal_loss_multiclass())

        # Make predictions
        test_predictions = bst.predict(dtest)
        del dtrain
        del dtest
        gc.collect()

        # Compute metrics
        accuracy = accuracy_score(test_y, test_predictions)
        balanced_acc = balanced_accuracy_score(test_y, test_predictions)
        f2 = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
        mcc = matthews_corrcoef(test_y, test_predictions)

        # Store metrics
        metrics_storage['test_accuracy'].append(accuracy)
        metrics_storage['balanced_accuracy'].append(balanced_acc)
        metrics_storage['f2'].append(f2)
        metrics_storage['mcc'].append(mcc)

        # Store classification report metrics
        report = classification_report(test_y, test_predictions, output_dict=True)
        for label, values in report.items():
            # Check if the value is a dictionary (e.g., 'precision', 'recall', 'f1-score')
            if isinstance(values, dict):
                for metric, value in values.items():
                    metrics_storage[f"{label}_{metric}"].append(value)
            else:
                # Handle scalar values (like 'accuracy')
                metrics_storage[label].append(values)

        # Average the metrics over all successful runs
        print(f"\nAggregated Metrics for {run+1} runs:")
        for metric, values in metrics_storage.items():
            avg_value = np.mean(values)
            print(f"{metric}: {avg_value:.4f}")

    gc.collect()

In [64]:
run_model("shuttle_bsmote.csv", "shuttle_test.csv", is_string_labels = False)

Run 1/50 started...

Aggregated Metrics for 1 runs:
test_accuracy: 0.9929
balanced_accuracy: 0.8826
f2: 0.9931
mcc: 0.9802
0_precision: 0.9984
0_recall: 0.9963
0_f1-score: 0.9973
0_support: 9117.0000
1_precision: 0.3448
1_recall: 1.0000
1_f1-score: 0.5128
1_support: 10.0000
2_precision: 0.7273
2_recall: 0.7059
2_f1-score: 0.7164
2_support: 34.0000
3_precision: 0.9837
3_recall: 0.9809
3_f1-score: 0.9823
3_support: 1781.0000
4_precision: 0.9985
4_recall: 0.9954
4_f1-score: 0.9969
4_support: 653.0000
5_precision: 0.3333
5_recall: 0.5000
5_f1-score: 0.4000
5_support: 2.0000
6_precision: 0.3000
6_recall: 1.0000
6_f1-score: 0.4615
6_support: 3.0000
accuracy: 0.9929
macro avg_precision: 0.6694
macro avg_recall: 0.8826
macro avg_f1-score: 0.7239
macro avg_support: 11600.0000
weighted avg_precision: 0.9945
weighted avg_recall: 0.9929
weighted avg_f1-score: 0.9935
weighted avg_support: 11600.0000
Run 2/50 started...

Aggregated Metrics for 2 runs:
test_accuracy: 0.9887
balanced_accuracy: 0.9001


In [None]:
run_model("covtype_bsmote.csv", "covtype_test.csv", is_string_labels = False)

Run 1/5 started...

Aggregated Metrics for 1 runs:
test_accuracy: 0.6604
balanced_accuracy: 0.4941
f2: 0.6505
mcc: 0.4412
0_precision: 0.6860
0_recall: 0.6050
0_f1-score: 0.6429
0_support: 42368.0000
1_precision: 0.6884
1_recall: 0.8010
1_f1-score: 0.7404
1_support: 56661.0000
2_precision: 0.6724
2_recall: 0.2577
2_f1-score: 0.3726
2_support: 7151.0000
3_precision: 0.1530
3_recall: 0.9745
3_f1-score: 0.2645
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.5959
5_recall: 0.0251
5_f1-score: 0.0481
5_support: 3473.0000
6_precision: 0.5044
6_recall: 0.7955
6_f1-score: 0.6173
6_support: 4102.0000
accuracy: 0.6604
macro avg_precision: 0.4714
macro avg_recall: 0.4941
macro avg_f1-score: 0.3837
macro avg_support: 116203.0000
weighted avg_precision: 0.6635
weighted avg_recall: 0.6604
weighted avg_f1-score: 0.6429
weighted avg_support: 116203.0000
Run 2/5 started...

Aggregated Metrics for 2 runs:
test_accuracy: 0.6554
balanced_accur

In [None]:
labels_map = {
    'normal.': 0, 'satan.': 1, 'ipsweep.': 2, 'portsweep.': 3, 'nmap.': 4,
    'back.': 5, 'warezclient.': 6, 'teardrop.': 7, 'pod.': 8, 'guess_passwd.': 9,
    'buffer_overflow.': 10, 'land.': 11, 'warezmaster.': 12, 'imap.': 13, 'rootkit.': 14,
    'loadmodule.': 15, 'multihop.': 16, 'ftp_write.': 17, 'phf.': 18, 'perl.': 19, 'spy.': 20
}

run_model("kdd_bsmote.csv", "kdd_test.csv", is_string_labels = True, label_mapping=labels_map)

Run 1/50 started...


Aggregated Metrics for 1 runs:
test_accuracy: 0.9380
balanced_accuracy: 0.3080
f2: 0.9385
mcc: 0.3369
0_precision: 0.9724
0_recall: 0.9666
0_f1-score: 0.9694
0_support: 194557.0000
1_precision: 0.0000
1_recall: 0.0000
1_f1-score: 0.0000
1_support: 3178.0000
2_precision: 0.7918
2_recall: 0.1006
2_f1-score: 0.1785
2_support: 2496.0000
3_precision: 0.8383
3_recall: 0.9784
3_f1-score: 0.9030
3_support: 2083.0000
4_precision: 1.0000
4_recall: 0.0713
4_f1-score: 0.1331
4_support: 463.0000
5_precision: 0.9909
5_recall: 0.9841
5_f1-score: 0.9875
5_support: 441.0000
6_precision: 0.0251
6_recall: 0.7010
6_f1-score: 0.0484
6_support: 204.0000
7_precision: 0.6256
7_recall: 0.6480
7_f1-score: 0.6366
7_support: 196.0000
8_precision: 1.0000
8_recall: 0.0189
8_f1-score: 0.0370
8_support: 53.0000
9_precision: 0.0000
9_recall: 0.0000
9_f1-score: 0.0000
9_support: 11.0000
10_precision: 0.0106
10_recall: 0.5000
10_f1-score: 0.0208
10_support: 6.0000
11_precision: 0.2000
11_recall: 0.

In [None]:
labels_map = {
    'Normal': 0, 'Darknet_Audio-Streaming': 1, 'Darknet_Chat': 2, 'Darknet_File-Transfer': 3, 'Darknet_VOIP': 4,
    'Darknet_Video-Streaming': 5, 'Darknet_Email': 6, 'Darknet_Browsing': 7, 'Darknet_P2P': 8
}

run_model("darknet_bsmote.csv", "darknet_test.csv", is_string_labels = True, label_mapping = labels_map)

Run 1/10 started...


Aggregated Metrics for 1 runs:
test_accuracy: 0.9633
balanced_accuracy: 0.8125
f2: 0.9641
mcc: 0.8740
0_precision: 0.9918
0_recall: 0.9736
0_f1-score: 0.9826
0_support: 26862.0000
1_precision: 0.9981
1_recall: 0.9721
1_f1-score: 0.9849
1_support: 2657.0000
2_precision: 0.8303
2_recall: 0.9912
2_f1-score: 0.9036
2_support: 908.0000
3_precision: 0.8946
3_recall: 0.9598
3_f1-score: 0.9261
3_support: 522.0000
4_precision: 0.0988
4_recall: 0.1092
4_f1-score: 0.1037
4_support: 293.0000
5_precision: 0.4865
5_recall: 0.8067
5_f1-score: 0.6070
5_support: 269.0000
6_precision: 0.6522
6_recall: 0.7759
6_f1-score: 0.7087
6_support: 116.0000
7_precision: 0.2945
7_recall: 0.9057
7_f1-score: 0.4444
7_support: 53.0000
8_precision: 0.7059
8_recall: 0.8182
8_f1-score: 0.7579
8_support: 44.0000
accuracy: 0.9633
macro avg_precision: 0.6614
macro avg_recall: 0.8125
macro avg_f1-score: 0.7132
macro avg_support: 31724.0000
weighted avg_precision: 0.9708
weighted avg_recall: 0.9633
weigh