In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import gc
import numpy as np
import pandas as pd
import tensorflow as tf

from collections import defaultdict
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report, balanced_accuracy_score, roc_auc_score, average_precision_score, fbeta_score, matthews_corrcoef
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Input, Conv1D, Lambda, Flatten, Concatenate

In [3]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [4]:
def create_random_combination_layer(input_layer, combination_size, num_combinations, input_dim):
    outputs = []
    
    for _ in range(num_combinations):
        # First random feature selection
        indices_1 = np.random.choice(input_dim, combination_size, replace=False)
        indices_tensor_1 = tf.constant(indices_1, dtype=tf.int32)
        
        # First feature selection using Lambda layer
        slice_layer_1 = Lambda(
            lambda x: tf.gather(x, indices_tensor_1, axis=1),  # Gather selected features
        )(input_layer)

        # Apply Dense layers on the final selected subset
        reshaped_features = Lambda(lambda x: tf.reshape(x, (-1, combination_size)))(slice_layer_1)
        selected_features = Dense(16, activation='gelu')(reshaped_features)
        outputs.append(selected_features)
    
    # Concatenate the outputs from all the random feature combinations
    return Concatenate()(outputs)

In [5]:
def create_convnext_like_model(input_dim, num_classes, sliding_window_size=3):
    # Input layer
    input_layer = Input(shape=(input_dim,))
    
    # Reshape input to be compatible with Conv1D using Lambda layer
    reshaped_input = Lambda(lambda x: tf.expand_dims(x, axis=-1))(input_layer)  # Shape: (batch_size, input_dim, 1)
    x = reshaped_input
    
    # Simulate ConvNeXt block using random combination layers
    x = create_random_combination_layer(input_layer=x, combination_size=sliding_window_size, num_combinations=int(max(16, input_dim * 2)), input_dim=input_dim)
    x = LayerNormalization()(x)
    x = create_random_combination_layer(input_layer=x, combination_size=sliding_window_size, num_combinations=int(max(16, input_dim)), input_dim=int(max(16, input_dim * 2)))
    x = LayerNormalization()(x)
    x = create_random_combination_layer(input_layer=x, combination_size=sliding_window_size, num_combinations=int(max(16, input_dim // 2)), input_dim=int(max(16, input_dim)))
    x = LayerNormalization()(x)
    x = create_random_combination_layer(input_layer=x, combination_size=sliding_window_size, num_combinations=int(max(16, input_dim // 4)), input_dim=int(max(16, input_dim // 2)))
    x = LayerNormalization()(x)
    x = create_random_combination_layer(input_layer=x, combination_size=sliding_window_size, num_combinations=int(max(16, input_dim // 8)), input_dim=int(max(16, input_dim // 4)))
    x = LayerNormalization()(x)
    
    # Flatten the output of the final layer
    x = Flatten()(x)
    
    # Global pooling and classification head
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.2)(x)
    output_layer = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [8]:
def run_model(train_data_path, test_data_path, is_string_labels = False, label_mapping = None):

    # Initialize the one-hot encoder for the target
    encoder = OneHotEncoder(sparse_output=False)

    # Load and Prepare Training Data
    train_data = pd.read_csv(train_data_path)
    train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle
    if (is_string_labels):
        train_data['label'] = train_data['label'].map(label_mapping)
    train_X = train_data.drop(columns=['label']).values
    train_y = train_data['label'].values
    train_y = encoder.fit_transform(train_y.reshape(-1, 1))
    del train_data
    gc.collect()

    # Load and Prepare Test Data (this will not be used in training)
    test_data = pd.read_csv(test_data_path)
    test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle
    if (is_string_labels):
        test_data['label'] = test_data['label'].map(label_mapping)
    test_X = test_data.drop(columns=['label']).values
    test_y = test_data['label'].values
    test_y = encoder.transform(test_y.reshape(-1, 1))
    del test_data
    gc.collect()

    # EarlyStopping Callback (optional, to avoid overfitting)
    early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

    # Number of runs for averaging results
    num_runs = 50

    # Initialize storage for metrics
    metrics_storage = defaultdict(list)

    # Train the Model with Validation Split N tines for more accurate metrics
    print("Verbose output only for first run...")
    verbose_run = 1
    for run in range(num_runs):
        
        # Model is defined separately in each run, since the random combination layers
        # must be randomly initialized each time. Otherwise, the "random" indices stay the same
        # throughout all runs
        model = create_convnext_like_model(train_X.shape[1], test_y.shape[1])

        print(f"Run {run + 1}/{num_runs} started...")
        history = model.fit(
            train_X, train_y, 
            epochs=100, 
            batch_size=int(train_X.shape[0] * 0.01),
            callbacks=[early_stopping],
            verbose=verbose_run
        )
        verbose_run = 0 # Suppress detailed output for multiple runs

        test_loss, test_acc = model.evaluate(test_X, test_y, verbose=0)
        y_pred = model.predict(test_X, verbose=0)
        y_pred_classes = y_pred.argmax(axis=1)
        y_true_classes = test_y.argmax(axis=1)
        del model
        gc.collect()

        # Compute metrics
        balanced_acc = balanced_accuracy_score(y_true_classes, y_pred_classes)
        roc_auc = roc_auc_score(test_y, y_pred, multi_class='ovr')  # `test_y` is fine here for AUC
        pr_auc = average_precision_score(test_y, y_pred, average='weighted')
        f2 = fbeta_score(y_true_classes, y_pred_classes, beta=2, average='weighted')
        mcc = matthews_corrcoef(y_true_classes, y_pred_classes)

        # Store metrics
        metrics_storage['test_loss'].append(test_loss)
        metrics_storage['test_accuracy'].append(test_acc)
        metrics_storage['balanced_accuracy'].append(balanced_acc)
        metrics_storage['roc_auc'].append(roc_auc)
        metrics_storage['pr_auc'].append(pr_auc)
        metrics_storage['f2'].append(f2)
        metrics_storage['mcc'].append(mcc)

        # Store classification report metrics
        report = classification_report(y_true_classes, y_pred_classes, output_dict=True)
        for label, values in report.items():
            # Check if the value is a dictionary (e.g., 'precision', 'recall', 'f1-score')
            if isinstance(values, dict):
                for metric, value in values.items():
                    metrics_storage[f"{label}_{metric}"].append(value)
            else:
                # Handle scalar values (like 'accuracy')
                metrics_storage[label].append(values)

        # Average the metrics over all successful runs
        print(f"\nAggregated Metrics for {run+1} runs:")
        for metric, values in metrics_storage.items():
            avg_value = np.mean(values)
            print(f"{metric}: {avg_value:.4f}")

    gc.collect()
        

In [None]:
run_model("/kaggle/input/ma-datasets/shuttle_train.csv", "/kaggle/input/ma-datasets/shuttle_test.csv", is_string_labels = False)

Run 1/50 started...

Aggregated Metrics for 1 runs:
test_loss: 0.0050
test_accuracy: 0.9987
balanced_accuracy: 0.6724
roc_auc: 0.9993
pr_auc: 0.9993
f2: 0.9986
mcc: 0.9964
0_precision: 0.9991
0_recall: 0.9999
0_f1-score: 0.9995
0_support: 9117.0000
1_precision: 1.0000
1_recall: 0.8000
1_f1-score: 0.8889
1_support: 10.0000
2_precision: 0.9394
2_recall: 0.9118
2_f1-score: 0.9254
2_support: 34.0000
3_precision: 0.9983
3_recall: 0.9994
3_f1-score: 0.9989
3_support: 1781.0000
4_precision: 0.9985
4_recall: 0.9954
4_f1-score: 0.9969
4_support: 653.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 2.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 3.0000
accuracy: 0.9987
macro avg_precision: 0.7050
macro avg_recall: 0.6724
macro avg_f1-score: 0.6871
macro avg_support: 11600.0000
weighted avg_precision: 0.9984
weighted avg_recall: 0.9987
weighted avg_f1-score: 0.9985
weighted avg_support: 11600.0000
Run 2/50 started...

Aggregated Metrics for 2 runs:

In [None]:
run_model("/kaggle/input/ma-datasets/covtype_train.csv", "/kaggle/input/ma-datasets/covtype_test.csv", is_string_labels = False)

Run 1/50 started...



Aggregated Metrics for 1 runs:
test_loss: 1.1533
test_accuracy: 0.4889
balanced_accuracy: 0.1511
roc_auc: 0.6460
pr_auc: 0.4380
f2: 0.4070
mcc: 0.0521
0_precision: 0.0000
0_recall: 0.0000
0_f1-score: 0.0000
0_support: 42368.0000
1_precision: 0.4907
1_recall: 0.9947
1_f1-score: 0.6572
1_support: 56661.0000
2_precision: 0.3366
2_recall: 0.0629
2_f1-score: 0.1060
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 3473.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 4102.0000
accuracy: 0.4889
macro avg_precision: 0.1182
macro avg_recall: 0.1511
macro avg_f1-score: 0.1090
macro avg_support: 116203.0000
weighted avg_precision: 0.2600
weighted avg_recall: 0.4889
weighted avg_f1-score: 0.3270
weighted avg_support: 116203.0000


Run 2/50 started...



Aggregated Metrics for 2 runs:
test_loss: 1.0865
test_accuracy: 0.5072
balanced_accuracy: 0.1931
roc_auc: 0.7081
pr_auc: 0.4615
f2: 0.4483
mcc: 0.1145
0_precision: 0.2465
0_recall: 0.1012
0_f1-score: 0.1435
0_support: 42368.0000
1_precision: 0.5091
1_recall: 0.9231
1_f1-score: 0.6543
1_support: 56661.0000
2_precision: 0.4565
2_recall: 0.3272
2_f1-score: 0.3449
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 3473.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 4102.0000
accuracy: 0.5072
macro avg_precision: 0.1731
macro avg_recall: 0.1931
macro avg_f1-score: 0.1632
macro avg_support: 116203.0000
weighted avg_precision: 0.3662
weighted avg_recall: 0.5072
weighted avg_f1-score: 0.3926
weighted avg_support: 116203.0000


Run 3/50 started...



Aggregated Metrics for 3 runs:
test_loss: 1.1187
test_accuracy: 0.5033
balanced_accuracy: 0.1841
roc_auc: 0.6476
pr_auc: 0.4367
f2: 0.4372
mcc: 0.1116
0_precision: 0.1643
0_recall: 0.0675
0_f1-score: 0.0956
0_support: 42368.0000
1_precision: 0.5038
1_recall: 0.9477
1_f1-score: 0.6562
1_support: 56661.0000
2_precision: 0.5252
2_recall: 0.2659
2_f1-score: 0.3085
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.1808
5_recall: 0.0074
5_f1-score: 0.0142
5_support: 3473.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 4102.0000
accuracy: 0.5033
macro avg_precision: 0.1963
macro avg_recall: 0.1841
macro avg_f1-score: 0.1535
macro avg_support: 116203.0000
weighted avg_precision: 0.3433
weighted avg_recall: 0.5033
weighted avg_f1-score: 0.3742
weighted avg_support: 116203.0000


Run 4/50 started...



Aggregated Metrics for 4 runs:
test_loss: 1.1317
test_accuracy: 0.5028
balanced_accuracy: 0.1751
roc_auc: 0.6246
pr_auc: 0.4271
f2: 0.4339
mcc: 0.1071
0_precision: 0.2614
0_recall: 0.0609
0_f1-score: 0.0909
0_support: 42368.0000
1_precision: 0.5028
1_recall: 0.9601
1_f1-score: 0.6586
1_support: 56661.0000
2_precision: 0.3939
2_recall: 0.1994
2_f1-score: 0.2314
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.1356
5_recall: 0.0055
5_f1-score: 0.0107
5_support: 3473.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 4102.0000
accuracy: 0.5028
macro avg_precision: 0.1848
macro avg_recall: 0.1751
macro avg_f1-score: 0.1416
macro avg_support: 116203.0000
weighted avg_precision: 0.3688
weighted avg_recall: 0.5028
weighted avg_f1-score: 0.3688
weighted avg_support: 116203.0000


Run 5/50 started...



Aggregated Metrics for 5 runs:
test_loss: 1.1362
test_accuracy: 0.4997
balanced_accuracy: 0.1687
roc_auc: 0.6183
pr_auc: 0.4215
f2: 0.4277
mcc: 0.0857
0_precision: 0.2092
0_recall: 0.0487
0_f1-score: 0.0727
0_support: 42368.0000
1_precision: 0.4997
1_recall: 0.9680
1_f1-score: 0.6580
1_support: 56661.0000
2_precision: 0.3151
2_recall: 0.1595
2_f1-score: 0.1851
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.1085
5_recall: 0.0044
5_f1-score: 0.0085
5_support: 3473.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 4102.0000
accuracy: 0.4997
macro avg_precision: 0.1618
macro avg_recall: 0.1687
macro avg_f1-score: 0.1320
macro avg_support: 116203.0000
weighted avg_precision: 0.3426
weighted avg_recall: 0.4997
weighted avg_f1-score: 0.3590
weighted avg_support: 116203.0000


Run 6/50 started...



Aggregated Metrics for 6 runs:
test_loss: 1.1471
test_accuracy: 0.4980
balanced_accuracy: 0.1650
roc_auc: 0.5997
pr_auc: 0.4148
f2: 0.4239
mcc: 0.0790
0_precision: 0.1743
0_recall: 0.0406
0_f1-score: 0.0606
0_support: 42368.0000
1_precision: 0.4979
1_recall: 0.9733
1_f1-score: 0.6577
1_support: 56661.0000
2_precision: 0.3586
2_recall: 0.1375
2_f1-score: 0.1630
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0904
5_recall: 0.0037
5_f1-score: 0.0071
5_support: 3473.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 4102.0000
accuracy: 0.4980
macro avg_precision: 0.1602
macro avg_recall: 0.1650
macro avg_f1-score: 0.1269
macro avg_support: 116203.0000
weighted avg_precision: 0.3311
weighted avg_recall: 0.4980
weighted avg_f1-score: 0.3531
weighted avg_support: 116203.0000


Run 7/50 started...



Aggregated Metrics for 7 runs:
test_loss: 1.1492
test_accuracy: 0.5002
balanced_accuracy: 0.1686
roc_auc: 0.5963
pr_auc: 0.4135
f2: 0.4275
mcc: 0.0883
0_precision: 0.2386
0_recall: 0.0474
0_f1-score: 0.0741
0_support: 42368.0000
1_precision: 0.4996
1_recall: 0.9724
1_f1-score: 0.6592
1_support: 56661.0000
2_precision: 0.3074
2_recall: 0.1179
2_f1-score: 0.1397
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0775
5_recall: 0.0032
5_f1-score: 0.0061
5_support: 3473.0000
6_precision: 0.0587
6_recall: 0.0391
6_f1-score: 0.0470
6_support: 4102.0000
accuracy: 0.5002
macro avg_precision: 0.1688
macro avg_recall: 0.1686
macro avg_f1-score: 0.1323
macro avg_support: 116203.0000
weighted avg_precision: 0.3539
weighted avg_recall: 0.5002
weighted avg_f1-score: 0.3589
weighted avg_support: 116203.0000


Run 8/50 started...



Aggregated Metrics for 8 runs:
test_loss: 1.1515
test_accuracy: 0.5031
balanced_accuracy: 0.1689
roc_auc: 0.6012
pr_auc: 0.4172
f2: 0.4317
mcc: 0.0985
0_precision: 0.3056
0_recall: 0.0565
0_f1-score: 0.0909
0_support: 42368.0000
1_precision: 0.5006
1_recall: 0.9722
1_f1-score: 0.6601
1_support: 56661.0000
2_precision: 0.3421
2_recall: 0.1163
2_f1-score: 0.1445
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0678
5_recall: 0.0028
5_f1-score: 0.0053
5_support: 3473.0000
6_precision: 0.0514
6_recall: 0.0343
6_f1-score: 0.0411
6_support: 4102.0000
accuracy: 0.5031
macro avg_precision: 0.1811
macro avg_recall: 0.1689
macro avg_f1-score: 0.1346
macro avg_support: 116203.0000
weighted avg_precision: 0.3804
weighted avg_recall: 0.5031
weighted avg_f1-score: 0.3655
weighted avg_support: 116203.0000


Run 9/50 started...



Aggregated Metrics for 9 runs:
test_loss: 1.1539
test_accuracy: 0.5020
balanced_accuracy: 0.1678
roc_auc: 0.5958
pr_auc: 0.4164
f2: 0.4295
mcc: 0.0970
0_precision: 0.2716
0_recall: 0.0503
0_f1-score: 0.0808
0_support: 42368.0000
1_precision: 0.4996
1_recall: 0.9749
1_f1-score: 0.6599
1_support: 56661.0000
2_precision: 0.3691
2_recall: 0.1151
2_f1-score: 0.1483
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0603
5_recall: 0.0025
5_f1-score: 0.0047
5_support: 3473.0000
6_precision: 0.1568
6_recall: 0.0321
6_f1-score: 0.0398
6_support: 4102.0000
accuracy: 0.5020
macro avg_precision: 0.1939
macro avg_recall: 0.1678
macro avg_f1-score: 0.1334
macro avg_support: 116203.0000
weighted avg_precision: 0.3727
weighted avg_recall: 0.5020
weighted avg_f1-score: 0.3619
weighted avg_support: 116203.0000


Run 10/50 started...



Aggregated Metrics for 10 runs:
test_loss: 1.1557
test_accuracy: 0.5036
balanced_accuracy: 0.1682
roc_auc: 0.5931
pr_auc: 0.4155
f2: 0.4331
mcc: 0.1008
0_precision: 0.3060
0_recall: 0.0620
0_f1-score: 0.0990
0_support: 42368.0000
1_precision: 0.5003
1_recall: 0.9697
1_f1-score: 0.6594
1_support: 56661.0000
2_precision: 0.3865
2_recall: 0.1144
2_f1-score: 0.1515
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0542
5_recall: 0.0022
5_f1-score: 0.0043
5_support: 3473.0000
6_precision: 0.1411
6_recall: 0.0289
6_f1-score: 0.0359
6_support: 4102.0000
accuracy: 0.5036
macro avg_precision: 0.1983
macro avg_recall: 0.1682
macro avg_f1-score: 0.1357
macro avg_support: 116203.0000
weighted avg_precision: 0.3859
weighted avg_recall: 0.5036
weighted avg_f1-score: 0.3683
weighted avg_support: 116203.0000


Run 11/50 started...



Aggregated Metrics for 11 runs:
test_loss: 1.1198
test_accuracy: 0.5182
balanced_accuracy: 0.1817
roc_auc: 0.6215
pr_auc: 0.4376
f2: 0.4530
mcc: 0.1313
0_precision: 0.3378
0_recall: 0.1123
0_f1-score: 0.1478
0_support: 42368.0000
1_precision: 0.5172
1_recall: 0.9549
1_f1-score: 0.6668
1_support: 56661.0000
2_precision: 0.4016
2_recall: 0.1683
2_f1-score: 0.1942
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0493
5_recall: 0.0020
5_f1-score: 0.0039
5_support: 3473.0000
6_precision: 0.1787
6_recall: 0.0342
6_f1-score: 0.0463
6_support: 4102.0000
accuracy: 0.5182
macro avg_precision: 0.2121
macro avg_recall: 0.1817
macro avg_f1-score: 0.1513
macro avg_support: 116203.0000
weighted avg_precision: 0.4079
weighted avg_recall: 0.5182
weighted avg_f1-score: 0.3927
weighted avg_support: 116203.0000


Run 12/50 started...



Aggregated Metrics for 12 runs:
test_loss: 1.1248
test_accuracy: 0.5156
balanced_accuracy: 0.1784
roc_auc: 0.6199
pr_auc: 0.4356
f2: 0.4488
mcc: 0.1204
0_precision: 0.3097
0_recall: 0.1030
0_f1-score: 0.1354
0_support: 42368.0000
1_precision: 0.5147
1_recall: 0.9587
1_f1-score: 0.6659
1_support: 56661.0000
2_precision: 0.3681
2_recall: 0.1543
2_f1-score: 0.1780
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0452
5_recall: 0.0018
5_f1-score: 0.0036
5_support: 3473.0000
6_precision: 0.1638
6_recall: 0.0313
6_f1-score: 0.0424
6_support: 4102.0000
accuracy: 0.5156
macro avg_precision: 0.2002
macro avg_recall: 0.1784
macro avg_f1-score: 0.1465
macro avg_support: 116203.0000
weighted avg_precision: 0.3937
weighted avg_recall: 0.5156
weighted avg_f1-score: 0.3866
weighted avg_support: 116203.0000


Run 13/50 started...



Aggregated Metrics for 13 runs:
test_loss: 1.1205
test_accuracy: 0.5159
balanced_accuracy: 0.1821
roc_auc: 0.6239
pr_auc: 0.4347
f2: 0.4484
mcc: 0.1275
0_precision: 0.2858
0_recall: 0.0950
0_f1-score: 0.1250
0_support: 42368.0000
1_precision: 0.5147
1_recall: 0.9610
1_f1-score: 0.6667
1_support: 56661.0000
2_precision: 0.3842
2_recall: 0.1879
2_f1-score: 0.2092
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0417
5_recall: 0.0017
5_f1-score: 0.0033
5_support: 3473.0000
6_precision: 0.1512
6_recall: 0.0289
6_f1-score: 0.0392
6_support: 4102.0000
accuracy: 0.5159
macro avg_precision: 0.1968
macro avg_recall: 0.1821
macro avg_f1-score: 0.1491
macro avg_support: 116203.0000
weighted avg_precision: 0.3854
weighted avg_recall: 0.5159
weighted avg_f1-score: 0.3850
weighted avg_support: 116203.0000


Run 14/50 started...



Aggregated Metrics for 14 runs:
test_loss: 1.1260
test_accuracy: 0.5140
balanced_accuracy: 0.1799
roc_auc: 0.6173
pr_auc: 0.4310
f2: 0.4455
mcc: 0.1224
0_precision: 0.2654
0_recall: 0.0882
0_f1-score: 0.1161
0_support: 42368.0000
1_precision: 0.5129
1_recall: 0.9635
1_f1-score: 0.6660
1_support: 56661.0000
2_precision: 0.3933
2_recall: 0.1792
2_f1-score: 0.2026
2_support: 7151.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 549.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 1899.0000
5_precision: 0.0387
5_recall: 0.0016
5_f1-score: 0.0030
5_support: 3473.0000
6_precision: 0.1404
6_recall: 0.0269
6_f1-score: 0.0364
6_support: 4102.0000
accuracy: 0.5140
macro avg_precision: 0.1930
macro avg_recall: 0.1799
macro avg_f1-score: 0.1463
macro avg_support: 116203.0000
weighted avg_precision: 0.3772
weighted avg_recall: 0.5140
weighted avg_f1-score: 0.3809
weighted avg_support: 116203.0000


In [None]:
labels_map = {
    'normal.': 0, 'satan.': 1, 'ipsweep.': 2, 'portsweep.': 3, 'nmap.': 4,
    'back.': 5, 'warezclient.': 6, 'teardrop.': 7, 'pod.': 8, 'guess_passwd.': 9,
    'buffer_overflow.': 10, 'land.': 11, 'warezmaster.': 12, 'imap.': 13, 'rootkit.': 14,
    'loadmodule.': 15, 'multihop.': 16, 'ftp_write.': 17, 'phf.': 18, 'perl.': 19, 'spy.': 20
}

run_model("kdd_train.csv", "kdd_test.csv", is_string_labels = True, label_mapping=labels_map)

Memory usage of dataframe is 789.51 MB
Memory usage after optimization is: 196.60 MB
Decreased by 75.1%
Memory usage of dataframe is 197.38 MB
Memory usage after optimization is: 49.15 MB
Decreased by 75.1%
Verbose output only for first run...

Run 1/50 started...
Epoch 1/100
[1m 64/101[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m35:19[0m 57s/step - accuracy: 0.8767 - loss: 0.6338

In [None]:
labels_map = {
    'Normal': 0, 'Darknet_Audio-Streaming': 1, 'Darknet_Chat': 2, 'Darknet_File-Transfer': 3, 'Darknet_VOIP': 4,
    'Darknet_Video-Streaming': 5, 'Darknet_Email': 6, 'Darknet_Browsing': 7, 'Darknet_P2P': 8
}

run_model("/kaggle/input/ma-datasets/darknet_train.csv", "/kaggle/input/ma-datasets/darknet_test.csv", is_string_labels = True, label_mapping = labels_map)

Run 1/50 started...



Aggregated Metrics for 1 runs:
test_loss: 0.6083
test_accuracy: 0.8467
balanced_accuracy: 0.1111
roc_auc: 0.6815
pr_auc: 0.8051
f2: 0.8172
mcc: 0.0000
0_precision: 0.8467
0_recall: 1.0000
0_f1-score: 0.9170
0_support: 26862.0000
1_precision: 0.0000
1_recall: 0.0000
1_f1-score: 0.0000
1_support: 2657.0000
2_precision: 0.0000
2_recall: 0.0000
2_f1-score: 0.0000
2_support: 908.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 522.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 293.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 269.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 116.0000
7_precision: 0.0000
7_recall: 0.0000
7_f1-score: 0.0000
7_support: 53.0000
8_precision: 0.0000
8_recall: 0.0000
8_f1-score: 0.0000
8_support: 44.0000
accuracy: 0.8467
macro avg_precision: 0.0941
macro avg_recall: 0.1111
macro avg_f1-score: 0.1019
macro avg_support: 31724.0000
weighted avg_precision: 0.7170
weigh

Run 2/50 started...



Aggregated Metrics for 2 runs:
test_loss: 0.5731
test_accuracy: 0.8576
balanced_accuracy: 0.1530
roc_auc: 0.7269
pr_auc: 0.8064
f2: 0.8321
mcc: 0.1776
0_precision: 0.8565
0_recall: 0.9999
0_f1-score: 0.9226
0_support: 26862.0000
1_precision: 0.5000
1_recall: 0.1274
1_f1-score: 0.2031
1_support: 2657.0000
2_precision: 0.0000
2_recall: 0.0000
2_f1-score: 0.0000
2_support: 908.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 522.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 293.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 269.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 116.0000
7_precision: 0.0000
7_recall: 0.0000
7_f1-score: 0.0000
7_support: 53.0000
8_precision: 0.2340
8_recall: 0.2500
8_f1-score: 0.2418
8_support: 44.0000
accuracy: 0.8576
macro avg_precision: 0.1767
macro avg_recall: 0.1530
macro avg_f1-score: 0.1519
macro avg_support: 31724.0000
weighted avg_precision: 0.7674
weigh

Run 3/50 started...



Aggregated Metrics for 3 runs:
test_loss: 0.5331
test_accuracy: 0.8576
balanced_accuracy: 0.1531
roc_auc: 0.7708
pr_auc: 0.8247
f2: 0.8355
mcc: 0.2297
0_precision: 0.8652
0_recall: 0.9907
0_f1-score: 0.9235
0_support: 26862.0000
1_precision: 0.5042
1_recall: 0.2208
1_f1-score: 0.2867
1_support: 2657.0000
2_precision: 0.0000
2_recall: 0.0000
2_f1-score: 0.0000
2_support: 908.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 522.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 293.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 269.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 116.0000
7_precision: 0.0000
7_recall: 0.0000
7_f1-score: 0.0000
7_support: 53.0000
8_precision: 0.1560
8_recall: 0.1667
8_f1-score: 0.1612
8_support: 44.0000
accuracy: 0.8576
macro avg_precision: 0.1695
macro avg_recall: 0.1531
macro avg_f1-score: 0.1524
macro avg_support: 31724.0000
weighted avg_precision: 0.7750
weigh

Run 4/50 started...



Aggregated Metrics for 4 runs:
test_loss: 0.5228
test_accuracy: 0.8611
balanced_accuracy: 0.1543
roc_auc: 0.7633
pr_auc: 0.8305
f2: 0.8404
mcc: 0.2696
0_precision: 0.8684
0_recall: 0.9897
0_f1-score: 0.9249
0_support: 26862.0000
1_precision: 0.5644
1_recall: 0.2736
1_f1-score: 0.3518
1_support: 2657.0000
2_precision: 0.0000
2_recall: 0.0000
2_f1-score: 0.0000
2_support: 908.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 522.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 293.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 269.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 116.0000
7_precision: 0.0000
7_recall: 0.0000
7_f1-score: 0.0000
7_support: 53.0000
8_precision: 0.1170
8_recall: 0.1250
8_f1-score: 0.1209
8_support: 44.0000
accuracy: 0.8611
macro avg_precision: 0.1722
macro avg_recall: 0.1543
macro avg_f1-score: 0.1553
macro avg_support: 31724.0000
weighted avg_precision: 0.7827
weigh

Run 5/50 started...



Aggregated Metrics for 5 runs:
test_loss: 0.5278
test_accuracy: 0.8582
balanced_accuracy: 0.1456
roc_auc: 0.7547
pr_auc: 0.8292
f2: 0.8357
mcc: 0.2156
0_precision: 0.8641
0_recall: 0.9918
0_f1-score: 0.9234
0_support: 26862.0000
1_precision: 0.4515
1_recall: 0.2189
1_f1-score: 0.2814
1_support: 2657.0000
2_precision: 0.0000
2_recall: 0.0000
2_f1-score: 0.0000
2_support: 908.0000
3_precision: 0.0000
3_recall: 0.0000
3_f1-score: 0.0000
3_support: 522.0000
4_precision: 0.0000
4_recall: 0.0000
4_f1-score: 0.0000
4_support: 293.0000
5_precision: 0.0000
5_recall: 0.0000
5_f1-score: 0.0000
5_support: 269.0000
6_precision: 0.0000
6_recall: 0.0000
6_f1-score: 0.0000
6_support: 116.0000
7_precision: 0.0000
7_recall: 0.0000
7_f1-score: 0.0000
7_support: 53.0000
8_precision: 0.0936
8_recall: 0.1000
8_f1-score: 0.0967
8_support: 44.0000
accuracy: 0.8582
macro avg_precision: 0.1566
macro avg_recall: 0.1456
macro avg_f1-score: 0.1446
macro avg_support: 31724.0000
weighted avg_precision: 0.7696
weigh