In [4]:
import pandas as pd
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import fbeta_score, roc_auc_score, average_precision_score, matthews_corrcoef
import numpy as np
from collections import defaultdict

In [5]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

# Shuttle

In [6]:
# Load the training dataset
train_data = pd.read_csv("shuttle_train.csv")

# Load the test dataset
test_data = pd.read_csv("shuttle_test.csv")

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
f2_scores = []
auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

verbose_run = 1
for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    clf = TabNetClassifier(verbose=verbose_run)
    clf.fit(train_X, train_y, eval_set=[(train_X, train_y)], eval_metric=['logloss'])
    verbose_run = 0
    
    # Make predictions
    test_probabilities = clf.predict_proba(test_X)  # Outputs probabilities
    test_predictions = clf.predict(test_X)  # Convert to discrete class predictions
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
average_f2_score = np.mean(f2_scores)
average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")


Run 1/5




epoch 0  | loss: 0.5214  | val_0_logloss: 0.06544 |  0:00:04s
epoch 1  | loss: 0.05223 | val_0_logloss: 0.02861 |  0:00:10s
epoch 2  | loss: 0.03198 | val_0_logloss: 0.02694 |  0:00:15s
epoch 3  | loss: 0.04257 | val_0_logloss: 0.08513 |  0:00:20s
epoch 4  | loss: 0.03237 | val_0_logloss: 0.02389 |  0:00:26s
epoch 5  | loss: 0.02714 | val_0_logloss: 0.02176 |  0:00:31s
epoch 6  | loss: 0.02316 | val_0_logloss: 0.01996 |  0:00:36s
epoch 7  | loss: 0.02064 | val_0_logloss: 0.01694 |  0:00:42s
epoch 8  | loss: 0.01606 | val_0_logloss: 0.01603 |  0:00:46s
epoch 9  | loss: 0.01342 | val_0_logloss: 0.01474 |  0:00:51s
epoch 10 | loss: 0.01426 | val_0_logloss: 0.01286 |  0:00:56s
epoch 11 | loss: 0.01864 | val_0_logloss: 0.01677 |  0:01:01s
epoch 12 | loss: 0.01391 | val_0_logloss: 0.01291 |  0:01:06s
epoch 13 | loss: 0.01278 | val_0_logloss: 0.01126 |  0:01:11s
epoch 14 | loss: 0.0118  | val_0_logloss: 0.01642 |  0:01:16s
epoch 15 | loss: 0.01131 | val_0_logloss: 0.01249 |  0:01:23s
epoch 16



Run 2/5

Early stopping occurred at epoch 43 with best_epoch = 33 and best_val_0_logloss = 0.01127




Run 3/5

Early stopping occurred at epoch 25 with best_epoch = 15 and best_val_0_logloss = 0.01281




Run 4/5

Early stopping occurred at epoch 24 with best_epoch = 14 and best_val_0_logloss = 0.01494




Run 5/5

Early stopping occurred at epoch 31 with best_epoch = 21 and best_val_0_logloss = 0.01452




Average Test Accuracy: 0.9969
Average F2-Score (Weighted): 0.9966
Average AUC-ROC (One-vs-Rest): 0.9926
Average AUC-PR: 0.9976
Average Matthews Correlation Coefficient: 0.9913

Average Classification Report:
Class 1:
  precision: 0.9983
  recall: 0.9998
  f1-score: 0.9990
  support: 9117.0000
Class 2:
  precision: 0.5967
  recall: 0.3800
  f1-score: 0.4154
  support: 10.0000
Class 3:
  precision: 0.8043
  recall: 0.3471
  f1-score: 0.4821
  support: 34.0000
Class 4:
  precision: 0.9954
  recall: 1.0000
  f1-score: 0.9977
  support: 1781.0000
Class 5:
  precision: 0.9891
  recall: 0.9982
  f1-score: 0.9936
  support: 653.0000
Class 6:
  precision: 0.3000
  recall: 0.2000
  f1-score: 0.2333
  support: 2.0000
Class 7:
  precision: 0.2000
  recall: 0.0667
  f1-score: 0.1000
  support: 3.0000
Class macro avg:
  precision: 0.6977
  recall: 0.5702
  f1-score: 0.6030
  support: 11600.0000
Class weighted avg:
  precision: 0.9961
  recall: 0.9969
  f1-score: 0.9961
  support: 11600.0000


# Covertype

In [None]:
# Load the training dataset
train_data = pd.read_csv("covtype_train.csv")

# Load the test dataset
test_data = pd.read_csv("covtype_test.csv")

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
f2_scores = []
auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

verbose_run = 1
for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    clf = TabNetClassifier(verbose=verbose_run)
    clf.fit(train_X, train_y, eval_set=[(train_X, train_y)], eval_metric=['logloss'])
    verbose_run = 0
    
    # Make predictions
    test_probabilities = clf.predict_proba(test_X)  # Outputs probabilities
    test_predictions = clf.predict(test_X)  # Convert to discrete class predictions
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
average_f2_score = np.mean(f2_scores)
average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")


Run 1/5


KeyboardInterrupt: 

# KDD

In [6]:
# Load the training dataset
train_data = import_data("kdd_train.csv")

# Load the test dataset
test_data = import_data("kdd_test.csv")

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
f2_scores = []
auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

verbose_run = 1
for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    clf = TabNetClassifier(verbose=verbose_run)
    clf.fit(train_X, train_y, eval_set=[(train_X, train_y)], eval_metric=['logloss'])
    verbose_run = 0
    
    # Make predictions
    test_probabilities = clf.predict_proba(test_X)  # Outputs probabilities
    test_predictions = clf.predict(test_X)  # Convert to discrete class predictions
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
average_f2_score = np.mean(f2_scores)
average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")




Test Accuracy: 0.9966
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                  precision    recall  f1-score   support

           back.       0.74      0.03      0.06       441
buffer_overflow.       0.00      0.00      0.00         6
      ftp_write.       0.00      0.00      0.00         2
   guess_passwd.       1.00      0.91      0.95        11
           imap.       0.50      0.50      0.50         2
        ipsweep.       0.99      0.99      0.99      2496
           land.       0.00      0.00      0.00         4
     loadmodule.       0.00      0.00      0.00         2
       multihop.       0.00      0.00      0.00         1
           nmap.       0.99      0.92      0.96       463
         normal.       1.00      1.00      1.00    194557
           perl.       0.00      0.00      0.00         1
            phf.       0.00      0.00      0.00         1
            pod.       1.00      0.66      0.80        53
      portsweep.       1.00      0.98      0.99      2083
        rootkit.       0.00      0.00      0.00         2
          sat

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Darknet

In [7]:
# Load the training dataset
train_data = pd.read_csv("darknet_train.csv")

# Load the test dataset
test_data = pd.read_csv("darknet_test.csv")

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
f2_scores = []
auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

verbose_run = 1
for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    clf = TabNetClassifier(verbose=verbose_run)
    clf.fit(train_X, train_y, eval_set=[(train_X, train_y)], eval_metric=['logloss'])
    verbose_run = 0
    
    # Make predictions
    test_probabilities = clf.predict_proba(test_X)  # Outputs probabilities
    test_predictions = clf.predict(test_X)  # Convert to discrete class predictions
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
average_f2_score = np.mean(f2_scores)
average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")




Test Accuracy: 0.9989
Classification Report:
                         precision    recall  f1-score   support

Darknet_Audio-Streaming       1.00      1.00      1.00      2657
       Darknet_Browsing       0.95      0.70      0.80        53
           Darknet_Chat       1.00      1.00      1.00       908
          Darknet_Email       1.00      0.99      1.00       116
  Darknet_File-Transfer       1.00      1.00      1.00       522
            Darknet_P2P       1.00      0.98      0.99        44
           Darknet_VOIP       0.96      0.98      0.97       293
Darknet_Video-Streaming       0.98      1.00      0.99       269
                 Normal       1.00      1.00      1.00     26862

               accuracy                           1.00     31724
              macro avg       0.99      0.96      0.97     31724
           weighted avg       1.00      1.00      1.00     31724

