# Shuttle

In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from pytorch_tabnet.tab_model import TabNetClassifier

# Load the training dataset
train_data = pd.read_csv("shuttle_train.csv")
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("shuttle_test.csv")
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label'].values
train_X = train_data.drop(['label'], axis=1).values

test_y = test_data['label'].values
test_X = test_data.drop(['label'], axis=1).values

clf = TabNetClassifier(verbose=0)
clf.fit(train_X, train_y)

test_predictions = clf.predict(test_X)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions))



Test Accuracy: 0.9972
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00      9117
           2       1.00      0.70      0.82        10
           3       0.77      0.29      0.43        34
           4       1.00      1.00      1.00      1781
           5       0.99      1.00      1.00       653
           6       1.00      0.50      0.67         2
           7       0.00      0.00      0.00         3

    accuracy                           1.00     11600
   macro avg       0.82      0.64      0.70     11600
weighted avg       1.00      1.00      1.00     11600



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Runtime: 5m 31s

# Covertype

In [5]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from pytorch_tabnet.tab_model import TabNetClassifier

# Load the training dataset
train_data = pd.read_csv("covtype_train.csv")
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("covtype_test.csv")
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label'].values
train_X = train_data.drop(['label'], axis=1).values

test_y = test_data['label'].values
test_X = test_data.drop(['label'], axis=1).values

clf = TabNetClassifier(verbose=0)
clf.fit(train_X, train_y)

test_predictions = clf.predict(test_X)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions))



Test Accuracy: 0.8636
Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.82      0.86     42368
           2       0.85      0.93      0.89     56661
           3       0.82      0.84      0.83      7151
           4       0.79      0.65      0.71       549
           5       0.76      0.49      0.60      1899
           6       0.70      0.63      0.66      3473
           7       0.91      0.85      0.88      4102

    accuracy                           0.86    116203
   macro avg       0.82      0.74      0.77    116203
weighted avg       0.86      0.86      0.86    116203



Runtime: 59m 37s

# KDD

In [6]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from pytorch_tabnet.tab_model import TabNetClassifier

# Load the training dataset
train_data = pd.read_csv("kdd_train.csv")
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("kdd_test.csv")
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label'].values
train_X = train_data.drop(['label'], axis=1).values

test_y = test_data['label'].values
test_X = test_data.drop(['label'], axis=1).values

clf = TabNetClassifier(verbose=0)
clf.fit(train_X, train_y)

test_predictions = clf.predict(test_X)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions))



Test Accuracy: 0.9966
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                  precision    recall  f1-score   support

           back.       0.74      0.03      0.06       441
buffer_overflow.       0.00      0.00      0.00         6
      ftp_write.       0.00      0.00      0.00         2
   guess_passwd.       1.00      0.91      0.95        11
           imap.       0.50      0.50      0.50         2
        ipsweep.       0.99      0.99      0.99      2496
           land.       0.00      0.00      0.00         4
     loadmodule.       0.00      0.00      0.00         2
       multihop.       0.00      0.00      0.00         1
           nmap.       0.99      0.92      0.96       463
         normal.       1.00      1.00      1.00    194557
           perl.       0.00      0.00      0.00         1
            phf.       0.00      0.00      0.00         1
            pod.       1.00      0.66      0.80        53
      portsweep.       1.00      0.98      0.99      2083
        rootkit.       0.00      0.00      0.00         2
          sat

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Runtime: 134m 49s

# Darknet

In [7]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from pytorch_tabnet.tab_model import TabNetClassifier

# Load the training dataset
train_data = pd.read_csv("darknet_train.csv")
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("darknet_test.csv")
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label'].values
train_X = train_data.drop(['label'], axis=1).values

test_y = test_data['label'].values
test_X = test_data.drop(['label'], axis=1).values

clf = TabNetClassifier(verbose=0)
clf.fit(train_X, train_y)

test_predictions = clf.predict(test_X)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions))



Test Accuracy: 0.9989
Classification Report:
                         precision    recall  f1-score   support

Darknet_Audio-Streaming       1.00      1.00      1.00      2657
       Darknet_Browsing       0.95      0.70      0.80        53
           Darknet_Chat       1.00      1.00      1.00       908
          Darknet_Email       1.00      0.99      1.00       116
  Darknet_File-Transfer       1.00      1.00      1.00       522
            Darknet_P2P       1.00      0.98      0.99        44
           Darknet_VOIP       0.96      0.98      0.97       293
Darknet_Video-Streaming       0.98      1.00      0.99       269
                 Normal       1.00      1.00      1.00     26862

               accuracy                           1.00     31724
              macro avg       0.99      0.96      0.97     31724
           weighted avg       1.00      1.00      1.00     31724



Runtime: 19m 43s