# Shuttle

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from catboost import CatBoostClassifier

# Load the training dataset
train_data = pd.read_csv("shuttle_train.csv")
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("shuttle_test.csv")
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label']
train_X = train_data.drop(['label'], axis=1)

test_y = test_data['label']
test_X = test_data.drop(['label'], axis=1)

model = CatBoostClassifier(learning_rate=1, depth=6)
fit_model = model.fit(X=train_X, y=train_y, verbose=False)

test_predictions = fit_model.predict(test_data)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions))

Test Accuracy: 0.9998
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00      9117
           2       1.00      1.00      1.00        10
           3       0.97      1.00      0.99        34
           4       1.00      1.00      1.00      1781
           5       1.00      1.00      1.00       653
           6       1.00      1.00      1.00         2
           7       1.00      1.00      1.00         3

    accuracy                           1.00     11600
   macro avg       1.00      1.00      1.00     11600
weighted avg       1.00      1.00      1.00     11600



Runtime: 24.4s

# Covertype

In [8]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from catboost import CatBoostClassifier

# Load the training dataset
train_data = pd.read_csv("covtype_train.csv")
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("covtype_test.csv")
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label']
train_X = train_data.drop(['label'], axis=1)

test_y = test_data['label']
test_X = test_data.drop(['label'], axis=1)

model = CatBoostClassifier(learning_rate=1, depth=6)
fit_model = model.fit(X=train_X, y=train_y, verbose=False)

test_predictions = fit_model.predict(test_data)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions))

Test Accuracy: 0.7372
Classification Report:
              precision    recall  f1-score   support

           1       0.73      0.73      0.73     42368
           2       0.77      0.77      0.77     56661
           3       0.73      0.74      0.74      7151
           4       0.64      0.68      0.66       549
           5       0.38      0.39      0.38      1899
           6       0.50      0.50      0.50      3473
           7       0.73      0.72      0.73      4102

    accuracy                           0.74    116203
   macro avg       0.64      0.65      0.64    116203
weighted avg       0.74      0.74      0.74    116203



Runtime: 4m 36.5s

# KDD

In [3]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from catboost import CatBoostClassifier

label_mapping = {
    'normal.': 0.0, 'satan.': 1.0, 'ipsweep.': 2.0, 'portsweep.': 3.0, 'nmap.': 4.0,
    'back.': 5.0, 'warezclient.': 6.0, 'teardrop.': 7.0, 'pod.': 8.0, 'guess_passwd.': 9.0,
    'buffer_overflow.': 10.0, 'land.': 11.0, 'warezmaster.': 12.0, 'imap.': 13.0, 'rootkit.': 14.0,
    'loadmodule.': 15.0, 'multihop.': 16.0, 'ftp_write.': 17.0, 'phf.': 18.0, 'perl.': 19.0, 'spy.': 20.0
}

# Load the training dataset
train_data = pd.read_csv("kdd_train.csv")
train_data['label'] = train_data['label'].map(label_mapping)  # Convert labels to numeric values
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("kdd_test.csv")
test_data['label'] = test_data['label'].map(label_mapping)  # Convert labels to numeric values
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label']
train_X = train_data.drop(['label'], axis=1)

test_y = test_data['label']
test_X = test_data.drop(['label'], axis=1)

model = CatBoostClassifier(learning_rate=1, depth=6)
fit_model = model.fit(X=train_X, y=train_y, verbose=False)

test_predictions = fit_model.predict(test_data)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions, target_names=list(label_mapping.keys())))

Test Accuracy: 0.9967
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                  precision    recall  f1-score   support

         normal.       1.00      1.00      1.00    194557
          satan.       0.97      0.98      0.98      3178
        ipsweep.       0.98      0.98      0.98      2496
      portsweep.       0.99      0.98      0.98      2083
           nmap.       0.89      0.85      0.87       463
           back.       0.95      0.97      0.96       441
    warezclient.       0.53      0.47      0.50       204
       teardrop.       0.99      0.98      0.98       196
            pod.       0.71      0.32      0.44        53
   guess_passwd.       0.00      0.00      0.00        11
buffer_overflow.       0.00      0.00      0.00         6
           land.       0.00      0.00      0.00         4
    warezmaster.       0.00      0.00      0.00         4
           imap.       0.00      0.00      0.00         2
        rootkit.       0.00      0.00      0.00         2
     loadmodule.       0.00      0.00      0.00         2
       multih

Runtime: 74m 35s

# Darknet

In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

from catboost import CatBoostClassifier

# Define label mapping for Darknet dataset labels
label_mapping = {
    'Normal': 0.0, 'Darknet_Audio-Streaming': 1.0, 'Darknet_Chat': 2.0, 'Darknet_File-Transfer': 3.0, 'Darknet_VOIP': 4.0,
    'Darknet_Video-Streaming': 5.0, 'Darknet_Email': 6.0, 'Darknet_Browsing': 7.0, 'Darknet_P2P': 8.0
}

# Load the training dataset
train_data = pd.read_csv("darknet_train.csv")
train_data['label'] = train_data['label'].map(label_mapping)  # Convert labels to numeric values
train_data = train_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Load the test dataset
test_data = pd.read_csv("darknet_test.csv")
test_data['label'] = test_data['label'].map(label_mapping)  # Convert labels to numeric values
test_data = test_data.sample(frac=1).reset_index(drop=True)  # Shuffle

# Split features and labels
train_y = train_data['label']
train_X = train_data.drop(['label'], axis=1)

test_y = test_data['label']
test_X = test_data.drop(['label'], axis=1)

model = CatBoostClassifier(learning_rate=1, depth=6)
fit_model = model.fit(X=train_X, y=train_y, verbose=False)

test_predictions = fit_model.predict(test_data)

# Calculate accuracy
accuracy = accuracy_score(test_y, test_predictions)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
print("Classification Report:")
print(classification_report(test_y, test_predictions, target_names=list(label_mapping.keys())))

Test Accuracy: 0.9951
Classification Report:
                         precision    recall  f1-score   support

                 Normal       1.00      1.00      1.00     26862
Darknet_Audio-Streaming       0.99      0.99      0.99      2657
           Darknet_Chat       0.99      0.99      0.99       908
  Darknet_File-Transfer       0.98      0.96      0.97       522
           Darknet_VOIP       0.96      0.99      0.97       293
Darknet_Video-Streaming       0.88      0.91      0.89       269
          Darknet_Email       0.97      0.96      0.96       116
       Darknet_Browsing       0.80      0.83      0.81        53
            Darknet_P2P       0.95      0.86      0.90        44

               accuracy                           1.00     31724
              macro avg       0.95      0.94      0.94     31724
           weighted avg       1.00      1.00      1.00     31724



Runtime: 4m