In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls "/content/drive/MyDrive/CiC-DataSet"
!ls "/content/drive/MyDrive/CiC-DataSet/Complete_Dataset"
!ls "/content/drive/MyDrive/CiC-DataSet/Complete_Dataset/csv"

Complete_Dataset  merged_filtered.csv
 csv   example	'README - README.pdf'   supplementary
 CICIoT2023  'README_csv - README.pdf'


In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import warnings
from sklearn.tree import DecisionTreeClassifier
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler, StandardScaler


drive.mount('/content/drive', force_remount=True)

DATASET_DIRECTORY = '/content/drive/MyDrive/CiC-DataSet/Complete_Dataset/csv/CICIoT2023'

try:
    df_sets = [k for k in os.listdir(DATASET_DIRECTORY) if k.endswith('.csv')]
    if not df_sets:
        print("No CSV files found in the specified directory.")
    df_sets.sort()
except Exception as e:
    print(f"Error accessing directory: {e}")

training_sets = df_sets[:int(len(df_sets) * 0.8)]
test_sets = df_sets[int(len(df_sets) * 0.8):]

X_columns = [
    'flow_duration', 'Header_Length', 'Protocol Type', 'Duration',
    'Rate', 'Srate', 'Drate', 'fin_flag_number', 'syn_flag_number',
    'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
    'ece_flag_number', 'cwr_flag_number', 'ack_count',
    'syn_count', 'fin_count', 'urg_count', 'rst_count',
    'HTTP', 'HTTPS', 'DNS', 'Telnet', 'SMTP', 'SSH', 'IRC', 'TCP',
    'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC', 'Tot sum', 'Min',
    'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number', 'Magnitue',
    'Radius', 'Covariance', 'Variance', 'Weight',
]
y_column = 'label'

scaler = StandardScaler()

for train_set in tqdm(training_sets, desc="Processing Training Sets"):
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        for chunk in pd.read_csv(file_path, usecols=X_columns, chunksize=5000):
            scaler.partial_fit(chunk)
    except Exception as e:
        print(f"Error processing file {train_set}: {e}")


classifier = DecisionTreeClassifier(random_state=42)

for train_set in tqdm(training_sets, desc="Training Model"):
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        for chunk in pd.read_csv(file_path, usecols=X_columns + [y_column], chunksize=5000):
            X_train = scaler.transform(chunk[X_columns])
            y_train = chunk[y_column]
            classifier.fit(X_train, y_train)
    except Exception as e:
        print(f"Error training on file {train_set}: {e}")

for test_set in tqdm(test_sets, desc="Testing Model"):
    file_path = os.path.join(DATASET_DIRECTORY, test_set)
    try:
        for chunk in pd.read_csv(file_path, usecols=X_columns + [y_column], chunksize=5000):
            X_test = scaler.transform(chunk[X_columns])
            y_test = chunk[y_column]
            predictions = classifier.predict(X_test)

    except Exception as e:
        print(f"Error testing on file {test_set}: {e}")


Mounted at /content/drive


Processing Training Sets: 100%|██████████| 135/135 [05:03<00:00,  2.24s/it]
Training Model: 100%|██████████| 135/135 [12:52<00:00,  5.72s/it]
Testing Model: 100%|██████████| 34/34 [01:37<00:00,  2.88s/it]


# Decision Tree [Classification: 34 (33+1) classes]

In [None]:
import os
from tqdm import tqdm
from sklearn.tree import DecisionTreeClassifier

ML_models = [
    DecisionTreeClassifier(random_state=42),
]

ML_names = [
    "DecisionTree",
]

for train_set in tqdm(training_sets, desc="Training Models"):

    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:

        d = pd.read_csv(file_path)


        d[X_columns] = scaler.transform(d[X_columns])

        for model, model_name in zip(ML_models, ML_names):
            print(f"Training {model_name} on {train_set}...")
            model.fit(d[X_columns], d[y_column])

        del d
    except Exception as e:
        print(f"Error processing {train_set}: {e}")

Training Models:   0%|          | 0/135 [00:00<?, ?it/s]

Training DecisionTree on part-00000-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|          | 1/135 [00:08<18:06,  8.11s/it]

Training DecisionTree on part-00001-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|▏         | 2/135 [00:13<13:53,  6.26s/it]

Training DecisionTree on part-00002-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   2%|▏         | 3/135 [00:19<13:57,  6.35s/it]

Training DecisionTree on part-00003-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   3%|▎         | 4/135 [00:25<13:56,  6.39s/it]

Training DecisionTree on part-00004-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▎         | 5/135 [00:31<12:49,  5.92s/it]

Training DecisionTree on part-00005-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▍         | 6/135 [00:37<13:20,  6.20s/it]

Training DecisionTree on part-00006-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   5%|▌         | 7/135 [00:42<12:27,  5.84s/it]

Training DecisionTree on part-00007-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   6%|▌         | 8/135 [00:48<11:51,  5.61s/it]

Training DecisionTree on part-00008-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 9/135 [00:55<12:40,  6.04s/it]

Training DecisionTree on part-00009-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 10/135 [00:59<11:51,  5.69s/it]

Training DecisionTree on part-00010-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   8%|▊         | 11/135 [01:07<12:40,  6.13s/it]

Training DecisionTree on part-00011-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   9%|▉         | 12/135 [01:12<11:54,  5.81s/it]

Training DecisionTree on part-00012-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|▉         | 13/135 [01:17<11:41,  5.75s/it]

Training DecisionTree on part-00013-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|█         | 14/135 [01:24<12:08,  6.02s/it]

Training DecisionTree on part-00014-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  11%|█         | 15/135 [01:29<11:12,  5.61s/it]

Training DecisionTree on part-00015-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  12%|█▏        | 16/135 [01:36<12:14,  6.17s/it]

Training DecisionTree on part-00016-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 17/135 [01:41<11:36,  5.90s/it]

Training DecisionTree on part-00017-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 18/135 [01:47<11:31,  5.91s/it]

Training DecisionTree on part-00018-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  14%|█▍        | 19/135 [01:53<11:35,  6.00s/it]

Training DecisionTree on part-00019-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  15%|█▍        | 20/135 [01:58<10:55,  5.70s/it]

Training DecisionTree on part-00020-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▌        | 21/135 [02:05<11:26,  6.02s/it]

Training DecisionTree on part-00021-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▋        | 22/135 [02:10<10:42,  5.69s/it]

Training DecisionTree on part-00022-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  17%|█▋        | 23/135 [02:16<10:57,  5.87s/it]

Training DecisionTree on part-00023-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  18%|█▊        | 24/135 [02:23<11:18,  6.11s/it]

Training DecisionTree on part-00024-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▊        | 25/135 [02:28<10:34,  5.77s/it]

Training DecisionTree on part-00025-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▉        | 26/135 [02:40<13:58,  7.70s/it]

Training DecisionTree on part-00026-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  20%|██        | 27/135 [02:46<13:01,  7.24s/it]

Training DecisionTree on part-00027-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██        | 28/135 [02:52<12:04,  6.77s/it]

Training DecisionTree on part-00028-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██▏       | 29/135 [02:58<11:17,  6.39s/it]

Training DecisionTree on part-00029-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  22%|██▏       | 30/135 [03:05<11:35,  6.62s/it]

Training DecisionTree on part-00030-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  23%|██▎       | 31/135 [03:10<10:49,  6.25s/it]

Training DecisionTree on part-00031-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▎       | 32/135 [03:16<10:41,  6.23s/it]

Training DecisionTree on part-00032-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▍       | 33/135 [03:22<10:12,  6.01s/it]

Training DecisionTree on part-00033-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  25%|██▌       | 34/135 [03:34<13:20,  7.93s/it]

Training DecisionTree on part-00034-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  26%|██▌       | 35/135 [03:39<11:50,  7.10s/it]

Training DecisionTree on part-00035-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 36/135 [03:47<11:45,  7.12s/it]

Training DecisionTree on part-00036-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 37/135 [03:58<13:39,  8.36s/it]

Training DecisionTree on part-00037-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  28%|██▊       | 38/135 [04:03<12:12,  7.55s/it]

Training DecisionTree on part-00038-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  29%|██▉       | 39/135 [04:09<10:55,  6.83s/it]

Training DecisionTree on part-00039-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|██▉       | 40/135 [04:20<13:12,  8.34s/it]

Training DecisionTree on part-00040-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|███       | 41/135 [04:26<11:36,  7.41s/it]

Training DecisionTree on part-00041-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  31%|███       | 42/135 [04:38<13:35,  8.77s/it]

Training DecisionTree on part-00042-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  32%|███▏      | 43/135 [04:44<12:09,  7.93s/it]

Training DecisionTree on part-00043-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 44/135 [04:49<11:04,  7.30s/it]

Training DecisionTree on part-00044-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 45/135 [04:55<09:57,  6.64s/it]

Training DecisionTree on part-00045-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  34%|███▍      | 46/135 [05:02<10:06,  6.81s/it]

Training DecisionTree on part-00046-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  35%|███▍      | 47/135 [05:07<09:23,  6.40s/it]

Training DecisionTree on part-00047-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▌      | 48/135 [05:15<09:43,  6.71s/it]

Training DecisionTree on part-00048-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▋      | 49/135 [05:20<08:53,  6.20s/it]

Training DecisionTree on part-00049-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  37%|███▋      | 50/135 [05:27<09:17,  6.56s/it]

Training DecisionTree on part-00050-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  38%|███▊      | 51/135 [05:33<08:46,  6.26s/it]

Training DecisionTree on part-00051-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▊      | 52/135 [05:39<08:43,  6.31s/it]

Training DecisionTree on part-00052-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▉      | 53/135 [05:51<11:06,  8.13s/it]

Training DecisionTree on part-00053-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  40%|████      | 54/135 [05:58<10:28,  7.75s/it]

Training DecisionTree on part-00054-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████      | 55/135 [06:10<11:56,  8.95s/it]

Training DecisionTree on part-00055-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████▏     | 56/135 [06:17<10:49,  8.23s/it]

Training DecisionTree on part-00056-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  42%|████▏     | 57/135 [06:22<09:38,  7.42s/it]

Training DecisionTree on part-00057-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  43%|████▎     | 58/135 [06:35<11:29,  8.95s/it]

Training DecisionTree on part-00058-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▎     | 59/135 [06:41<10:25,  8.24s/it]

Training DecisionTree on part-00059-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▍     | 60/135 [06:47<09:26,  7.55s/it]

Training DecisionTree on part-00060-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  45%|████▌     | 61/135 [06:54<08:55,  7.23s/it]

Training DecisionTree on part-00061-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  46%|████▌     | 62/135 [07:00<08:29,  6.97s/it]

Training DecisionTree on part-00062-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 63/135 [07:12<10:12,  8.51s/it]

Training DecisionTree on part-00063-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 64/135 [07:17<08:53,  7.51s/it]

Training DecisionTree on part-00064-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  48%|████▊     | 65/135 [07:24<08:35,  7.37s/it]

Training DecisionTree on part-00065-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  49%|████▉     | 66/135 [07:30<07:57,  6.92s/it]

Training DecisionTree on part-00066-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|████▉     | 67/135 [07:35<07:04,  6.25s/it]

Training DecisionTree on part-00067-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|█████     | 68/135 [07:42<07:24,  6.63s/it]

Training DecisionTree on part-00068-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  51%|█████     | 69/135 [07:47<06:44,  6.13s/it]

Training DecisionTree on part-00069-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  52%|█████▏    | 70/135 [07:54<06:45,  6.25s/it]

Training DecisionTree on part-00070-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 71/135 [07:59<06:19,  5.93s/it]

Training DecisionTree on part-00071-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 72/135 [08:06<06:25,  6.11s/it]

Training DecisionTree on part-00072-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  54%|█████▍    | 73/135 [08:13<06:36,  6.39s/it]

Training DecisionTree on part-00073-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  55%|█████▍    | 74/135 [08:18<06:09,  6.06s/it]

Training DecisionTree on part-00074-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▌    | 75/135 [08:25<06:23,  6.40s/it]

Training DecisionTree on part-00075-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▋    | 76/135 [08:37<07:57,  8.09s/it]

Training DecisionTree on part-00076-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  57%|█████▋    | 77/135 [08:48<08:34,  8.87s/it]

Training DecisionTree on part-00077-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  58%|█████▊    | 78/135 [09:00<09:20,  9.83s/it]

Training DecisionTree on part-00078-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▊    | 79/135 [09:12<09:43, 10.42s/it]

Training DecisionTree on part-00079-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▉    | 80/135 [09:24<10:03, 10.97s/it]

Training DecisionTree on part-00080-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  60%|██████    | 81/135 [09:29<08:21,  9.28s/it]

Training DecisionTree on part-00081-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████    | 82/135 [09:36<07:30,  8.51s/it]

Training DecisionTree on part-00082-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████▏   | 83/135 [09:41<06:31,  7.53s/it]

Training DecisionTree on part-00083-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  62%|██████▏   | 84/135 [09:47<05:54,  6.96s/it]

Training DecisionTree on part-00084-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  63%|██████▎   | 85/135 [09:53<05:36,  6.74s/it]

Training DecisionTree on part-00085-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▎   | 86/135 [09:59<05:11,  6.36s/it]

Training DecisionTree on part-00086-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▍   | 87/135 [10:06<05:15,  6.58s/it]

Training DecisionTree on part-00087-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  65%|██████▌   | 88/135 [10:11<04:51,  6.21s/it]

Training DecisionTree on part-00088-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  66%|██████▌   | 89/135 [10:17<04:40,  6.10s/it]

Training DecisionTree on part-00089-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 90/135 [10:24<04:41,  6.26s/it]

Training DecisionTree on part-00090-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 91/135 [10:29<04:25,  6.04s/it]

Training DecisionTree on part-00091-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  68%|██████▊   | 92/135 [10:37<04:42,  6.57s/it]

Training DecisionTree on part-00092-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  69%|██████▉   | 93/135 [10:42<04:15,  6.09s/it]

Training DecisionTree on part-00093-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|██████▉   | 94/135 [10:49<04:28,  6.56s/it]

Training DecisionTree on part-00094-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|███████   | 95/135 [11:00<05:14,  7.86s/it]

Training DecisionTree on part-00095-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  71%|███████   | 96/135 [11:12<05:53,  9.05s/it]

Training DecisionTree on part-00096-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  72%|███████▏  | 97/135 [11:20<05:24,  8.54s/it]

Training DecisionTree on part-00097-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 98/135 [11:25<04:43,  7.66s/it]

Training DecisionTree on part-00098-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 99/135 [11:38<05:27,  9.08s/it]

Training DecisionTree on part-00099-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  74%|███████▍  | 100/135 [11:43<04:43,  8.11s/it]

Training DecisionTree on part-00100-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  75%|███████▍  | 101/135 [11:50<04:16,  7.55s/it]

Training DecisionTree on part-00101-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▌  | 102/135 [11:55<03:51,  7.02s/it]

Training DecisionTree on part-00102-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▋  | 103/135 [12:02<03:44,  7.00s/it]

Training DecisionTree on part-00103-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  77%|███████▋  | 104/135 [12:08<03:27,  6.70s/it]

Training DecisionTree on part-00104-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  78%|███████▊  | 105/135 [12:15<03:15,  6.53s/it]

Training DecisionTree on part-00105-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▊  | 106/135 [12:20<03:01,  6.26s/it]

Training DecisionTree on part-00106-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▉  | 107/135 [12:26<02:52,  6.18s/it]

Training DecisionTree on part-00107-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  80%|████████  | 108/135 [12:33<02:49,  6.29s/it]

Training DecisionTree on part-00108-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████  | 109/135 [12:38<02:35,  5.98s/it]

Training DecisionTree on part-00109-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████▏ | 110/135 [12:46<02:41,  6.46s/it]

Training DecisionTree on part-00110-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  82%|████████▏ | 111/135 [12:51<02:26,  6.12s/it]

Training DecisionTree on part-00111-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  83%|████████▎ | 112/135 [12:58<02:28,  6.48s/it]

Training DecisionTree on part-00112-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▎ | 113/135 [13:04<02:20,  6.38s/it]

Training DecisionTree on part-00113-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▍ | 114/135 [13:17<02:50,  8.13s/it]

Training DecisionTree on part-00114-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  85%|████████▌ | 115/135 [13:22<02:25,  7.27s/it]

Training DecisionTree on part-00115-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  86%|████████▌ | 116/135 [13:28<02:12,  6.95s/it]

Training DecisionTree on part-00116-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 117/135 [13:34<01:58,  6.58s/it]

Training DecisionTree on part-00117-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 118/135 [13:40<01:47,  6.34s/it]

Training DecisionTree on part-00118-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  88%|████████▊ | 119/135 [13:46<01:41,  6.34s/it]

Training DecisionTree on part-00119-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  89%|████████▉ | 120/135 [13:51<01:31,  6.10s/it]

Training DecisionTree on part-00120-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|████████▉ | 121/135 [13:58<01:29,  6.37s/it]

Training DecisionTree on part-00121-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|█████████ | 122/135 [14:04<01:19,  6.12s/it]

Training DecisionTree on part-00122-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  91%|█████████ | 123/135 [14:11<01:15,  6.26s/it]

Training DecisionTree on part-00123-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  92%|█████████▏| 124/135 [14:17<01:09,  6.31s/it]

Training DecisionTree on part-00124-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 125/135 [14:23<01:02,  6.21s/it]

Training DecisionTree on part-00125-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 126/135 [14:35<01:12,  8.04s/it]

Training DecisionTree on part-00126-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  94%|█████████▍| 127/135 [14:42<01:01,  7.68s/it]

Training DecisionTree on part-00127-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  95%|█████████▍| 128/135 [14:54<01:02,  8.88s/it]

Training DecisionTree on part-00128-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▌| 129/135 [15:00<00:47,  7.99s/it]

Training DecisionTree on part-00129-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▋| 130/135 [15:05<00:35,  7.15s/it]

Training DecisionTree on part-00130-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  97%|█████████▋| 131/135 [15:12<00:28,  7.24s/it]

Training DecisionTree on part-00131-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  98%|█████████▊| 132/135 [15:24<00:25,  8.65s/it]

Training DecisionTree on part-00132-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▊| 133/135 [15:29<00:15,  7.59s/it]

Training DecisionTree on part-00133-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▉| 134/135 [15:35<00:07,  7.02s/it]

Training DecisionTree on part-00134-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models: 100%|██████████| 135/135 [15:42<00:00,  6.98s/it]


In [None]:
import os
from tqdm import tqdm
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

ML_models = [
    DecisionTreeClassifier(random_state=42),
]

ML_names = [
    "DecisionTree",
]
for train_set in tqdm(training_sets, desc="Training Models"):
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        d_train = pd.read_csv(file_path)

        d_train[X_columns] = scaler.transform(d_train[X_columns])


        for model, model_name in zip(ML_models, ML_names):
            print(f"Training {model_name} on {train_set}...")
            model.fit(d_train[X_columns], d_train[y_column])

        del d_train
    except Exception as e:
        print(f"Error processing {train_set}: {e}")

y_test = []
preds = {i: [] for i in range(len(ML_models))}

for test_set in tqdm(test_sets, desc="Evaluating Models"):
    file_path = os.path.join(DATASET_DIRECTORY, test_set)
    try:
        d_test = pd.read_csv(file_path)

        d_test[X_columns] = scaler.transform(d_test[X_columns])

        y_test += list(d_test[y_column].values)

        for i, model in enumerate(ML_models):
            y_pred = list(model.predict(d_test[X_columns]))
            preds[i].extend(y_pred)

        del d_test
    except Exception as e:
        print(f"Error processing {test_set}: {e}")

Training Models:   0%|          | 0/135 [00:00<?, ?it/s]

Training DecisionTree on part-00000-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|          | 1/135 [00:10<22:50, 10.22s/it]

Training DecisionTree on part-00001-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|▏         | 2/135 [00:16<17:30,  7.90s/it]

Training DecisionTree on part-00002-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   2%|▏         | 3/135 [00:24<17:02,  7.75s/it]

Training DecisionTree on part-00003-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   3%|▎         | 4/135 [00:29<14:31,  6.65s/it]

Training DecisionTree on part-00004-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▎         | 5/135 [00:35<14:22,  6.63s/it]

Training DecisionTree on part-00005-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▍         | 6/135 [00:40<13:09,  6.12s/it]

Training DecisionTree on part-00006-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   5%|▌         | 7/135 [00:45<12:25,  5.82s/it]

Training DecisionTree on part-00007-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   6%|▌         | 8/135 [00:53<13:17,  6.28s/it]

Training DecisionTree on part-00008-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 9/135 [00:58<12:46,  6.08s/it]

Training DecisionTree on part-00009-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 10/135 [01:05<12:56,  6.21s/it]

Training DecisionTree on part-00010-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   8%|▊         | 11/135 [01:10<12:19,  5.96s/it]

Training DecisionTree on part-00011-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   9%|▉         | 12/135 [01:16<11:54,  5.81s/it]

Training DecisionTree on part-00012-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|▉         | 13/135 [01:22<11:50,  5.82s/it]

Training DecisionTree on part-00013-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|█         | 14/135 [01:27<11:14,  5.58s/it]

Training DecisionTree on part-00014-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  11%|█         | 15/135 [01:33<11:36,  5.80s/it]

Training DecisionTree on part-00015-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  12%|█▏        | 16/135 [01:39<11:33,  5.83s/it]

Training DecisionTree on part-00016-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 17/135 [01:45<11:23,  5.79s/it]

Training DecisionTree on part-00017-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 18/135 [01:51<11:38,  5.97s/it]

Training DecisionTree on part-00018-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  14%|█▍        | 19/135 [01:56<11:12,  5.79s/it]

Training DecisionTree on part-00019-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  15%|█▍        | 20/135 [02:03<11:38,  6.07s/it]

Training DecisionTree on part-00020-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▌        | 21/135 [02:08<11:05,  5.84s/it]

Training DecisionTree on part-00021-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▋        | 22/135 [02:14<10:47,  5.73s/it]

Training DecisionTree on part-00022-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  17%|█▋        | 23/135 [02:21<11:19,  6.07s/it]

Training DecisionTree on part-00023-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  18%|█▊        | 24/135 [02:33<14:43,  7.96s/it]

Training DecisionTree on part-00024-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▊        | 25/135 [02:39<13:22,  7.30s/it]

Training DecisionTree on part-00025-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▉        | 26/135 [02:51<16:10,  8.91s/it]

Training DecisionTree on part-00026-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  20%|██        | 27/135 [02:57<14:01,  7.79s/it]

Training DecisionTree on part-00027-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██        | 28/135 [03:03<13:09,  7.37s/it]

Training DecisionTree on part-00028-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██▏       | 29/135 [03:09<12:05,  6.85s/it]

Training DecisionTree on part-00029-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  22%|██▏       | 30/135 [03:16<12:08,  6.94s/it]

Training DecisionTree on part-00030-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  23%|██▎       | 31/135 [03:22<11:29,  6.63s/it]

Training DecisionTree on part-00031-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▎       | 32/135 [03:27<10:36,  6.18s/it]

Training DecisionTree on part-00032-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▍       | 33/135 [03:34<10:48,  6.36s/it]

Training DecisionTree on part-00033-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  25%|██▌       | 34/135 [03:46<13:40,  8.12s/it]

Training DecisionTree on part-00034-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  26%|██▌       | 35/135 [03:51<12:01,  7.21s/it]

Training DecisionTree on part-00035-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 36/135 [03:57<11:22,  6.89s/it]

Training DecisionTree on part-00036-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 37/135 [04:09<13:49,  8.46s/it]

Training DecisionTree on part-00037-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  28%|██▊       | 38/135 [04:15<12:37,  7.80s/it]

Training DecisionTree on part-00038-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  29%|██▉       | 39/135 [04:21<11:18,  7.06s/it]

Training DecisionTree on part-00039-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|██▉       | 40/135 [04:33<13:33,  8.56s/it]

Training DecisionTree on part-00040-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|███       | 41/135 [04:38<11:43,  7.49s/it]

Training DecisionTree on part-00041-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  31%|███       | 42/135 [04:50<13:50,  8.93s/it]

Training DecisionTree on part-00042-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  32%|███▏      | 43/135 [04:55<11:59,  7.82s/it]

Training DecisionTree on part-00043-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 44/135 [05:02<11:24,  7.52s/it]

Training DecisionTree on part-00044-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 45/135 [05:07<10:07,  6.75s/it]

Training DecisionTree on part-00045-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  34%|███▍      | 46/135 [05:14<10:11,  6.87s/it]

Training DecisionTree on part-00046-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  35%|███▍      | 47/135 [05:20<09:31,  6.49s/it]

Training DecisionTree on part-00047-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▌      | 48/135 [05:26<09:27,  6.52s/it]

Training DecisionTree on part-00048-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▋      | 49/135 [05:32<08:57,  6.25s/it]

Training DecisionTree on part-00049-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  37%|███▋      | 50/135 [05:38<08:39,  6.11s/it]

Training DecisionTree on part-00050-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  38%|███▊      | 51/135 [05:45<08:55,  6.37s/it]

Training DecisionTree on part-00051-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▊      | 52/135 [05:51<08:35,  6.21s/it]

Training DecisionTree on part-00052-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▉      | 53/135 [06:05<11:43,  8.58s/it]

Training DecisionTree on part-00053-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  40%|████      | 54/135 [06:11<10:38,  7.88s/it]

Training DecisionTree on part-00054-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████      | 55/135 [06:17<09:41,  7.27s/it]

Training DecisionTree on part-00055-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████▏     | 56/135 [06:22<08:39,  6.58s/it]

Training DecisionTree on part-00056-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  42%|████▏     | 57/135 [06:29<08:40,  6.67s/it]

Training DecisionTree on part-00057-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  43%|████▎     | 58/135 [06:40<10:20,  8.06s/it]

Training DecisionTree on part-00058-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▎     | 59/135 [06:46<09:17,  7.33s/it]

Training DecisionTree on part-00059-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▍     | 60/135 [06:51<08:26,  6.75s/it]

Training DecisionTree on part-00060-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  45%|████▌     | 61/135 [06:58<08:29,  6.89s/it]

Training DecisionTree on part-00061-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  46%|████▌     | 62/135 [07:04<07:53,  6.48s/it]

Training DecisionTree on part-00062-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 63/135 [07:16<09:58,  8.32s/it]

Training DecisionTree on part-00063-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 64/135 [07:23<09:09,  7.74s/it]

Training DecisionTree on part-00064-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  48%|████▊     | 65/135 [07:30<08:40,  7.44s/it]

Training DecisionTree on part-00065-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  49%|████▉     | 66/135 [07:35<08:01,  6.98s/it]

Training DecisionTree on part-00066-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|████▉     | 67/135 [07:42<07:38,  6.74s/it]

Training DecisionTree on part-00067-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|█████     | 68/135 [07:48<07:18,  6.55s/it]

Training DecisionTree on part-00068-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  51%|█████     | 69/135 [07:54<07:12,  6.55s/it]

Training DecisionTree on part-00069-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  52%|█████▏    | 70/135 [08:00<06:41,  6.18s/it]

Training DecisionTree on part-00070-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 71/135 [08:05<06:25,  6.03s/it]

Training DecisionTree on part-00071-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 72/135 [08:12<06:40,  6.35s/it]

Training DecisionTree on part-00072-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  54%|█████▍    | 73/135 [08:18<06:26,  6.24s/it]

Training DecisionTree on part-00073-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  55%|█████▍    | 74/135 [08:25<06:29,  6.39s/it]

Training DecisionTree on part-00074-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▌    | 75/135 [08:31<06:10,  6.18s/it]

Training DecisionTree on part-00075-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▋    | 76/135 [08:44<08:07,  8.27s/it]

Training DecisionTree on part-00076-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  57%|█████▋    | 77/135 [08:56<09:10,  9.48s/it]

Training DecisionTree on part-00077-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  58%|█████▊    | 78/135 [09:08<09:45, 10.28s/it]

Training DecisionTree on part-00078-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▊    | 79/135 [09:20<09:56, 10.66s/it]

Training DecisionTree on part-00079-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▉    | 80/135 [09:32<10:01, 10.93s/it]

Training DecisionTree on part-00080-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  60%|██████    | 81/135 [09:38<08:41,  9.66s/it]

Training DecisionTree on part-00081-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████    | 82/135 [09:43<07:21,  8.33s/it]

Training DecisionTree on part-00082-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████▏   | 83/135 [09:50<06:47,  7.84s/it]

Training DecisionTree on part-00083-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  62%|██████▏   | 84/135 [09:56<06:04,  7.15s/it]

Training DecisionTree on part-00084-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  63%|██████▎   | 85/135 [10:04<06:13,  7.47s/it]

Training DecisionTree on part-00085-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▎   | 86/135 [10:10<05:39,  6.93s/it]

Training DecisionTree on part-00086-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▍   | 87/135 [10:16<05:19,  6.65s/it]

Training DecisionTree on part-00087-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  65%|██████▌   | 88/135 [10:22<05:10,  6.60s/it]

Training DecisionTree on part-00088-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  66%|██████▌   | 89/135 [10:27<04:45,  6.21s/it]

Training DecisionTree on part-00089-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 90/135 [10:34<04:51,  6.48s/it]

Training DecisionTree on part-00090-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 91/135 [10:40<04:33,  6.21s/it]

Training DecisionTree on part-00091-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  68%|██████▊   | 92/135 [10:48<04:44,  6.61s/it]

Training DecisionTree on part-00092-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  69%|██████▉   | 93/135 [10:52<04:14,  6.05s/it]

Training DecisionTree on part-00093-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|██████▉   | 94/135 [10:58<04:08,  6.05s/it]

Training DecisionTree on part-00094-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|███████   | 95/135 [11:10<05:12,  7.82s/it]

Training DecisionTree on part-00095-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  71%|███████   | 96/135 [11:23<05:59,  9.22s/it]

Training DecisionTree on part-00096-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  72%|███████▏  | 97/135 [11:29<05:19,  8.40s/it]

Training DecisionTree on part-00097-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 98/135 [11:36<04:49,  7.82s/it]

Training DecisionTree on part-00098-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 99/135 [11:48<05:28,  9.12s/it]

Training DecisionTree on part-00099-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  74%|███████▍  | 100/135 [11:53<04:39,  7.99s/it]

Training DecisionTree on part-00100-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  75%|███████▍  | 101/135 [12:00<04:14,  7.48s/it]

Training DecisionTree on part-00101-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▌  | 102/135 [12:05<03:49,  6.96s/it]

Training DecisionTree on part-00102-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▋  | 103/135 [12:10<03:25,  6.41s/it]

Training DecisionTree on part-00103-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  77%|███████▋  | 104/135 [12:18<03:29,  6.75s/it]

Training DecisionTree on part-00104-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  78%|███████▊  | 105/135 [12:23<03:04,  6.16s/it]

Training DecisionTree on part-00105-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▊  | 106/135 [12:29<03:03,  6.32s/it]

Training DecisionTree on part-00106-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▉  | 107/135 [12:41<03:42,  7.96s/it]

Training DecisionTree on part-00107-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  80%|████████  | 108/135 [12:48<03:23,  7.52s/it]

Training DecisionTree on part-00108-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████  | 109/135 [12:53<02:56,  6.81s/it]

Training DecisionTree on part-00109-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████▏ | 110/135 [13:01<02:56,  7.07s/it]

Training DecisionTree on part-00110-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  82%|████████▏ | 111/135 [13:06<02:37,  6.56s/it]

Training DecisionTree on part-00111-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  83%|████████▎ | 112/135 [13:14<02:38,  6.90s/it]

Training DecisionTree on part-00112-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▎ | 113/135 [13:19<02:24,  6.59s/it]

Training DecisionTree on part-00113-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▍ | 114/135 [13:32<02:56,  8.41s/it]

Training DecisionTree on part-00114-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  85%|████████▌ | 115/135 [13:37<02:29,  7.49s/it]

Training DecisionTree on part-00115-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  86%|████████▌ | 116/135 [13:44<02:16,  7.16s/it]

Training DecisionTree on part-00116-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 117/135 [13:49<02:00,  6.68s/it]

Training DecisionTree on part-00117-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 118/135 [13:56<01:53,  6.69s/it]

Training DecisionTree on part-00118-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  88%|████████▊ | 119/135 [14:01<01:39,  6.19s/it]

Training DecisionTree on part-00119-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  89%|████████▉ | 120/135 [14:06<01:28,  5.90s/it]

Training DecisionTree on part-00120-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|████████▉ | 121/135 [14:13<01:26,  6.15s/it]

Training DecisionTree on part-00121-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|█████████ | 122/135 [14:19<01:17,  6.00s/it]

Training DecisionTree on part-00122-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  91%|█████████ | 123/135 [14:26<01:15,  6.33s/it]

Training DecisionTree on part-00123-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  92%|█████████▏| 124/135 [14:32<01:08,  6.18s/it]

Training DecisionTree on part-00124-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 125/135 [14:38<01:03,  6.30s/it]

Training DecisionTree on part-00125-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 126/135 [14:50<01:11,  7.92s/it]

Training DecisionTree on part-00126-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  94%|█████████▍| 127/135 [14:57<01:00,  7.59s/it]

Training DecisionTree on part-00127-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  95%|█████████▍| 128/135 [15:09<01:03,  9.02s/it]

Training DecisionTree on part-00128-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▌| 129/135 [15:15<00:47,  7.92s/it]

Training DecisionTree on part-00129-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▋| 130/135 [15:20<00:36,  7.28s/it]

Training DecisionTree on part-00130-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  97%|█████████▋| 131/135 [15:27<00:28,  7.21s/it]

Training DecisionTree on part-00131-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  98%|█████████▊| 132/135 [15:40<00:26,  8.78s/it]

Training DecisionTree on part-00132-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▊| 133/135 [15:45<00:15,  7.72s/it]

Training DecisionTree on part-00133-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▉| 134/135 [15:52<00:07,  7.38s/it]

Training DecisionTree on part-00134-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models: 100%|██████████| 135/135 [15:57<00:00,  7.10s/it]
Evaluating Models: 100%|██████████| 34/34 [01:34<00:00,  2.78s/it]


In [None]:
import os
from tqdm import tqdm
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

ML_models = [
    DecisionTreeClassifier(random_state=42),
]

ML_names = [
    "DecisionTree",
]

for train_set in tqdm(training_sets, desc="Training Models"):
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        d_train = pd.read_csv(file_path)

        d_train[X_columns] = scaler.transform(d_train[X_columns])
        for model, model_name in zip(ML_models, ML_names):
            print(f"Training {model_name} on {train_set}...")
            model.fit(d_train[X_columns], d_train[y_column])

        del d_train
    except Exception as e:
        print(f"Error processing {train_set}: {e}")
y_test = []
preds = {i: [] for i in range(len(ML_models))}

for test_set in tqdm(test_sets, desc="Evaluating Models"):
    file_path = os.path.join(DATASET_DIRECTORY, test_set)
    try:
        d_test = pd.read_csv(file_path)


        d_test[X_columns] = scaler.transform(d_test[X_columns])
        y_test += list(d_test[y_column].values)
        for i, model in enumerate(ML_models):
            y_pred = list(model.predict(d_test[X_columns]))
            preds[i].extend(y_pred)
        del d_test
    except Exception as e:
        print(f"Error processing {test_set}: {e}")

# Evaluation phase
for k, v in preds.items():
    y_pred = v
    print(f"##### {ML_names[k]} #####")
    print('Accuracy Score: ', accuracy_score(y_test, y_pred))
    print('Recall Score: ', recall_score(y_test, y_pred, average='macro'))
    print('Precision Score: ', precision_score(y_test, y_pred, average='macro'))
    print('F1 Score: ', f1_score(y_test, y_pred, average='macro'))
    print("\n")


Training Models:   0%|          | 0/135 [00:00<?, ?it/s]

Training DecisionTree on part-00000-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|          | 1/135 [00:10<23:37, 10.57s/it]

Training DecisionTree on part-00001-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|▏         | 2/135 [00:15<16:03,  7.24s/it]

Training DecisionTree on part-00002-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   2%|▏         | 3/135 [00:23<16:33,  7.53s/it]

Training DecisionTree on part-00003-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   3%|▎         | 4/135 [00:31<17:00,  7.79s/it]

Training DecisionTree on part-00004-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▎         | 5/135 [00:37<15:38,  7.22s/it]

Training DecisionTree on part-00005-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▍         | 6/135 [00:48<18:04,  8.41s/it]

Training DecisionTree on part-00006-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   5%|▌         | 7/135 [00:56<17:47,  8.34s/it]

Training DecisionTree on part-00007-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   6%|▌         | 8/135 [01:04<17:10,  8.12s/it]

Training DecisionTree on part-00008-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 9/135 [01:11<16:31,  7.87s/it]

Training DecisionTree on part-00009-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 10/135 [01:16<14:37,  7.02s/it]

Training DecisionTree on part-00010-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   8%|▊         | 11/135 [01:23<14:37,  7.08s/it]

Training DecisionTree on part-00011-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   9%|▉         | 12/135 [01:28<13:12,  6.44s/it]

Training DecisionTree on part-00012-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|▉         | 13/135 [01:34<12:24,  6.10s/it]

Training DecisionTree on part-00013-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|█         | 14/135 [01:41<12:55,  6.41s/it]

Training DecisionTree on part-00014-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  11%|█         | 15/135 [01:46<11:50,  5.92s/it]

Training DecisionTree on part-00015-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  12%|█▏        | 16/135 [01:53<12:46,  6.44s/it]

Training DecisionTree on part-00016-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 17/135 [01:59<11:55,  6.06s/it]

Training DecisionTree on part-00017-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 18/135 [02:04<11:20,  5.82s/it]

Training DecisionTree on part-00018-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  14%|█▍        | 19/135 [02:10<11:45,  6.08s/it]

Training DecisionTree on part-00019-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  15%|█▍        | 20/135 [02:16<11:08,  5.81s/it]

Training DecisionTree on part-00020-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▌        | 21/135 [02:22<11:37,  6.12s/it]

Training DecisionTree on part-00021-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▋        | 22/135 [02:27<10:49,  5.75s/it]

Training DecisionTree on part-00022-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  17%|█▋        | 23/135 [02:34<11:03,  5.92s/it]

Training DecisionTree on part-00023-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  18%|█▊        | 24/135 [02:41<11:36,  6.27s/it]

Training DecisionTree on part-00024-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▊        | 25/135 [02:46<10:43,  5.85s/it]

Training DecisionTree on part-00025-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▉        | 26/135 [02:58<14:18,  7.87s/it]

Training DecisionTree on part-00026-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  20%|██        | 27/135 [03:04<13:07,  7.29s/it]

Training DecisionTree on part-00027-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██        | 28/135 [03:10<12:07,  6.80s/it]

Training DecisionTree on part-00028-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██▏       | 29/135 [03:15<11:20,  6.42s/it]

Training DecisionTree on part-00029-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  22%|██▏       | 30/135 [03:22<11:36,  6.64s/it]

Training DecisionTree on part-00030-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  23%|██▎       | 31/135 [03:28<10:45,  6.21s/it]

Training DecisionTree on part-00031-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▎       | 32/135 [03:34<10:27,  6.09s/it]

Training DecisionTree on part-00032-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▍       | 33/135 [03:39<10:10,  5.99s/it]

Training DecisionTree on part-00033-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  25%|██▌       | 34/135 [03:51<13:05,  7.78s/it]

Training DecisionTree on part-00034-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  26%|██▌       | 35/135 [03:56<11:38,  6.98s/it]

Training DecisionTree on part-00035-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 36/135 [04:03<11:13,  6.80s/it]

Training DecisionTree on part-00036-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 37/135 [04:14<13:21,  8.18s/it]

Training DecisionTree on part-00037-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  28%|██▊       | 38/135 [04:21<12:24,  7.68s/it]

Training DecisionTree on part-00038-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  29%|██▉       | 39/135 [04:26<11:04,  6.92s/it]

Training DecisionTree on part-00039-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|██▉       | 40/135 [04:38<13:24,  8.47s/it]

Training DecisionTree on part-00040-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|███       | 41/135 [04:43<11:38,  7.43s/it]

Training DecisionTree on part-00041-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  31%|███       | 42/135 [04:55<13:45,  8.87s/it]

Training DecisionTree on part-00042-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  32%|███▏      | 43/135 [05:00<11:59,  7.82s/it]

Training DecisionTree on part-00043-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 44/135 [05:07<11:17,  7.44s/it]

Training DecisionTree on part-00044-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 45/135 [05:12<10:01,  6.68s/it]

Training DecisionTree on part-00045-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  34%|███▍      | 46/135 [05:19<10:01,  6.76s/it]

Training DecisionTree on part-00046-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  35%|███▍      | 47/135 [05:24<09:11,  6.27s/it]

Training DecisionTree on part-00047-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▌      | 48/135 [05:30<08:59,  6.20s/it]

Training DecisionTree on part-00048-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▋      | 49/135 [05:36<08:41,  6.07s/it]

Training DecisionTree on part-00049-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  37%|███▋      | 50/135 [05:42<08:26,  5.96s/it]

Training DecisionTree on part-00050-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  38%|███▊      | 51/135 [05:48<08:37,  6.17s/it]

Training DecisionTree on part-00051-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▊      | 52/135 [05:55<08:36,  6.23s/it]

Training DecisionTree on part-00052-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▉      | 53/135 [06:07<11:13,  8.22s/it]

Training DecisionTree on part-00053-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  40%|████      | 54/135 [06:13<10:08,  7.51s/it]

Training DecisionTree on part-00054-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████      | 55/135 [06:20<09:33,  7.17s/it]

Training DecisionTree on part-00055-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████▏     | 56/135 [06:24<08:30,  6.46s/it]

Training DecisionTree on part-00056-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  42%|████▏     | 57/135 [06:31<08:31,  6.55s/it]

Training DecisionTree on part-00057-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  43%|████▎     | 58/135 [06:42<09:55,  7.74s/it]

Training DecisionTree on part-00058-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▎     | 59/135 [06:48<09:23,  7.41s/it]

Training DecisionTree on part-00059-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▍     | 60/135 [06:54<08:32,  6.84s/it]

Training DecisionTree on part-00060-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  45%|████▌     | 61/135 [07:01<08:30,  6.90s/it]

Training DecisionTree on part-00061-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  46%|████▌     | 62/135 [07:06<07:48,  6.42s/it]

Training DecisionTree on part-00062-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 63/135 [07:18<09:43,  8.11s/it]

Training DecisionTree on part-00063-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 64/135 [07:23<08:33,  7.24s/it]

Training DecisionTree on part-00064-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  48%|████▊     | 65/135 [07:31<08:30,  7.29s/it]

Training DecisionTree on part-00065-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  49%|████▉     | 66/135 [07:36<07:43,  6.72s/it]

Training DecisionTree on part-00066-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|████▉     | 67/135 [07:42<07:11,  6.34s/it]

Training DecisionTree on part-00067-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|█████     | 68/135 [07:48<07:10,  6.43s/it]

Training DecisionTree on part-00068-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  51%|█████     | 69/135 [07:53<06:30,  5.92s/it]

Training DecisionTree on part-00069-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  52%|█████▏    | 70/135 [08:00<06:45,  6.23s/it]

Training DecisionTree on part-00070-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 71/135 [08:05<06:13,  5.84s/it]

Training DecisionTree on part-00071-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 72/135 [08:13<06:41,  6.38s/it]

Training DecisionTree on part-00072-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  54%|█████▍    | 73/135 [08:18<06:24,  6.21s/it]

Training DecisionTree on part-00073-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  55%|█████▍    | 74/135 [08:24<06:05,  6.00s/it]

Training DecisionTree on part-00074-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▌    | 75/135 [08:31<06:11,  6.19s/it]

Training DecisionTree on part-00075-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▋    | 76/135 [08:43<07:48,  7.95s/it]

Training DecisionTree on part-00076-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  57%|█████▋    | 77/135 [08:54<08:37,  8.92s/it]

Training DecisionTree on part-00077-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  58%|█████▊    | 78/135 [09:05<09:09,  9.63s/it]

Training DecisionTree on part-00078-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▊    | 79/135 [09:17<09:38, 10.33s/it]

Training DecisionTree on part-00079-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▉    | 80/135 [09:29<09:57, 10.86s/it]

Training DecisionTree on part-00080-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  60%|██████    | 81/135 [09:34<08:16,  9.19s/it]

Training DecisionTree on part-00081-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████    | 82/135 [09:41<07:26,  8.42s/it]

Training DecisionTree on part-00082-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████▏   | 83/135 [09:46<06:25,  7.41s/it]

Training DecisionTree on part-00083-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  62%|██████▏   | 84/135 [09:52<05:49,  6.85s/it]

Training DecisionTree on part-00084-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  63%|██████▎   | 85/135 [09:58<05:38,  6.76s/it]

Training DecisionTree on part-00085-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▎   | 86/135 [10:04<05:12,  6.37s/it]

Training DecisionTree on part-00086-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▍   | 87/135 [10:11<05:14,  6.56s/it]

Training DecisionTree on part-00087-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  65%|██████▌   | 88/135 [10:16<04:46,  6.10s/it]

Training DecisionTree on part-00088-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  66%|██████▌   | 89/135 [10:21<04:34,  5.97s/it]

Training DecisionTree on part-00089-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 90/135 [10:28<04:35,  6.11s/it]

Training DecisionTree on part-00090-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 91/135 [10:33<04:17,  5.86s/it]

Training DecisionTree on part-00091-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  68%|██████▊   | 92/135 [10:41<04:32,  6.33s/it]

Training DecisionTree on part-00092-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  69%|██████▉   | 93/135 [10:45<04:03,  5.80s/it]

Training DecisionTree on part-00093-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|██████▉   | 94/135 [10:52<04:06,  6.02s/it]

Training DecisionTree on part-00094-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|███████   | 95/135 [11:06<05:45,  8.64s/it]

Training DecisionTree on part-00095-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  71%|███████   | 96/135 [11:20<06:32, 10.07s/it]

Training DecisionTree on part-00096-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  72%|███████▏  | 97/135 [11:27<05:50,  9.22s/it]

Training DecisionTree on part-00097-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 98/135 [11:32<04:58,  8.07s/it]

Training DecisionTree on part-00098-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 99/135 [11:45<05:35,  9.32s/it]

Training DecisionTree on part-00099-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  74%|███████▍  | 100/135 [11:50<04:43,  8.10s/it]

Training DecisionTree on part-00100-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  75%|███████▍  | 101/135 [11:56<04:20,  7.65s/it]

Training DecisionTree on part-00101-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▌  | 102/135 [12:02<03:47,  6.90s/it]

Training DecisionTree on part-00102-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▋  | 103/135 [12:08<03:33,  6.67s/it]

Training DecisionTree on part-00103-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  77%|███████▋  | 104/135 [12:14<03:23,  6.56s/it]

Training DecisionTree on part-00104-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  78%|███████▊  | 105/135 [12:19<03:00,  6.03s/it]

Training DecisionTree on part-00105-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▊  | 106/135 [12:25<02:59,  6.21s/it]

Training DecisionTree on part-00106-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▉  | 107/135 [12:31<02:48,  6.01s/it]

Training DecisionTree on part-00107-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  80%|████████  | 108/135 [12:37<02:41,  5.98s/it]

Training DecisionTree on part-00108-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████  | 109/135 [12:43<02:32,  5.87s/it]

Training DecisionTree on part-00109-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████▏ | 110/135 [12:49<02:27,  5.89s/it]

Training DecisionTree on part-00110-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  82%|████████▏ | 111/135 [12:55<02:26,  6.11s/it]

Training DecisionTree on part-00111-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  83%|████████▎ | 112/135 [13:01<02:19,  6.07s/it]

Training DecisionTree on part-00112-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▎ | 113/135 [13:08<02:20,  6.36s/it]

Training DecisionTree on part-00113-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▍ | 114/135 [13:20<02:46,  7.95s/it]

Training DecisionTree on part-00114-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  85%|████████▌ | 115/135 [13:26<02:26,  7.33s/it]

Training DecisionTree on part-00115-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  86%|████████▌ | 116/135 [13:30<02:03,  6.52s/it]

Training DecisionTree on part-00116-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 117/135 [13:37<02:00,  6.67s/it]

Training DecisionTree on part-00117-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 118/135 [13:43<01:46,  6.28s/it]

Training DecisionTree on part-00118-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  88%|████████▊ | 119/135 [13:48<01:36,  6.02s/it]

Training DecisionTree on part-00119-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  89%|████████▉ | 120/135 [13:56<01:36,  6.44s/it]

Training DecisionTree on part-00120-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|████████▉ | 121/135 [14:04<01:39,  7.11s/it]

Training DecisionTree on part-00121-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|█████████ | 122/135 [14:10<01:28,  6.77s/it]

Training DecisionTree on part-00122-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  91%|█████████ | 123/135 [14:16<01:17,  6.45s/it]

Training DecisionTree on part-00123-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  92%|█████████▏| 124/135 [14:23<01:13,  6.70s/it]

Training DecisionTree on part-00124-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 125/135 [14:29<01:03,  6.33s/it]

Training DecisionTree on part-00125-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 126/135 [14:41<01:13,  8.14s/it]

Training DecisionTree on part-00126-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  94%|█████████▍| 127/135 [14:47<00:59,  7.50s/it]

Training DecisionTree on part-00127-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  95%|█████████▍| 128/135 [14:59<01:00,  8.71s/it]

Training DecisionTree on part-00128-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▌| 129/135 [15:05<00:48,  8.14s/it]

Training DecisionTree on part-00129-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▋| 130/135 [15:10<00:36,  7.21s/it]

Training DecisionTree on part-00130-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  97%|█████████▋| 131/135 [15:17<00:28,  7.09s/it]

Training DecisionTree on part-00131-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  98%|█████████▊| 132/135 [15:28<00:24,  8.29s/it]

Training DecisionTree on part-00132-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▊| 133/135 [15:35<00:15,  7.84s/it]

Training DecisionTree on part-00133-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▉| 134/135 [15:40<00:07,  7.07s/it]

Training DecisionTree on part-00134-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models: 100%|██████████| 135/135 [15:47<00:00,  7.02s/it]
Evaluating Models: 100%|██████████| 34/34 [01:30<00:00,  2.66s/it]


##### DecisionTree #####
Accuracy Score:  0.9919811693454289
Recall Score:  0.8103269213100808
Precision Score:  0.8062475944896136
F1 Score:  0.80638206531345




# Decision Tree [Classification: 8 (7+1)] classes

In [None]:
dict_7classes = {}
dict_7classes['DDoS-RSTFINFlood'] = 'DDoS'
dict_7classes['DDoS-PSHACK_Flood'] = 'DDoS'
dict_7classes['DDoS-SYN_Flood'] = 'DDoS'
dict_7classes['DDoS-UDP_Flood'] = 'DDoS'
dict_7classes['DDoS-TCP_Flood'] = 'DDoS'
dict_7classes['DDoS-ICMP_Flood'] = 'DDoS'
dict_7classes['DDoS-SynonymousIP_Flood'] = 'DDoS'
dict_7classes['DDoS-ACK_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-UDP_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-ICMP_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-SlowLoris'] = 'DDoS'
dict_7classes['DDoS-HTTP_Flood'] = 'DDoS'

dict_7classes['DoS-UDP_Flood'] = 'DoS'
dict_7classes['DoS-SYN_Flood'] = 'DoS'
dict_7classes['DoS-TCP_Flood'] = 'DoS'
dict_7classes['DoS-HTTP_Flood'] = 'DoS'


dict_7classes['Mirai-greeth_flood'] = 'Mirai'
dict_7classes['Mirai-greip_flood'] = 'Mirai'
dict_7classes['Mirai-udpplain'] = 'Mirai'

dict_7classes['Recon-PingSweep'] = 'Recon'
dict_7classes['Recon-OSScan'] = 'Recon'
dict_7classes['Recon-PortScan'] = 'Recon'
dict_7classes['VulnerabilityScan'] = 'Recon'
dict_7classes['Recon-HostDiscovery'] = 'Recon'

dict_7classes['DNS_Spoofing'] = 'Spoofing'
dict_7classes['MITM-ArpSpoofing'] = 'Spoofing'

dict_7classes['BenignTraffic'] = 'Benign'

dict_7classes['BrowserHijacking'] = 'Web'
dict_7classes['Backdoor_Malware'] = 'Web'
dict_7classes['XSS'] = 'Web'
dict_7classes['Uploading_Attack'] = 'Web'
dict_7classes['SqlInjection'] = 'Web'
dict_7classes['CommandInjection'] = 'Web'


dict_7classes['DictionaryBruteForce'] = 'BruteForce'

In [None]:
import os
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
DATASET_DIRECTORY = '/content/drive/MyDrive/CiC-DataSet/Complete_Dataset/csv/CICIoT2023'

training_sets = [file for file in os.listdir(DATASET_DIRECTORY) if file.endswith('.csv')]
training_sets.sort()

scaler = StandardScaler()

if not training_sets:
    print("No CSV files found in the specified directory!")
else:
    print(f"Found {len(training_sets)} files: {training_sets}")

for train_set in tqdm(training_sets):

    file_path = os.path.join(DATASET_DIRECTORY, train_set)

    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        continue

    d = pd.read_csv(file_path)
    scaler.fit(d[X_columns])
    d[X_columns] = scaler.transform(d[X_columns])

    new_y = [dict_7classes.get(k, 'Unknown') for k in d[y_column]]
    d[y_column] = new_y

    for model in ML_models:
        model.fit(d[X_columns], d[y_column])

    del d


Found 169 files: ['part-00000-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00001-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00002-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00003-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00004-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00005-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00006-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00007-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00008-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00009-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00010-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00011-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00012-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00013-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00014-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00015-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00016-363d1ba3-8

100%|██████████| 169/169 [17:36<00:00,  6.25s/it]


In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
import os
import pandas as pd
from tqdm import tqdm

decision_tree = DecisionTreeClassifier()
scaler = StandardScaler()

for train_set in tqdm(training_sets):

    train_file_path = os.path.join(DATASET_DIRECTORY, train_set)
    d_train = pd.read_csv(train_file_path)

    scaler.fit(d_train[X_columns])
    d_train[X_columns] = scaler.transform(d_train[X_columns])

    d_train[y_column] = [dict_7classes[k] for k in d_train[y_column]]

    decision_tree.fit(d_train[X_columns], d_train[y_column])

    del d_train

y_test = []
preds = {0: []}

for test_set in tqdm(test_sets):

    test_file_path = os.path.join(DATASET_DIRECTORY, test_set)
    d_test = pd.read_csv(test_file_path)

    d_test[X_columns] = scaler.transform(d_test[X_columns])


    d_test[y_column] = [dict_7classes[k] for k in d_test[y_column]]


    y_test += list(d_test[y_column].values)


    y_pred = list(decision_tree.predict(d_test[X_columns]))
    preds[0] = preds[0] + y_pred


100%|██████████| 169/169 [17:44<00:00,  6.30s/it]
100%|██████████| 34/34 [01:41<00:00,  2.97s/it]


In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

# Evaluate the Decision Tree
print("##### Decision Tree (7 classes) #####")
y_pred = preds[0]

# Calculate and print metrics
print('accuracy_score = ', accuracy_score(y_test, y_pred))
print('recall_score = ', recall_score(y_test, y_pred, average='macro'))
print('precision_score = ', precision_score(y_test, y_pred, average='macro'))
print('f1_score = ', f1_score(y_test, y_pred, average='macro'))
print()


##### Decision Tree (7 classes) #####
accuracy_score =  0.9940541544759313
recall_score =  0.8275701216174062
precision_score =  0.8312311790721518
f1_score =  0.829336162840749



# Decision Tree Classification: 2 (1+1) Classes


In [None]:
dict_2classes = {}
dict_2classes['DDoS-RSTFINFlood'] = 'Attack'
dict_2classes['DDoS-PSHACK_Flood'] = 'Attack'
dict_2classes['DDoS-SYN_Flood'] = 'Attack'
dict_2classes['DDoS-UDP_Flood'] = 'Attack'
dict_2classes['DDoS-TCP_Flood'] = 'Attack'
dict_2classes['DDoS-ICMP_Flood'] = 'Attack'
dict_2classes['DDoS-SynonymousIP_Flood'] = 'Attack'
dict_2classes['DDoS-ACK_Fragmentation'] = 'Attack'
dict_2classes['DDoS-UDP_Fragmentation'] = 'Attack'
dict_2classes['DDoS-ICMP_Fragmentation'] = 'Attack'
dict_2classes['DDoS-SlowLoris'] = 'Attack'
dict_2classes['DDoS-HTTP_Flood'] = 'Attack'

dict_2classes['DoS-UDP_Flood'] = 'Attack'
dict_2classes['DoS-SYN_Flood'] = 'Attack'
dict_2classes['DoS-TCP_Flood'] = 'Attack'
dict_2classes['DoS-HTTP_Flood'] = 'Attack'


dict_2classes['Mirai-greeth_flood'] = 'Attack'
dict_2classes['Mirai-greip_flood'] = 'Attack'
dict_2classes['Mirai-udpplain'] = 'Attack'

dict_2classes['Recon-PingSweep'] = 'Attack'
dict_2classes['Recon-OSScan'] = 'Attack'
dict_2classes['Recon-PortScan'] = 'Attack'
dict_2classes['VulnerabilityScan'] = 'Attack'
dict_2classes['Recon-HostDiscovery'] = 'Attack'

dict_2classes['DNS_Spoofing'] = 'Attack'
dict_2classes['MITM-ArpSpoofing'] = 'Attack'

dict_2classes['BenignTraffic'] = 'Benign'

dict_2classes['BrowserHijacking'] = 'Attack'
dict_2classes['Backdoor_Malware'] = 'Attack'
dict_2classes['XSS'] = 'Attack'
dict_2classes['Uploading_Attack'] = 'Attack'
dict_2classes['SqlInjection'] = 'Attack'
dict_2classes['CommandInjection'] = 'Attack'

dict_2classes['DictionaryBruteForce'] = 'Attack'

In [None]:
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm
import pandas as pd
import os
decision_tree = DecisionTreeClassifier()

for train_set in tqdm(training_sets):

    train_file_path = os.path.join(DATASET_DIRECTORY, train_set)
    d = pd.read_csv(train_file_path)

    d[X_columns] = scaler.transform(d[X_columns])

    new_y = [dict_2classes[k] for k in d[y_column]]
    d[y_column] = new_y

    decision_tree.fit(d[X_columns], d[y_column])
    del d


100%|██████████| 169/169 [14:24<00:00,  5.12s/it]


In [None]:
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm
import pandas as pd
import os

decision_tree = DecisionTreeClassifier()

for train_set in tqdm(training_sets):

    train_file_path = os.path.join(DATASET_DIRECTORY, train_set)
    d_train = pd.read_csv(train_file_path)

    d_train[X_columns] = scaler.fit_transform(d_train[X_columns])

    d_train[y_column] = [dict_2classes[k] for k in d_train[y_column]]

    decision_tree.fit(d_train[X_columns], d_train[y_column])

    del d_train

y_test = []
preds = {0: []}

for test_set in tqdm(test_sets):

    test_file_path = os.path.join(DATASET_DIRECTORY, test_set)
    d_test = pd.read_csv(test_file_path)

    d_test[X_columns] = scaler.transform(d_test[X_columns])

    d_test[y_column] = [dict_2classes[k] for k in d_test[y_column]]

    y_test += list(d_test[y_column].values)

    y_pred = list(decision_tree.predict(d_test[X_columns]))
    preds[0] = preds[0] + y_pred


100%|██████████| 169/169 [14:28<00:00,  5.14s/it]
100%|██████████| 34/34 [01:32<00:00,  2.73s/it]


In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

y_pred = preds[0]

# Print evaluation metrics
print("##### Decision Tree (2 classes) #####")
print('accuracy_score: ', accuracy_score(y_test, y_pred))
print('recall_score: ', recall_score(y_test, y_pred, average='macro'))
print('precision_score: ', precision_score(y_test, y_pred, average='macro'))
print('f1_score: ', f1_score(y_test, y_pred, average='macro'))
print()


##### Decision Tree (2 classes) #####
accuracy_score:  0.9958889421547692
recall_score:  0.9550381776930854
precision_score:  0.9554638197525818
f1_score:  0.9552508939534676

