In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls "/content/drive/MyDrive/CiC-DataSet"
!ls "/content/drive/MyDrive/CiC-DataSet/Complete_Dataset"
!ls "/content/drive/MyDrive/CiC-DataSet/Complete_Dataset/csv"

Complete_Dataset  merged_filtered.csv
 csv   example	'README - README.pdf'   supplementary
 CICIoT2023  'README_csv - README.pdf'


In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import warnings
from sklearn.ensemble import RandomForestClassifier
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Forcibly Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Define Dataset Directory
DATASET_DIRECTORY = '/content/drive/MyDrive/CiC-DataSet/Complete_Dataset/csv/CICIoT2023'

# Verify if the directory exists and list CSV files
try:
    df_sets = [k for k in os.listdir(DATASET_DIRECTORY) if k.endswith('.csv')]
    if not df_sets:
        print("No CSV files found in the specified directory.")
    df_sets.sort()
except Exception as e:
    print(f"Error accessing directory: {e}")

# Split into training and test sets
training_sets = df_sets[:int(len(df_sets) * 0.8)]
test_sets = df_sets[int(len(df_sets) * 0.8):]

# Define columns
X_columns = [
    'flow_duration', 'Header_Length', 'Protocol Type', 'Duration',
    'Rate', 'Srate', 'Drate', 'fin_flag_number', 'syn_flag_number',
    'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
    'ece_flag_number', 'cwr_flag_number', 'ack_count',
    'syn_count', 'fin_count', 'urg_count', 'rst_count',
    'HTTP', 'HTTPS', 'DNS', 'Telnet', 'SMTP', 'SSH', 'IRC', 'TCP',
    'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC', 'Tot sum', 'Min',
    'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number', 'Magnitue',
    'Radius', 'Covariance', 'Variance', 'Weight',
]
y_column = 'label'

# Initialize the scaler
scaler = StandardScaler()

# Fit scaler to training sets with error handling
for train_set in tqdm(training_sets, desc="Processing Training Sets"):
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        for chunk in pd.read_csv(file_path, usecols=X_columns, chunksize=5000):  # Reduced chunksize to 5000
            scaler.partial_fit(chunk)
    except Exception as e:
        print(f"Error processing file {train_set}: {e}")

# Initialize Random Forest Classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)

# Training the Random Forest model on the data
for train_set in tqdm(training_sets, desc="Training Model"):
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        for chunk in pd.read_csv(file_path, usecols=X_columns + [y_column], chunksize=5000):  # Include y_column
            X_train = scaler.transform(chunk[X_columns])
            y_train = chunk[y_column]
            classifier.fit(X_train, y_train)
    except Exception as e:
        print(f"Error training on file {train_set}: {e}")

# Test the model on the test sets
for test_set in tqdm(test_sets, desc="Testing Model"):
    file_path = os.path.join(DATASET_DIRECTORY, test_set)
    try:
        for chunk in pd.read_csv(file_path, usecols=X_columns + [y_column], chunksize=5000):  # Include y_column
            X_test = scaler.transform(chunk[X_columns])
            y_test = chunk[y_column]
            predictions = classifier.predict(X_test)
            # Evaluate or log predictions here (e.g., accuracy metrics)
    except Exception as e:
        print(f"Error testing on file {test_set}: {e}")


Mounted at /content/drive


Processing Training Sets: 100%|██████████| 135/135 [04:51<00:00,  2.16s/it]
Training Model: 100%|██████████| 135/135 [1:51:25<00:00, 49.52s/it]
Testing Model: 100%|██████████| 34/34 [04:36<00:00,  8.14s/it]


## Random Forest: 34 Class (33+1 Classification)

In [None]:
import os
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier

# Define the Random Forest model
ML_models = [
    RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1),
]

# Assign the model name
ML_names = [
    "RandomForest",
]

# Iterate through each training set
for train_set in tqdm(training_sets, desc="Training Models"):
    # Ensure proper file path concatenation
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        # Load the dataset
        d = pd.read_csv(file_path)

        # Apply scaling transformation to the features
        d[X_columns] = scaler.transform(d[X_columns])

        # Train the Random Forest model
        for model, model_name in zip(ML_models, ML_names):
            print(f"Training {model_name} on {train_set}...")
            model.fit(d[X_columns], d[y_column])

        # Clear the data from memory
        del d
    except Exception as e:
        print(f"Error processing {train_set}: {e}")


Training Models:   0%|          | 0/135 [00:00<?, ?it/s]

Training RandomForest on part-00000-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|          | 1/135 [00:39<1:27:41, 39.26s/it]

Training RandomForest on part-00001-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   1%|▏         | 2/135 [01:13<1:20:41, 36.40s/it]

Training RandomForest on part-00002-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   2%|▏         | 3/135 [02:01<1:32:02, 41.83s/it]

Training RandomForest on part-00003-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   3%|▎         | 4/135 [02:38<1:26:29, 39.61s/it]

Training RandomForest on part-00004-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▎         | 5/135 [03:15<1:23:57, 38.75s/it]

Training RandomForest on part-00005-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   4%|▍         | 6/135 [03:52<1:22:10, 38.22s/it]

Training RandomForest on part-00006-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   5%|▌         | 7/135 [04:31<1:21:51, 38.37s/it]

Training RandomForest on part-00007-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   6%|▌         | 8/135 [05:07<1:19:33, 37.59s/it]

Training RandomForest on part-00008-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 9/135 [05:50<1:22:22, 39.23s/it]

Training RandomForest on part-00009-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   7%|▋         | 10/135 [06:25<1:19:36, 38.21s/it]

Training RandomForest on part-00010-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   8%|▊         | 11/135 [07:07<1:20:59, 39.19s/it]

Training RandomForest on part-00011-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:   9%|▉         | 12/135 [07:42<1:17:41, 37.90s/it]

Training RandomForest on part-00012-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|▉         | 13/135 [08:21<1:18:00, 38.37s/it]

Training RandomForest on part-00013-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  10%|█         | 14/135 [08:59<1:17:06, 38.24s/it]

Training RandomForest on part-00014-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  11%|█         | 15/135 [09:37<1:16:00, 38.01s/it]

Training RandomForest on part-00015-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  12%|█▏        | 16/135 [10:21<1:18:52, 39.77s/it]

Training RandomForest on part-00016-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 17/135 [10:58<1:16:43, 39.02s/it]

Training RandomForest on part-00017-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  13%|█▎        | 18/135 [11:32<1:13:20, 37.61s/it]

Training RandomForest on part-00018-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  14%|█▍        | 19/135 [12:11<1:13:17, 37.91s/it]

Training RandomForest on part-00019-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  15%|█▍        | 20/135 [12:46<1:11:08, 37.12s/it]

Training RandomForest on part-00020-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▌        | 21/135 [13:27<1:12:53, 38.36s/it]

Training RandomForest on part-00021-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  16%|█▋        | 22/135 [14:02<1:10:10, 37.26s/it]

Training RandomForest on part-00022-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  17%|█▋        | 23/135 [14:44<1:12:23, 38.78s/it]

Training RandomForest on part-00023-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  18%|█▊        | 24/135 [15:28<1:14:20, 40.19s/it]

Training RandomForest on part-00024-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▊        | 25/135 [16:03<1:10:53, 38.67s/it]

Training RandomForest on part-00025-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  19%|█▉        | 26/135 [17:29<1:36:17, 53.00s/it]

Training RandomForest on part-00026-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  20%|██        | 27/135 [18:07<1:27:03, 48.37s/it]

Training RandomForest on part-00027-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██        | 28/135 [18:41<1:18:35, 44.07s/it]

Training RandomForest on part-00028-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  21%|██▏       | 29/135 [19:23<1:16:40, 43.40s/it]

Training RandomForest on part-00029-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  22%|██▏       | 30/135 [20:04<1:14:54, 42.81s/it]

Training RandomForest on part-00030-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  23%|██▎       | 31/135 [20:44<1:12:29, 41.82s/it]

Training RandomForest on part-00031-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▎       | 32/135 [21:21<1:09:20, 40.40s/it]

Training RandomForest on part-00032-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  24%|██▍       | 33/135 [21:56<1:06:10, 38.92s/it]

Training RandomForest on part-00033-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  25%|██▌       | 34/135 [23:23<1:29:35, 53.22s/it]

Training RandomForest on part-00034-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  26%|██▌       | 35/135 [24:00<1:20:37, 48.38s/it]

Training RandomForest on part-00035-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 36/135 [24:44<1:17:28, 46.95s/it]

Training RandomForest on part-00036-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  27%|██▋       | 37/135 [26:05<1:33:32, 57.27s/it]

Training RandomForest on part-00037-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  28%|██▊       | 38/135 [26:38<1:21:06, 50.17s/it]

Training RandomForest on part-00038-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  29%|██▉       | 39/135 [27:16<1:14:10, 46.36s/it]

Training RandomForest on part-00039-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|██▉       | 40/135 [28:41<1:31:49, 57.99s/it]

Training RandomForest on part-00040-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  30%|███       | 41/135 [29:18<1:20:53, 51.64s/it]

Training RandomForest on part-00041-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  31%|███       | 42/135 [30:40<1:34:07, 60.72s/it]

Training RandomForest on part-00042-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  32%|███▏      | 43/135 [31:16<1:21:49, 53.36s/it]

Training RandomForest on part-00043-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 44/135 [31:59<1:16:14, 50.27s/it]

Training RandomForest on part-00044-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  33%|███▎      | 45/135 [32:36<1:09:21, 46.24s/it]

Training RandomForest on part-00045-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  34%|███▍      | 46/135 [33:16<1:06:04, 44.54s/it]

Training RandomForest on part-00046-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  35%|███▍      | 47/135 [33:53<1:01:59, 42.26s/it]

Training RandomForest on part-00047-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▌      | 48/135 [34:32<59:47, 41.23s/it]  

Training RandomForest on part-00048-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  36%|███▋      | 49/135 [35:07<56:11, 39.20s/it]

Training RandomForest on part-00049-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  37%|███▋      | 50/135 [35:50<57:12, 40.39s/it]

Training RandomForest on part-00050-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  38%|███▊      | 51/135 [36:27<55:06, 39.37s/it]

Training RandomForest on part-00051-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▊      | 52/135 [37:08<55:07, 39.84s/it]

Training RandomForest on part-00052-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  39%|███▉      | 53/135 [38:30<1:11:49, 52.55s/it]

Training RandomForest on part-00053-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  40%|████      | 54/135 [39:12<1:06:42, 49.41s/it]

Training RandomForest on part-00054-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████      | 55/135 [39:49<1:00:52, 45.66s/it]

Training RandomForest on part-00055-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  41%|████▏     | 56/135 [40:26<56:30, 42.91s/it]  

Training RandomForest on part-00056-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  42%|████▏     | 57/135 [41:01<53:01, 40.78s/it]

Training RandomForest on part-00057-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  43%|████▎     | 58/135 [42:19<1:06:24, 51.75s/it]

Training RandomForest on part-00058-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▎     | 59/135 [42:59<1:01:09, 48.28s/it]

Training RandomForest on part-00059-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  44%|████▍     | 60/135 [43:37<56:30, 45.21s/it]  

Training RandomForest on part-00060-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  45%|████▌     | 61/135 [44:18<54:17, 44.02s/it]

Training RandomForest on part-00061-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  46%|████▌     | 62/135 [44:59<52:13, 42.93s/it]

Training RandomForest on part-00062-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 63/135 [46:24<1:06:46, 55.64s/it]

Training RandomForest on part-00063-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  47%|████▋     | 64/135 [47:04<1:00:18, 50.96s/it]

Training RandomForest on part-00064-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  48%|████▊     | 65/135 [47:48<56:59, 48.85s/it]  

Training RandomForest on part-00065-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  49%|████▉     | 66/135 [48:27<52:45, 45.88s/it]

Training RandomForest on part-00066-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|████▉     | 67/135 [49:02<48:21, 42.66s/it]

Training RandomForest on part-00067-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  50%|█████     | 68/135 [49:48<48:43, 43.63s/it]

Training RandomForest on part-00068-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  51%|█████     | 69/135 [50:25<45:51, 41.70s/it]

Training RandomForest on part-00069-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  52%|█████▏    | 70/135 [51:02<43:38, 40.29s/it]

Training RandomForest on part-00070-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 71/135 [51:40<42:08, 39.51s/it]

Training RandomForest on part-00071-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  53%|█████▎    | 72/135 [52:26<43:38, 41.56s/it]

Training RandomForest on part-00072-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  54%|█████▍    | 73/135 [53:10<43:35, 42.18s/it]

Training RandomForest on part-00073-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  55%|█████▍    | 74/135 [53:46<41:14, 40.57s/it]

Training RandomForest on part-00074-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▌    | 75/135 [54:24<39:45, 39.76s/it]

Training RandomForest on part-00075-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  56%|█████▋    | 76/135 [55:46<51:27, 52.33s/it]

Training RandomForest on part-00076-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  57%|█████▋    | 77/135 [57:10<59:41, 61.75s/it]

Training RandomForest on part-00077-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  58%|█████▊    | 78/135 [58:30<1:03:59, 67.36s/it]

Training RandomForest on part-00078-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▊    | 79/135 [59:52<1:06:49, 71.60s/it]

Training RandomForest on part-00079-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  59%|█████▉    | 80/135 [1:01:16<1:09:03, 75.33s/it]

Training RandomForest on part-00080-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  60%|██████    | 81/135 [1:01:52<57:13, 63.58s/it]  

Training RandomForest on part-00081-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████    | 82/135 [1:02:29<49:04, 55.56s/it]

Training RandomForest on part-00082-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  61%|██████▏   | 83/135 [1:03:05<43:03, 49.68s/it]

Training RandomForest on part-00083-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  62%|██████▏   | 84/135 [1:03:42<39:07, 46.03s/it]

Training RandomForest on part-00084-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  63%|██████▎   | 85/135 [1:04:19<35:56, 43.14s/it]

Training RandomForest on part-00085-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▎   | 86/135 [1:04:57<34:03, 41.71s/it]

Training RandomForest on part-00086-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  64%|██████▍   | 87/135 [1:05:36<32:47, 40.99s/it]

Training RandomForest on part-00087-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  65%|██████▌   | 88/135 [1:06:13<31:04, 39.66s/it]

Training RandomForest on part-00088-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  66%|██████▌   | 89/135 [1:06:52<30:13, 39.42s/it]

Training RandomForest on part-00089-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 90/135 [1:07:30<29:15, 39.01s/it]

Training RandomForest on part-00090-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  67%|██████▋   | 91/135 [1:08:09<28:45, 39.21s/it]

Training RandomForest on part-00091-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  68%|██████▊   | 92/135 [1:08:49<28:17, 39.47s/it]

Training RandomForest on part-00092-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  69%|██████▉   | 93/135 [1:09:23<26:23, 37.69s/it]

Training RandomForest on part-00093-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|██████▉   | 94/135 [1:10:04<26:24, 38.64s/it]

Training RandomForest on part-00094-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  70%|███████   | 95/135 [1:11:22<33:40, 50.52s/it]

Training RandomForest on part-00095-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  71%|███████   | 96/135 [1:12:45<39:08, 60.21s/it]

Training RandomForest on part-00096-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  72%|███████▏  | 97/135 [1:13:27<34:37, 54.67s/it]

Training RandomForest on part-00097-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 98/135 [1:14:06<30:57, 50.20s/it]

Training RandomForest on part-00098-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  73%|███████▎  | 99/135 [1:15:30<36:04, 60.12s/it]

Training RandomForest on part-00099-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  74%|███████▍  | 100/135 [1:16:07<31:04, 53.26s/it]

Training RandomForest on part-00100-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  75%|███████▍  | 101/135 [1:16:43<27:10, 47.96s/it]

Training RandomForest on part-00101-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▌  | 102/135 [1:17:22<24:53, 45.27s/it]

Training RandomForest on part-00102-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  76%|███████▋  | 103/135 [1:17:58<22:47, 42.74s/it]

Training RandomForest on part-00103-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  77%|███████▋  | 104/135 [1:18:38<21:35, 41.78s/it]

Training RandomForest on part-00104-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  78%|███████▊  | 105/135 [1:19:13<19:52, 39.77s/it]

Training RandomForest on part-00105-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▊  | 106/135 [1:19:50<18:49, 38.94s/it]

Training RandomForest on part-00106-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  79%|███████▉  | 107/135 [1:20:33<18:42, 40.09s/it]

Training RandomForest on part-00107-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  80%|████████  | 108/135 [1:21:08<17:23, 38.64s/it]

Training RandomForest on part-00108-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████  | 109/135 [1:21:44<16:20, 37.70s/it]

Training RandomForest on part-00109-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  81%|████████▏ | 110/135 [1:22:27<16:21, 39.28s/it]

Training RandomForest on part-00110-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  82%|████████▏ | 111/135 [1:23:04<15:28, 38.70s/it]

Training RandomForest on part-00111-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  83%|████████▎ | 112/135 [1:23:49<15:32, 40.54s/it]

Training RandomForest on part-00112-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▎ | 113/135 [1:24:30<14:55, 40.72s/it]

Training RandomForest on part-00113-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  84%|████████▍ | 114/135 [1:25:52<18:36, 53.17s/it]

Training RandomForest on part-00114-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  85%|████████▌ | 115/135 [1:26:30<16:13, 48.69s/it]

Training RandomForest on part-00115-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  86%|████████▌ | 116/135 [1:27:06<14:13, 44.93s/it]

Training RandomForest on part-00116-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 117/135 [1:27:46<13:00, 43.34s/it]

Training RandomForest on part-00117-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  87%|████████▋ | 118/135 [1:28:25<11:52, 41.90s/it]

Training RandomForest on part-00118-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  88%|████████▊ | 119/135 [1:29:00<10:39, 39.95s/it]

Training RandomForest on part-00119-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  89%|████████▉ | 120/135 [1:29:37<09:45, 39.03s/it]

Training RandomForest on part-00120-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|████████▉ | 121/135 [1:30:17<09:12, 39.48s/it]

Training RandomForest on part-00121-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  90%|█████████ | 122/135 [1:30:55<08:26, 38.98s/it]

Training RandomForest on part-00122-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  91%|█████████ | 123/135 [1:31:35<07:51, 39.31s/it]

Training RandomForest on part-00123-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  92%|█████████▏| 124/135 [1:32:17<07:20, 40.08s/it]

Training RandomForest on part-00124-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 125/135 [1:32:56<06:38, 39.83s/it]

Training RandomForest on part-00125-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  93%|█████████▎| 126/135 [1:34:23<08:03, 53.78s/it]

Training RandomForest on part-00126-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  94%|█████████▍| 127/135 [1:35:00<06:31, 48.90s/it]

Training RandomForest on part-00127-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  95%|█████████▍| 128/135 [1:36:23<06:52, 58.96s/it]

Training RandomForest on part-00128-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▌| 129/135 [1:37:04<05:22, 53.80s/it]

Training RandomForest on part-00129-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  96%|█████████▋| 130/135 [1:37:40<04:01, 48.25s/it]

Training RandomForest on part-00130-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  97%|█████████▋| 131/135 [1:38:22<03:05, 46.45s/it]

Training RandomForest on part-00131-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  98%|█████████▊| 132/135 [1:39:43<02:50, 56.81s/it]

Training RandomForest on part-00132-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▊| 133/135 [1:40:26<01:45, 52.62s/it]

Training RandomForest on part-00133-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models:  99%|█████████▉| 134/135 [1:41:04<00:48, 48.18s/it]

Training RandomForest on part-00134-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Models: 100%|██████████| 135/135 [1:41:45<00:00, 45.22s/it]


In [None]:
import os
from tqdm import tqdm
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# Define Random Forest model and its name
ML_models = [
    RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1),
]

ML_names = [
    "RandomForest",
]

# Train the Random Forest model on each training set
for train_set in tqdm(training_sets, desc="Training Random Forest"):
    file_path = os.path.join(DATASET_DIRECTORY, train_set)
    try:
        d_train = pd.read_csv(file_path)

        # Apply scaling transformation
        d_train[X_columns] = scaler.transform(d_train[X_columns])

        # Fit the Random Forest model
        for model, model_name in zip(ML_models, ML_names):
            print(f"Training {model_name} on {train_set}...")
            model.fit(d_train[X_columns], d_train[y_column])

        # Clear the training data from memory
        del d_train
    except Exception as e:
        print(f"Error processing {train_set}: {e}")

# Initialize y_test and preds to collect predictions
y_test = []
preds = {i: [] for i in range(len(ML_models))}

# Evaluate the Random Forest model on each test set
for test_set in tqdm(test_sets, desc="Evaluating Random Forest"):
    file_path = os.path.join(DATASET_DIRECTORY, test_set)
    try:
        d_test = pd.read_csv(file_path)

        # Apply scaling transformation
        d_test[X_columns] = scaler.transform(d_test[X_columns])

        # Collect true labels for y_test
        y_test += list(d_test[y_column].values)

        # Predict with the Random Forest model and accumulate predictions
        for i, model in enumerate(ML_models):
            y_pred = list(model.predict(d_test[X_columns]))
            preds[i].extend(y_pred)  # Extend list for each model with new predictions

        # Clear the test data from memory
        del d_test
    except Exception as e:
        print(f"Error processing {test_set}: {e}")

# Now y_test contains true labels, and preds holds predictions for the Random Forest model


Training Random Forest:   0%|          | 0/135 [00:00<?, ?it/s]

Training RandomForest on part-00000-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   1%|          | 1/135 [00:37<1:24:36, 37.88s/it]

Training RandomForest on part-00001-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   1%|▏         | 2/135 [01:14<1:21:44, 36.88s/it]

Training RandomForest on part-00002-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   2%|▏         | 3/135 [02:01<1:32:07, 41.87s/it]

Training RandomForest on part-00003-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   3%|▎         | 4/135 [02:38<1:26:53, 39.80s/it]

Training RandomForest on part-00004-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   4%|▎         | 5/135 [03:15<1:23:49, 38.69s/it]

Training RandomForest on part-00005-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   4%|▍         | 6/135 [03:54<1:23:26, 38.81s/it]

Training RandomForest on part-00006-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   5%|▌         | 7/135 [04:30<1:21:20, 38.13s/it]

Training RandomForest on part-00007-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   6%|▌         | 8/135 [05:11<1:22:06, 38.79s/it]

Training RandomForest on part-00008-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   7%|▋         | 9/135 [05:51<1:22:13, 39.16s/it]

Training RandomForest on part-00009-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   7%|▋         | 10/135 [06:27<1:20:04, 38.44s/it]

Training RandomForest on part-00010-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   8%|▊         | 11/135 [07:08<1:20:45, 39.08s/it]

Training RandomForest on part-00011-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:   9%|▉         | 12/135 [07:43<1:17:22, 37.74s/it]

Training RandomForest on part-00012-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  10%|▉         | 13/135 [08:21<1:17:08, 37.94s/it]

Training RandomForest on part-00013-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  10%|█         | 14/135 [09:00<1:17:09, 38.26s/it]

Training RandomForest on part-00014-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  11%|█         | 15/135 [09:33<1:13:09, 36.58s/it]

Training RandomForest on part-00015-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  12%|█▏        | 16/135 [10:17<1:16:53, 38.76s/it]

Training RandomForest on part-00016-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  13%|█▎        | 17/135 [10:53<1:14:36, 37.93s/it]

Training RandomForest on part-00017-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  13%|█▎        | 18/135 [11:32<1:14:35, 38.25s/it]

Training RandomForest on part-00018-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  14%|█▍        | 19/135 [12:07<1:12:27, 37.48s/it]

Training RandomForest on part-00019-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  15%|█▍        | 20/135 [12:43<1:10:59, 37.04s/it]

Training RandomForest on part-00020-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  16%|█▌        | 21/135 [13:21<1:10:53, 37.31s/it]

Training RandomForest on part-00021-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  16%|█▋        | 22/135 [13:56<1:08:39, 36.45s/it]

Training RandomForest on part-00022-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  17%|█▋        | 23/135 [14:39<1:11:44, 38.43s/it]

Training RandomForest on part-00023-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  18%|█▊        | 24/135 [15:22<1:13:53, 39.94s/it]

Training RandomForest on part-00024-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  19%|█▊        | 25/135 [15:57<1:10:19, 38.36s/it]

Training RandomForest on part-00025-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  19%|█▉        | 26/135 [17:21<1:34:28, 52.00s/it]

Training RandomForest on part-00026-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  20%|██        | 27/135 [18:00<1:26:30, 48.06s/it]

Training RandomForest on part-00027-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  21%|██        | 28/135 [18:33<1:17:58, 43.72s/it]

Training RandomForest on part-00028-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  21%|██▏       | 29/135 [19:15<1:16:07, 43.09s/it]

Training RandomForest on part-00029-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  22%|██▏       | 30/135 [19:56<1:14:12, 42.41s/it]

Training RandomForest on part-00030-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  23%|██▎       | 31/135 [20:32<1:10:29, 40.67s/it]

Training RandomForest on part-00031-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  24%|██▎       | 32/135 [21:11<1:08:50, 40.10s/it]

Training RandomForest on part-00032-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  24%|██▍       | 33/135 [21:46<1:05:41, 38.65s/it]

Training RandomForest on part-00033-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  25%|██▌       | 34/135 [23:13<1:29:12, 53.00s/it]

Training RandomForest on part-00034-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  26%|██▌       | 35/135 [23:49<1:20:00, 48.00s/it]

Training RandomForest on part-00035-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  27%|██▋       | 36/135 [24:32<1:16:54, 46.61s/it]

Training RandomForest on part-00036-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  27%|██▋       | 37/135 [25:53<1:32:35, 56.69s/it]

Training RandomForest on part-00037-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  28%|██▊       | 38/135 [26:26<1:20:18, 49.68s/it]

Training RandomForest on part-00038-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  29%|██▉       | 39/135 [27:05<1:14:18, 46.44s/it]

Training RandomForest on part-00039-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  30%|██▉       | 40/135 [28:27<1:30:20, 57.06s/it]

Training RandomForest on part-00040-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  30%|███       | 41/135 [29:04<1:20:13, 51.21s/it]

Training RandomForest on part-00041-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  31%|███       | 42/135 [30:28<1:34:17, 60.84s/it]

Training RandomForest on part-00042-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  32%|███▏      | 43/135 [31:02<1:21:18, 53.03s/it]

Training RandomForest on part-00043-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  33%|███▎      | 44/135 [31:43<1:14:51, 49.36s/it]

Training RandomForest on part-00044-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  33%|███▎      | 45/135 [32:19<1:07:53, 45.26s/it]

Training RandomForest on part-00045-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  34%|███▍      | 46/135 [32:59<1:04:50, 43.71s/it]

Training RandomForest on part-00046-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  35%|███▍      | 47/135 [33:39<1:02:19, 42.49s/it]

Training RandomForest on part-00047-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  36%|███▌      | 48/135 [34:16<59:23, 40.96s/it]  

Training RandomForest on part-00048-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  36%|███▋      | 49/135 [34:50<55:39, 38.83s/it]

Training RandomForest on part-00049-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  37%|███▋      | 50/135 [35:31<55:54, 39.46s/it]

Training RandomForest on part-00050-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  38%|███▊      | 51/135 [36:07<53:49, 38.44s/it]

Training RandomForest on part-00051-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  39%|███▊      | 52/135 [36:50<55:03, 39.80s/it]

Training RandomForest on part-00052-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  39%|███▉      | 53/135 [38:11<1:11:31, 52.33s/it]

Training RandomForest on part-00053-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  40%|████      | 54/135 [38:51<1:05:33, 48.57s/it]

Training RandomForest on part-00054-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  41%|████      | 55/135 [39:28<59:54, 44.93s/it]  

Training RandomForest on part-00055-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  41%|████▏     | 56/135 [40:06<56:35, 42.99s/it]

Training RandomForest on part-00056-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  42%|████▏     | 57/135 [40:42<53:08, 40.88s/it]

Training RandomForest on part-00057-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  43%|████▎     | 58/135 [42:00<1:06:39, 51.95s/it]

Training RandomForest on part-00058-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  44%|████▎     | 59/135 [42:38<1:00:34, 47.82s/it]

Training RandomForest on part-00059-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  44%|████▍     | 60/135 [43:18<56:58, 45.58s/it]  

Training RandomForest on part-00060-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  45%|████▌     | 61/135 [43:59<54:28, 44.17s/it]

Training RandomForest on part-00061-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  46%|████▌     | 62/135 [44:39<51:59, 42.74s/it]

Training RandomForest on part-00062-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  47%|████▋     | 63/135 [45:58<1:04:31, 53.77s/it]

Training RandomForest on part-00063-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  47%|████▋     | 64/135 [46:37<58:13, 49.20s/it]  

Training RandomForest on part-00064-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  48%|████▊     | 65/135 [47:20<55:15, 47.37s/it]

Training RandomForest on part-00065-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  49%|████▉     | 66/135 [48:00<51:51, 45.09s/it]

Training RandomForest on part-00066-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  50%|████▉     | 67/135 [48:33<47:09, 41.61s/it]

Training RandomForest on part-00067-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  50%|█████     | 68/135 [49:17<47:10, 42.25s/it]

Training RandomForest on part-00068-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  51%|█████     | 69/135 [49:53<44:26, 40.40s/it]

Training RandomForest on part-00069-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  52%|█████▏    | 70/135 [50:32<43:16, 39.95s/it]

Training RandomForest on part-00070-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  53%|█████▎    | 71/135 [51:07<41:15, 38.67s/it]

Training RandomForest on part-00071-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  53%|█████▎    | 72/135 [51:53<42:55, 40.88s/it]

Training RandomForest on part-00072-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  54%|█████▍    | 73/135 [52:36<42:54, 41.52s/it]

Training RandomForest on part-00073-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  55%|█████▍    | 74/135 [53:15<41:12, 40.54s/it]

Training RandomForest on part-00074-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  56%|█████▌    | 75/135 [53:55<40:20, 40.35s/it]

Training RandomForest on part-00075-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  56%|█████▋    | 76/135 [55:17<52:08, 53.03s/it]

Training RandomForest on part-00076-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  57%|█████▋    | 77/135 [56:41<1:00:16, 62.35s/it]

Training RandomForest on part-00077-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  58%|█████▊    | 78/135 [58:00<1:03:57, 67.32s/it]

Training RandomForest on part-00078-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  59%|█████▊    | 79/135 [59:20<1:06:10, 70.91s/it]

Training RandomForest on part-00079-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  59%|█████▉    | 80/135 [1:00:46<1:09:24, 75.72s/it]

Training RandomForest on part-00080-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  60%|██████    | 81/135 [1:01:22<57:24, 63.80s/it]  

Training RandomForest on part-00081-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  61%|██████    | 82/135 [1:02:00<49:16, 55.79s/it]

Training RandomForest on part-00082-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  61%|██████▏   | 83/135 [1:02:35<43:10, 49.83s/it]

Training RandomForest on part-00083-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  62%|██████▏   | 84/135 [1:03:13<39:19, 46.26s/it]

Training RandomForest on part-00084-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  63%|██████▎   | 85/135 [1:03:50<36:01, 43.22s/it]

Training RandomForest on part-00085-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  64%|██████▎   | 86/135 [1:04:27<33:50, 41.44s/it]

Training RandomForest on part-00086-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  64%|██████▍   | 87/135 [1:05:06<32:42, 40.89s/it]

Training RandomForest on part-00087-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  65%|██████▌   | 88/135 [1:05:43<31:04, 39.67s/it]

Training RandomForest on part-00088-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  66%|██████▌   | 89/135 [1:06:22<30:13, 39.41s/it]

Training RandomForest on part-00089-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  67%|██████▋   | 90/135 [1:06:59<29:05, 38.79s/it]

Training RandomForest on part-00090-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  67%|██████▋   | 91/135 [1:07:38<28:30, 38.87s/it]

Training RandomForest on part-00091-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  68%|██████▊   | 92/135 [1:08:20<28:22, 39.59s/it]

Training RandomForest on part-00092-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  69%|██████▉   | 93/135 [1:08:53<26:20, 37.62s/it]

Training RandomForest on part-00093-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  70%|██████▉   | 94/135 [1:09:34<26:26, 38.69s/it]

Training RandomForest on part-00094-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  70%|███████   | 95/135 [1:10:53<33:46, 50.66s/it]

Training RandomForest on part-00095-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  71%|███████   | 96/135 [1:12:14<38:52, 59.81s/it]

Training RandomForest on part-00096-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  72%|███████▏  | 97/135 [1:12:58<34:51, 55.04s/it]

Training RandomForest on part-00097-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  73%|███████▎  | 98/135 [1:13:35<30:42, 49.80s/it]

Training RandomForest on part-00098-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  73%|███████▎  | 99/135 [1:14:58<35:51, 59.77s/it]

Training RandomForest on part-00099-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  74%|███████▍  | 100/135 [1:15:36<31:00, 53.14s/it]

Training RandomForest on part-00100-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  75%|███████▍  | 101/135 [1:16:13<27:19, 48.21s/it]

Training RandomForest on part-00101-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  76%|███████▌  | 102/135 [1:16:50<24:46, 45.04s/it]

Training RandomForest on part-00102-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  76%|███████▋  | 103/135 [1:17:26<22:31, 42.23s/it]

Training RandomForest on part-00103-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  77%|███████▋  | 104/135 [1:18:07<21:38, 41.88s/it]

Training RandomForest on part-00104-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  78%|███████▊  | 105/135 [1:18:41<19:48, 39.61s/it]

Training RandomForest on part-00105-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  79%|███████▊  | 106/135 [1:19:19<18:55, 39.14s/it]

Training RandomForest on part-00106-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  79%|███████▉  | 107/135 [1:20:00<18:28, 39.59s/it]

Training RandomForest on part-00107-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  80%|████████  | 108/135 [1:20:35<17:14, 38.31s/it]

Training RandomForest on part-00108-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  81%|████████  | 109/135 [1:21:12<16:23, 37.84s/it]

Training RandomForest on part-00109-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  81%|████████▏ | 110/135 [1:21:54<16:20, 39.21s/it]

Training RandomForest on part-00110-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  82%|████████▏ | 111/135 [1:22:31<15:21, 38.41s/it]

Training RandomForest on part-00111-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  83%|████████▎ | 112/135 [1:23:17<15:37, 40.74s/it]

Training RandomForest on part-00112-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  84%|████████▎ | 113/135 [1:23:57<14:50, 40.46s/it]

Training RandomForest on part-00113-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  84%|████████▍ | 114/135 [1:25:20<18:36, 53.19s/it]

Training RandomForest on part-00114-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  85%|████████▌ | 115/135 [1:25:58<16:12, 48.60s/it]

Training RandomForest on part-00115-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  86%|████████▌ | 116/135 [1:26:35<14:16, 45.10s/it]

Training RandomForest on part-00116-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  87%|████████▋ | 117/135 [1:27:13<12:54, 43.01s/it]

Training RandomForest on part-00117-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  87%|████████▋ | 118/135 [1:27:52<11:52, 41.88s/it]

Training RandomForest on part-00118-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  88%|████████▊ | 119/135 [1:28:26<10:31, 39.47s/it]

Training RandomForest on part-00119-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  89%|████████▉ | 120/135 [1:29:05<09:48, 39.21s/it]

Training RandomForest on part-00120-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  90%|████████▉ | 121/135 [1:29:43<09:05, 38.99s/it]

Training RandomForest on part-00121-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  90%|█████████ | 122/135 [1:30:22<08:27, 39.02s/it]

Training RandomForest on part-00122-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  91%|█████████ | 123/135 [1:31:02<07:51, 39.30s/it]

Training RandomForest on part-00123-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  92%|█████████▏| 124/135 [1:31:42<07:15, 39.55s/it]

Training RandomForest on part-00124-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  93%|█████████▎| 125/135 [1:32:22<06:35, 39.55s/it]

Training RandomForest on part-00125-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  93%|█████████▎| 126/135 [1:33:47<07:59, 53.25s/it]

Training RandomForest on part-00126-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  94%|█████████▍| 127/135 [1:34:23<06:24, 48.08s/it]

Training RandomForest on part-00127-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  95%|█████████▍| 128/135 [1:35:45<06:48, 58.36s/it]

Training RandomForest on part-00128-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  96%|█████████▌| 129/135 [1:36:26<05:18, 53.03s/it]

Training RandomForest on part-00129-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  96%|█████████▋| 130/135 [1:37:01<03:58, 47.65s/it]

Training RandomForest on part-00130-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  97%|█████████▋| 131/135 [1:37:44<03:04, 46.11s/it]

Training RandomForest on part-00131-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  98%|█████████▊| 132/135 [1:39:04<02:48, 56.28s/it]

Training RandomForest on part-00132-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  99%|█████████▊| 133/135 [1:39:43<01:42, 51.16s/it]

Training RandomForest on part-00133-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest:  99%|█████████▉| 134/135 [1:40:21<00:47, 47.23s/it]

Training RandomForest on part-00134-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv...


Training Random Forest: 100%|██████████| 135/135 [1:41:00<00:00, 44.89s/it]
Evaluating Random Forest: 100%|██████████| 34/34 [04:49<00:00,  8.52s/it]


In [None]:
# Evaluation phase
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

for k, v in preds.items():
    y_pred = v
    print(f"##### {ML_names[k]} #####")
    print('Accuracy Score: ', accuracy_score(y_test, y_pred))
    print('Recall Score: ', recall_score(y_test, y_pred, average='macro'))
    print('Precision Score: ', precision_score(y_test, y_pred, average='macro'))
    print('F1 Score: ', f1_score(y_test, y_pred, average='macro'))
    print("\n")


##### RandomForest #####
Accuracy Score:  0.9914299206753163
Recall Score:  0.7004519929366664
Precision Score:  0.8085019266522151
F1 Score:  0.7088549720620101




## Random Forest 8 class (7+1) classification

In [None]:
dict_7classes = {}
dict_7classes['DDoS-RSTFINFlood'] = 'DDoS'
dict_7classes['DDoS-PSHACK_Flood'] = 'DDoS'
dict_7classes['DDoS-SYN_Flood'] = 'DDoS'
dict_7classes['DDoS-UDP_Flood'] = 'DDoS'
dict_7classes['DDoS-TCP_Flood'] = 'DDoS'
dict_7classes['DDoS-ICMP_Flood'] = 'DDoS'
dict_7classes['DDoS-SynonymousIP_Flood'] = 'DDoS'
dict_7classes['DDoS-ACK_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-UDP_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-ICMP_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-SlowLoris'] = 'DDoS'
dict_7classes['DDoS-HTTP_Flood'] = 'DDoS'

dict_7classes['DoS-UDP_Flood'] = 'DoS'
dict_7classes['DoS-SYN_Flood'] = 'DoS'
dict_7classes['DoS-TCP_Flood'] = 'DoS'
dict_7classes['DoS-HTTP_Flood'] = 'DoS'


dict_7classes['Mirai-greeth_flood'] = 'Mirai'
dict_7classes['Mirai-greip_flood'] = 'Mirai'
dict_7classes['Mirai-udpplain'] = 'Mirai'

dict_7classes['Recon-PingSweep'] = 'Recon'
dict_7classes['Recon-OSScan'] = 'Recon'
dict_7classes['Recon-PortScan'] = 'Recon'
dict_7classes['VulnerabilityScan'] = 'Recon'
dict_7classes['Recon-HostDiscovery'] = 'Recon'

dict_7classes['DNS_Spoofing'] = 'Spoofing'
dict_7classes['MITM-ArpSpoofing'] = 'Spoofing'

dict_7classes['BenignTraffic'] = 'Benign'

dict_7classes['BrowserHijacking'] = 'Web'
dict_7classes['Backdoor_Malware'] = 'Web'
dict_7classes['XSS'] = 'Web'
dict_7classes['Uploading_Attack'] = 'Web'
dict_7classes['SqlInjection'] = 'Web'
dict_7classes['CommandInjection'] = 'Web'


dict_7classes['DictionaryBruteForce'] = 'BruteForce'