In [1]:
import os
import numpy
import numpy as np
import sklearn
import pandas as pd
import kagglehub


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")

In [3]:
data_path = kagglehub.dataset_download("hassan06/nslkdd")

train_data_path = os.path.join(data_path, "KDDTrain+.txt")
test_data_path = os.path.join(data_path, "KDDTest+.txt")

In [4]:
from data_loader import Dataset

dataset = Dataset(train_data_path, test_data_path)

columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes',
    'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot',
    'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell',
    'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login',
    'is_guest_login', 'count', 'srv_count', 'serror_rate',
    'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate',
    'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate',
    'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
    'dst_host_srv_rerror_rate', 'label', 'difficulty'
]

dataset.set_columns(columns)
dataset.load_data()

df = dataset.train_df
test_df = dataset.test_df

Loaded training data: (125973, 43)
Loaded test data: (22544, 43)


In [5]:
df.head(20)

Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label,difficulty
0,0,tcp,ftp_data,SF,491,0,0,0,0,0,...,0.17,0.03,0.17,0.0,0.0,0.0,0.05,0.0,normal,20
1,0,udp,other,SF,146,0,0,0,0,0,...,0.0,0.6,0.88,0.0,0.0,0.0,0.0,0.0,normal,15
2,0,tcp,private,S0,0,0,0,0,0,0,...,0.1,0.05,0.0,0.0,1.0,1.0,0.0,0.0,neptune,19
3,0,tcp,http,SF,232,8153,0,0,0,0,...,1.0,0.0,0.03,0.04,0.03,0.01,0.0,0.01,normal,21
4,0,tcp,http,SF,199,420,0,0,0,0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,normal,21
5,0,tcp,private,REJ,0,0,0,0,0,0,...,0.07,0.07,0.0,0.0,0.0,0.0,1.0,1.0,neptune,21
6,0,tcp,private,S0,0,0,0,0,0,0,...,0.04,0.05,0.0,0.0,1.0,1.0,0.0,0.0,neptune,21
7,0,tcp,private,S0,0,0,0,0,0,0,...,0.06,0.07,0.0,0.0,1.0,1.0,0.0,0.0,neptune,21
8,0,tcp,remote_job,S0,0,0,0,0,0,0,...,0.09,0.05,0.0,0.0,1.0,1.0,0.0,0.0,neptune,21
9,0,tcp,private,S0,0,0,0,0,0,0,...,0.05,0.06,0.0,0.0,1.0,1.0,0.0,0.0,neptune,21


In [6]:
X_df, y_true = dataset.get_X_y(df, label_column="label", drop_cols=["difficulty"])

In [7]:
print("X shape:", X_df.shape)
print("y shape:", y_true.shape)

X shape: (125973, 41)
y shape: (125973,)


In [8]:
from encoder import OneHotEncoderWrapper

one_hot_encoder = OneHotEncoderWrapper(categories="auto", drop=None, sparse_output=False, handle_unknown="ignore")

categorical_columns_names = ["protocol_type", "service", "flag"]

one_hot_encoder.fit(X_df, categorical_columns_names)
X_train_df = one_hot_encoder.transform(X_df)

print("Transformed X shape:", X_train_df.shape)

Fitted encoder on 3 categorical columns
Created 84 one-hot encoded features
Transformed X shape: (125973, 122)


In [9]:
from encoder import LabelEncoderWrapper

label_encoder = LabelEncoderWrapper()

label_encoder.fit(y_true)

Fitted LabelEncoder on 23 classes
Classes: ['back', 'buffer_overflow', 'ftp_write', 'guess_passwd', 'imap', 'ipsweep', 'land', 'loadmodule', 'multihop', 'neptune', 'nmap', 'normal', 'perl', 'phf', 'pod', 'portsweep', 'rootkit', 'satan', 'smurf', 'spy', 'teardrop', 'warezclient', 'warezmaster']


LabelEncoderWrapper(fitted=True, num_classes=23)

In [10]:
y_true.head(10)

0     normal
1     normal
2    neptune
3     normal
4     normal
5    neptune
6    neptune
7    neptune
8    neptune
9    neptune
Name: label, dtype: str

In [11]:
y_true_encoded = label_encoder.transform(y_true)
y_true_encoded.shape


(125973,)

In [12]:
X_train_final_df, X_val_final_df, y_train_encoded, y_val_encoded = sklearn.model_selection.train_test_split(
    X_train_df,
    y_true_encoded,
    test_size=0.25,
    random_state=35673,
    stratify=y_true_encoded
)

print("X_train_final_df shape:", X_train_final_df.shape)
print("X_val_final_df shape:", X_val_final_df.shape)
print("y_train_encoded shape:", y_train_encoded.shape)
print("y_val_encoded shape:", y_val_encoded.shape)

X_train_final_df shape: (94479, 122)
X_val_final_df shape: (31494, 122)
y_train_encoded shape: (94479,)
y_val_encoded shape: (31494,)


In [13]:
from model import RandomForestModel

random_forest_classifier = RandomForestModel(class_weight="balanced", random_state=875636, n_jobs=4, verbose=0)

In [14]:
# random_forest_classifier.set_params(n_jobs=4, verbose=0)

In [15]:
random_forest_classifier.get_params()

{'n_estimators': 100,
 'criterion': 'gini',
 'max_depth': None,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'bootstrap': True,
 'class_weight': 'balanced',
 'random_state': 875636,
 'n_jobs': 4,
 'verbose': 0}

In [19]:
hyperparameter_choices = {
    'n_estimators': (50, 500),
    'criterion': ['gini', 'entropy'],
    'max_depth': (10, 50),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 10),
    'max_features': ['sqrt', 'log2'],
    'bootstrap': True,
    'class_weight': 'balanced',
    'random_state': 355677,
    'n_jobs': 4,
    'verbose': 0
}


In [20]:
hyperparameter_results = random_forest_classifier.tune_hyperparameters(X_train=X_train_final_df, y_train=y_train_encoded, X_val=X_val_final_df,
                                              y_val=y_val_encoded, param_distributions=hyperparameter_choices,
                                              n_trials=50, metric="macro_f1")

[32m[I 2026-02-02 03:04:00,804][0m A new study created in memory with name: no-name-75bb1b7f-18eb-4ce4-b751-8b9a8f73b4a0[0m


Starting hyperparameter tuning with 50 trials...
Optimizing for: macro_f1


Best trial: 0. Best value: 0.7319:   2%|▏         | 1/50 [00:08<07:09,  8.77s/it]

[32m[I 2026-02-02 03:04:09,560][0m Trial 0 finished with value: 0.7318996858264397 and parameters: {'n_estimators': 352, 'criterion': 'entropy', 'max_depth': 26, 'min_samples_split': 4, 'min_samples_leaf': 7, 'max_features': 'log2'}. Best is trial 0 with value: 0.7318996858264397.[0m


Best trial: 1. Best value: 0.791255:   4%|▍         | 2/50 [00:13<05:19,  6.66s/it]

[32m[I 2026-02-02 03:04:14,759][0m Trial 1 finished with value: 0.7912545472180771 and parameters: {'n_estimators': 104, 'criterion': 'entropy', 'max_depth': 22, 'min_samples_split': 20, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.7912545472180771.[0m


Best trial: 1. Best value: 0.791255:   6%|▌         | 3/50 [00:16<03:50,  4.91s/it]

[32m[I 2026-02-02 03:04:17,586][0m Trial 2 finished with value: 0.7781363722368461 and parameters: {'n_estimators': 77, 'criterion': 'entropy', 'max_depth': 26, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 1 with value: 0.7912545472180771.[0m


Best trial: 1. Best value: 0.791255:   8%|▊         | 4/50 [00:32<07:04,  9.23s/it]

[32m[I 2026-02-02 03:04:33,432][0m Trial 3 finished with value: 0.7870647128009051 and parameters: {'n_estimators': 320, 'criterion': 'gini', 'max_depth': 36, 'min_samples_split': 12, 'min_samples_leaf': 10, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.7912545472180771.[0m


Best trial: 4. Best value: 0.822603:  10%|█         | 5/50 [00:57<11:02, 14.73s/it]

[32m[I 2026-02-02 03:04:57,897][0m Trial 4 finished with value: 0.82260280698438 and parameters: {'n_estimators': 497, 'criterion': 'gini', 'max_depth': 44, 'min_samples_split': 20, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 4 with value: 0.82260280698438.[0m


Best trial: 4. Best value: 0.822603:  12%|█▏        | 6/50 [01:01<08:17, 11.32s/it]

[32m[I 2026-02-02 03:05:02,608][0m Trial 5 finished with value: 0.7369199877458179 and parameters: {'n_estimators': 146, 'criterion': 'gini', 'max_depth': 50, 'min_samples_split': 19, 'min_samples_leaf': 7, 'max_features': 'log2'}. Best is trial 4 with value: 0.82260280698438.[0m


Best trial: 4. Best value: 0.822603:  14%|█▍        | 7/50 [01:04<06:11,  8.65s/it]

[32m[I 2026-02-02 03:05:05,745][0m Trial 6 finished with value: 0.7198207225310288 and parameters: {'n_estimators': 98, 'criterion': 'entropy', 'max_depth': 38, 'min_samples_split': 13, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 4 with value: 0.82260280698438.[0m


Best trial: 4. Best value: 0.822603:  16%|█▌        | 8/50 [01:09<05:10,  7.40s/it]

[32m[I 2026-02-02 03:05:10,477][0m Trial 7 finished with value: 0.7257199551257646 and parameters: {'n_estimators': 115, 'criterion': 'gini', 'max_depth': 11, 'min_samples_split': 9, 'min_samples_leaf': 9, 'max_features': 'sqrt'}. Best is trial 4 with value: 0.82260280698438.[0m


Best trial: 4. Best value: 0.822603:  18%|█▊        | 9/50 [01:12<04:11,  6.13s/it]

[32m[I 2026-02-02 03:05:13,812][0m Trial 8 finished with value: 0.7323199095036109 and parameters: {'n_estimators': 87, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 18, 'min_samples_leaf': 2, 'max_features': 'sqrt'}. Best is trial 4 with value: 0.82260280698438.[0m


Best trial: 9. Best value: 0.866919:  20%|██        | 10/50 [01:22<04:48,  7.21s/it]

[32m[I 2026-02-02 03:05:23,428][0m Trial 9 finished with value: 0.866919206064031 and parameters: {'n_estimators': 213, 'criterion': 'gini', 'max_depth': 27, 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  22%|██▏       | 11/50 [01:33<05:20,  8.21s/it]

[32m[I 2026-02-02 03:05:33,901][0m Trial 10 finished with value: 0.8326719910535658 and parameters: {'n_estimators': 229, 'criterion': 'gini', 'max_depth': 18, 'min_samples_split': 15, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  24%|██▍       | 12/50 [01:42<05:26,  8.60s/it]

[32m[I 2026-02-02 03:05:43,421][0m Trial 11 finished with value: 0.8346469474124465 and parameters: {'n_estimators': 212, 'criterion': 'gini', 'max_depth': 18, 'min_samples_split': 15, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  26%|██▌       | 13/50 [01:53<05:40,  9.22s/it]

[32m[I 2026-02-02 03:05:54,030][0m Trial 12 finished with value: 0.8283667930296698 and parameters: {'n_estimators': 234, 'criterion': 'gini', 'max_depth': 18, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  28%|██▊       | 14/50 [02:01<05:26,  9.07s/it]

[32m[I 2026-02-02 03:06:02,766][0m Trial 13 finished with value: 0.8189409946423943 and parameters: {'n_estimators': 194, 'criterion': 'gini', 'max_depth': 32, 'min_samples_split': 16, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  30%|███       | 15/50 [02:15<06:01, 10.33s/it]

[32m[I 2026-02-02 03:06:16,013][0m Trial 14 finished with value: 0.8336410229187798 and parameters: {'n_estimators': 297, 'criterion': 'gini', 'max_depth': 18, 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  32%|███▏      | 16/50 [02:33<07:09, 12.62s/it]

[32m[I 2026-02-02 03:06:33,951][0m Trial 15 finished with value: 0.8353160172433619 and parameters: {'n_estimators': 379, 'criterion': 'gini', 'max_depth': 29, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  34%|███▍      | 17/50 [02:51<07:48, 14.21s/it]

[32m[I 2026-02-02 03:06:51,852][0m Trial 16 finished with value: 0.8350506781406746 and parameters: {'n_estimators': 394, 'criterion': 'gini', 'max_depth': 30, 'min_samples_split': 8, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  36%|███▌      | 18/50 [03:13<08:52, 16.65s/it]

[32m[I 2026-02-02 03:07:14,210][0m Trial 17 finished with value: 0.836478417599588 and parameters: {'n_estimators': 482, 'criterion': 'gini', 'max_depth': 36, 'min_samples_split': 6, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  38%|███▊      | 19/50 [03:29<08:32, 16.53s/it]

[32m[I 2026-02-02 03:07:30,457][0m Trial 18 finished with value: 0.8278769823284836 and parameters: {'n_estimators': 494, 'criterion': 'entropy', 'max_depth': 39, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  40%|████      | 20/50 [03:50<08:59, 17.97s/it]

[32m[I 2026-02-02 03:07:51,775][0m Trial 19 finished with value: 0.7846436851277429 and parameters: {'n_estimators': 456, 'criterion': 'gini', 'max_depth': 42, 'min_samples_split': 6, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  42%|████▏     | 21/50 [04:03<07:54, 16.38s/it]

[32m[I 2026-02-02 03:08:04,443][0m Trial 20 finished with value: 0.8208562762261686 and parameters: {'n_estimators': 265, 'criterion': 'gini', 'max_depth': 34, 'min_samples_split': 12, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  44%|████▍     | 22/50 [04:23<08:06, 17.38s/it]

[32m[I 2026-02-02 03:08:24,137][0m Trial 21 finished with value: 0.836418600169521 and parameters: {'n_estimators': 424, 'criterion': 'gini', 'max_depth': 28, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  46%|████▌     | 23/50 [04:44<08:19, 18.49s/it]

[32m[I 2026-02-02 03:08:45,236][0m Trial 22 finished with value: 0.8355360823495951 and parameters: {'n_estimators': 447, 'criterion': 'gini', 'max_depth': 25, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  48%|████▊     | 24/50 [05:04<08:12, 18.96s/it]

[32m[I 2026-02-02 03:09:05,281][0m Trial 23 finished with value: 0.7844917200618119 and parameters: {'n_estimators': 428, 'criterion': 'gini', 'max_depth': 32, 'min_samples_split': 4, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  50%|█████     | 25/50 [05:12<06:31, 15.67s/it]

[32m[I 2026-02-02 03:09:13,259][0m Trial 24 finished with value: 0.8047410277544038 and parameters: {'n_estimators': 165, 'criterion': 'gini', 'max_depth': 29, 'min_samples_split': 11, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  52%|█████▏    | 26/50 [05:31<06:39, 16.64s/it]

[32m[I 2026-02-02 03:09:32,188][0m Trial 25 finished with value: 0.8347844524142701 and parameters: {'n_estimators': 411, 'criterion': 'gini', 'max_depth': 22, 'min_samples_split': 13, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  54%|█████▍    | 27/50 [05:42<05:44, 14.96s/it]

[32m[I 2026-02-02 03:09:43,240][0m Trial 26 finished with value: 0.766339467725987 and parameters: {'n_estimators': 342, 'criterion': 'entropy', 'max_depth': 45, 'min_samples_split': 17, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  56%|█████▌    | 28/50 [05:55<05:16, 14.40s/it]

[32m[I 2026-02-02 03:09:56,335][0m Trial 27 finished with value: 0.7975647033871639 and parameters: {'n_estimators': 269, 'criterion': 'gini', 'max_depth': 35, 'min_samples_split': 4, 'min_samples_leaf': 9, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  58%|█████▊    | 29/50 [06:18<05:57, 17.02s/it]

[32m[I 2026-02-02 03:10:19,435][0m Trial 28 finished with value: 0.8244712372539342 and parameters: {'n_estimators': 476, 'criterion': 'gini', 'max_depth': 40, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  60%|██████    | 30/50 [06:29<05:02, 15.12s/it]

[32m[I 2026-02-02 03:10:30,125][0m Trial 29 finished with value: 0.7240965739133742 and parameters: {'n_estimators': 364, 'criterion': 'entropy', 'max_depth': 25, 'min_samples_split': 2, 'min_samples_leaf': 7, 'max_features': 'log2'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  62%|██████▏   | 31/50 [06:44<04:48, 15.19s/it]

[32m[I 2026-02-02 03:10:45,483][0m Trial 30 finished with value: 0.8364535842301769 and parameters: {'n_estimators': 321, 'criterion': 'gini', 'max_depth': 27, 'min_samples_split': 4, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  64%|██████▍   | 32/50 [06:59<04:33, 15.20s/it]

[32m[I 2026-02-02 03:11:00,709][0m Trial 31 finished with value: 0.8339631634087133 and parameters: {'n_estimators': 333, 'criterion': 'gini', 'max_depth': 27, 'min_samples_split': 4, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  66%|██████▌   | 33/50 [07:20<04:43, 16.67s/it]

[32m[I 2026-02-02 03:11:20,821][0m Trial 32 finished with value: 0.8008095551253963 and parameters: {'n_estimators': 431, 'criterion': 'gini', 'max_depth': 22, 'min_samples_split': 6, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  68%|██████▊   | 34/50 [07:34<04:14, 15.88s/it]

[32m[I 2026-02-02 03:11:34,841][0m Trial 33 finished with value: 0.8257203713273388 and parameters: {'n_estimators': 299, 'criterion': 'gini', 'max_depth': 31, 'min_samples_split': 3, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  70%|███████   | 35/50 [07:53<04:15, 17.04s/it]

[32m[I 2026-02-02 03:11:54,571][0m Trial 34 finished with value: 0.8210766800610055 and parameters: {'n_estimators': 456, 'criterion': 'entropy', 'max_depth': 27, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  72%|███████▏  | 36/50 [08:12<04:05, 17.53s/it]

[32m[I 2026-02-02 03:12:13,263][0m Trial 35 finished with value: 0.8357506716528682 and parameters: {'n_estimators': 397, 'criterion': 'gini', 'max_depth': 23, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  74%|███████▍  | 37/50 [08:23<03:24, 15.71s/it]

[32m[I 2026-02-02 03:12:24,727][0m Trial 36 finished with value: 0.7184488271327291 and parameters: {'n_estimators': 359, 'criterion': 'gini', 'max_depth': 34, 'min_samples_split': 2, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  76%|███████▌  | 38/50 [08:26<02:20, 11.74s/it]

[32m[I 2026-02-02 03:12:27,189][0m Trial 37 finished with value: 0.7767649092906536 and parameters: {'n_estimators': 53, 'criterion': 'entropy', 'max_depth': 28, 'min_samples_split': 13, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  78%|███████▊  | 39/50 [08:34<01:58, 10.79s/it]

[32m[I 2026-02-02 03:12:35,792][0m Trial 38 finished with value: 0.7976003503241009 and parameters: {'n_estimators': 173, 'criterion': 'gini', 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 9, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  80%|████████  | 40/50 [08:44<01:44, 10.41s/it]

[32m[I 2026-02-02 03:12:45,309][0m Trial 39 finished with value: 0.7884975506539338 and parameters: {'n_estimators': 303, 'criterion': 'gini', 'max_depth': 15, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  82%|████████▏ | 41/50 [08:50<01:22,  9.21s/it]

[32m[I 2026-02-02 03:12:51,715][0m Trial 40 finished with value: 0.8350233146267751 and parameters: {'n_estimators': 134, 'criterion': 'gini', 'max_depth': 37, 'min_samples_split': 14, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  84%|████████▍ | 42/50 [09:08<01:34, 11.84s/it]

[32m[I 2026-02-02 03:13:09,681][0m Trial 41 finished with value: 0.8357506716528682 and parameters: {'n_estimators': 391, 'criterion': 'gini', 'max_depth': 23, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  86%|████████▌ | 43/50 [09:28<01:38, 14.05s/it]

[32m[I 2026-02-02 03:13:28,897][0m Trial 42 finished with value: 0.8348507470236719 and parameters: {'n_estimators': 419, 'criterion': 'gini', 'max_depth': 21, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  88%|████████▊ | 44/50 [09:49<01:38, 16.36s/it]

[32m[I 2026-02-02 03:13:50,619][0m Trial 43 finished with value: 0.8031741193225646 and parameters: {'n_estimators': 479, 'criterion': 'gini', 'max_depth': 20, 'min_samples_split': 10, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  90%|█████████ | 45/50 [10:09<01:26, 17.29s/it]

[32m[I 2026-02-02 03:14:10,082][0m Trial 44 finished with value: 0.8235599793328136 and parameters: {'n_estimators': 404, 'criterion': 'gini', 'max_depth': 26, 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  92%|█████████▏| 46/50 [10:21<01:03, 15.83s/it]

[32m[I 2026-02-02 03:14:22,525][0m Trial 45 finished with value: 0.8171160965277869 and parameters: {'n_estimators': 244, 'criterion': 'gini', 'max_depth': 33, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  94%|█████████▍| 47/50 [10:36<00:46, 15.53s/it]

[32m[I 2026-02-02 03:14:37,360][0m Trial 46 finished with value: 0.8353584572955424 and parameters: {'n_estimators': 322, 'criterion': 'gini', 'max_depth': 30, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  96%|█████████▌| 48/50 [10:53<00:32, 16.10s/it]

[32m[I 2026-02-02 03:14:54,769][0m Trial 47 finished with value: 0.7822277624398919 and parameters: {'n_estimators': 370, 'criterion': 'gini', 'max_depth': 36, 'min_samples_split': 18, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919:  98%|█████████▊| 49/50 [11:02<00:13, 13.84s/it]

[32m[I 2026-02-02 03:15:03,360][0m Trial 48 finished with value: 0.7684360015342455 and parameters: {'n_estimators': 202, 'criterion': 'entropy', 'max_depth': 14, 'min_samples_split': 20, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 9 with value: 0.866919206064031.[0m


Best trial: 9. Best value: 0.866919: 100%|██████████| 50/50 [11:16<00:00, 13.54s/it]

[32m[I 2026-02-02 03:15:17,788][0m Trial 49 finished with value: 0.7947448171511151 and parameters: {'n_estimators': 446, 'criterion': 'gini', 'max_depth': 28, 'min_samples_split': 5, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 9 with value: 0.866919206064031.[0m

Best macro_f1: 0.8669
Best hyperparameters:
  n_estimators: 213
  criterion: gini
  max_depth: 27
  min_samples_split: 15
  min_samples_leaf: 6
  max_features: sqrt





In [21]:
hyperparameter_results

{'best_params': {'n_estimators': 213,
  'criterion': 'gini',
  'max_depth': 27,
  'min_samples_split': 15,
  'min_samples_leaf': 6,
  'max_features': 'sqrt'},
 'best_score': 0.866919206064031,
 'study': <optuna.study.study.Study at 0x202e46945c0>}

In [27]:
from optuna.visualization import plot_optimization_history,plot_param_importances,plot_contour,plot_slice,plot_parallel_coordinate
import plotly

study = hyperparameter_results['study']

In [33]:
print(plotly.__version__)

6.5.2


In [None]:
fig = plot_param_importances(study)
fig.show()