In [18]:
from pathlib import Path
import optuna
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import f1_score

In [2]:
relpath_to_csv_folder = Path("../../datasets/CIC-IDS-2017/MachineLearningCSV/MachineLearningCVE")
df_train = pd.read_csv(relpath_to_csv_folder / "train.csv")
df_test = pd.read_csv(relpath_to_csv_folder / "test.csv")

### Encode Labels to numbers referring to the encoding in AlertNet training

In [8]:
labelnames2encodednums = {
    "BENIGN": "0",
    "Bot": "1",
    "DDoS": "2",
    "DoS": "3",
    "FTP-Patator": "4",
    "PortScan": "5",
    "SSH-Patator": "6",
    "Web": "7"
}
df_train = df_train.replace({"Label": labelnames2encodednums})
df_train["Label"] = df_train["Label"].astype("int64")
df_test = df_test.replace({"Label": labelnames2encodednums})
df_test["Label"] = df_test["Label"].astype("int64")

df_train.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,53,251,2,2,82,152,41,41,41.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,0
1,15837,2307203,1,5,1375,24,1375,1375,1375.0,0.0,...,20,0.0,0.0,0,0,0.0,0.0,0,0,0
2,53,357,2,2,90,206,45,45,45.0,0.0,...,32,0.0,0.0,0,0,0.0,0.0,0,0,0
3,62209,10051963,1,5,6,30,6,6,6.0,0.0,...,20,16498.0,0.0,16498,16498,10000000.0,0.0,10000000,10000000,0
4,58372,12009644,1,6,1375,30,1375,1375,1375.0,0.0,...,20,22971.0,0.0,22971,22971,12000000.0,0.0,12000000,12000000,0


### Split data into features and labels

In [4]:
train_data = df_train.drop("Label", axis=1).values
train_labels = df_train["Label"].values
test_data = df_test.drop("Label", axis=1).values
test_labels = df_test["Label"].values

### Define evaluation function for the optimization by optuna

In [5]:
def objective(trial, train_data, train_labels, test_data, test_labels):
    param = {}
    param['n_estimators'] = trial.suggest_int('n_estimators', 1, 100, log=True)
    param['max_depth'] = trial.suggest_int('max_depth', 1, 100, log=True)

    model = RandomForestClassifier(**param)
    model.fit(train_data, train_labels)
    
    preds =  model.predict(test_data)
    score = mae(preds, test_labels)

    return score

In [6]:
EXECUTE_OPTIMIZATION = True

if EXECUTE_OPTIMIZATION:
    study = optuna.create_study()
    # start optimization
    study.optimize(lambda trial: objective(trial, train_data, train_labels, test_data, test_labels), n_trials=100, show_progress_bar=True)
    # print the best parameter
    print(f'Best parameter:{study.best_params}')

    # initialize RandomForestClassifier with the best parameter
    model = RandomForestClassifier(**study.best_params)
    model.fit(train_data, train_labels)

# Best parameter:{'n_estimators': 52, 'max_depth': 59}

[32m[I 2022-07-04 20:32:49,649][0m A new study created in memory with name: no-name-777d8ca4-0923-434e-ad6d-4cd109b8671f[0m
  self._init_valid()
  1%|          | 1/100 [00:00<00:32,  3.04it/s]

[32m[I 2022-07-04 20:32:49,984][0m Trial 0 finished with value: 0.009128892946174643 and parameters: {'n_estimators': 2, 'max_depth': 32}. Best is trial 0 with value: 0.009128892946174643.[0m


  3%|▎         | 3/100 [00:02<01:10,  1.37it/s]

[32m[I 2022-07-04 20:32:51,783][0m Trial 1 finished with value: 0.6846669709630983 and parameters: {'n_estimators': 42, 'max_depth': 2}. Best is trial 0 with value: 0.009128892946174643.[0m
[32m[I 2022-07-04 20:32:51,962][0m Trial 2 finished with value: 1.083529370457498 and parameters: {'n_estimators': 6, 'max_depth': 1}. Best is trial 0 with value: 0.009128892946174643.[0m


  4%|▍         | 4/100 [00:06<03:36,  2.25s/it]

[32m[I 2022-07-04 20:32:56,546][0m Trial 3 finished with value: 0.19978231101436045 and parameters: {'n_estimators': 80, 'max_depth': 3}. Best is trial 0 with value: 0.009128892946174643.[0m
[32m[I 2022-07-04 20:32:56,612][0m Trial 4 finished with value: 1.083529370457498 and parameters: {'n_estimators': 1, 'max_depth': 1}. Best is trial 0 with value: 0.009128892946174643.[0m


  6%|▌         | 6/100 [00:12<03:56,  2.51s/it]

[32m[I 2022-07-04 20:33:02,040][0m Trial 5 finished with value: 0.004950668866963941 and parameters: {'n_estimators': 37, 'max_depth': 28}. Best is trial 5 with value: 0.004950668866963941.[0m


  7%|▋         | 7/100 [00:19<05:44,  3.71s/it]

[32m[I 2022-07-04 20:33:08,902][0m Trial 6 finished with value: 0.005512446894420842 and parameters: {'n_estimators': 51, 'max_depth': 32}. Best is trial 5 with value: 0.004950668866963941.[0m


  9%|▉         | 9/100 [00:19<03:03,  2.02s/it]

[32m[I 2022-07-04 20:33:09,299][0m Trial 7 finished with value: 1.083529370457498 and parameters: {'n_estimators': 14, 'max_depth': 1}. Best is trial 5 with value: 0.004950668866963941.[0m
[32m[I 2022-07-04 20:33:09,440][0m Trial 8 finished with value: 1.083529370457498 and parameters: {'n_estimators': 4, 'max_depth': 1}. Best is trial 5 with value: 0.004950668866963941.[0m


 10%|█         | 10/100 [00:21<02:41,  1.79s/it]

[32m[I 2022-07-04 20:33:10,668][0m Trial 9 finished with value: 0.6821740809662582 and parameters: {'n_estimators': 30, 'max_depth': 2}. Best is trial 5 with value: 0.004950668866963941.[0m


 11%|█         | 11/100 [00:22<02:44,  1.84s/it]

[32m[I 2022-07-04 20:33:12,643][0m Trial 10 finished with value: 0.01306133913837295 and parameters: {'n_estimators': 16, 'max_depth': 10}. Best is trial 5 with value: 0.004950668866963941.[0m


 12%|█▏        | 12/100 [00:36<07:40,  5.24s/it]

[32m[I 2022-07-04 20:33:25,971][0m Trial 11 finished with value: 0.004880446613531828 and parameters: {'n_estimators': 96, 'max_depth': 99}. Best is trial 11 with value: 0.004880446613531828.[0m


 13%|█▎        | 13/100 [00:48<10:45,  7.42s/it]

[32m[I 2022-07-04 20:33:38,567][0m Trial 12 finished with value: 0.004880446613531828 and parameters: {'n_estimators': 90, 'max_depth': 96}. Best is trial 11 with value: 0.004880446613531828.[0m


 14%|█▍        | 14/100 [01:01<12:47,  8.92s/it]

[32m[I 2022-07-04 20:33:51,025][0m Trial 13 finished with value: 0.004985779993679997 and parameters: {'n_estimators': 93, 'max_depth': 90}. Best is trial 11 with value: 0.004880446613531828.[0m


 15%|█▌        | 15/100 [01:03<09:41,  6.84s/it]

[32m[I 2022-07-04 20:33:52,969][0m Trial 14 finished with value: 0.006214669428741968 and parameters: {'n_estimators': 15, 'max_depth': 100}. Best is trial 11 with value: 0.004880446613531828.[0m


 16%|█▌        | 16/100 [01:15<11:43,  8.37s/it]

[32m[I 2022-07-04 20:34:04,939][0m Trial 15 finished with value: 0.005652891401285067 and parameters: {'n_estimators': 87, 'max_depth': 14}. Best is trial 11 with value: 0.004880446613531828.[0m


 17%|█▋        | 17/100 [01:18<09:37,  6.95s/it]

[32m[I 2022-07-04 20:34:08,575][0m Trial 16 finished with value: 0.005266669007408448 and parameters: {'n_estimators': 27, 'max_depth': 57}. Best is trial 11 with value: 0.004880446613531828.[0m


 18%|█▊        | 18/100 [01:20<07:10,  5.25s/it]

[32m[I 2022-07-04 20:34:09,842][0m Trial 17 finished with value: 0.005091113373828166 and parameters: {'n_estimators': 9, 'max_depth': 53}. Best is trial 11 with value: 0.004880446613531828.[0m


 19%|█▉        | 19/100 [01:28<08:07,  6.02s/it]

[32m[I 2022-07-04 20:34:17,671][0m Trial 18 finished with value: 0.005196446753976335 and parameters: {'n_estimators': 58, 'max_depth': 17}. Best is trial 11 with value: 0.004880446613531828.[0m


 20%|██        | 20/100 [01:28<05:44,  4.30s/it]

[32m[I 2022-07-04 20:34:17,944][0m Trial 19 finished with value: 0.2193392085952038 and parameters: {'n_estimators': 3, 'max_depth': 4}. Best is trial 11 with value: 0.004880446613531828.[0m


 21%|██        | 21/100 [01:30<04:49,  3.66s/it]

[32m[I 2022-07-04 20:34:20,124][0m Trial 20 finished with value: 0.04020224008988448 and parameters: {'n_estimators': 22, 'max_depth': 6}. Best is trial 11 with value: 0.004880446613531828.[0m


 22%|██▏       | 22/100 [01:36<05:40,  4.37s/it]

[32m[I 2022-07-04 20:34:26,129][0m Trial 21 finished with value: 0.005020891120396054 and parameters: {'n_estimators': 42, 'max_depth': 31}. Best is trial 11 with value: 0.004880446613531828.[0m


 23%|██▎       | 23/100 [01:45<07:17,  5.68s/it]

[32m[I 2022-07-04 20:34:34,884][0m Trial 22 finished with value: 0.005091113373828166 and parameters: {'n_estimators': 62, 'max_depth': 57}. Best is trial 11 with value: 0.004880446613531828.[0m


 24%|██▍       | 24/100 [01:58<09:56,  7.85s/it]

[32m[I 2022-07-04 20:34:47,799][0m Trial 23 finished with value: 0.004564446473087322 and parameters: {'n_estimators': 94, 'max_depth': 26}. Best is trial 23 with value: 0.004564446473087322.[0m


 25%|██▌       | 25/100 [02:11<11:58,  9.58s/it]

[32m[I 2022-07-04 20:35:01,401][0m Trial 24 finished with value: 0.004985779993679997 and parameters: {'n_estimators': 100, 'max_depth': 73}. Best is trial 23 with value: 0.004564446473087322.[0m


 27%|██▋       | 27/100 [02:20<08:03,  6.62s/it]

[32m[I 2022-07-04 20:35:10,381][0m Trial 25 finished with value: 0.005126224500544223 and parameters: {'n_estimators': 64, 'max_depth': 18}. Best is trial 23 with value: 0.004564446473087322.[0m
[32m[I 2022-07-04 20:35:10,525][0m Trial 26 finished with value: 0.015132895614620273 and parameters: {'n_estimators': 1, 'max_depth': 42}. Best is trial 23 with value: 0.004564446473087322.[0m


 28%|██▊       | 28/100 [02:22<06:03,  5.04s/it]

[32m[I 2022-07-04 20:35:11,887][0m Trial 27 finished with value: 0.006284891682174081 and parameters: {'n_estimators': 9, 'max_depth': 74}. Best is trial 23 with value: 0.004564446473087322.[0m


 29%|██▉       | 29/100 [02:25<05:14,  4.43s/it]

[32m[I 2022-07-04 20:35:14,871][0m Trial 28 finished with value: 0.004845335486815772 and parameters: {'n_estimators': 21, 'max_depth': 22}. Best is trial 23 with value: 0.004564446473087322.[0m


 30%|███       | 30/100 [02:27<04:33,  3.91s/it]

[32m[I 2022-07-04 20:35:17,566][0m Trial 29 finished with value: 0.017731118991608442 and parameters: {'n_estimators': 22, 'max_depth': 9}. Best is trial 23 with value: 0.004564446473087322.[0m


 31%|███       | 31/100 [02:28<03:14,  2.82s/it]

[32m[I 2022-07-04 20:35:17,862][0m Trial 30 finished with value: 0.009304448579754925 and parameters: {'n_estimators': 2, 'max_depth': 24}. Best is trial 23 with value: 0.004564446473087322.[0m


 32%|███▏      | 32/100 [02:37<05:14,  4.63s/it]

[32m[I 2022-07-04 20:35:26,704][0m Trial 31 finished with value: 0.005372002387556617 and parameters: {'n_estimators': 63, 'max_depth': 42}. Best is trial 23 with value: 0.004564446473087322.[0m


 33%|███▎      | 33/100 [02:42<05:27,  4.89s/it]

[32m[I 2022-07-04 20:35:32,204][0m Trial 32 finished with value: 0.004634668726519434 and parameters: {'n_estimators': 40, 'max_depth': 21}. Best is trial 23 with value: 0.004564446473087322.[0m


 34%|███▍      | 34/100 [02:48<05:38,  5.13s/it]

[32m[I 2022-07-04 20:35:37,898][0m Trial 33 finished with value: 0.006846669709630982 and parameters: {'n_estimators': 42, 'max_depth': 12}. Best is trial 23 with value: 0.004564446473087322.[0m


 35%|███▌      | 35/100 [02:51<04:55,  4.54s/it]

[32m[I 2022-07-04 20:35:41,069][0m Trial 34 finished with value: 0.04041290685018082 and parameters: {'n_estimators': 33, 'max_depth': 6}. Best is trial 23 with value: 0.004564446473087322.[0m


 36%|███▌      | 36/100 [02:54<04:22,  4.10s/it]

[32m[I 2022-07-04 20:35:44,140][0m Trial 35 finished with value: 0.00505600224711211 and parameters: {'n_estimators': 22, 'max_depth': 22}. Best is trial 23 with value: 0.004564446473087322.[0m


 37%|███▋      | 37/100 [03:01<05:07,  4.89s/it]

[32m[I 2022-07-04 20:35:50,860][0m Trial 36 finished with value: 0.005020891120396054 and parameters: {'n_estimators': 48, 'max_depth': 37}. Best is trial 23 with value: 0.004564446473087322.[0m


 38%|███▊      | 38/100 [03:11<06:38,  6.42s/it]

[32m[I 2022-07-04 20:36:00,863][0m Trial 37 finished with value: 0.004985779993679997 and parameters: {'n_estimators': 72, 'max_depth': 22}. Best is trial 23 with value: 0.004564446473087322.[0m


 39%|███▉      | 39/100 [03:12<04:57,  4.87s/it]

[32m[I 2022-07-04 20:36:02,126][0m Trial 38 finished with value: 0.02707067869807942 and parameters: {'n_estimators': 11, 'max_depth': 8}. Best is trial 23 with value: 0.004564446473087322.[0m


 40%|████      | 40/100 [03:17<04:56,  4.95s/it]

[32m[I 2022-07-04 20:36:07,237][0m Trial 39 finished with value: 0.005091113373828166 and parameters: {'n_estimators': 38, 'max_depth': 14}. Best is trial 23 with value: 0.004564446473087322.[0m


 41%|████      | 41/100 [03:21<04:27,  4.54s/it]

[32m[I 2022-07-04 20:36:10,828][0m Trial 40 finished with value: 0.08103648046065798 and parameters: {'n_estimators': 50, 'max_depth': 4}. Best is trial 23 with value: 0.004564446473087322.[0m


 42%|████▏     | 42/100 [03:32<06:18,  6.52s/it]

[32m[I 2022-07-04 20:36:21,964][0m Trial 41 finished with value: 0.004775113233383659 and parameters: {'n_estimators': 82, 'max_depth': 73}. Best is trial 23 with value: 0.004564446473087322.[0m


 43%|████▎     | 43/100 [03:42<07:21,  7.75s/it]

[32m[I 2022-07-04 20:36:32,576][0m Trial 42 finished with value: 0.004810224360099716 and parameters: {'n_estimators': 76, 'max_depth': 47}. Best is trial 23 with value: 0.004564446473087322.[0m


 44%|████▍     | 44/100 [03:53<07:54,  8.47s/it]

[32m[I 2022-07-04 20:36:42,721][0m Trial 43 finished with value: 0.004775113233383659 and parameters: {'n_estimators': 73, 'max_depth': 49}. Best is trial 23 with value: 0.004564446473087322.[0m


 45%|████▌     | 45/100 [04:03<08:11,  8.94s/it]

[32m[I 2022-07-04 20:36:52,769][0m Trial 44 finished with value: 0.005020891120396054 and parameters: {'n_estimators': 73, 'max_depth': 50}. Best is trial 23 with value: 0.004564446473087322.[0m


 46%|████▌     | 46/100 [04:13<08:28,  9.42s/it]

[32m[I 2022-07-04 20:37:03,297][0m Trial 45 finished with value: 0.005301780134124504 and parameters: {'n_estimators': 75, 'max_depth': 69}. Best is trial 23 with value: 0.004564446473087322.[0m


 47%|████▋     | 47/100 [04:20<07:42,  8.73s/it]

[32m[I 2022-07-04 20:37:10,424][0m Trial 46 finished with value: 0.00572311365471718 and parameters: {'n_estimators': 51, 'max_depth': 30}. Best is trial 23 with value: 0.004564446473087322.[0m


 48%|████▊     | 48/100 [04:24<06:20,  7.32s/it]

[32m[I 2022-07-04 20:37:14,458][0m Trial 47 finished with value: 0.005547558021136898 and parameters: {'n_estimators': 30, 'max_depth': 39}. Best is trial 23 with value: 0.004564446473087322.[0m


 49%|████▉     | 49/100 [04:35<07:04,  8.33s/it]

[32m[I 2022-07-04 20:37:25,144][0m Trial 48 finished with value: 0.005020891120396054 and parameters: {'n_estimators': 77, 'max_depth': 69}. Best is trial 23 with value: 0.004564446473087322.[0m


 50%|█████     | 50/100 [04:36<05:04,  6.08s/it]

[32m[I 2022-07-04 20:37:25,978][0m Trial 49 finished with value: 0.005898669288297461 and parameters: {'n_estimators': 5, 'max_depth': 48}. Best is trial 23 with value: 0.004564446473087322.[0m


 51%|█████     | 51/100 [04:43<05:16,  6.46s/it]

[32m[I 2022-07-04 20:37:33,321][0m Trial 50 finished with value: 0.004704890979951547 and parameters: {'n_estimators': 55, 'max_depth': 27}. Best is trial 23 with value: 0.004564446473087322.[0m


 52%|█████▏    | 52/100 [04:57<06:57,  8.69s/it]

[32m[I 2022-07-04 20:37:47,226][0m Trial 51 finished with value: 0.005231557880692392 and parameters: {'n_estimators': 100, 'max_depth': 34}. Best is trial 23 with value: 0.004564446473087322.[0m


 53%|█████▎    | 53/100 [05:04<06:29,  8.28s/it]

[32m[I 2022-07-04 20:37:54,540][0m Trial 52 finished with value: 0.004704890979951547 and parameters: {'n_estimators': 54, 'max_depth': 27}. Best is trial 23 with value: 0.004564446473087322.[0m


 54%|█████▍    | 54/100 [05:12<06:14,  8.13s/it]

[32m[I 2022-07-04 20:38:02,330][0m Trial 53 finished with value: 0.005617780274569011 and parameters: {'n_estimators': 57, 'max_depth': 26}. Best is trial 23 with value: 0.004564446473087322.[0m


 55%|█████▌    | 55/100 [05:19<05:43,  7.62s/it]

[32m[I 2022-07-04 20:38:08,770][0m Trial 54 finished with value: 0.004775113233383659 and parameters: {'n_estimators': 45, 'max_depth': 16}. Best is trial 23 with value: 0.004564446473087322.[0m


 56%|█████▌    | 56/100 [05:23<04:57,  6.76s/it]

[32m[I 2022-07-04 20:38:13,504][0m Trial 55 finished with value: 0.005020891120396054 and parameters: {'n_estimators': 33, 'max_depth': 18}. Best is trial 23 with value: 0.004564446473087322.[0m


 57%|█████▋    | 57/100 [05:30<04:44,  6.62s/it]

[32m[I 2022-07-04 20:38:19,800][0m Trial 56 finished with value: 0.005020891120396054 and parameters: {'n_estimators': 45, 'max_depth': 16}. Best is trial 23 with value: 0.004564446473087322.[0m


 58%|█████▊    | 58/100 [05:35<04:18,  6.15s/it]

[32m[I 2022-07-04 20:38:24,843][0m Trial 57 finished with value: 0.006776447456198869 and parameters: {'n_estimators': 40, 'max_depth': 11}. Best is trial 23 with value: 0.004564446473087322.[0m


 59%|█████▉    | 59/100 [05:39<03:43,  5.45s/it]

[32m[I 2022-07-04 20:38:28,675][0m Trial 58 finished with value: 0.005547558021136898 and parameters: {'n_estimators': 28, 'max_depth': 26}. Best is trial 23 with value: 0.004564446473087322.[0m


 60%|██████    | 60/100 [05:46<04:07,  6.19s/it]

[32m[I 2022-07-04 20:38:36,571][0m Trial 59 finished with value: 0.005091113373828166 and parameters: {'n_estimators': 59, 'max_depth': 59}. Best is trial 23 with value: 0.004564446473087322.[0m


 61%|██████    | 61/100 [05:53<04:09,  6.39s/it]

[32m[I 2022-07-04 20:38:43,431][0m Trial 60 finished with value: 0.004424001966223096 and parameters: {'n_estimators': 51, 'max_depth': 20}. Best is trial 60 with value: 0.004424001966223096.[0m


 62%|██████▏   | 62/100 [06:00<04:09,  6.57s/it]

[32m[I 2022-07-04 20:38:50,439][0m Trial 61 finished with value: 0.005898669288297461 and parameters: {'n_estimators': 52, 'max_depth': 13}. Best is trial 60 with value: 0.004424001966223096.[0m


 63%|██████▎   | 63/100 [06:05<03:45,  6.10s/it]

[32m[I 2022-07-04 20:38:55,422][0m Trial 62 finished with value: 0.004950668866963941 and parameters: {'n_estimators': 36, 'max_depth': 18}. Best is trial 60 with value: 0.004424001966223096.[0m


 64%|██████▍   | 64/100 [06:08<03:00,  5.02s/it]

[32m[I 2022-07-04 20:38:57,934][0m Trial 63 finished with value: 0.005407113514272673 and parameters: {'n_estimators': 18, 'max_depth': 20}. Best is trial 60 with value: 0.004424001966223096.[0m


 65%|██████▌   | 65/100 [06:11<02:38,  4.53s/it]

[32m[I 2022-07-04 20:39:01,326][0m Trial 64 finished with value: 0.005688002528001124 and parameters: {'n_estimators': 25, 'max_depth': 30}. Best is trial 60 with value: 0.004424001966223096.[0m


 66%|██████▌   | 66/100 [06:23<03:45,  6.63s/it]

[32m[I 2022-07-04 20:39:12,854][0m Trial 65 finished with value: 0.00505600224711211 and parameters: {'n_estimators': 82, 'max_depth': 32}. Best is trial 60 with value: 0.004424001966223096.[0m


 67%|██████▋   | 67/100 [06:35<04:35,  8.36s/it]

[32m[I 2022-07-04 20:39:25,233][0m Trial 66 finished with value: 0.005196446753976335 and parameters: {'n_estimators': 90, 'max_depth': 16}. Best is trial 60 with value: 0.004424001966223096.[0m


 68%|██████▊   | 68/100 [06:43<04:23,  8.25s/it]

[32m[I 2022-07-04 20:39:33,224][0m Trial 67 finished with value: 0.01450089533373126 and parameters: {'n_estimators': 64, 'max_depth': 10}. Best is trial 60 with value: 0.004424001966223096.[0m


 69%|██████▉   | 69/100 [06:52<04:25,  8.56s/it]

[32m[I 2022-07-04 20:39:42,504][0m Trial 68 finished with value: 0.004880446613531828 and parameters: {'n_estimators': 67, 'max_depth': 86}. Best is trial 60 with value: 0.004424001966223096.[0m


 70%|███████   | 70/100 [06:56<03:32,  7.08s/it]

[32m[I 2022-07-04 20:39:46,125][0m Trial 69 finished with value: 0.7473403321512587 and parameters: {'n_estimators': 85, 'max_depth': 2}. Best is trial 60 with value: 0.004424001966223096.[0m


 71%|███████   | 71/100 [07:04<03:35,  7.42s/it]

[32m[I 2022-07-04 20:39:54,344][0m Trial 70 finished with value: 0.005161335627260279 and parameters: {'n_estimators': 58, 'max_depth': 37}. Best is trial 60 with value: 0.004424001966223096.[0m


 72%|███████▏  | 72/100 [07:10<03:17,  7.05s/it]

[32m[I 2022-07-04 20:40:00,520][0m Trial 71 finished with value: 0.005477335767704786 and parameters: {'n_estimators': 44, 'max_depth': 24}. Best is trial 60 with value: 0.004424001966223096.[0m


 73%|███████▎  | 73/100 [07:24<04:02,  9.00s/it]

[32m[I 2022-07-04 20:40:14,073][0m Trial 72 finished with value: 0.005091113373828166 and parameters: {'n_estimators': 100, 'max_depth': 27}. Best is trial 60 with value: 0.004424001966223096.[0m


 74%|███████▍  | 74/100 [07:31<03:41,  8.51s/it]

[32m[I 2022-07-04 20:40:21,441][0m Trial 73 finished with value: 0.004072890699062533 and parameters: {'n_estimators': 52, 'max_depth': 59}. Best is trial 73 with value: 0.004072890699062533.[0m


 75%|███████▌  | 75/100 [07:36<03:02,  7.31s/it]

[32m[I 2022-07-04 20:40:25,942][0m Trial 74 finished with value: 0.004845335486815772 and parameters: {'n_estimators': 33, 'max_depth': 14}. Best is trial 73 with value: 0.004072890699062533.[0m


 76%|███████▌  | 76/100 [07:44<02:59,  7.49s/it]

[32m[I 2022-07-04 20:40:33,851][0m Trial 75 finished with value: 0.00533689126084056 and parameters: {'n_estimators': 56, 'max_depth': 63}. Best is trial 73 with value: 0.004072890699062533.[0m


 77%|███████▋  | 77/100 [07:49<02:37,  6.84s/it]

[32m[I 2022-07-04 20:40:39,164][0m Trial 76 finished with value: 0.005161335627260279 and parameters: {'n_estimators': 38, 'max_depth': 20}. Best is trial 73 with value: 0.004072890699062533.[0m


 78%|███████▊  | 78/100 [07:59<02:48,  7.64s/it]

[32m[I 2022-07-04 20:40:48,674][0m Trial 77 finished with value: 0.00533689126084056 and parameters: {'n_estimators': 66, 'max_depth': 80}. Best is trial 73 with value: 0.004072890699062533.[0m


 79%|███████▉  | 79/100 [08:05<02:33,  7.32s/it]

[32m[I 2022-07-04 20:40:55,247][0m Trial 78 finished with value: 0.005547558021136898 and parameters: {'n_estimators': 48, 'max_depth': 45}. Best is trial 73 with value: 0.004072890699062533.[0m


 80%|████████  | 80/100 [08:17<02:51,  8.57s/it]

[32m[I 2022-07-04 20:41:06,734][0m Trial 79 finished with value: 0.004740002106667603 and parameters: {'n_estimators': 84, 'max_depth': 54}. Best is trial 73 with value: 0.004072890699062533.[0m


 81%|████████  | 81/100 [08:20<02:13,  7.05s/it]

[32m[I 2022-07-04 20:41:10,245][0m Trial 80 finished with value: 0.004985779993679997 and parameters: {'n_estimators': 25, 'max_depth': 35}. Best is trial 73 with value: 0.004072890699062533.[0m


 82%|████████▏ | 82/100 [08:31<02:29,  8.29s/it]

[32m[I 2022-07-04 20:41:21,433][0m Trial 81 finished with value: 0.005091113373828166 and parameters: {'n_estimators': 80, 'max_depth': 40}. Best is trial 73 with value: 0.004072890699062533.[0m


 83%|████████▎ | 83/100 [08:38<02:11,  7.71s/it]

[32m[I 2022-07-04 20:41:27,786][0m Trial 82 finished with value: 0.005231557880692392 and parameters: {'n_estimators': 45, 'max_depth': 59}. Best is trial 73 with value: 0.004072890699062533.[0m


 84%|████████▍ | 84/100 [08:47<02:11,  8.23s/it]

[32m[I 2022-07-04 20:41:37,217][0m Trial 83 finished with value: 0.004810224360099716 and parameters: {'n_estimators': 70, 'max_depth': 97}. Best is trial 73 with value: 0.004072890699062533.[0m


 85%|████████▌ | 85/100 [08:54<01:59,  7.99s/it]

[32m[I 2022-07-04 20:41:44,645][0m Trial 84 finished with value: 0.005161335627260279 and parameters: {'n_estimators': 53, 'max_depth': 21}. Best is trial 73 with value: 0.004072890699062533.[0m


 86%|████████▌ | 86/100 [09:04<01:56,  8.31s/it]

[32m[I 2022-07-04 20:41:53,703][0m Trial 85 finished with value: 0.035427126856500825 and parameters: {'n_estimators': 87, 'max_depth': 7}. Best is trial 73 with value: 0.004072890699062533.[0m


 87%|████████▋ | 87/100 [09:04<01:17,  5.95s/it]

[32m[I 2022-07-04 20:41:54,148][0m Trial 86 finished with value: 0.00867244829886591 and parameters: {'n_estimators': 3, 'max_depth': 52}. Best is trial 73 with value: 0.004072890699062533.[0m


 88%|████████▊ | 88/100 [09:09<01:09,  5.80s/it]

[32m[I 2022-07-04 20:41:59,600][0m Trial 87 finished with value: 0.005161335627260279 and parameters: {'n_estimators': 39, 'max_depth': 78}. Best is trial 73 with value: 0.004072890699062533.[0m


 89%|████████▉ | 89/100 [09:19<01:17,  7.07s/it]

[32m[I 2022-07-04 20:42:09,622][0m Trial 88 finished with value: 0.004669779853235491 and parameters: {'n_estimators': 70, 'max_depth': 25}. Best is trial 73 with value: 0.004072890699062533.[0m


 90%|█████████ | 90/100 [09:32<01:25,  8.59s/it]

[32m[I 2022-07-04 20:42:21,767][0m Trial 89 finished with value: 0.005301780134124504 and parameters: {'n_estimators': 88, 'max_depth': 29}. Best is trial 73 with value: 0.004072890699062533.[0m


 91%|█████████ | 91/100 [09:41<01:19,  8.79s/it]

[32m[I 2022-07-04 20:42:31,031][0m Trial 90 finished with value: 0.005617780274569011 and parameters: {'n_estimators': 69, 'max_depth': 45}. Best is trial 73 with value: 0.004072890699062533.[0m


 92%|█████████▏| 92/100 [09:52<01:15,  9.42s/it]

[32m[I 2022-07-04 20:42:41,919][0m Trial 91 finished with value: 0.00533689126084056 and parameters: {'n_estimators': 78, 'max_depth': 67}. Best is trial 73 with value: 0.004072890699062533.[0m


 93%|█████████▎| 93/100 [10:00<01:03,  9.10s/it]

[32m[I 2022-07-04 20:42:50,256][0m Trial 92 finished with value: 0.005372002387556617 and parameters: {'n_estimators': 61, 'max_depth': 55}. Best is trial 73 with value: 0.004072890699062533.[0m


 94%|█████████▍| 94/100 [10:07<00:50,  8.46s/it]

[32m[I 2022-07-04 20:42:57,224][0m Trial 93 finished with value: 0.00572311365471718 and parameters: {'n_estimators': 50, 'max_depth': 25}. Best is trial 73 with value: 0.004072890699062533.[0m


 95%|█████████▌| 95/100 [10:14<00:40,  8.12s/it]

[32m[I 2022-07-04 20:43:04,560][0m Trial 94 finished with value: 0.005231557880692392 and parameters: {'n_estimators': 55, 'max_depth': 15}. Best is trial 73 with value: 0.004072890699062533.[0m


 96%|█████████▌| 96/100 [10:19<00:28,  7.11s/it]

[32m[I 2022-07-04 20:43:09,327][0m Trial 95 finished with value: 0.00505600224711211 and parameters: {'n_estimators': 35, 'max_depth': 19}. Best is trial 73 with value: 0.004072890699062533.[0m


 97%|█████████▋| 97/100 [10:25<00:20,  6.85s/it]

[32m[I 2022-07-04 20:43:15,569][0m Trial 96 finished with value: 0.004880446613531828 and parameters: {'n_estimators': 45, 'max_depth': 23}. Best is trial 73 with value: 0.004072890699062533.[0m


 98%|█████████▊| 98/100 [10:27<00:10,  5.34s/it]

[32m[I 2022-07-04 20:43:17,386][0m Trial 97 finished with value: 0.005898669288297461 and parameters: {'n_estimators': 13, 'max_depth': 43}. Best is trial 73 with value: 0.004072890699062533.[0m


 99%|█████████▉| 99/100 [10:41<00:07,  7.88s/it]

[32m[I 2022-07-04 20:43:31,186][0m Trial 98 finished with value: 0.005477335767704786 and parameters: {'n_estimators': 100, 'max_depth': 32}. Best is trial 73 with value: 0.004072890699062533.[0m


100%|██████████| 100/100 [10:45<00:00,  6.46s/it]


[32m[I 2022-07-04 20:43:35,349][0m Trial 99 finished with value: 0.00533689126084056 and parameters: {'n_estimators': 29, 'max_depth': 27}. Best is trial 73 with value: 0.004072890699062533.[0m
Best parameter:{'n_estimators': 52, 'max_depth': 59}


In [20]:
preds =  model.predict(test_data)
f1score = f1_score(preds, test_labels, average="weighted")
print(f"F1 score of optimized model: {f1score}")

F1 score of optimized model: 0.9970387875278569


In [15]:
df = pd.DataFrame()
df["FeatureName"] = df_train.drop("Label", axis=1).columns
df["Importance"] = model.feature_importances_
df.sort_values("Importance", ascending=False).head(n=20)

Unnamed: 0,FeatureName,Importance
0,Destination Port,0.092784
63,Subflow Fwd Bytes,0.038117
65,Subflow Bwd Bytes,0.035389
53,Avg Fwd Segment Size,0.034009
12,Bwd Packet Length Mean,0.032598
6,Fwd Packet Length Max,0.031366
4,Total Length of Fwd Packets,0.031111
69,min_seg_size_forward,0.030677
66,Init_Win_bytes_forward,0.029622
34,Fwd Header Length,0.029445
