# XGB Classifier with Filter-Based FS

Steven Sison | March 9, 2024

## Description

This document will be used to train a model using the reduced feature set obtain by using the wrapper-based method, forward feature selection. The model will be evaluated in terms of the usual metrics (accuracy, precision, F1-score, recall) as well as the training time. The model will also be stored for future evaluation purposes.

## Training the Model

### Preliminaries

#### 1. Loading the Dataset

In [12]:
import pandas as pd                     # For data transformation
import numpy as numpy                   # For scientific calculations
import seaborn as sns                   # For data visualizations
import matplotlib.pyplot as plt         # For plotting
import plotly.graph_objects as go       # For plotting
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, ConfusionMatrixDisplay
from xgboost import XGBClassifier, DMatrix, train
from sklearn.pipeline import Pipeline
import time
from datetime import datetime
import joblib
import os
import optuna
from sklearn.metrics import mean_squared_error # or any other metric
from sklearn.model_selection import train_test_split

dataset = pd.read_csv("../../../02_feature-engineering/final-datasets/binary_new_Bacud_unbalanced_lexical.csv")      # Loading the dataset

dataset.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,url_type,url_length,url_ip_in_domain,url_domain_entropy,url_is_digits_in_domain,url_query_length,url_number_of_parameters,url_number_of_digits,url_string_entropy,url_is_https,...,has_swf_in_string,has_cgi_in_string,has_php_in_string,has_abuse_in_string,has_admin_in_string,has_bin_in_string,has_personal_in_string,has_update_in_string,has_verification_in_string,url_scheme
0,1,16,0,3.169925,0,0,0,0,3.375,0,...,0,0,0,0,0,0,0,0,0,0
1,0,35,0,2.807355,1,0,0,1,4.079143,0,...,0,0,0,0,0,0,0,0,0,0
2,0,31,0,2.921928,0,0,0,1,3.708093,0,...,0,0,0,0,0,0,0,0,0,0
3,1,88,0,2.896292,0,49,4,7,4.660343,0,...,0,0,1,0,0,0,0,0,0,27
4,1,235,0,3.405822,0,194,3,22,4.980518,0,...,0,0,1,0,0,0,0,0,0,27


In [13]:
dataset.head()

Unnamed: 0,url_type,url_length,url_ip_in_domain,url_domain_entropy,url_is_digits_in_domain,url_query_length,url_number_of_parameters,url_number_of_digits,url_string_entropy,url_is_https,...,has_swf_in_string,has_cgi_in_string,has_php_in_string,has_abuse_in_string,has_admin_in_string,has_bin_in_string,has_personal_in_string,has_update_in_string,has_verification_in_string,url_scheme
0,1,16,0,3.169925,0,0,0,0,3.375,0,...,0,0,0,0,0,0,0,0,0,0
1,0,35,0,2.807355,1,0,0,1,4.079143,0,...,0,0,0,0,0,0,0,0,0,0
2,0,31,0,2.921928,0,0,0,1,3.708093,0,...,0,0,0,0,0,0,0,0,0,0
3,1,88,0,2.896292,0,49,4,7,4.660343,0,...,0,0,1,0,0,0,0,0,0,27
4,1,235,0,3.405822,0,194,3,22,4.980518,0,...,0,0,1,0,0,0,0,0,0,27


In [14]:
x_train, x_test, y_train, y_test = train_test_split(dataset.drop(columns=['url_type']), dataset['url_type'], test_size = 0.2, random_state=42)

#### 2. Preprocessing (Balancing)

In [15]:
dataset['url_type'].value_counts()

url_type
0    724778
1    380244
Name: count, dtype: int64

#### 3. Removing Unnecessary Features

In [16]:
important_features_hybrid = ['url_is_digits_in_domain',
                             'url_query_length',
                             'url_number_of_parameters',
                             'url_string_entropy',
                             'url_path_length',
                             'url_host_length',
                             'get_tld',
                             'url_num_periods',
                             'has_exe_in_string',
                             'has_php_in_string',
                             'url_scheme']

print(len(important_features_hybrid))

X_test_lexical = x_test[important_features_hybrid]
X_train_lexical = x_train[important_features_hybrid]

11


#### Hyper-parameter Tuning

In [17]:
# Define the objective function for Optuna
def objective_lexical(trial):
    # Define the search space for hyperparameters
    param = {
        'objective': 'binary:hinge',
        'eval_metric': 'error',
        'eta': trial.suggest_float('eta', 0.01, 0.3),
        'n_estimators': 100000, # Fix the boosting round and use early stopping
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 10.0),
        'min_child_weight': trial.suggest_float('min_child_weight', 0.1, 10.0),
        'lambda': trial.suggest_float('lambda', 0.1, 10.0),
        'alpha': trial.suggest_float('alpha', 0.0, 10.0),
    }
    
    # Split the data into further training and validation sets (three sets are preferable)
    train_data, valid_data, train_target, valid_target = train_test_split(X_train_lexical, y_train, test_size=0.2, random_state=42)
    
    # Convert the data into DMatrix format
    dtrain = DMatrix(train_data, label=train_target)
    dvalid = DMatrix(valid_data, label=valid_target)
    
    # Define the pruning callback for early stopping
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, 'validation-error')
    
    # Train the model with early stopping
    model = train(param, dtrain, num_boost_round=100000, evals=[(dvalid, 'validation')], early_stopping_rounds=100, callbacks=[pruning_callback])
    
    # Make predictions on the test set
    dtest = DMatrix(valid_data)
    y_pred = model.predict(dtest)
    
    # Calculate the root mean squared error
    error = mean_squared_error(valid_target, y_pred, squared=False)
    
    return error

# Create an Optuna study and optimize the objective function
study_lexical = optuna.create_study(direction='minimize')
study_lexical.optimize(objective_lexical, n_trials=100) # Control the number of trials

# Print the best hyperparameters and the best RMSE
best_params_lexical = study_lexical.best_params
best_error_lexical = study_lexical.best_value
print("Best Hyperparameters (33 Features): ", best_params_lexical)
print("Best Error (33 Features): ", best_error_lexical)

[I 2024-04-08 12:48:29,829] A new study created in memory with name: no-name-50b0811c-d536-4724-9866-9fbc1e709507


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.39624
[2]	validation-error:0.18124
[3]	validation-error:0.11478
[4]	validation-error:0.09743
[5]	validation-error:0.09746
[6]	validation-error:0.09569
[7]	validation-error:0.09214
[8]	validation-error:0.09067
[9]	validation-error:0.08789
[10]	validation-error:0.08713
[11]	validation-error:0.08700
[12]	validation-error:0.08577
[13]	validation-error:0.08500
[14]	validation-error:0.08477
[15]	validation-error:0.08452
[16]	validation-error:0.08423
[17]	validation-error:0.08403
[18]	validation-error:0.08382
[19]	validation-error:0.08355
[20]	validation-error:0.08323
[21]	validation-error:0.08311
[22]	validation-error:0.08263
[23]	validation-error:0.08192
[24]	validation-error:0.08188
[25]	validation-error:0.08182
[26]	validation-error:0.08171
[27]	validation-error:0.08159
[28]	validation-error:0.08130
[29]	validation-error:0.08132
[30]	validation-error:0.08111
[31]	validation-error:0.08123
[32]	validation-error:0.08121
[33]	validation-error:0.08093
[34]	validation-err

[I 2024-04-08 12:48:53,978] Trial 0 finished with value: 0.27358956067315116 and parameters: {'eta': 0.2720624526840041, 'max_depth': 5, 'subsample': 0.8146910789806376, 'colsample_bytree': 0.8472236145430684, 'gamma': 7.3310254286969645, 'min_child_weight': 0.3448281386560015, 'lambda': 0.26597051366726254, 'alpha': 5.985054533852917}. Best is trial 0 with value: 0.27358956067315116.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653
[2]	validation-error:0.39237
[3]	validation-error:0.19832
[4]	validation-error:0.14047
[5]	validation-error:0.12177
[6]	validation-error:0.10808
[7]	validation-error:0.09502
[8]	validation-error:0.09061
[9]	validation-error:0.08692
[10]	validation-error:0.08541
[11]	validation-error:0.08407
[12]	validation-error:0.08262
[13]	validation-error:0.08269
[14]	validation-error:0.08207
[15]	validation-error:0.08156
[16]	validation-error:0.08121
[17]	validation-error:0.08062
[18]	validation-error:0.07996
[19]	validation-error:0.07990
[20]	validation-error:0.07928
[21]	validation-error:0.07939
[22]	validation-error:0.07922
[23]	validation-error:0.07901
[24]	validation-error:0.07892
[25]	validation-error:0.07881
[26]	validation-error:0.07875
[27]	validation-error:0.07875
[28]	validation-error:0.07858
[29]	validation-error:0.07854
[30]	validation-error:0.07845
[31]	validation-error:0.07824
[32]	validation-error:0.07819
[33]	validation-error:0.07802
[34]	validation-err

[I 2024-04-08 12:49:07,151] Trial 1 finished with value: 0.2741884289792247 and parameters: {'eta': 0.1758834227663109, 'max_depth': 7, 'subsample': 0.6286789318912325, 'colsample_bytree': 0.8143272203823202, 'gamma': 9.300426838213825, 'min_child_weight': 2.7991767491898925, 'lambda': 5.278108532074833, 'alpha': 1.2015175137423983}. Best is trial 0 with value: 0.27358956067315116.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653
[2]	validation-error:0.65653
[3]	validation-error:0.65653
[4]	validation-error:0.65653
[5]	validation-error:0.65653
[6]	validation-error:0.65653
[7]	validation-error:0.62741
[8]	validation-error:0.34168
[9]	validation-error:0.28578
[10]	validation-error:0.22912
[11]	validation-error:0.19170
[12]	validation-error:0.16835
[13]	validation-error:0.15111
[14]	validation-error:0.13106
[15]	validation-error:0.12154
[16]	validation-error:0.11552
[17]	validation-error:0.10915
[18]	validation-error:0.10569
[19]	validation-error:0.10379
[20]	validation-error:0.10336
[21]	validation-error:0.10155
[22]	validation-error:0.09927
[23]	validation-error:0.09818
[24]	validation-error:0.09616
[25]	validation-error:0.09506
[26]	validation-error:0.09319
[27]	validation-error:0.09289
[28]	validation-error:0.09216
[29]	validation-error:0.09165
[30]	validation-error:0.09114
[31]	validation-error:0.09025
[32]	validation-error:0.08987
[33]	validation-error:0.08957
[34]	validation-err

[I 2024-04-08 12:49:56,820] Trial 2 finished with value: 0.27498146318417055 and parameters: {'eta': 0.07318913478883499, 'max_depth': 5, 'subsample': 0.612371434252369, 'colsample_bytree': 0.6274519523359129, 'gamma': 3.9125721466446706, 'min_child_weight': 4.982607700775993, 'lambda': 6.004096386055464, 'alpha': 3.2698333516147695}. Best is trial 0 with value: 0.27358956067315116.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.27072
[2]	validation-error:0.12667
[3]	validation-error:0.10770
[4]	validation-error:0.09277
[5]	validation-error:0.08993
[6]	validation-error:0.08452
[7]	validation-error:0.08228
[8]	validation-error:0.08039
[9]	validation-error:0.07965
[10]	validation-error:0.07864
[11]	validation-error:0.07791
[12]	validation-error:0.07779
[13]	validation-error:0.07765
[14]	validation-error:0.07729
[15]	validation-error:0.07693
[16]	validation-error:0.07668
[17]	validation-error:0.07654
[18]	validation-error:0.07638
[19]	validation-error:0.07653
[20]	validation-error:0.07624
[21]	validation-error:0.07609
[22]	validation-error:0.07600
[23]	validation-error:0.07592
[24]	validation-error:0.07582
[25]	validation-error:0.07575
[26]	validation-error:0.07567
[27]	validation-error:0.07562
[28]	validation-error:0.07548
[29]	validation-error:0.07550
[30]	validation-error:0.07552
[31]	validation-error:0.07538
[32]	validation-error:0.07532
[33]	validation-erro

[I 2024-04-08 12:50:12,434] Trial 3 finished with value: 0.2709095876962938 and parameters: {'eta': 0.29310928003210684, 'max_depth': 8, 'subsample': 0.9694641191270407, 'colsample_bytree': 0.7685364956013545, 'gamma': 6.067108355118743, 'min_child_weight': 0.5499182663815725, 'lambda': 2.9488197275146737, 'alpha': 6.169417628260698}. Best is trial 3 with value: 0.2709095876962938.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653
[2]	validation-error:0.65653
[3]	validation-error:0.65653
[4]	validation-error:0.42677
[5]	validation-error:0.27070
[6]	validation-error:0.20420
[7]	validation-error:0.16162
[8]	validation-error:0.13703
[9]	validation-error:0.12315
[10]	validation-error:0.10708
[11]	validation-error:0.10215
[12]	validation-error:0.09671
[13]	validation-error:0.09494
[14]	validation-error:0.09070
[15]	validation-error:0.08744
[16]	validation-error:0.08669
[17]	validation-error:0.08600
[18]	validation-error:0.08496
[19]	validation-error:0.08468
[20]	validation-error:0.08422
[21]	validation-error:0.08401
[22]	validation-error:0.08384
[23]	validation-error:0.08370
[24]	validation-error:0.08357
[25]	validation-error:0.08313
[26]	validation-error:0.08315
[27]	validation-error:0.08293
[28]	validation-error:0.08285
[29]	validation-error:0.08285
[30]	validation-error:0.08245
[31]	validation-error:0.08229
[32]	validation-error:0.08223
[33]	validation-error:0.08222
[34]	validation-err

[I 2024-04-08 12:51:09,013] Trial 4 finished with value: 0.2719410722721638 and parameters: {'eta': 0.10963931590606006, 'max_depth': 6, 'subsample': 0.6716958263874167, 'colsample_bytree': 0.6394940095013921, 'gamma': 1.4287241346541202, 'min_child_weight': 2.0636945114832255, 'lambda': 5.329530021062506, 'alpha': 9.75741106611429}. Best is trial 3 with value: 0.2709095876962938.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27682
[2]	validation-error:0.13040
[3]	validation-error:0.10348
[4]	validation-error:0.09243
[5]	validation-error:0.08548
[6]	validation-error:0.08229
[7]	validation-error:0.08134
[8]	validation-error:0.07990
[9]	validation-error:0.07930
[10]	validation-error:0.07883
[11]	validation-error:0.07840
[12]	validation-error:0.07808
[13]	validation-error:0.07767
[14]	validation-error:0.07700
[15]	validation-error:0.07675
[16]	validation-error:0.07660
[17]	validation-error:0.07651
[18]	validation-error:0.07641
[19]	validation-error:0.07630
[20]	validation-error:0.07623
[21]	validation-error:0.07611
[22]	validation-error:0.07611
[23]	validation-error:0.07595
[24]	validation-error:0.07594
[25]	validation-error:0.07594
[26]	validation-error:0.07590
[27]	validation-error:0.07581
[28]	validation-error:0.07577
[29]	validation-error:0.07582
[30]	validation-error:0.07564
[31]	validation-error:0.07550
[32]	validation-error:0.07542
[33]	validation-error:0.07535
[34]	validation-err

[I 2024-04-08 12:51:32,793] Trial 5 finished with value: 0.2721074098194993 and parameters: {'eta': 0.2672364651289911, 'max_depth': 9, 'subsample': 0.7489085636768584, 'colsample_bytree': 0.9636840634637243, 'gamma': 8.26266627575154, 'min_child_weight': 7.522619424755581, 'lambda': 6.436414858253958, 'alpha': 7.111368346381455}. Best is trial 3 with value: 0.2709095876962938.


[0]	validation-error:0.65653
[1]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:33,262] Trial 6 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:33,746] Trial 7 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653


[I 2024-04-08 12:51:34,249] Trial 8 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653


[I 2024-04-08 12:51:34,727] Trial 9 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:35,255] Trial 10 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653
[1]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:35,756] Trial 11 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:36,279] Trial 12 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:36,828] Trial 13 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:37,350] Trial 14 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:37,868] Trial 15 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:51:38,446] Trial 16 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.28414
[2]	validation-error:0.13639
[3]	validation-error:0.10796
[4]	validation-error:0.08647
[5]	validation-error:0.08980
[6]	validation-error:0.08549
[7]	validation-error:0.08130
[8]	validation-error:0.08008
[9]	validation-error:0.07941
[10]	validation-error:0.07854
[11]	validation-error:0.07787
[12]	validation-error:0.07814
[13]	validation-error:0.07776
[14]	validation-error:0.07763
[15]	validation-error:0.07721
[16]	validation-error:0.07670
[17]	validation-error:0.07646
[18]	validation-error:0.07599
[19]	validation-error:0.07610
[20]	validation-error:0.07602
[21]	validation-error:0.07586
[22]	validation-error:0.07569
[23]	validation-error:0.07558
[24]	validation-error:0.07567
[25]	validation-error:0.07530
[26]	validation-error:0.07535
[27]	validation-error:0.07525
[28]	validation-error:0.07507
[29]	validation-error:0.07508
[30]	validation-error:0.07510
[31]	validation-error:0.07482
[32]	validation-error:0.07486
[33]	validation-error:0.07472
[34]	validation-err

[I 2024-04-08 12:52:00,717] Trial 17 finished with value: 0.2694231902709173 and parameters: {'eta': 0.2923757639924323, 'max_depth': 10, 'subsample': 0.8070226122927233, 'colsample_bytree': 0.5696186734815429, 'gamma': 3.4549132350669423, 'min_child_weight': 1.4814618119792746, 'lambda': 4.3900427543904526, 'alpha': 4.466080762836852}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27426
[2]	validation-error:0.13402
[3]	validation-error:0.10699
[4]	validation-error:0.08606
[5]	validation-error:0.08938
[6]	validation-error:0.08535
[7]	validation-error:0.08098
[8]	validation-error:0.07975
[9]	validation-error:0.07944
[10]	validation-error:0.07839
[11]	validation-error:0.07804
[12]	validation-error:0.07796
[13]	validation-error:0.07757
[14]	validation-error:0.07775
[15]	validation-error:0.07697
[16]	validation-error:0.07677
[17]	validation-error:0.07654
[18]	validation-error:0.07612
[19]	validation-error:0.07620
[20]	validation-error:0.07611
[21]	validation-error:0.07617
[22]	validation-error:0.07585
[23]	validation-error:0.07573
[24]	validation-error:0.07576
[25]	validation-error:0.07539
[26]	validation-error:0.07550
[27]	validation-error:0.07547
[28]	validation-error:0.07529
[29]	validation-error:0.07528
[30]	validation-error:0.07516
[31]	validation-error:0.07507
[32]	validation-error:0.07512
[33]	validation-error:0.07505
[34]	validation-err

[I 2024-04-08 12:52:11,455] Trial 18 finished with value: 0.2712955509941713 and parameters: {'eta': 0.2983351044738248, 'max_depth': 10, 'subsample': 0.9324811034331741, 'colsample_bytree': 0.5742261621979692, 'gamma': 6.319873298931879, 'min_child_weight': 3.2447885531766607, 'lambda': 3.049246098833004, 'alpha': 4.253884691285152}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.30030
[2]	validation-error:0.12919
[3]	validation-error:0.10604
[4]	validation-error:0.08930
[5]	validation-error:0.08817
[6]	validation-error:0.08202
[7]	validation-error:0.07980
[8]	validation-error:0.07890
[9]	validation-error:0.07807
[10]	validation-error:0.07760
[11]	validation-error:0.07712
[12]	validation-error:0.07713
[13]	validation-error:0.07673
[14]	validation-error:0.07660
[15]	validation-error:0.07637
[16]	validation-error:0.07634
[17]	validation-error:0.07625
[18]	validation-error:0.07609
[19]	validation-error:0.07624
[20]	validation-error:0.07609
[21]	validation-error:0.07586
[22]	validation-error:0.07572
[23]	validation-error:0.07539
[24]	validation-error:0.07526
[25]	validation-error:0.07518
[26]	validation-error:0.07522
[27]	validation-error:0.07515
[28]	validation-error:0.07508
[29]	validation-error:0.07512
[30]	validation-error:0.07489
[31]	validation-error:0.07476
[32]	validation-error:0.07473
[33]	validation-error:0.07473
[34]	validation-err

[I 2024-04-08 12:52:33,753] Trial 19 finished with value: 0.26998940425503876 and parameters: {'eta': 0.2844599314267772, 'max_depth': 9, 'subsample': 0.7984991853513749, 'colsample_bytree': 0.7781570370731278, 'gamma': 3.1969099237258867, 'min_child_weight': 6.155973426605742, 'lambda': 0.4533755796829171, 'alpha': 6.688789783191034}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653


[I 2024-04-08 12:52:34,301] Trial 20 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.28185
[2]	validation-error:0.12534
[3]	validation-error:0.10444
[4]	validation-error:0.08830
[5]	validation-error:0.08740
[6]	validation-error:0.08270
[7]	validation-error:0.08015
[8]	validation-error:0.07909
[9]	validation-error:0.07818
[10]	validation-error:0.07797
[11]	validation-error:0.07731
[12]	validation-error:0.07712
[13]	validation-error:0.07703
[14]	validation-error:0.07694
[15]	validation-error:0.07675
[16]	validation-error:0.07647
[17]	validation-error:0.07611
[18]	validation-error:0.07603
[19]	validation-error:0.07607
[20]	validation-error:0.07594
[21]	validation-error:0.07593
[22]	validation-error:0.07561
[23]	validation-error:0.07536
[24]	validation-error:0.07525
[25]	validation-error:0.07513
[26]	validation-error:0.07513
[27]	validation-error:0.07506
[28]	validation-error:0.07491
[29]	validation-error:0.07474
[30]	validation-error:0.07460
[31]	validation-error:0.07449
[32]	validation-error:0.07451
[33]	validation-error:0.07447
[34]	validation-err

[I 2024-04-08 12:52:51,952] Trial 21 finished with value: 0.2701045986223927 and parameters: {'eta': 0.2909454379733738, 'max_depth': 9, 'subsample': 0.8174692695391544, 'colsample_bytree': 0.7767635843851678, 'gamma': 2.5772325936471097, 'min_child_weight': 6.721938668174777, 'lambda': 4.317918001020513, 'alpha': 6.391951793319712}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:52:52,525] Trial 22 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653


[I 2024-04-08 12:52:53,098] Trial 23 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27649
[2]	validation-error:0.12493
[3]	validation-error:0.10103
[4]	validation-error:0.08664
[5]	validation-error:0.08493
[6]	validation-error:0.08108
[7]	validation-error:0.07940
[8]	validation-error:0.07817
[9]	validation-error:0.07756
[10]	validation-error:0.07720
[11]	validation-error:0.07672
[12]	validation-error:0.07648
[13]	validation-error:0.07625
[14]	validation-error:0.07602
[15]	validation-error:0.07560
[16]	validation-error:0.07568
[17]	validation-error:0.07561
[18]	validation-error:0.07533
[19]	validation-error:0.07521
[20]	validation-error:0.07511
[21]	validation-error:0.07503
[22]	validation-error:0.07500
[23]	validation-error:0.07491
[24]	validation-error:0.07470
[25]	validation-error:0.07465
[26]	validation-error:0.07499
[27]	validation-error:0.07494
[28]	validation-error:0.07469
[29]	validation-error:0.07477
[30]	validation-error:0.07456
[31]	validation-error:0.07469
[32]	validation-error:0.07459
[33]	validation-error:0.07456
[34]	validation-err

[I 2024-04-08 12:53:10,958] Trial 24 finished with value: 0.2700522436377714 and parameters: {'eta': 0.29217291755715674, 'max_depth': 10, 'subsample': 0.8452621947943822, 'colsample_bytree': 0.804337939634657, 'gamma': 1.946325345488133, 'min_child_weight': 6.689758880914652, 'lambda': 4.488717109533209, 'alpha': 7.958443124669587}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653


[I 2024-04-08 12:53:11,519] Trial 25 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:53:12,110] Trial 26 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.31244


[I 2024-04-08 12:53:12,794] Trial 27 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:53:13,359] Trial 28 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:53:13,910] Trial 29 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653


[I 2024-04-08 12:53:14,494] Trial 30 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.26507
[2]	validation-error:0.12233
[3]	validation-error:0.10485
[4]	validation-error:0.08820
[5]	validation-error:0.08740
[6]	validation-error:0.08230
[7]	validation-error:0.08049
[8]	validation-error:0.07906
[9]	validation-error:0.07835
[10]	validation-error:0.07791
[11]	validation-error:0.07738
[12]	validation-error:0.07719
[13]	validation-error:0.07678
[14]	validation-error:0.07655
[15]	validation-error:0.07656
[16]	validation-error:0.07641
[17]	validation-error:0.07624
[18]	validation-error:0.07612
[19]	validation-error:0.07616
[20]	validation-error:0.07601
[21]	validation-error:0.07585
[22]	validation-error:0.07567
[23]	validation-error:0.07567
[24]	validation-error:0.07538
[25]	validation-error:0.07512
[26]	validation-error:0.07516
[27]	validation-error:0.07516
[28]	validation-error:0.07510
[29]	validation-error:0.07516
[30]	validation-error:0.07499
[31]	validation-error:0.07477
[32]	validation-error:0.07477
[33]	validation-error:0.07474
[34]	validation-err

[I 2024-04-08 12:53:33,356] Trial 31 finished with value: 0.2696225492940148 and parameters: {'eta': 0.2944369373130159, 'max_depth': 9, 'subsample': 0.7876110568738819, 'colsample_bytree': 0.7835913524103175, 'gamma': 2.89328565387789, 'min_child_weight': 6.811072275293385, 'lambda': 4.364982665782495, 'alpha': 6.49101990656948}. Best is trial 17 with value: 0.2694231902709173.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.31457


[I 2024-04-08 12:53:34,007] Trial 32 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:53:34,620] Trial 33 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27580
[2]	validation-error:0.12499
[3]	validation-error:0.10259
[4]	validation-error:0.08574
[5]	validation-error:0.08524
[6]	validation-error:0.08102
[7]	validation-error:0.07953
[8]	validation-error:0.07858
[9]	validation-error:0.07783
[10]	validation-error:0.07728
[11]	validation-error:0.07683
[12]	validation-error:0.07654
[13]	validation-error:0.07624
[14]	validation-error:0.07629
[15]	validation-error:0.07606
[16]	validation-error:0.07599
[17]	validation-error:0.07586
[18]	validation-error:0.07576
[19]	validation-error:0.07559
[20]	validation-error:0.07533
[21]	validation-error:0.07537
[22]	validation-error:0.07531
[23]	validation-error:0.07526
[24]	validation-error:0.07536
[25]	validation-error:0.07530
[26]	validation-error:0.07544
[27]	validation-error:0.07533
[28]	validation-error:0.07523
[29]	validation-error:0.07524
[30]	validation-error:0.07510
[31]	validation-error:0.07516
[32]	validation-error:0.07513
[33]	validation-error:0.07507
[34]	validation-err

[I 2024-04-08 12:53:47,951] Trial 34 finished with value: 0.27126427713086226 and parameters: {'eta': 0.29620234169845405, 'max_depth': 10, 'subsample': 0.8428729839812299, 'colsample_bytree': 0.7121070299936285, 'gamma': 4.658644553453021, 'min_child_weight': 4.992944271072421, 'lambda': 2.632340835646435, 'alpha': 9.08272048690966}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.25234
[2]	validation-error:0.12783
[3]	validation-error:0.10913


[I 2024-04-08 12:53:48,616] Trial 35 pruned. Trial was pruned at iteration 3.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:53:49,192] Trial 36 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:53:49,728] Trial 37 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.65653


[I 2024-04-08 12:53:50,561] Trial 38 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.65653


[I 2024-04-08 12:53:51,226] Trial 39 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:53:51,831] Trial 40 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:53:52,539] Trial 41 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.28000
[2]	validation-error:0.12399
[3]	validation-error:0.10068
[4]	validation-error:0.08904
[5]	validation-error:0.08467
[6]	validation-error:0.08215
[7]	validation-error:0.08015
[8]	validation-error:0.07902
[9]	validation-error:0.07865
[10]	validation-error:0.07798
[11]	validation-error:0.07736
[12]	validation-error:0.07723
[13]	validation-error:0.07688
[14]	validation-error:0.07646
[15]	validation-error:0.07623
[16]	validation-error:0.07610
[17]	validation-error:0.07613
[18]	validation-error:0.07579
[19]	validation-error:0.07558
[20]	validation-error:0.07558
[21]	validation-error:0.07541
[22]	validation-error:0.07539
[23]	validation-error:0.07524
[24]	validation-error:0.07530
[25]	validation-error:0.07510
[26]	validation-error:0.07525
[27]	validation-error:0.07517
[28]	validation-error:0.07503
[29]	validation-error:0.07497
[30]	validation-error:0.07496
[31]	validation-error:0.07480
[32]	validation-error:0.07481
[33]	validation-erro

[I 2024-04-08 12:54:13,284] Trial 42 finished with value: 0.2708051788637667 and parameters: {'eta': 0.29091851861244966, 'max_depth': 9, 'subsample': 0.8582843402803888, 'colsample_bytree': 0.8216421864855062, 'gamma': 4.208151587417322, 'min_child_weight': 6.661884948633597, 'lambda': 4.648613577716963, 'alpha': 5.1911364456844415}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.32349


[I 2024-04-08 12:54:13,837] Trial 43 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.24128
[2]	validation-error:0.12371
[3]	validation-error:0.10030
[4]	validation-error:0.08696
[5]	validation-error:0.08581
[6]	validation-error:0.08115
[7]	validation-error:0.07954
[8]	validation-error:0.07829
[9]	validation-error:0.07808
[10]	validation-error:0.07747
[11]	validation-error:0.07707
[12]	validation-error:0.07680
[13]	validation-error:0.07663
[14]	validation-error:0.07624
[15]	validation-error:0.07614
[16]	validation-error:0.07612
[17]	validation-error:0.07584
[18]	validation-error:0.07563
[19]	validation-error:0.07561
[20]	validation-error:0.07556
[21]	validation-error:0.07548
[22]	validation-error:0.07542
[23]	validation-error:0.07521
[24]	validation-error:0.07504
[25]	validation-error:0.07482
[26]	validation-error:0.07464
[27]	validation-error:0.07451
[28]	validation-error:0.07456
[29]	validation-error:0.07447
[30]	validation-error:0.07431
[31]	validation-error:0.07429
[32]	validation-error:0.07421
[33]	validation-error:0.07421
[34]	validation-err

[I 2024-04-08 12:54:28,190] Trial 44 finished with value: 0.27011506840159394 and parameters: {'eta': 0.29958194698071333, 'max_depth': 10, 'subsample': 0.8069459054365202, 'colsample_bytree': 0.7558220766181277, 'gamma': 2.9596101312463636, 'min_child_weight': 8.868109955609693, 'lambda': 3.4283323836809134, 'alpha': 8.137438683126348}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:54:28,674] Trial 45 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:54:29,206] Trial 46 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:54:29,870] Trial 47 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.65653


[I 2024-04-08 12:54:30,776] Trial 48 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.65653


[I 2024-04-08 12:54:31,448] Trial 49 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.39894


[I 2024-04-08 12:54:32,142] Trial 50 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27969
[2]	validation-error:0.12478
[3]	validation-error:0.10248
[4]	validation-error:0.08804
[5]	validation-error:0.08587
[6]	validation-error:0.08136
[7]	validation-error:0.07939
[8]	validation-error:0.07837
[9]	validation-error:0.07808
[10]	validation-error:0.07756
[11]	validation-error:0.07706
[12]	validation-error:0.07710
[13]	validation-error:0.07649
[14]	validation-error:0.07623
[15]	validation-error:0.07608
[16]	validation-error:0.07607
[17]	validation-error:0.07582
[18]	validation-error:0.07562
[19]	validation-error:0.07584
[20]	validation-error:0.07572
[21]	validation-error:0.07536
[22]	validation-error:0.07521
[23]	validation-error:0.07511
[24]	validation-error:0.07491
[25]	validation-error:0.07504
[26]	validation-error:0.07505
[27]	validation-error:0.07495
[28]	validation-error:0.07487
[29]	validation-error:0.07478
[30]	validation-error:0.07472
[31]	validation-error:0.07467
[32]	validation-error:0.07463
[33]	validation-error:0.07459
[34]	validation-err

[I 2024-04-08 12:54:52,685] Trial 51 finished with value: 0.2697798339337325 and parameters: {'eta': 0.29067112238710524, 'max_depth': 10, 'subsample': 0.7983318243004245, 'colsample_bytree': 0.7368666813218258, 'gamma': 2.862781518526059, 'min_child_weight': 8.626389346453537, 'lambda': 3.9353162479095767, 'alpha': 8.122114102492583}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27708
[2]	validation-error:0.12431
[3]	validation-error:0.09943
[4]	validation-error:0.08857
[5]	validation-error:0.08535
[6]	validation-error:0.08138
[7]	validation-error:0.08000
[8]	validation-error:0.07853
[9]	validation-error:0.07772
[10]	validation-error:0.07730
[11]	validation-error:0.07681
[12]	validation-error:0.07642
[13]	validation-error:0.07619
[14]	validation-error:0.07581
[15]	validation-error:0.07554
[16]	validation-error:0.07544
[17]	validation-error:0.07555
[18]	validation-error:0.07542
[19]	validation-error:0.07546
[20]	validation-error:0.07531
[21]	validation-error:0.07529
[22]	validation-error:0.07522
[23]	validation-error:0.07510
[24]	validation-error:0.07489
[25]	validation-error:0.07482
[26]	validation-error:0.07485
[27]	validation-error:0.07478
[28]	validation-error:0.07458
[29]	validation-error:0.07457
[30]	validation-error:0.07452
[31]	validation-error:0.07440
[32]	validation-error:0.07444
[33]	validation-error:0.07437
[34]	validation-err

[I 2024-04-08 12:55:12,871] Trial 52 finished with value: 0.27073206872293354 and parameters: {'eta': 0.28837303387410074, 'max_depth': 10, 'subsample': 0.8611571243742221, 'colsample_bytree': 0.8652521522040127, 'gamma': 2.8039596480199935, 'min_child_weight': 8.388200159885308, 'lambda': 3.920507726428179, 'alpha': 8.471624087746186}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.28887


[I 2024-04-08 12:55:13,423] Trial 53 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:13,994] Trial 54 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27830


[I 2024-04-08 12:55:14,607] Trial 55 pruned. Trial was pruned at iteration 2.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.43934


[I 2024-04-08 12:55:15,183] Trial 56 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:15,707] Trial 57 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.65653


[I 2024-04-08 12:55:16,279] Trial 58 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:16,884] Trial 59 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27995


[I 2024-04-08 12:55:17,444] Trial 60 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:55:18,010] Trial 61 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.24210
[2]	validation-error:0.12174
[3]	validation-error:0.10205
[4]	validation-error:0.08698
[5]	validation-error:0.08685
[6]	validation-error:0.08128
[7]	validation-error:0.07963
[8]	validation-error:0.07849
[9]	validation-error:0.07779
[10]	validation-error:0.07711
[11]	validation-error:0.07659
[12]	validation-error:0.07659
[13]	validation-error:0.07651
[14]	validation-error:0.07645
[15]	validation-error:0.07607
[16]	validation-error:0.07598
[17]	validation-error:0.07574
[18]	validation-error:0.07571
[19]	validation-error:0.07546
[20]	validation-error:0.07560
[21]	validation-error:0.07548
[22]	validation-error:0.07539
[23]	validation-error:0.07512
[24]	validation-error:0.07490
[25]	validation-error:0.07467
[26]	validation-error:0.07490
[27]	validation-error:0.07460
[28]	validation-error:0.07465
[29]	validation-error:0.07451
[30]	validation-error:0.07437
[31]	validation-error:0.07439
[32]	validation-error:0.07430
[33]	validation-error:0.07432
[34]	validation-err

[I 2024-04-08 12:55:35,245] Trial 62 finished with value: 0.27027206642031404 and parameters: {'eta': 0.29633632005634464, 'max_depth': 10, 'subsample': 0.7785552681635954, 'colsample_bytree': 0.8035286389646261, 'gamma': 2.925666767537389, 'min_child_weight': 8.70503518986941, 'lambda': 3.5867407741522124, 'alpha': 8.733526907111731}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.38993


[I 2024-04-08 12:55:35,805] Trial 63 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.24095
[2]	validation-error:0.12465
[3]	validation-error:0.10083
[4]	validation-error:0.08720
[5]	validation-error:0.08562
[6]	validation-error:0.08125
[7]	validation-error:0.07962
[8]	validation-error:0.07801
[9]	validation-error:0.07769
[10]	validation-error:0.07707
[11]	validation-error:0.07676
[12]	validation-error:0.07676
[13]	validation-error:0.07642
[14]	validation-error:0.07624
[15]	validation-error:0.07596
[16]	validation-error:0.07623
[17]	validation-error:0.07577
[18]	validation-error:0.07573
[19]	validation-error:0.07554
[20]	validation-error:0.07560
[21]	validation-error:0.07551
[22]	validation-error:0.07534
[23]	validation-error:0.07533
[24]	validation-error:0.07505
[25]	validation-error:0.07490
[26]	validation-error:0.07482
[27]	validation-error:0.07473
[28]	validation-error:0.07481
[29]	validation-error:0.07467
[30]	validation-error:0.07451
[31]	validation-error:0.07446
[32]	validation-error:0.07440
[33]	validation-error:0.07430
[34]	validation-err

[I 2024-04-08 12:55:41,274] Trial 64 pruned. Trial was pruned at iteration 115.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:41,815] Trial 65 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.33261


[I 2024-04-08 12:55:42,378] Trial 66 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:42,970] Trial 67 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:43,513] Trial 68 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.33205


[I 2024-04-08 12:55:44,035] Trial 69 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:44,567] Trial 70 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:45,120] Trial 71 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.23654
[2]	validation-error:0.11693
[3]	validation-error:0.09944
[4]	validation-error:0.08705
[5]	validation-error:0.08380
[6]	validation-error:0.08134
[7]	validation-error:0.07959
[8]	validation-error:0.07852
[9]	validation-error:0.07787
[10]	validation-error:0.07722
[11]	validation-error:0.07696
[12]	validation-error:0.07685
[13]	validation-error:0.07651
[14]	validation-error:0.07591
[15]	validation-error:0.07569
[16]	validation-error:0.07543
[17]	validation-error:0.07525
[18]	validation-error:0.07521
[19]	validation-error:0.07507
[20]	validation-error:0.07503
[21]	validation-error:0.07509
[22]	validation-error:0.07492
[23]	validation-error:0.07482
[24]	validation-error:0.07475
[25]	validation-error:0.07470
[26]	validation-error:0.07470
[27]	validation-error:0.07448
[28]	validation-error:0.07442
[29]	validation-error:0.07439
[30]	validation-error:0.07426
[31]	validation-error:0.07432
[32]	validation-error:0.07430
[33]	validation-error:0.07424
[34]	validation-err

[I 2024-04-08 12:55:58,053] Trial 72 pruned. Trial was pruned at iteration 334.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:58,609] Trial 73 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:55:59,143] Trial 74 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27676


[I 2024-04-08 12:55:59,730] Trial 75 pruned. Trial was pruned at iteration 2.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.31283


[I 2024-04-08 12:56:00,250] Trial 76 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:56:00,824] Trial 77 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.27685
[2]	validation-error:0.12357
[3]	validation-error:0.10414


[I 2024-04-08 12:56:01,554] Trial 78 pruned. Trial was pruned at iteration 4.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.33763


[I 2024-04-08 12:56:02,225] Trial 79 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:56:03,020] Trial 80 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.28299


[I 2024-04-08 12:56:03,856] Trial 81 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.23908
[2]	validation-error:0.12130
[3]	validation-error:0.09795
[4]	validation-error:0.08759
[5]	validation-error:0.08451
[6]	validation-error:0.08118
[7]	validation-error:0.07965
[8]	validation-error:0.07841
[9]	validation-error:0.07796
[10]	validation-error:0.07736
[11]	validation-error:0.07691
[12]	validation-error:0.07674
[13]	validation-error:0.07636
[14]	validation-error:0.07609
[15]	validation-error:0.07590
[16]	validation-error:0.07565
[17]	validation-error:0.07553
[18]	validation-error:0.07521
[19]	validation-error:0.07532
[20]	validation-error:0.07516
[21]	validation-error:0.07487
[22]	validation-error:0.07481
[23]	validation-error:0.07472
[24]	validation-error:0.07471
[25]	validation-error:0.07465
[26]	validation-error:0.07443
[27]	validation-error:0.07445
[28]	validation-error:0.07436
[29]	validation-error:0.07429
[30]	validation-error:0.07439
[31]	validation-error:0.07425
[32]	validation-error:0.07426
[33]	validation-erro

[I 2024-04-08 12:56:19,820] Trial 82 finished with value: 0.2699475032065587 and parameters: {'eta': 0.2986070117169972, 'max_depth': 10, 'subsample': 0.8505679789618315, 'colsample_bytree': 0.8836112947236965, 'gamma': 3.1247277697799087, 'min_child_weight': 8.679992535506184, 'lambda': 4.4771171232021985, 'alpha': 8.875313112535297}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.23960
[2]	validation-error:0.11805
[3]	validation-error:0.09863
[4]	validation-error:0.08692
[5]	validation-error:0.08388
[6]	validation-error:0.08102
[7]	validation-error:0.07948
[8]	validation-error:0.07794
[9]	validation-error:0.07756
[10]	validation-error:0.07701
[11]	validation-error:0.07657
[12]	validation-error:0.07637
[13]	validation-error:0.07584
[14]	validation-error:0.07556
[15]	validation-error:0.07529
[16]	validation-error:0.07524
[17]	validation-error:0.07520
[18]	validation-error:0.07517
[19]	validation-error:0.07513
[20]	validation-error:0.07501
[21]	validation-error:0.07499
[22]	validation-error:0.07487
[23]	validation-error:0.07493
[24]	validation-error:0.07481
[25]	validation-error:0.07466
[26]	validation-error:0.07457
[27]	validation-error:0.07449
[28]	validation-error:0.07436
[29]	validation-error:0.07423
[30]	validation-error:0.07416
[31]	validation-error:0.07408
[32]	validation-error:0.07410
[33]	validation-error:0.07409
[34]	validation-err

[I 2024-04-08 12:56:36,099] Trial 83 finished with value: 0.2701360067426511 and parameters: {'eta': 0.2975860850559708, 'max_depth': 10, 'subsample': 0.952400625154703, 'colsample_bytree': 0.8369242957978932, 'gamma': 3.1516611811384383, 'min_child_weight': 8.9390814586167, 'lambda': 4.4207436464950405, 'alpha': 8.866696680647586}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:56:36,721] Trial 84 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.19047
[2]	validation-error:0.11082
[3]	validation-error:0.09500
[4]	validation-error:0.08580
[5]	validation-error:0.08155
[6]	validation-error:0.08000
[7]	validation-error:0.07897
[8]	validation-error:0.07789
[9]	validation-error:0.07785
[10]	validation-error:0.07742
[11]	validation-error:0.07679
[12]	validation-error:0.07709
[13]	validation-error:0.07664
[14]	validation-error:0.07612
[15]	validation-error:0.07589
[16]	validation-error:0.07557
[17]	validation-error:0.07547
[18]	validation-error:0.07525
[19]	validation-error:0.07507
[20]	validation-error:0.07511
[21]	validation-error:0.07497
[22]	validation-error:0.07492
[23]	validation-error:0.07470
[24]	validation-error:0.07466
[25]	validation-error:0.07444
[26]	validation-error:0.07446
[27]	validation-error:0.07443
[28]	validation-error:0.07438
[29]	validation-error:0.07442
[30]	validation-error:0.07433
[31]	validation-error:0.07431
[32]	validation-error:0.07428
[33]	validation-erro

[I 2024-04-08 12:56:42,000] Trial 85 pruned. Trial was pruned at iteration 85.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:56:42,667] Trial 86 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.23687


[I 2024-04-08 12:56:43,293] Trial 87 pruned. Trial was pruned at iteration 2.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.33824


[I 2024-04-08 12:56:43,956] Trial 88 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:56:44,663] Trial 89 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:56:45,322] Trial 90 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.26190
[2]	validation-error:0.12255
[3]	validation-error:0.10326
[4]	validation-error:0.08810
[5]	validation-error:0.08678
[6]	validation-error:0.08158
[7]	validation-error:0.07986
[8]	validation-error:0.07853
[9]	validation-error:0.07797
[10]	validation-error:0.07730
[11]	validation-error:0.07685
[12]	validation-error:0.07655
[13]	validation-error:0.07609
[14]	validation-error:0.07581
[15]	validation-error:0.07559
[16]	validation-error:0.07571
[17]	validation-error:0.07561
[18]	validation-error:0.07576
[19]	validation-error:0.07572
[20]	validation-error:0.07535
[21]	validation-error:0.07525
[22]	validation-error:0.07515
[23]	validation-error:0.07493
[24]	validation-error:0.07477
[25]	validation-error:0.07466
[26]	validation-error:0.07462
[27]	validation-error:0.07460
[28]	validation-error:0.07462
[29]	validation-error:0.07464
[30]	validation-error:0.07464
[31]	validation-error:0.07469
[32]	validation-error:0.07470
[33]	validation-erro

[I 2024-04-08 12:57:05,175] Trial 91 finished with value: 0.27002082577440667 and parameters: {'eta': 0.2936730060051364, 'max_depth': 10, 'subsample': 0.7942902616552955, 'colsample_bytree': 0.8001595341687123, 'gamma': 2.9904309453298037, 'min_child_weight': 8.833622040786404, 'lambda': 3.6884423202450547, 'alpha': 8.796335910221604}. Best is trial 17 with value: 0.2694231902709173.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:57:05,761] Trial 92 pruned. Trial was pruned at iteration 1.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.



[1]	validation-error:0.27630
[2]	validation-error:0.12461
[3]	validation-error:0.10090
[4]	validation-error:0.08605
[5]	validation-error:0.08506
[6]	validation-error:0.08121
[7]	validation-error:0.07924
[8]	validation-error:0.07797
[9]	validation-error:0.07760
[10]	validation-error:0.07731
[11]	validation-error:0.07671
[12]	validation-error:0.07672
[13]	validation-error:0.07646
[14]	validation-error:0.07645
[15]	validation-error:0.07597
[16]	validation-error:0.07591
[17]	validation-error:0.07583
[18]	validation-error:0.07565
[19]	validation-error:0.07559
[20]	validation-error:0.07563
[21]	validation-error:0.07548
[22]	validation-error:0.07530
[23]	validation-error:0.07532
[24]	validation-error:0.07516
[25]	validation-error:0.07482
[26]	validation-error:0.07501
[27]	validation-error:0.07493
[28]	validation-error:0.07490
[29]	validation-error:0.07496
[30]	validation-error:0.07482
[31]	validation-error:0.07472
[32]	validation-error:0.07464
[33]	validation-error:0.07462
[34]	validation-err

[I 2024-04-08 12:57:12,989] Trial 93 pruned. Trial was pruned at iteration 143.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:57:13,654] Trial 94 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:57:14,331] Trial 95 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653


[I 2024-04-08 12:57:15,021] Trial 96 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.24661
[2]	validation-error:0.11939
[3]	validation-error:0.10024


[I 2024-04-08 12:57:15,821] Trial 97 pruned. Trial was pruned at iteration 4.


[0]	validation-error:0.65653


Parameters: { "n_estimators" } are not used.

[I 2024-04-08 12:57:16,467] Trial 98 pruned. Trial was pruned at iteration 1.
Parameters: { "n_estimators" } are not used.



[0]	validation-error:0.65653
[1]	validation-error:0.27658
[2]	validation-error:0.12718


[I 2024-04-08 12:57:17,123] Trial 99 pruned. Trial was pruned at iteration 2.


Best Hyperparameters (33 Features):  {'eta': 0.2923757639924323, 'max_depth': 10, 'subsample': 0.8070226122927233, 'colsample_bytree': 0.5696186734815429, 'gamma': 3.4549132350669423, 'min_child_weight': 1.4814618119792746, 'lambda': 4.3900427543904526, 'alpha': 4.466080762836852}
Best Error (33 Features):  0.2694231902709173


#### Model Training

In [18]:
from sklearn.model_selection import KFold
from xgboost import DMatrix, train

# Initialize CV
cv = KFold(n_splits=10, shuffle=True, random_state=1)

best_params_lexical['objective'] = 'binary:hinge'
best_params_lexical['eval_metric'] = 'error'

# Convert the data into DMatrix format
lexical_train = DMatrix(X_train_lexical, label=y_train)
lexical_valid = DMatrix(X_test_lexical, label=y_test)

# Train the Model
xgb_classifier_lexical = train(best_params_lexical, lexical_train, num_boost_round=3000)
y_pred_lexical = xgb_classifier_lexical.predict(lexical_valid)

print("Model training done.")

Model training done.


#### Evaluation

In [20]:
# Classification Report
print(classification_report(y_test, y_pred_lexical))

              precision    recall  f1-score   support

           0       0.93      0.96      0.95    144867
           1       0.92      0.87      0.89     76138

    accuracy                           0.93    221005
   macro avg       0.93      0.91      0.92    221005
weighted avg       0.93      0.93      0.93    221005



In [22]:
'''# Confusion Matrix for 12 Features
cm_up = confusion_matrix(y_test, y_pred_lexical, labels=xgb_classifier_lexical.classes)
disp = ConfusionMatrixDisplay(confusion_matrix = cm_up, display_labels = xgb_classifier_lexical.classes)
disp.plot()
plt.show()'''

AttributeError: 'Booster' object has no attribute 'classes'

In [None]:
'''# Cross Validation Score
scores = cross_val_score(XGBClassifier(**params_gbm),
                        X_train, y_train, scoring='accuracy', cv=cv).mean()

print(scores)'''

"# Cross Validation Score\nscores = cross_val_score(XGBClassifier(**params_gbm),\n                        X_train, y_train, scoring='accuracy', cv=cv).mean()\n\nprint(scores)"

In [23]:
# Dumping the model
joblib.dump(xgb_classifier_lexical, 'xgb_hybrid_lexical.sav')

['xgb_hybrid_lexical.sav']

In [25]:
import lexical_generator_hybrid
import time

def xgb_predict_maliciousness(url):

    numerical_values = lexical_generator_hybrid.lexical_generator(url)
    # print(numerical_values)
    numerical_values = DMatrix(numerical_values)

    match xgb_classifier_lexical.predict(numerical_values):
        case 0:
            return "Benign"
        case 1:
            return "Malware"
        case 2:
            return "Phishing"
        case 3:
            return "Defacement"

url = "www.facebook.com/"
print("Current URL: "+url)

print("------------- Filter-Based (Lexical) -------------")
for i in range(15):
    start = time.perf_counter()
    prediction = xgb_predict_maliciousness(url)
    end = time.perf_counter()
    print("Trial "+str(i))
    print(prediction)
    print(end-start)

Current URL: www.facebook.com/
------------- Filter-Based (Lexical) -------------
Trial 0
Malware
0.01717310003004968
Trial 1
Malware
0.02909849997377023
Trial 2
Malware
0.022313499997835606
Trial 3
Malware
0.021470999985467643
Trial 4
Malware
0.020548800006508827
Trial 5
Malware
0.019322500040289015
Trial 6
Malware
0.021581600012723356
Trial 7
Malware
0.019691599998623133
Trial 8
Malware
0.01976180000929162
Trial 9
Malware
0.020910499966703355
Trial 10
Malware
0.021918100013863295
Trial 11
Malware
0.023274099978152663
Trial 12
Malware
0.021161200013011694
Trial 13
Malware
0.022343699994962662
Trial 14
Malware
0.023142099962569773
