In [4]:
import optuna
import pandas as pd
import numpy as np
import gc
import time
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, MinMaxScaler, RobustScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import roc_auc_score
import xgboost as xgb

# Paths to datasets
train_path = r"C:\Users\paulo\OneDrive\Documents\kaggle_competition_2_datasets\train.csv"

def import_data(path, index_col=None):
    """Import data from a CSV file and optimize memory usage."""
    start_time = time.time()
    df = pd.read_csv(path, index_col=index_col)
    df = reduce_mem_usage(df)
    print(f"Data imported and memory optimized in {time.time() - start_time:.2f} seconds")
    print(f"Data shape: {df.shape}, Memory usage: {df.memory_usage().sum() / 1024**2:.2f} MB")
    return df

def reduce_mem_usage(df):
    """Iterate through all the columns of a dataframe and modify the data type to reduce memory usage."""
    start_mem = df.memory_usage().sum() / 1024**2
    print(f"Initial memory usage: {start_mem:.2f} MB")
    for col in df.columns:
        col_type = df[col].dtype
        if isinstance(col_type, pd.IntervalDtype):
            continue
        if pd.api.types.is_integer_dtype(col_type):
            c_min = df[col].min()
            c_max = df[col].max()
            if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                df[col] = df[col].astype(np.int8)
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                df[col] = df[col].astype(np.int16)
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                df[col] = df[col].astype(np.int32)
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                df[col] = df[col].astype(np.int64)  
        elif pd.api.types.is_float_dtype(col_type):
            c_min = df[col].min()
            c_max = df[col].max()
            if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                df[col] = df[col].astype(np.float32)
            else:
                df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    print(f"Memory usage after optimization: {end_mem:.2f} MB")
    return df

def feature_engineering(df):
    """Feature engineering on the dataset."""
    start_time = time.time()
    # Binning age and converting to categorical labels instead of intervals
    df.loc[:, 'Age_Type'] = pd.cut(df['Age'], bins=7, labels=False)
    df.loc[:, 'Vehicle_Age'] = pd.Categorical(df['Vehicle_Age']).codes
    df.loc[:, 'Vehicle_Damage'] = pd.Categorical(df['Vehicle_Damage']).codes
    df.loc[:, 'Previously_Insured'] = pd.Categorical(df['Previously_Insured']).codes

    df.loc[:, 'Age_x_Vehicle_Age'] = df['Age_Type'] * df['Vehicle_Age']
    df.loc[:, 'Age_x_Vehicle_Damage'] = df['Age_Type'] * df['Vehicle_Damage']
    df.loc[:, 'Age_x_Previously_Insured'] = df['Age_Type'] * df['Previously_Insured']

    fac_pre = ['Policy_Sales_Channel', 'Vehicle_Damage', 'Annual_Premium', 'Vintage', 'Age_Type']
    col_pre = []
    for i in fac_pre:
        df.loc[:, 'Previously_Insured_x_' + i] = pd.factorize(df['Previously_Insured'].astype(str) + df[i].astype(str))[0]
        col_pre.append('Previously_Insured_x_' + i)

    fac_pro = fac_pre[1:]
    col_pro = []
    for i in fac_pro:
        df.loc[:, 'Policy_Sales_Channel_x_' + i] = pd.factorize(df['Policy_Sales_Channel'].astype(str) + df[i].astype(str))[0]
        col_pro.append('Policy_Sales_Channel_x_' + i)
    print(f"Feature engineering completed in {time.time() - start_time:.2f} seconds")
    print(f"Data shape after feature engineering: {df.shape}, Memory usage: {df.memory_usage().sum() / 1024**2:.2f} MB")
    return df, col_pre, col_pro

# Load and optimize data
train_df = import_data(train_path, index_col='id')

# Create a stratified sample of 2% from the training data
start_time = time.time()
sample_fraction = 0.02
train_sample_df, _ = train_test_split(train_df, stratify=train_df['Response'], test_size=(1 - sample_fraction), random_state=42)
print(f"Stratified sample created in {time.time() - start_time:.2f} seconds")
print(f"Sample data shape: {train_sample_df.shape}, Memory usage: {train_sample_df.memory_usage().sum() / 1024**2:.2f} MB")

# Apply feature engineering to the sampled dataset
train_sample_df, col_pre, col_pro = feature_engineering(train_sample_df)

# Split the training data into training and validation sets
start_time = time.time()
X = train_sample_df.drop('Response', axis=1)
y = train_sample_df['Response']
X_train, X_valid, y_train, y_valid = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
print(f"Training and validation sets created in {time.time() - start_time:.2f} seconds")
print(f"Training set shape: {X_train.shape}, Memory usage: {X_train.memory_usage().sum() / 1024**2:.2f} MB")
print(f"Validation set shape: {X_valid.shape}, Memory usage: {X_valid.memory_usage().sum() / 1024**2:.2f} MB")

# Define the ColumnTransformer
start_time = time.time()
coltrans = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(sparse_output=False, dtype=np.float32), ['Gender', 'Vehicle_Damage']),
        ('minmax', MinMaxScaler(), ['Age', 'Region_Code', 'Previously_Insured', 'Policy_Sales_Channel', 'Vintage']),
        ('ordinal', OrdinalEncoder(categories=[[0, 1, 2]], dtype=np.float32), ['Vehicle_Age']),
        ('robust', RobustScaler(), ['Annual_Premium']),
        ('standard', StandardScaler(), ['Age_Type', 'Age_x_Vehicle_Age', 'Age_x_Vehicle_Damage', 'Age_x_Previously_Insured']),
        ('standard_2', StandardScaler(), col_pre + col_pro),
    ],
    remainder='passthrough'  # Keeps columns not specified in transformers
)

# Fit the transformer on the training data and transform both training and validation sets
X_train = coltrans.fit_transform(X_train)
X_valid = coltrans.transform(X_valid)
print(f"Data transformed in {time.time() - start_time:.2f} seconds")
print(f"Transformed training set shape: {X_train.shape}, Memory usage: {X_train.nbytes / 1024**2:.2f} MB")
print(f"Transformed validation set shape: {X_valid.shape}, Memory usage: {X_valid.nbytes / 1024**2:.2f} MB")

# Get feature names after transformation
feature_names = coltrans.get_feature_names_out()
gc.collect()

# Calculate the ratio for scale_pos_weight
ratio = len(train_sample_df[train_sample_df['Response'] == 0]) / len(train_sample_df[train_sample_df['Response'] == 1])
print(f"Scale pos weight ratio: {ratio}")

# Define the Optuna objective function
def objective(trial):
    params = {
        'random_state': 512,
        'objective': "binary:logistic",
        'eval_metric': 'auc',
        'max_depth': trial.suggest_int('max_depth', 7, 9),
        'min_child_weight': trial.suggest_int('min_child_weight', 12, 18),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 0.7),
        'gamma': trial.suggest_float('gamma', 0.05, 0.2),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 0.15),
        'subsample': trial.suggest_float('subsample', 0.75, 0.85),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 0.3),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.7, 0.9),
        'max_bin': trial.suggest_int('max_bin', 160000, 280000),
        'scale_pos_weight': ratio,
        # 'tree_method': 'hist',
        # 'device': 'cuda',
    }
    
    num_boost_round = trial.suggest_int('num_boost_round', 2500, 3500)
    
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dvalid = xgb.DMatrix(X_valid, label=y_valid)
    
    model = xgb.train(
        params,
        dtrain,
        num_boost_round=num_boost_round,
        evals=[(dtrain, 'train'), (dvalid, 'valid')],
        verbose_eval=100,
        early_stopping_rounds=10,
    )
    
    valid_preds = model.predict(dvalid, iteration_range=(0, model.best_iteration))
    auc_score = roc_auc_score(y_valid, valid_preds)
    
    return auc_score

# Create and run the Optuna study
print("Starting Optuna study")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=25)

# Extract the best hyperparameters
best_params = study.best_params
best_params['random_state'] = 512
best_params['objective'] = "binary:logistic"
best_params['eval_metric'] = 'auc'
best_params['scale_pos_weight'] = ratio
best_params['tree_method'] = 'hist'
best_params['device'] = 'cuda'

# Output the best hyperparameters
print("Best hyperparameters:", best_params)


Initial memory usage: 1053.30 MB
Memory usage after optimization: 548.59 MB
Data imported and memory optimized in 7.38 seconds
Data shape: (11504798, 11), Memory usage: 548.59 MB
Stratified sample created in 7.51 seconds
Sample data shape: (230095, 11), Memory usage: 10.97 MB
Feature engineering completed in 1.17 seconds
Data shape after feature engineering: (230095, 24), Memory usage: 33.79 MB
Training and validation sets created in 0.10 seconds
Training set shape: (184076, 23), Memory usage: 26.86 MB
Validation set shape: (46019, 23), Memory usage: 6.71 MB


[I 2024-07-27 11:25:14,414] A new study created in memory with name: no-name-a8f730af-659e-42d4-9058-c0ff24ad9352


Data transformed in 0.26 seconds
Transformed training set shape: (184076, 25), Memory usage: 35.11 MB
Transformed validation set shape: (46019, 25), Memory usage: 8.78 MB
Scale pos weight ratio: 7.130278082046571
Starting Optuna study
[0]	train-auc:0.84792	valid-auc:0.84608
[100]	train-auc:0.89431	valid-auc:0.86989
[143]	train-auc:0.90244	valid-auc:0.87055


[I 2024-07-27 11:25:28,056] Trial 0 finished with value: 0.8705578606914601 and parameters: {'max_depth': 7, 'min_child_weight': 13, 'colsample_bytree': 0.6321667978993475, 'gamma': 0.07639815657030134, 'learning_rate': 0.12242598863448688, 'subsample': 0.8096162909662507, 'reg_alpha': 0.2780718793571905, 'reg_lambda': 0.8276102513823306, 'max_bin': 184854, 'num_boost_round': 3200}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85099	valid-auc:0.84932
[100]	train-auc:0.89138	valid-auc:0.86813
[148]	train-auc:0.90106	valid-auc:0.86911


[I 2024-07-27 11:25:42,214] Trial 1 finished with value: 0.8692353748779615 and parameters: {'max_depth': 7, 'min_child_weight': 14, 'colsample_bytree': 0.6625090382186193, 'gamma': 0.14912234017179782, 'learning_rate': 0.11017495761802458, 'subsample': 0.8149959122315166, 'reg_alpha': 0.2743359832809267, 'reg_lambda': 0.777550429471253, 'max_bin': 214912, 'num_boost_round': 2886}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85366	valid-auc:0.85039
[82]	train-auc:0.90135	valid-auc:0.86914


[I 2024-07-27 11:25:52,047] Trial 2 finished with value: 0.8691928392325521 and parameters: {'max_depth': 8, 'min_child_weight': 16, 'colsample_bytree': 0.661226767787537, 'gamma': 0.18875671926070248, 'learning_rate': 0.14506413372982133, 'subsample': 0.8173037859227296, 'reg_alpha': 0.16394808396766092, 'reg_lambda': 0.8447138750710668, 'max_bin': 220559, 'num_boost_round': 2689}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85110	valid-auc:0.84950
[83]	train-auc:0.88756	valid-auc:0.86791


[I 2024-07-27 11:26:00,522] Trial 3 finished with value: 0.8680707194449254 and parameters: {'max_depth': 7, 'min_child_weight': 16, 'colsample_bytree': 0.67312998560362, 'gamma': 0.16077502445620653, 'learning_rate': 0.12018002315176496, 'subsample': 0.8409837404093006, 'reg_alpha': 0.18919802172076325, 'reg_lambda': 0.7698573894460294, 'max_bin': 223591, 'num_boost_round': 3176}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85239	valid-auc:0.84791
[100]	train-auc:0.90425	valid-auc:0.86867
[101]	train-auc:0.90455	valid-auc:0.86877


[I 2024-07-27 11:26:14,409] Trial 4 finished with value: 0.8688483865259824 and parameters: {'max_depth': 9, 'min_child_weight': 15, 'colsample_bytree': 0.5564979244141043, 'gamma': 0.07127696596828474, 'learning_rate': 0.1010219342857838, 'subsample': 0.7585136536916725, 'reg_alpha': 0.0591635152691842, 'reg_lambda': 0.7023379696823856, 'max_bin': 180158, 'num_boost_round': 2587}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85400	valid-auc:0.85107
[100]	train-auc:0.90005	valid-auc:0.86962
[135]	train-auc:0.90906	valid-auc:0.87016


[I 2024-07-27 11:26:30,417] Trial 5 finished with value: 0.8702271057191038 and parameters: {'max_depth': 8, 'min_child_weight': 14, 'colsample_bytree': 0.6642627238423212, 'gamma': 0.055712900370231876, 'learning_rate': 0.10686422731691, 'subsample': 0.841366836468716, 'reg_alpha': 0.027470629212627246, 'reg_lambda': 0.7377511754519348, 'max_bin': 252713, 'num_boost_round': 2843}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85289	valid-auc:0.84756
[100]	train-auc:0.91357	valid-auc:0.86881
[108]	train-auc:0.91579	valid-auc:0.86890


[I 2024-07-27 11:26:45,426] Trial 6 finished with value: 0.8689288590728598 and parameters: {'max_depth': 9, 'min_child_weight': 13, 'colsample_bytree': 0.5113835134500873, 'gamma': 0.07916206972390805, 'learning_rate': 0.13110942460674438, 'subsample': 0.802292071514886, 'reg_alpha': 0.1343774802607736, 'reg_lambda': 0.8765317233299167, 'max_bin': 193055, 'num_boost_round': 3110}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85692	valid-auc:0.85080
[100]	train-auc:0.90659	valid-auc:0.86957
[145]	train-auc:0.91782	valid-auc:0.86988


[I 2024-07-27 11:27:05,819] Trial 7 finished with value: 0.869938282273486 and parameters: {'max_depth': 9, 'min_child_weight': 18, 'colsample_bytree': 0.6484594397295412, 'gamma': 0.08837799019423018, 'learning_rate': 0.10416535886099319, 'subsample': 0.8411733034463486, 'reg_alpha': 0.07117541464862424, 'reg_lambda': 0.7335431594409376, 'max_bin': 261679, 'num_boost_round': 3008}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85137	valid-auc:0.84864
[100]	train-auc:0.89767	valid-auc:0.86962
[142]	train-auc:0.90809	valid-auc:0.87040


[I 2024-07-27 11:27:22,745] Trial 8 finished with value: 0.8704584065608337 and parameters: {'max_depth': 8, 'min_child_weight': 13, 'colsample_bytree': 0.5856094050114243, 'gamma': 0.16845255721079913, 'learning_rate': 0.10017837798368663, 'subsample': 0.8432911537926918, 'reg_alpha': 0.2546014339140165, 'reg_lambda': 0.7192773747146983, 'max_bin': 229371, 'num_boost_round': 2987}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85000	valid-auc:0.84636
[82]	train-auc:0.89052	valid-auc:0.86959


[I 2024-07-27 11:27:32,547] Trial 9 finished with value: 0.8693874573757069 and parameters: {'max_depth': 8, 'min_child_weight': 18, 'colsample_bytree': 0.5080663646716673, 'gamma': 0.18604626880674824, 'learning_rate': 0.10112790838740564, 'subsample': 0.8315239723829354, 'reg_alpha': 0.12867376325871116, 'reg_lambda': 0.7221012499528079, 'max_bin': 278702, 'num_boost_round': 3220}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.84796	valid-auc:0.84655
[88]	train-auc:0.89211	valid-auc:0.86866


[I 2024-07-27 11:27:41,461] Trial 10 finished with value: 0.8687998819254436 and parameters: {'max_depth': 7, 'min_child_weight': 12, 'colsample_bytree': 0.6129122693983889, 'gamma': 0.10876184618404468, 'learning_rate': 0.1289589459892116, 'subsample': 0.7752105217442893, 'reg_alpha': 0.22434109583555342, 'reg_lambda': 0.8283990130132255, 'max_bin': 162692, 'num_boost_round': 3471}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.84793	valid-auc:0.84653
[100]	train-auc:0.89281	valid-auc:0.86943
[147]	train-auc:0.90302	valid-auc:0.86990


[I 2024-07-27 11:27:55,801] Trial 11 finished with value: 0.8700060420622442 and parameters: {'max_depth': 7, 'min_child_weight': 12, 'colsample_bytree': 0.5970670031368251, 'gamma': 0.12546855595137213, 'learning_rate': 0.1182380005049146, 'subsample': 0.7887914083074112, 'reg_alpha': 0.2967921646482643, 'reg_lambda': 0.7998798830515031, 'max_bin': 200062, 'num_boost_round': 3379}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85138	valid-auc:0.84851
[78]	train-auc:0.89941	valid-auc:0.86881


[I 2024-07-27 11:28:05,467] Trial 12 finished with value: 0.869166643246124 and parameters: {'max_depth': 8, 'min_child_weight': 13, 'colsample_bytree': 0.6028508453989734, 'gamma': 0.13487210514521342, 'learning_rate': 0.13918200299335287, 'subsample': 0.8169992800523892, 'reg_alpha': 0.24194804988594085, 'reg_lambda': 0.8871655483597912, 'max_bin': 227903, 'num_boost_round': 3316}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.84801	valid-auc:0.84639
[87]	train-auc:0.88843	valid-auc:0.86845


[I 2024-07-27 11:28:14,259] Trial 13 finished with value: 0.8685121441423648 and parameters: {'max_depth': 7, 'min_child_weight': 13, 'colsample_bytree': 0.5609130307577386, 'gamma': 0.09889764815668632, 'learning_rate': 0.11467488812306945, 'subsample': 0.8002482108715274, 'reg_alpha': 0.2447844416177144, 'reg_lambda': 0.8365838119004029, 'max_bin': 242299, 'num_boost_round': 2978}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85136	valid-auc:0.84831
[84]	train-auc:0.89782	valid-auc:0.86885


[I 2024-07-27 11:28:24,420] Trial 14 finished with value: 0.86885676976696 and parameters: {'max_depth': 8, 'min_child_weight': 14, 'colsample_bytree': 0.6235972010194363, 'gamma': 0.17098778304286638, 'learning_rate': 0.12425998443174774, 'subsample': 0.8267360682730625, 'reg_alpha': 0.21407571607657497, 'reg_lambda': 0.8051491844296226, 'max_bin': 164595, 'num_boost_round': 2777}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.84809	valid-auc:0.84630
[88]	train-auc:0.89271	valid-auc:0.86917


[I 2024-07-27 11:28:33,035] Trial 15 finished with value: 0.8692285194443475 and parameters: {'max_depth': 7, 'min_child_weight': 12, 'colsample_bytree': 0.5641534970246658, 'gamma': 0.1988901088557531, 'learning_rate': 0.13677507510348838, 'subsample': 0.8498157689158581, 'reg_alpha': 0.2938917317627166, 'reg_lambda': 0.7624308149954543, 'max_bin': 202199, 'num_boost_round': 3063}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85468	valid-auc:0.85037
[100]	train-auc:0.91018	valid-auc:0.86877
[102]	train-auc:0.91044	valid-auc:0.86876


[I 2024-07-27 11:28:48,268] Trial 16 finished with value: 0.868932332755218 and parameters: {'max_depth': 9, 'min_child_weight': 15, 'colsample_bytree': 0.6972500301056909, 'gamma': 0.05390813755210673, 'learning_rate': 0.1108221746397575, 'subsample': 0.7849411617496432, 'reg_alpha': 0.2635574499748503, 'reg_lambda': 0.8569243787483096, 'max_bin': 183304, 'num_boost_round': 3266}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85082	valid-auc:0.84792
[61]	train-auc:0.89152	valid-auc:0.86737


[I 2024-07-27 11:28:56,063] Trial 17 finished with value: 0.8673637999134447 and parameters: {'max_depth': 8, 'min_child_weight': 13, 'colsample_bytree': 0.5816944158161363, 'gamma': 0.1125369659011193, 'learning_rate': 0.12471615594033182, 'subsample': 0.750638399915373, 'reg_alpha': 0.1871411227417492, 'reg_lambda': 0.8104904151093101, 'max_bin': 239284, 'num_boost_round': 2936}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.84792	valid-auc:0.84637
[83]	train-auc:0.89293	valid-auc:0.86944


[I 2024-07-27 11:29:04,544] Trial 18 finished with value: 0.8694045193504901 and parameters: {'max_depth': 7, 'min_child_weight': 16, 'colsample_bytree': 0.6332580733070305, 'gamma': 0.14290819422261747, 'learning_rate': 0.14483279022867734, 'subsample': 0.8090034076006115, 'reg_alpha': 0.20227913269807762, 'reg_lambda': 0.7866525758082672, 'max_bin': 178052, 'num_boost_round': 3105}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.84986	valid-auc:0.84597
[100]	train-auc:0.89997	valid-auc:0.86896
[129]	train-auc:0.90747	valid-auc:0.86940


[I 2024-07-27 11:29:18,951] Trial 19 finished with value: 0.8695015263627319 and parameters: {'max_depth': 8, 'min_child_weight': 14, 'colsample_bytree': 0.5344415584469988, 'gamma': 0.16671360474643918, 'learning_rate': 0.11489621648865908, 'subsample': 0.7892593780306147, 'reg_alpha': 0.26471552831247364, 'reg_lambda': 0.7548717321908198, 'max_bin': 210777, 'num_boost_round': 3411}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85391	valid-auc:0.84950
[55]	train-auc:0.89931	valid-auc:0.86744


[I 2024-07-27 11:29:27,923] Trial 20 finished with value: 0.8675346210341689 and parameters: {'max_depth': 9, 'min_child_weight': 17, 'colsample_bytree': 0.5859850260475636, 'gamma': 0.1213292719873037, 'learning_rate': 0.14916867902548137, 'subsample': 0.829604282032402, 'reg_alpha': 0.10604968581201293, 'reg_lambda': 0.7053583808602726, 'max_bin': 235218, 'num_boost_round': 2783}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85432	valid-auc:0.85148
[84]	train-auc:0.89758	valid-auc:0.86880


[I 2024-07-27 11:29:39,121] Trial 21 finished with value: 0.8685880420224947 and parameters: {'max_depth': 8, 'min_child_weight': 14, 'colsample_bytree': 0.6887524005398123, 'gamma': 0.05834473876491367, 'learning_rate': 0.10746789833878534, 'subsample': 0.8457764734760957, 'reg_alpha': 0.04675879503997568, 'reg_lambda': 0.7402170072571728, 'max_bin': 255547, 'num_boost_round': 2839}. Best is trial 0 with value: 0.8705578606914601.


[0]	train-auc:0.85139	valid-auc:0.84815
[100]	train-auc:0.89852	valid-auc:0.86989
[149]	train-auc:0.91186	valid-auc:0.87028


[I 2024-07-27 11:29:58,153] Trial 22 finished with value: 0.8706047915190843 and parameters: {'max_depth': 8, 'min_child_weight': 13, 'colsample_bytree': 0.6392097679152726, 'gamma': 0.06286971658103871, 'learning_rate': 0.10596700150775383, 'subsample': 0.835578953888964, 'reg_alpha': 0.012132926499371949, 'reg_lambda': 0.7228323760773259, 'max_bin': 251957, 'num_boost_round': 2608}. Best is trial 22 with value: 0.8706047915190843.


[0]	train-auc:0.85130	valid-auc:0.84802
[100]	train-auc:0.89843	valid-auc:0.86917
[149]	train-auc:0.91021	valid-auc:0.87044


[I 2024-07-27 11:30:17,119] Trial 23 finished with value: 0.8704662031938266 and parameters: {'max_depth': 8, 'min_child_weight': 13, 'colsample_bytree': 0.6393593105569539, 'gamma': 0.06952885919307983, 'learning_rate': 0.10062024240087251, 'subsample': 0.8321686295855483, 'reg_alpha': 0.08947279884286907, 'reg_lambda': 0.7176284196421847, 'max_bin': 248766, 'num_boost_round': 2502}. Best is trial 22 with value: 0.8706047915190843.


[0]	train-auc:0.84799	valid-auc:0.84587
[100]	train-auc:0.89235	valid-auc:0.86951
[113]	train-auc:0.89538	valid-auc:0.86958


[I 2024-07-27 11:30:29,165] Trial 24 finished with value: 0.8695955084039474 and parameters: {'max_depth': 7, 'min_child_weight': 12, 'colsample_bytree': 0.6384227330614574, 'gamma': 0.07321218933002964, 'learning_rate': 0.11377049978691271, 'subsample': 0.8258517914788286, 'reg_alpha': 0.011673791271104898, 'reg_lambda': 0.8163719052518412, 'max_bin': 275684, 'num_boost_round': 2500}. Best is trial 22 with value: 0.8706047915190843.


Best hyperparameters: {'max_depth': 8, 'min_child_weight': 13, 'colsample_bytree': 0.6392097679152726, 'gamma': 0.06286971658103871, 'learning_rate': 0.10596700150775383, 'subsample': 0.835578953888964, 'reg_alpha': 0.012132926499371949, 'reg_lambda': 0.7228323760773259, 'max_bin': 251957, 'num_boost_round': 2608, 'random_state': 512, 'objective': 'binary:logistic', 'eval_metric': 'auc', 'scale_pos_weight': 7.130278082046571, 'tree_method': 'hist', 'device': 'cuda'}


In [5]:
import optuna.visualization as vis
import plotly.io as pio

# Optimization history
opt_history = vis.plot_optimization_history(study)
pio.write_image(opt_history, 'optimization_history.png')

# Hyperparameter importance
param_importance = vis.plot_param_importances(study)
pio.write_image(param_importance, 'param_importance.png')

# Parallel coordinate plot
parallel_coord = vis.plot_parallel_coordinate(study)
pio.write_image(parallel_coord, 'parallel_coordinate.png')

# Slice plot for each hyperparameter
slice_plot = vis.plot_slice(study)
pio.write_image(slice_plot, 'slice_plot.png')

# Hyperparameter pair plot
pair_plot = vis.plot_contour(study)
pio.write_image(pair_plot, 'pair_plot.png')

