In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
import gc
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import optuna
from catboost import CatBoostClassifier, Pool
from datetime import datetime
import warnings
import joblib

warnings.filterwarnings("ignore")

# Create a log filename with the notebook name and current datetime
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
log_filename = f'kaggle_submission_{current_time}.log'

# Configure logging to save to a file
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_filename),
        logging.StreamHandler()  # This ensures logs are also output to the console
    ]
)

def reduce_mem_usage(df, verbose=True):
    """Iterate through all the columns of a dataframe and modify the data type to reduce memory usage."""
    start_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        logging.info(f'Start memory usage of dataframe: {start_mem:.2f} MB')

    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        logging.info(f'End memory usage of dataframe: {end_mem:.2f} MB')
        logging.info(f'Decreased by {(100 * (start_mem - end_mem) / start_mem):.1f}%')

    return df

def safe_map(df, column, mapping):
    """Map categorical values to numerical values and log any unknown categories."""
    unknown_categories = set(df[column]) - set(mapping.keys())
    if unknown_categories:
        logging.warning(f"Unknown categories in column {column}: {unknown_categories}")
    df[column] = df[column].map(mapping)
    return df

def import_data(file, **kwargs):
    """Create a dataframe and optimize its memory usage."""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True, **kwargs)
    df = reduce_mem_usage(df)
    return df

def preprocess_data(df):
    """Preprocess the dataset."""
    gender_mapping = {'Male': 1, 'Female': 0}
    vehicle_damage_mapping = {'Yes': 1, 'No': 0}
    vehicle_age_mapping = {'< 1 Year': 0, '1-2 Year': 1, '> 2 Years': 2}
    
    df = safe_map(df, 'Gender', gender_mapping)
    df = safe_map(df, 'Vehicle_Damage', vehicle_damage_mapping)
    df = safe_map(df, 'Vehicle_Age', vehicle_age_mapping)
    
    df.drop(['Driving_License'], axis=1, inplace=True)
    return df

def feature_engineering(df):
    """Feature engineering on the dataset."""
    df['Previously_Insured_Annual_Premium'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Annual_Premium'].astype(str)))[0]
    df['Previously_Insured_Vehicle_Age'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Vehicle_Age'].astype(str)))[0]
    df['Previously_Insured_Vehicle_Damage'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Vehicle_Damage'].astype(str)))[0]
    df['Previously_Insured_Vintage'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Vintage'].astype(str)))[0]
    return df

# Paths to datasets
train_path = r"C:\Users\paulo\OneDrive\Documents\kaggle_competition_2_datasets\train.csv"
test_path = r"C:\Users\paulo\OneDrive\Documents\kaggle_competition_2_datasets\test.csv"

# Load and optimize data
train_df = import_data(train_path, index_col='id')
test_df = import_data(test_path, index_col='id')

gc.collect()
print(f"DataFrame after import: {type(train_df)}")
logging.info("Data loaded successfully.")

# Apply preprocessing
train_df = preprocess_data(train_df)
test_df = preprocess_data(test_df)
print(f"DataFrame after preprocessing: {type(train_df)}")
logging.info("Data preprocessed successfully.")

# Apply feature engineering
train_df = feature_engineering(train_df)
test_df = feature_engineering(test_df)

gc.collect()
print(f"DataFrame after feature engineering: {type(train_df)}")
logging.info("Feature engineering completed successfully.")

# Normalize numeric columns
num_cols = ['Age', 'Region_Code', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage']
scaler = StandardScaler()
train_df[num_cols] = scaler.fit_transform(train_df[num_cols])
test_df[num_cols] = scaler.transform(test_df[num_cols])

# Create a stratified sample of the dataset
X = train_df.drop('Response', axis=1)
y = train_df['Response']

# Ensure that categorical columns are in string format
categorical_cols=['Gender', 'Vehicle_Age', 'Vehicle_Damage', 'Policy_Sales_Channel', 'Region_Code', 'Previously_Insured_Annual_Premium', 'Previously_Insured_Vehicle_Age', 'Previously_Insured_Vehicle_Damage', 'Previously_Insured_Vintage']
X[categorical_cols] = X[categorical_cols].astype(str)
test_df[categorical_cols] = test_df[categorical_cols].astype(str)

def objective(trial):
    params = {
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.02, 0.2),  # Expanded upper limit
        'iterations': trial.suggest_int('iterations', 1500, 3000),  # Expanded upper limit
        'depth': trial.suggest_int('depth', 7, 12),  # Expanded upper limit
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 0.01, 10),
        'random_strength': trial.suggest_loguniform('random_strength', 0.1, 2.0),  # Expanded upper limit
        'bagging_temperature': trial.suggest_loguniform('bagging_temperature', 0.2, 2.0),  # Expanded upper limit
        'task_type': 'GPU',  # Ensure your environment supports GPU
        'eval_metric': 'AUC',
        'loss_function': 'Logloss',
        'random_seed': 42,
        'allow_writing_files': False,
        'verbose': 100  # Display log every 100 iterations
    }

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

    X_train[categorical_cols] = X_train[categorical_cols].astype(str)
    X_valid[categorical_cols] = X_valid[categorical_cols].astype(str)
    
    train_pool = Pool(X_train, y_train, cat_features=categorical_cols)
    valid_pool = Pool(X_valid, y_valid, cat_features=categorical_cols)

    model = CatBoostClassifier(**params)
    model.fit(train_pool, eval_set=valid_pool, early_stopping_rounds=50, verbose=100)

    valid_preds = model.predict_proba(X_valid)[:, 1]
    auc = roc_auc_score(y_valid, valid_preds)
    
    return auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=25)

print('Best parameters:', study.best_params)
print('Best AUC score:', study.best_value)


# Load the best model
best_model = joblib.load(f'catboost_model_trial_{study.best_trial.number}.pkl')

# Ensure test_df categorical columns are in string format
test_df[categorical_cols] = test_df[categorical_cols].astype(str)

# Predict on the test set
test_pool = Pool(test_df, cat_features=categorical_cols)
test_pred = best_model.predict_proba(test_pool)[:, 1]
joblib.dump(test_pred, 'test_pred_cat.pkl')

logging.info("Final CatBoost model and predictions saved.")

# Create a submission dataframe
submission = pd.DataFrame({
    'id': test_df.index,
    'Response': test_pred
})

# Save the submission file
submission_filename = f'submission_{current_time}.csv'
submission.to_csv(submission_filename, index=False)

logging.info(f"Submission file {submission_filename} created successfully.")
print(f"Submission file {submission_filename} created successfully.")

# Plot Optuna visualizations
optuna.visualization.plot_optimization_history(study)
optuna.visualization.plot_intermediate_values(study)
optuna.visualization.plot_parallel_coordinate(study)
optuna.visualization.plot_contour(study)
optuna.visualization.plot_param_importances(study)
optuna.visualization.plot_slice(study)
optuna.visualization.plot_edf(study)


2024-07-26 07:18:19,139 - INFO - Start memory usage of dataframe: 1053.30 MB
2024-07-26 07:18:20,697 - INFO - End memory usage of dataframe: 318.18 MB
2024-07-26 07:18:20,698 - INFO - Decreased by 69.8%
2024-07-26 07:18:29,789 - INFO - Start memory usage of dataframe: 643.68 MB
2024-07-26 07:18:30,818 - INFO - End memory usage of dataframe: 204.81 MB
2024-07-26 07:18:30,819 - INFO - Decreased by 68.2%
2024-07-26 07:18:30,882 - INFO - Data loaded successfully.


DataFrame after import: <class 'pandas.core.frame.DataFrame'>


2024-07-26 07:18:32,614 - INFO - Data preprocessed successfully.


DataFrame after preprocessing: <class 'pandas.core.frame.DataFrame'>


2024-07-26 07:19:04,199 - INFO - Feature engineering completed successfully.


DataFrame after feature engineering: <class 'pandas.core.frame.DataFrame'>


[I 2024-07-26 07:19:40,210] A new study created in memory with name: no-name-a4c93c3f-fcf0-4527-b566-e83bd5cc37b2
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8745203	best: 0.8745203 (0)	total: 798ms	remaining: 21m 15s
100:	test: 0.8908743	best: 0.8908743 (100)	total: 1m 57s	remaining: 28m 57s
200:	test: 0.8924599	best: 0.8924599 (200)	total: 3m 48s	remaining: 26m 28s
300:	test: 0.8931028	best: 0.8931028 (300)	total: 5m 40s	remaining: 24m 31s
400:	test: 0.8934097	best: 0.8934097 (400)	total: 7m 33s	remaining: 22m 36s
500:	test: 0.8936255	best: 0.8936255 (499)	total: 9m 28s	remaining: 20m 46s
600:	test: 0.8937769	best: 0.8937769 (600)	total: 11m 23s	remaining: 18m 56s
700:	test: 0.8938711	best: 0.8938711 (700)	total: 13m 20s	remaining: 17m 6s
800:	test: 0.8939485	best: 0.8939485 (800)	total: 15m 11s	remaining: 15m 9s
900:	test: 0.8940251	best: 0.8940251 (900)	total: 17m 7s	remaining: 13m 17s
1000:	test: 0.8940922	best: 0.8940922 (1000)	total: 19m	remaining: 11m 22s
1100:	test: 0.8941449	best: 0.8941449 (1099)	total: 20m 54s	remaining: 9m 28s
1200:	test: 0.8941909	best: 0.8941909 (1200)	total: 22m 48s	remaining: 7m 34s
1300:	test: 0

[I 2024-07-26 07:53:14,555] Trial 0 finished with value: 0.8943397990894207 and parameters: {'learning_rate': 0.058724672018912835, 'iterations': 1600, 'depth': 8, 'l2_leaf_reg': 0.236232108497618, 'random_strength': 0.5035006426410924, 'bagging_temperature': 1.8507711937950635}. Best is trial 0 with value: 0.8943397990894207.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8743468	best: 0.8743468 (0)	total: 842ms	remaining: 22m 46s
100:	test: 0.8904626	best: 0.8904626 (100)	total: 1m 53s	remaining: 28m 30s
200:	test: 0.8922451	best: 0.8922451 (200)	total: 3m 43s	remaining: 26m 18s
300:	test: 0.8929542	best: 0.8929542 (300)	total: 5m 35s	remaining: 24m 35s
400:	test: 0.8933381	best: 0.8933381 (400)	total: 7m 25s	remaining: 22m 37s
500:	test: 0.8936057	best: 0.8936057 (500)	total: 9m 16s	remaining: 20m 46s
600:	test: 0.8938060	best: 0.8938060 (600)	total: 11m 11s	remaining: 19m 3s
700:	test: 0.8939427	best: 0.8939427 (700)	total: 13m 6s	remaining: 17m 15s
800:	test: 0.8940524	best: 0.8940524 (800)	total: 14m 59s	remaining: 15m 24s
900:	test: 0.8941358	best: 0.8941358 (900)	total: 16m 54s	remaining: 13m 33s
1000:	test: 0.8942168	best: 0.8942171 (998)	total: 18m 46s	remaining: 11m 40s
1100:	test: 0.8942764	best: 0.8942764 (1100)	total: 20m 45s	remaining: 9m 51s
1200:	test: 0.8943236	best: 0.8943236 (1200)	total: 22m 42s	remaining: 7m 59s
1300:	tes

[I 2024-07-26 08:27:08,664] Trial 1 finished with value: 0.8944985885069372 and parameters: {'learning_rate': 0.04972056387848113, 'iterations': 1624, 'depth': 8, 'l2_leaf_reg': 7.042280242236321, 'random_strength': 1.505272608522372, 'bagging_temperature': 0.8851819675036686}. Best is trial 1 with value: 0.8944985885069372.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8766077	best: 0.8766077 (0)	total: 1.2s	remaining: 46m 46s
100:	test: 0.8930103	best: 0.8930103 (100)	total: 2m 11s	remaining: 48m 40s
200:	test: 0.8937623	best: 0.8937623 (200)	total: 4m 25s	remaining: 47m 1s
300:	test: 0.8940720	best: 0.8940727 (299)	total: 6m 42s	remaining: 45m 20s
400:	test: 0.8942117	best: 0.8942117 (400)	total: 9m 2s	remaining: 43m 38s
500:	test: 0.8942985	best: 0.8942985 (500)	total: 11m 26s	remaining: 41m 56s
600:	test: 0.8943391	best: 0.8943416 (596)	total: 13m 50s	remaining: 39m 56s
700:	test: 0.8943580	best: 0.8943582 (699)	total: 16m 11s	remaining: 37m 46s
bestTest = 0.8943659067
bestIteration = 727
Shrink model to first 728 iterations.


[I 2024-07-26 08:47:40,608] Trial 2 finished with value: 0.8943659528509935 and parameters: {'learning_rate': 0.12571708313809302, 'iterations': 2336, 'depth': 10, 'l2_leaf_reg': 0.3735097391522565, 'random_strength': 1.042267119667513, 'bagging_temperature': 0.9972892189660757}. Best is trial 1 with value: 0.8944985885069372.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8760353	best: 0.8760353 (0)	total: 1.05s	remaining: 27m 20s
100:	test: 0.8885390	best: 0.8885390 (100)	total: 2m 1s	remaining: 29m 23s
200:	test: 0.8905618	best: 0.8905618 (200)	total: 4m 10s	remaining: 28m 23s
300:	test: 0.8916518	best: 0.8916518 (300)	total: 6m 11s	remaining: 25m 59s
400:	test: 0.8923310	best: 0.8923310 (400)	total: 8m 10s	remaining: 23m 44s
500:	test: 0.8927190	best: 0.8927190 (500)	total: 10m 13s	remaining: 21m 44s
600:	test: 0.8929988	best: 0.8929988 (600)	total: 12m 13s	remaining: 19m 37s
700:	test: 0.8932222	best: 0.8932222 (700)	total: 14m 12s	remaining: 17m 32s
800:	test: 0.8933946	best: 0.8933946 (800)	total: 16m 14s	remaining: 15m 30s
900:	test: 0.8935252	best: 0.8935252 (900)	total: 18m 18s	remaining: 13m 30s
1000:	test: 0.8936295	best: 0.8936295 (1000)	total: 20m 23s	remaining: 11m 30s
1100:	test: 0.8937277	best: 0.8937277 (1100)	total: 22m 26s	remaining: 9m 28s
1200:	test: 0.8938079	best: 0.8938079 (1200)	total: 24m 34s	remaining: 7m 28s
1300:	

[I 2024-07-26 09:22:41,415] Trial 3 finished with value: 0.8940116588610421 and parameters: {'learning_rate': 0.02250491221636737, 'iterations': 1566, 'depth': 9, 'l2_leaf_reg': 0.02385109546525541, 'random_strength': 0.8881382074397476, 'bagging_temperature': 1.5739351631098442}. Best is trial 1 with value: 0.8944985885069372.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8745744	best: 0.8745744 (0)	total: 785ms	remaining: 31m 28s
100:	test: 0.8911423	best: 0.8911423 (100)	total: 1m 52s	remaining: 42m 50s
200:	test: 0.8925976	best: 0.8925976 (200)	total: 3m 40s	remaining: 40m 20s
300:	test: 0.8931723	best: 0.8931723 (300)	total: 5m 31s	remaining: 38m 39s
400:	test: 0.8934719	best: 0.8934719 (400)	total: 7m 23s	remaining: 36m 59s
500:	test: 0.8936751	best: 0.8936751 (500)	total: 9m 16s	remaining: 35m 17s
600:	test: 0.8938067	best: 0.8938067 (600)	total: 11m 10s	remaining: 33m 36s
700:	test: 0.8938908	best: 0.8938908 (700)	total: 13m 5s	remaining: 31m 52s
800:	test: 0.8939721	best: 0.8939721 (800)	total: 14m 58s	remaining: 30m 2s
900:	test: 0.8940462	best: 0.8940462 (900)	total: 16m 53s	remaining: 28m 15s
1000:	test: 0.8941086	best: 0.8941086 (1000)	total: 18m 47s	remaining: 26m 24s
1100:	test: 0.8941698	best: 0.8941698 (1100)	total: 20m 38s	remaining: 24m 30s
1200:	test: 0.8942213	best: 0.8942213 (1200)	total: 22m 34s	remaining: 22m 41s
1300:	

[I 2024-07-26 10:12:02,295] Trial 4 finished with value: 0.8945155699649273 and parameters: {'learning_rate': 0.06349295021099573, 'iterations': 2408, 'depth': 8, 'l2_leaf_reg': 6.558066260689411, 'random_strength': 0.16668278076405652, 'bagging_temperature': 1.9536030084235567}. Best is trial 4 with value: 0.8945155699649273.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8784861	best: 0.8784861 (0)	total: 1.54s	remaining: 1h 9m 45s
100:	test: 0.8917656	best: 0.8917656 (100)	total: 2m 24s	remaining: 1h 2m 30s
200:	test: 0.8931655	best: 0.8931655 (200)	total: 4m 49s	remaining: 1h 21s
300:	test: 0.8936716	best: 0.8936716 (300)	total: 7m 15s	remaining: 58m 13s
400:	test: 0.8939531	best: 0.8939531 (400)	total: 9m 45s	remaining: 56m 23s
500:	test: 0.8941549	best: 0.8941549 (500)	total: 12m 16s	remaining: 54m 17s
600:	test: 0.8942552	best: 0.8942552 (600)	total: 14m 48s	remaining: 52m 8s
700:	test: 0.8943477	best: 0.8943477 (700)	total: 17m 17s	remaining: 49m 44s
800:	test: 0.8944295	best: 0.8944295 (800)	total: 19m 50s	remaining: 47m 28s
900:	test: 0.8944832	best: 0.8944832 (900)	total: 22m 23s	remaining: 45m 8s
1000:	test: 0.8945314	best: 0.8945315 (995)	total: 24m 56s	remaining: 42m 45s
1100:	test: 0.8945639	best: 0.8945639 (1100)	total: 27m 32s	remaining: 40m 25s
1200:	test: 0.8945928	best: 0.8945928 (1199)	total: 30m 9s	remaining: 38m 3s
1300:

[I 2024-07-26 11:09:30,730] Trial 5 finished with value: 0.8946836272334897 and parameters: {'learning_rate': 0.057137503886414205, 'iterations': 2717, 'depth': 11, 'l2_leaf_reg': 8.035741529704495, 'random_strength': 0.20700394579791845, 'bagging_temperature': 0.6097883500569838}. Best is trial 5 with value: 0.8946836272334897.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8758387	best: 0.8758387 (0)	total: 860ms	remaining: 23m 18s
100:	test: 0.8924607	best: 0.8924607 (100)	total: 2m 4s	remaining: 31m 18s
200:	test: 0.8934503	best: 0.8934503 (200)	total: 4m 6s	remaining: 29m 8s
300:	test: 0.8938202	best: 0.8938202 (300)	total: 6m 12s	remaining: 27m 20s
400:	test: 0.8940289	best: 0.8940289 (400)	total: 8m 15s	remaining: 25m 15s
500:	test: 0.8941746	best: 0.8941746 (500)	total: 10m 22s	remaining: 23m 20s
600:	test: 0.8942956	best: 0.8942956 (600)	total: 12m 30s	remaining: 21m 21s
700:	test: 0.8943592	best: 0.8943592 (700)	total: 14m 41s	remaining: 19m 23s
800:	test: 0.8944144	best: 0.8944157 (795)	total: 16m 47s	remaining: 17m 18s
900:	test: 0.8944530	best: 0.8944533 (896)	total: 18m 52s	remaining: 15m 12s
1000:	test: 0.8944861	best: 0.8944861 (1000)	total: 20m 58s	remaining: 13m 7s
1100:	test: 0.8945168	best: 0.8945170 (1099)	total: 23m 5s	remaining: 11m 2s
1200:	test: 0.8945343	best: 0.8945344 (1193)	total: 25m 15s	remaining: 8m 57s
1300:	test

[I 2024-07-26 11:45:46,172] Trial 6 finished with value: 0.8945753043537477 and parameters: {'learning_rate': 0.0978598071930063, 'iterations': 1627, 'depth': 9, 'l2_leaf_reg': 0.7686900339824705, 'random_strength': 0.14779680737765388, 'bagging_temperature': 1.5323715408331573}. Best is trial 5 with value: 0.8946836272334897.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8745702	best: 0.8745702 (0)	total: 967ms	remaining: 39m 23s
100:	test: 0.8924800	best: 0.8924800 (100)	total: 1m 48s	remaining: 41m 53s
200:	test: 0.8933869	best: 0.8933869 (200)	total: 3m 35s	remaining: 40m 3s
300:	test: 0.8936837	best: 0.8936837 (300)	total: 5m 25s	remaining: 38m 41s
400:	test: 0.8939148	best: 0.8939148 (400)	total: 7m 16s	remaining: 37m 5s
500:	test: 0.8940447	best: 0.8940447 (500)	total: 9m 11s	remaining: 35m 40s
600:	test: 0.8941372	best: 0.8941372 (600)	total: 11m 7s	remaining: 34m 6s
700:	test: 0.8941977	best: 0.8941977 (700)	total: 12m 57s	remaining: 32m 14s
800:	test: 0.8942715	best: 0.8942715 (800)	total: 14m 50s	remaining: 30m 27s
900:	test: 0.8943210	best: 0.8943210 (899)	total: 16m 43s	remaining: 28m 40s
1000:	test: 0.8943794	best: 0.8943797 (998)	total: 18m 37s	remaining: 26m 52s
1100:	test: 0.8944187	best: 0.8944190 (1098)	total: 20m 31s	remaining: 25m 3s
1200:	test: 0.8944432	best: 0.8944432 (1200)	total: 22m 27s	remaining: 23m 15s
1300:	test

[I 2024-07-26 12:25:49,572] Trial 7 finished with value: 0.894537655445337 and parameters: {'learning_rate': 0.1141260425710353, 'iterations': 2445, 'depth': 8, 'l2_leaf_reg': 0.07421888942585612, 'random_strength': 0.12806884670491384, 'bagging_temperature': 1.85806413133347}. Best is trial 5 with value: 0.8946836272334897.
Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8766067	best: 0.8766067 (0)	total: 1.13s	remaining: 53m 21s
100:	test: 0.8898017	best: 0.8898017 (100)	total: 2m 15s	remaining: 1h 59s
200:	test: 0.8918211	best: 0.8918211 (200)	total: 4m 28s	remaining: 58m 15s
300:	test: 0.8926681	best: 0.8926681 (300)	total: 6m 37s	remaining: 55m 25s
400:	test: 0.8931499	best: 0.8931499 (400)	total: 8m 48s	remaining: 53m 12s
500:	test: 0.8934509	best: 0.8934509 (500)	total: 11m 1s	remaining: 51m 4s
600:	test: 0.8936722	best: 0.8936722 (600)	total: 13m 19s	remaining: 49m 14s
700:	test: 0.8938406	best: 0.8938406 (700)	total: 15m 37s	remaining: 47m 16s
800:	test: 0.8939602	best: 0.8939602 (800)	total: 17m 55s	remaining: 45m 13s
900:	test: 0.8940555	best: 0.8940555 (900)	total: 20m 18s	remaining: 43m 17s
1000:	test: 0.8941435	best: 0.8941435 (1000)	total: 22m 40s	remaining: 41m 14s
1100:	test: 0.8942133	best: 0.8942133 (1100)	total: 25m 3s	remaining: 39m 9s
1200:	test: 0.8942704	best: 0.8942704 (1200)	total: 27m 21s	remaining: 36m 55s
1300:	te

In [None]:
optuna.visualization.plot_intermediate_values(study)


[W 2024-07-25 23:33:49,632] You need to set up the pruning feature to utilize `plot_intermediate_values()`


In [None]:
optuna.visualization.plot_parallel_coordinate(study)


In [None]:
optuna.visualization.plot_contour(study)


In [None]:
optuna.visualization.plot_param_importances(study)


In [None]:
optuna.visualization.plot_slice(study)


In [None]:
optuna.visualization.plot_edf(study)
