In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

train = pd.read_csv('/kaggle/input/titanic/train.csv')
test = pd.read_csv('/kaggle/input/titanic/test.csv')
gender_submssion = pd.read_csv('/kaggle/input/titanic/gender_submission.csv')

In [5]:
data = pd.concat([train,test],sort=False)
data.info()

data['Sex'] = data['Sex'].map({'male':0, 'female':1})
data['Fare'] = data['Fare'].fillna(np.mean(data['Fare']))
data['Age'] = data['Age'].fillna(data['Age'].median())
data['Embarked'] = data['Embarked'].map({'S':0, 'C':1,'Q':2})

In [16]:
delete_columns = ['Name','PassengerId','SibSp','Parch','Ticket','Cabin']
data.drop(delete_columns,axis=1,inplace=True)

In [19]:
train = data[:len(train)]
test = data[len(train):]

In [20]:
y_train = train['Survived']
X_train = train.drop('Survived',axis=1)
X_test = test.drop('Survived',axis=1)

In [22]:
#lightGBMを使用
X_train, X_valid,y_train,y_valid = \
    train_test_split(X_train,y_train,test_size=0.3,random_state=0,stratify=y_train)

categorical_features = ['Embarked','Pclass','Sex']

import lightgbm as lgb
#学習用データセット
lgb_train = lgb.Dataset(X_train,y_train,categorical_feature=categorical_features)
#評価用データセット
lgb_eval = lgb.Dataset(X_valid,y_valid,categorical_feature=categorical_features)

params = {
    'objctive':'binary'
}

In [26]:
from lightgbm import early_stopping, log_evaluation

verbose_eval = 10

model = lgb.train(
    params,
    lgb_train,
    valid_sets=[lgb_train, lgb_eval],
    callbacks=[
        early_stopping(stopping_rounds=10),  # 早期停止のコールバック
        log_evaluation(verbose_eval)           # ログ表示のコールバック
    ],
    num_boost_round=1000
)

y_pred = model.predict(X_test, num_iteration=model.best_iteration)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002762 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] Start training from score 0.383628
Training until validation scores don't improve for 10 rounds
[10]	training's l2: 0.129911	valid_1's l2: 0.151774
[20]	training's l2: 0.109413	valid_1's l2: 0.142997
[30]	training's l2: 0.0984213	valid_1's l2: 0.13801
[40]	training's l2: 0.0893738	valid_1's l2: 0.134476
[50]	training's l2: 0.0832869	valid_1's l2: 0.133397
Early stopping, best iteration is:
[48]	training's l2: 0.0844034	valid_1's l2: 0.13304


In [28]:
#ハイパーパラメータ調整
import optuna
from sklearn.metrics import log_loss

def objective(trial):
    params = {
        'objective':'binary',
        'max_bin':trial.suggest_int('num_bin',255,500),#探索範囲、項目設定
        'learning_rate':0.05,
        'num_leaves':trial.suggest_int('num_leaves',32,128),
    }
    #学習用データセット
    lgb_train = lgb.Dataset(X_train,y_train,categorical_feature=categorical_features)
    #評価用データセット
    lgb_eval = lgb.Dataset(X_valid,y_valid,categorical_feature=categorical_features)
    model = lgb.train(
        params,
        lgb_train,
        valid_sets=[lgb_train, lgb_eval],
        callbacks=[
            early_stopping(stopping_rounds=10),  # 早期停止のコールバック
            log_evaluation(verbose_eval)           # ログ表示のコールバック
        ],
        num_boost_round=1000
    )
    y_pred_valid = model.predict(X_valid,num_iteration=model.best_iteration)

    score = log_loss(y_valid,y_pred_valid)
    return score

In [29]:
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=0))
study.optimize(objective,n_trials=40)#最小化したい関数と実行回数を指定

[I 2024-12-08 08:03:59,158] A new study created in memory with name: no-name-9cfb6359-5919-4ee0-9c70-9659f194bca7


[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000783 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's binary_logloss: 0.456125
[40]	training's binary_logloss: 0.355475	valid_1's binary_logloss: 0.452295
[50]	training's binary_logloss: 0.332445	valid_1's binary_logloss: 0.449511
[60]	trai

[I 2024-12-08 08:03:59,638] Trial 0 finished with value: 0.4402736570449865 and parameters: {'num_bin': 390, 'num_leaves': 101}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000536 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's binary_logloss: 0.456125
[40]	training's binary_logloss: 0.3554

[I 2024-12-08 08:04:00,161] Trial 1 finished with value: 0.4402736570449865 and parameters: {'num_bin': 403, 'num_leaves': 84}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000423 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:00,639] Trial 2 finished with value: 0.4402736570449865 and parameters: {'num_bin': 359, 'num_leaves': 94}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000463 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's binary_logloss: 0.456125
[40]	training's binary_logloss: 0.3554

[I 2024-12-08 08:04:01,184] Trial 3 finished with value: 0.4402736570449865 and parameters: {'num_bin': 362, 'num_leaves': 118}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000521 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:01,692] Trial 4 finished with value: 0.4402736570449865 and parameters: {'num_bin': 492, 'num_leaves': 69}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000527 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:02,177] Trial 5 finished with value: 0.4402736570449865 and parameters: {'num_bin': 449, 'num_leaves': 83}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000676 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:02,649] Trial 6 finished with value: 0.4402736570449865 and parameters: {'num_bin': 394, 'num_leaves': 121}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000457 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:03,131] Trial 7 finished with value: 0.4402736570449865 and parameters: {'num_bin': 272, 'num_leaves': 40}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000504 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:03,590] Trial 8 finished with value: 0.4402736570449865 and parameters: {'num_bin': 259, 'num_leaves': 112}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000520 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:05,000] Trial 9 finished with value: 0.4402736570449865 and parameters: {'num_bin': 446, 'num_leaves': 116}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003345 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:05,821] Trial 10 finished with value: 0.4402736570449865 and parameters: {'num_bin': 495, 'num_leaves': 109}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000745 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:06,323] Trial 11 finished with value: 0.4402736570449865 and parameters: {'num_bin': 368, 'num_leaves': 107}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000885 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:06,807] Trial 12 finished with value: 0.4402736570449865 and parameters: {'num_bin': 284, 'num_leaves': 94}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000634 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:07,295] Trial 13 finished with value: 0.4402736570449865 and parameters: {'num_bin': 290, 'num_leaves': 123}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000725 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.3847

[I 2024-12-08 08:04:07,813] Trial 14 finished with value: 0.4402736570449865 and parameters: {'num_bin': 383, 'num_leaves': 72}. Best is trial 0 with value: 0.4402736570449865.


Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000485 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's binary_logloss: 0.456125
[40]	training's binary_logloss: 0.355475	valid_1's

[I 2024-12-08 08:04:08,314] Trial 15 finished with value: 0.4402736570449865 and parameters: {'num_bin': 320, 'num_leaves': 107}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000486 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:08,790] Trial 16 finished with value: 0.4402736570449865 and parameters: {'num_bin': 367, 'num_leaves': 87}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000591 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.4272

[I 2024-12-08 08:04:09,342] Trial 17 finished with value: 0.4402736570449865 and parameters: {'num_bin': 259, 'num_leaves': 91}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000840 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:09,855] Trial 18 finished with value: 0.4402736570449865 and parameters: {'num_bin': 405, 'num_leaves': 91}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000801 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:10,341] Trial 19 finished with value: 0.4402736570449865 and parameters: {'num_bin': 487, 'num_leaves': 98}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000486 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:10,824] Trial 20 finished with value: 0.4402736570449865 and parameters: {'num_bin': 343, 'num_leaves': 74}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000555 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:11,319] Trial 21 finished with value: 0.4402736570449865 and parameters: {'num_bin': 426, 'num_leaves': 37}. Best is trial 0 with value: 0.4402736570449865.


[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000614 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's binary_logloss: 0.456125
[40]	training's binary_logloss: 0.355475	valid_1's binary_logloss: 0.452295
[50]	training's binary_logloss: 0.332445	valid_1's binary_logloss: 0.449511
[60]	trai

[I 2024-12-08 08:04:11,780] Trial 22 finished with value: 0.4402736570449865 and parameters: {'num_bin': 419, 'num_leaves': 97}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000638 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:12,269] Trial 23 finished with value: 0.4402736570449865 and parameters: {'num_bin': 306, 'num_leaves': 44}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:12,727] Trial 24 finished with value: 0.4402736570449865 and parameters: {'num_bin': 332, 'num_leaves': 67}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000707 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:13,197] Trial 25 finished with value: 0.4402736570449865 and parameters: {'num_bin': 395, 'num_leaves': 74}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000676 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's binary_logloss: 0.456125
[40]	training's binary_logloss: 0.3554

[I 2024-12-08 08:04:13,685] Trial 26 finished with value: 0.4402736570449865 and parameters: {'num_bin': 498, 'num_leaves': 41}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000488 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:14,151] Trial 27 finished with value: 0.4402736570449865 and parameters: {'num_bin': 306, 'num_leaves': 47}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000775 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:14,661] Trial 28 finished with value: 0.4402736570449865 and parameters: {'num_bin': 415, 'num_leaves': 56}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000476 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:15,209] Trial 29 finished with value: 0.4402736570449865 and parameters: {'num_bin': 369, 'num_leaves': 55}. Best is trial 0 with value: 0.4402736570449865.


[50]	training's binary_logloss: 0.332445	valid_1's binary_logloss: 0.449511
[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000526 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores do

[I 2024-12-08 08:04:15,666] Trial 30 finished with value: 0.4402736570449865 and parameters: {'num_bin': 294, 'num_leaves': 42}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000490 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:16,158] Trial 31 finished with value: 0.4402736570449865 and parameters: {'num_bin': 416, 'num_leaves': 45}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000658 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.3847

[I 2024-12-08 08:04:16,705] Trial 32 finished with value: 0.4402736570449865 and parameters: {'num_bin': 303, 'num_leaves': 67}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000500 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:17,192] Trial 33 finished with value: 0.4402736570449865 and parameters: {'num_bin': 456, 'num_leaves': 41}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000475 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:17,656] Trial 34 finished with value: 0.4402736570449865 and parameters: {'num_bin': 461, 'num_leaves': 41}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000523 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:18,114] Trial 35 finished with value: 0.4402736570449865 and parameters: {'num_bin': 495, 'num_leaves': 77}. Best is trial 0 with value: 0.4402736570449865.


[50]	training's binary_logloss: 0.332445	valid_1's binary_logloss: 0.449511
[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002955 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores do

[I 2024-12-08 08:04:18,588] Trial 36 finished with value: 0.4402736570449865 and parameters: {'num_bin': 495, 'num_leaves': 90}. Best is trial 0 with value: 0.4402736570449865.


[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000509 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's binary_logloss: 0.479331
[30]	training's binary_logloss: 0.384728	valid_1's

[I 2024-12-08 08:04:19,037] Trial 37 finished with value: 0.4402736570449865 and parameters: {'num_bin': 436, 'num_leaves': 35}. Best is trial 0 with value: 0.4402736570449865.


[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000523 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's binary_logloss: 0.530856
[20]	training's binary_logloss: 0.427285	valid_1's

[I 2024-12-08 08:04:19,505] Trial 38 finished with value: 0.4402736570449865 and parameters: {'num_bin': 324, 'num_leaves': 43}. Best is trial 0 with value: 0.4402736570449865.


[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274
[LightGBM] [Info] Number of positive: 239, number of negative: 384
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000572 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 181
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.503739	valid_1's

[I 2024-12-08 08:04:19,961] Trial 39 finished with value: 0.4402736570449865 and parameters: {'num_bin': 327, 'num_leaves': 43}. Best is trial 0 with value: 0.4402736570449865.


[50]	training's binary_logloss: 0.332445	valid_1's binary_logloss: 0.449511
[60]	training's binary_logloss: 0.312621	valid_1's binary_logloss: 0.443413
[70]	training's binary_logloss: 0.294241	valid_1's binary_logloss: 0.440499
[80]	training's binary_logloss: 0.278628	valid_1's binary_logloss: 0.445379
Early stopping, best iteration is:
[72]	training's binary_logloss: 0.290827	valid_1's binary_logloss: 0.440274


In [30]:
#最適な値
study.best_params

{'num_bin': 390, 'num_leaves': 101}

In [33]:
#以降best_paramsでモデル作成
#クロスバリデーションを実装
from sklearn.model_selection import KFold

y_preds = []#X_testに対する予測値を格納
models = []#モデルを格納
off_train = np.zeros((len(X_train),))#X_valに対する予測値を格納
cv = KFold(n_splits=5,shuffle=True,random_state=0)

params = {
        'objective':'binary',
        'max_bin':390,
        'learning_rate':0.05,
        'num_leaves':101,
    }

for fold_id,(train_index,valid_index) in enumerate(cv.split(X_train)):
    # データの分割
    X_tr = X_train.iloc[train_index, :]
    X_val = X_train.iloc[valid_index, :]
    y_tr = y_train.iloc[train_index]
    y_val = y_train.iloc[valid_index]
    
    lgb_train = lgb.Dataset(X_tr,y_tr,categorical_feature = categorical_features)
    lgb_eval = lgb.Dataset(X_val,y_val,categorical_feature = categorical_features)

    model = lgb.train(
        params,
        lgb_train,
        valid_sets=[lgb_train, lgb_eval],
        callbacks=[
            early_stopping(stopping_rounds=10),  # 早期停止のコールバック
            log_evaluation(verbose_eval)           # ログ表示のコールバック
        ],
        num_boost_round=1000
    )

    off_train[valid_index] = model.predict(X_val,num_iteration = model.best_iteration)
    y_pred = model.predict(X_test,num_iteration = model.best_iteration)
    y_preds.append(y_pred)
    models.append(model)

[LightGBM] [Info] Number of positive: 195, number of negative: 303
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 163
[LightGBM] [Info] Number of data points in the train set: 498, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.391566 -> initscore=-0.440733
[LightGBM] [Info] Start training from score -0.440733
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.517405	valid_1's binary_logloss: 0.504583
[20]	training's binary_logloss: 0.441467	valid_1's binary_logloss: 0.435236
[30]	training's binary_logloss: 0.400172	valid_1's binary_logloss: 0.403086
[40]	training's binary_logloss: 0.373492	valid_1's binary_logloss: 0.387233
[50]	training's binary_logloss: 0.353067	valid_1's binary_logloss: 0.380903
[60]	trai

In [37]:
y_sub = np.mean(y_preds, axis=0)#平均を計算
y_sub = (y_sub > 0.5).astype(int)#0/1に変換

In [38]:
print(y_sub)#結果

[0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1 0 0 0 0 0 0 0 1 0 0
 0 0 1 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 1 0 0 1 1 0 0 0
 1 0 0 1 0 1 1 0 0 0 0 0 1 0 1 1 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0
 1 1 1 1 0 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 1 0 1 0 0 0 0 0 1 1 0 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1
 0 1 0 0 0 0 0 1 0 1 0 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 1 0
 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1
 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0
 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 1 0
 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 0 0
 0 1 1 1 1 1 0 1 0 0 0]
