In [21]:
import numpy as np
import pandas as pd


train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')
gender_submission = pd.read_csv('../data/gender_submission.csv')

data = pd.concat([train, test], sort=False)

data['Sex'].replace(['male', 'female'], [0, 1], inplace=True)
data['Embarked'].fillna(('S'), inplace=True)
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int)
data['Fare'].fillna(np.mean(data['Fare']), inplace=True)
data['Age'].fillna(data['Age'].median(), inplace=True)
data['FamilySize'] = data['Parch'] + data['SibSp'] + 1
data['IsAlone'] = 0
data.loc[data['FamilySize'] == 1, 'IsAlone'] = 1

In [22]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,FamilySize,IsAlone
0,1,0.0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.25,,0,2,0
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,1,2,0
2,3,1.0,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.925,,0,1,1
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1,C123,0,2,0
4,5,0.0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.05,,0,1,1


In [23]:
delete_columns = ['Name', 'PassengerId', 'Ticket', 'Cabin']
data.drop(delete_columns, axis=1, inplace=True)

train = data[:len(train)]
test = data[len(train):]

y_train = train['Survived']
X_train = train.drop('Survived', axis=1)
X_test = test.drop('Survived', axis=1)

In [24]:
X_train.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,FamilySize,IsAlone
0,3,0,22.0,1,0,7.25,0,2,0
1,1,1,38.0,1,0,71.2833,1,2,0
2,3,1,26.0,0,0,7.925,0,1,1
3,1,1,35.0,1,0,53.1,0,2,0
4,3,0,35.0,0,0,8.05,0,1,1


In [25]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.3, random_state=0, stratify=y_train
)

categorical_features = [
    "Embarked", "Pclass", "Sex"
]

In [26]:
params = {
    'objective': 'binary'
}

In [27]:
import lightgbm as lgb

lgb_train = lgb.Dataset(
    X_train, y_train, categorical_feature=categorical_features
)
lgb_eval = lgb.Dataset(
    X_valid, y_valid, reference=lgb_train, categorical_feature=categorical_features
)

model = lgb.train(params, lgb_train, valid_sets=[lgb_train, lgb_eval],verbose_eval=10, num_boost_round=1000, early_stopping_rounds=10)

y_pred = model.predict(
    X_test, num_iteration=model.best_iteration
)

[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.420386	valid_1's binary_logloss: 0.476708
[20]	training's binary_logloss: 0.34128	valid_1's binary_logloss: 0.437279
[30]	training's binary_logloss: 0.297138	valid_1's binary_logloss: 0.437636
Early stopping, best iteration is:
[23]	training's binary_logloss: 0.326948	valid_1's binary_logloss: 0.435703




In [28]:
import optuna
from sklearn.metrics import log_loss

In [29]:
def objective(trial):
    """
    ハイパーパラメータチューニング
    """
    params = {
        'objective': 'binary',
        'max_bin': trial.suggest_int('max_bin', 255, 500),
        'learning_rate': 0.05,
        'num_leaves': trial.suggest_int('num_leaves', 32, 128),
    }
    
    lgb_train = lgb.Dataset(X_train, y_train,
                                             categorical_feature=categorical_features)
    lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train,
                                            categorical_feature=categorical_features)
    model = lgb.train(
        params,lgb_train,valid_sets=[lgb_train, lgb_eval],
        verbose_eval=10,
        num_boost_round=1000,
        early_stopping_rounds=10
    )
    y_pred_valid = model.predict(X_valid, num_iteration=model.best_iteration)
    score = log_loss(y_valid, y_pred_valid)

    return score

In [30]:
study = optuna.create_study(
    sampler=optuna.samplers.RandomSampler(seed=0)
)

study.optimize(objective, n_trials=40)

[32m[I 2021-11-07 01:12:10,457][0m A new study created in memory with name: no-name-f660174f-26ae-4ccd-a525-7dd14fda7f5d[0m
[32m[I 2021-11-07 01:12:10,605][0m Trial 0 finished with value: 0.43594874611252654 and parameters: {'max_bin': 390, 'num_leaves': 101}. Best is trial 0 with value: 0.43594874611252654.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282

[32m[I 2021-11-07 01:12:10,793][0m Trial 1 finished with value: 0.43594874611252654 and parameters: {'max_bin': 403, 'num_leaves': 84}. Best is trial 0 with value: 0.43594874611252654.[0m


[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]

[32m[I 2021-11-07 01:12:10,906][0m Trial 2 finished with value: 0.43594874611252654 and parameters: {'max_bin': 359, 'num_leaves': 94}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:11,031][0m Trial 3 finished with value: 0.43594874611252654 and parameters: {'max_bin': 362, 'num_leaves': 118}. Best is trial 0 with value: 0.43594874611252654.[0m


[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282

[32m[I 2021-11-07 01:12:11,206][0m Trial 4 finished with value: 0.43594874611252654 and parameters: {'max_bin': 492, 'num_leaves': 69}. Best is trial 0 with value: 0.43594874611252654.[0m


[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0

[32m[I 2021-11-07 01:12:11,370][0m Trial 5 finished with value: 0.43594874611252654 and parameters: {'max_bin': 449, 'num_leaves': 83}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:11,518][0m Trial 6 finished with value: 0.43594874611252654 and parameters: {'max_bin': 394, 'num_leaves': 121}. Best is trial 0 with value: 0.43594874611252654.[0m


[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0

[32m[I 2021-11-07 01:12:11,676][0m Trial 7 finished with value: 0.43594874611252654 and parameters: {'max_bin': 272, 'num_leaves': 40}. Best is trial 0 with value: 0.43594874611252654.[0m


[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score

[32m[I 2021-11-07 01:12:11,840][0m Trial 8 finished with value: 0.43594874611252654 and parameters: {'max_bin': 259, 'num_leaves': 112}. Best is trial 0 with value: 0.43594874611252654.[0m


[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0

[32m[I 2021-11-07 01:12:12,022][0m Trial 9 finished with value: 0.43594874611252654 and parameters: {'max_bin': 446, 'num_leaves': 116}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:12,145][0m Trial 10 finished with value: 0.43594874611252654 and parameters: {'max_bin': 495, 'num_leaves': 109}. Best is trial 0 with value: 0.43594874611252654.[0m



[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50

[32m[I 2021-11-07 01:12:12,351][0m Trial 11 finished with value: 0.43594874611252654 and parameters: {'max_bin': 368, 'num_leaves': 107}. Best is trial 0 with value: 0.43594874611252654.[0m


[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start trainin

[32m[I 2021-11-07 01:12:12,488][0m Trial 12 finished with value: 0.43594874611252654 and parameters: {'max_bin': 284, 'num_leaves': 94}. Best is trial 0 with value: 0.43594874611252654.[0m


[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start trainin

[32m[I 2021-11-07 01:12:12,886][0m Trial 13 finished with value: 0.43594874611252654 and parameters: {'max_bin': 290, 'num_leaves': 123}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:12,962][0m Trial 14 finished with value: 0.43594874611252654 and parameters: {'max_bin': 383, 'num_leaves': 72}. Best is trial 0 with value: 0.43594874611252654.[0m


[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]



[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465


[32m[I 2021-11-07 01:12:13,321][0m Trial 15 finished with value: 0.43594874611252654 and parameters: {'max_bin': 320, 'num_leaves': 107}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:13,427][0m Trial 16 finished with value: 0.43594874611252654 and parameters: {'max_bin': 367, 'num_leaves': 87}. Best is trial 0 with value: 0.43594874611252654.[0m


[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]

[32m[I 2021-11-07 01:12:13,591][0m Trial 17 finished with value: 0.43594874611252654 and parameters: {'max_bin': 259, 'num_leaves': 91}. Best is trial 0 with value: 0.43594874611252654.[0m



[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 

[32m[I 2021-11-07 01:12:13,762][0m Trial 18 finished with value: 0.43594874611252654 and parameters: {'max_bin': 405, 'num_leaves': 91}. Best is trial 0 with value: 0.43594874611252654.[0m



[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 

[32m[I 2021-11-07 01:12:13,939][0m Trial 19 finished with value: 0.43594874611252654 and parameters: {'max_bin': 487, 'num_leaves': 98}. Best is trial 0 with value: 0.43594874611252654.[0m



[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 

[32m[I 2021-11-07 01:12:14,124][0m Trial 20 finished with value: 0.43594874611252654 and parameters: {'max_bin': 343, 'num_leaves': 74}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:14,257][0m Trial 21 finished with value: 0.43594874611252654 and parameters: {'max_bin': 426, 'num_leaves': 37}. Best is trial 0 with value: 0.43594874611252654.[0m



[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 

[32m[I 2021-11-07 01:12:14,329][0m Trial 22 finished with value: 0.43594874611252654 and parameters: {'max_bin': 419, 'num_leaves': 97}. Best is trial 0 with value: 0.43594874611252654.[0m



[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 1

[32m[I 2021-11-07 01:12:14,516][0m Trial 23 finished with value: 0.43594874611252654 and parameters: {'max_bin': 306, 'num_leaves': 44}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:14,654][0m Trial 24 finished with value: 0.43594874611252654 and parameters: {'max_bin': 332, 'num_leaves': 67}. Best is trial 0 with value: 0.43594874611252654.[0m



[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 

[32m[I 2021-11-07 01:12:14,814][0m Trial 25 finished with value: 0.43594874611252654 and parameters: {'max_bin': 395, 'num_leaves': 74}. Best is trial 0 with value: 0.43594874611252654.[0m



[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 1

[32m[I 2021-11-07 01:12:14,941][0m Trial 26 finished with value: 0.43594874611252654 and parameters: {'max_bin': 498, 'num_leaves': 41}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:15,073][0m Trial 27 finished with value: 0.43594874611252654 and parameters: {'max_bin': 306, 'num_leaves': 47}. Best is trial 0 with value: 0.43594874611252654.[0m


[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0

[32m[I 2021-11-07 01:12:15,273][0m Trial 28 finished with value: 0.43594874611252654 and parameters: {'max_bin': 415, 'num_leaves': 56}. Best is trial 0 with value: 0.43594874611252654.[0m


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can se

[32m[I 2021-11-07 01:12:15,424][0m Trial 29 finished with value: 0.43594874611252654 and parameters: {'max_bin': 369, 'num_leaves': 55}. Best is trial 0 with value: 0.43594874611252654.[0m



[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start traini

[32m[I 2021-11-07 01:12:15,605][0m Trial 30 finished with value: 0.43594874611252654 and parameters: {'max_bin': 294, 'num_leaves': 42}. Best is trial 0 with value: 0.43594874611252654.[0m


[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008

[32m[I 2021-11-07 01:12:15,816][0m Trial 31 finished with value: 0.43594874611252654 and parameters: {'max_bin': 416, 'num_leaves': 45}. Best is trial 0 with value: 0.43594874611252654.[0m



[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 1

[32m[I 2021-11-07 01:12:15,924][0m Trial 32 finished with value: 0.43594874611252654 and parameters: {'max_bin': 303, 'num_leaves': 67}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:16,084][0m Trial 33 finished with value: 0.43594874611252654 and parameters: {'max_bin': 456, 'num_leaves': 41}. Best is trial 0 with value: 0.43594874611252654.[0m


[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]

[32m[I 2021-11-07 01:12:16,254][0m Trial 34 finished with value: 0.43594874611252654 and parameters: {'max_bin': 461, 'num_leaves': 41}. Best is trial 0 with value: 0.43594874611252654.[0m
[32m[I 2021-11-07 01:12:16,360][0m Trial 35 finished with value: 0.43594874611252654 and parameters: {'max_bin': 495, 'num_leaves': 77}. Best is trial 0 with value: 0.43594874611252654.[0m


[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10

[32m[I 2021-11-07 01:12:16,556][0m Trial 36 finished with value: 0.43594874611252654 and parameters: {'max_bin': 495, 'num_leaves': 90}. Best is trial 0 with value: 0.43594874611252654.[0m


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can se

[32m[I 2021-11-07 01:12:16,684][0m Trial 37 finished with value: 0.43594874611252654 and parameters: {'max_bin': 436, 'num_leaves': 35}. Best is trial 0 with value: 0.43594874611252654.[0m


[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505696	valid_1's binary_logloss: 0.532008
[20]	training's binary_logloss: 0.422967	valid_1's binary_logloss: 0.478904
[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features

[32m[I 2021-11-07 01:12:16,878][0m Trial 38 finished with value: 0.43594874611252654 and parameters: {'max_bin': 324, 'num_leaves': 43}. Best is trial 0 with value: 0.43594874611252654.[0m


[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949
[LightGBM] [Info] Number of positive: 239, number of negative: 384
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 205
[LightGBM] [Info] Number of data points in the train set: 623, number of used features: 9
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.383628 -> initscore=-0.474179
[LightGBM] [Info] Start training from score -0.474179
Training until validation scores don't improve for 10 rounds
[10]

[32m[I 2021-11-07 01:12:17,097][0m Trial 39 finished with value: 0.43594874611252654 and parameters: {'max_bin': 327, 'num_leaves': 43}. Best is trial 0 with value: 0.43594874611252654.[0m


[30]	training's binary_logloss: 0.373496	valid_1's binary_logloss: 0.448045
[40]	training's binary_logloss: 0.342723	valid_1's binary_logloss: 0.439465
[50]	training's binary_logloss: 0.318708	valid_1's binary_logloss: 0.438633
[60]	training's binary_logloss: 0.299108	valid_1's binary_logloss: 0.436609
[70]	training's binary_logloss: 0.282689	valid_1's binary_logloss: 0.437435
Early stopping, best iteration is:
[61]	training's binary_logloss: 0.2972	valid_1's binary_logloss: 0.435949


In [31]:
y_pred = (y_pred > 0.5).astype(int)

sub = pd.read_csv('../data/gender_submission.csv')

sub['Survived'] = y_pred
sub.to_csv('submission_lightgbm_optuna.csv', index=False)

sub.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,0
