In [1]:
# ここにノートブックの名前を指定してください
notebook_name = 'run_lgb.ipynb'

In [3]:
import os
import sys

notebook_dir = os.path.dirname(os.path.abspath(notebook_name))
code_dir = os.path.abspath(os.path.join(notebook_dir, '../code'))
sys.path.append(code_dir)

In [8]:
# 必要なライブラリーをインストール
import pandas as pd
import numpy as np

from runner import Runner
from runner_gfs import GFSRunner
from runner_optuna import OptunaRunner
from util import Logger, Util, Submission
from model_lgb import ModelLGB
from easyplot import easyplot

pd.set_option('display.max_columns', None)
logger = Logger()

In [5]:
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')
train.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [10]:
ep = easyplot(train)
ep.display_all()

HTML(value='\n            <h2 style="text-align:left; margin-bottom: 0;">各種グラフ</h2>\n            <hr style="bo…

Tab(children=(VBox(children=(HBox(children=(Dropdown(description='x軸:', options=('PassengerId', 'Survived', 'P…

In [None]:
# データを簡単に加工します

def create_features(df):
    df['Sex'] = df['Sex'].astype('category').cat.codes
    df['Embarked'] = df['Embarked'].astype('category').cat.codes
    df['Pclass'] = df['Pclass'].astype('category').cat.codes
    return df

train = train.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
test = test.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

train = create_features(train)
test = create_features(test)

y_train = train['Survived']
X_train = train.drop('Survived', axis=1)
X_test = test.copy() # Target variable is not present in test set

print("X_train data types:\n", X_train.dtypes)
print("\nX_test data types:\n", X_test.dtypes)


X_train data types:
 Pclass         int8
Sex            int8
Age         float64
SibSp         int64
Parch         int64
Fare        float64
Embarked       int8
dtype: object

X_test data types:
 Pclass         int8
Sex            int8
Age         float64
SibSp         int64
Parch         int64
Fare        float64
Embarked       int8
dtype: object


In [15]:
params_lgb = {
    #'device_type': 'cuda', #CPU->cpu, GPU->cuda
    'objective': 'binary',
    'metric': 'auc',
    'boosting_type': 'gbdt',
    'learning_rate': 0.01,
    'num_iterations': 5000,
    'verbosity': -1
}

In [16]:
runner = Runner(
    run_name=f'lgb',
    model_cls=ModelLGB,
    params=params_lgb,
    train_x=X_train,
    train_y=y_train,
    test_x=X_test
)

In [17]:
runner.run_train_cv()
runner.run_predict_cv()

[2025-06-05 14:32:40] - lgb - start training cv
[2025-06-05 14:32:40] - lgb fold 0 - start training


Training until validation scores don't improve for 50 rounds
[50]	train's auc: 0.911098	eval's auc: 0.874297
[100]	train's auc: 0.920839	eval's auc: 0.888065
[150]	train's auc: 0.928115	eval's auc: 0.901535
[200]	train's auc: 0.933881	eval's auc: 0.90422
[250]	train's auc: 0.942756	eval's auc: 0.911296


[2025-06-05 14:32:42] - lgb fold 0 - end training - score 0.9140238704177324
[2025-06-05 14:32:42] - lgb fold 1 - start training


[300]	train's auc: 0.949199	eval's auc: 0.913257
Early stopping, best iteration is:
[284]	train's auc: 0.946543	eval's auc: 0.914024
Training until validation scores don't improve for 50 rounds
[50]	train's auc: 0.923572	eval's auc: 0.855967
[100]	train's auc: 0.927872	eval's auc: 0.867425
[150]	train's auc: 0.932494	eval's auc: 0.871075


[2025-06-05 14:32:43] - lgb fold 1 - end training - score 0.8758275335257172


[200]	train's auc: 0.938164	eval's auc: 0.875064
Early stopping, best iteration is:
[191]	train's auc: 0.936628	eval's auc: 0.875828


[2025-06-05 14:32:43] - lgb fold 2 - start training


Training until validation scores don't improve for 50 rounds


[2025-06-05 14:32:43] - lgb fold 2 - end training - score 0.848115769818367
[2025-06-05 14:32:43] - lgb fold 3 - start training


[50]	train's auc: 0.918021	eval's auc: 0.843363
Early stopping, best iteration is:
[13]	train's auc: 0.905909	eval's auc: 0.848116
Training until validation scores don't improve for 50 rounds
[50]	train's auc: 0.906105	eval's auc: 0.879176
[100]	train's auc: 0.92352	eval's auc: 0.886217
[150]	train's auc: 0.93194	eval's auc: 0.892271


[2025-06-05 14:32:44] - lgb fold 3 - end training - score 0.8929154143409188
[2025-06-05 14:32:44] - lgb - end training cv - score 0.8827206470256838
name:lgb	score:0.8827206470256838	score0:0.9140238704177324	score1:0.8758275335257172	score2:0.848115769818367	score3:0.8929154143409188
[2025-06-05 14:32:44] - lgb - start prediction cv
[2025-06-05 14:32:44] - lgb - start prediction fold:0
[2025-06-05 14:32:44] - lgb - end prediction fold:0
[2025-06-05 14:32:44] - lgb - start prediction fold:1
[2025-06-05 14:32:44] - lgb - end prediction fold:1
[2025-06-05 14:32:44] - lgb - start prediction fold:2
[2025-06-05 14:32:44] - lgb - end prediction fold:2
[2025-06-05 14:32:44] - lgb - start prediction fold:3
[2025-06-05 14:32:44] - lgb - end prediction fold:3
[2025-06-05 14:32:44] - lgb - end prediction cv


[200]	train's auc: 0.936794	eval's auc: 0.88845
Early stopping, best iteration is:
[170]	train's auc: 0.934084	eval's auc: 0.892915


In [18]:
Submission.create_submission('lgb-cv')