In [1]:
from pathlib import Path
from typing import List, Dict, Union
import os
import sys

import optuna
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import log_loss, accuracy_score
from sklearn.model_selection import cross_validate, StratifiedKFold, validation_curve
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier, early_stopping

sys.path.append('../')
from src.models.utils import fix_seed
from src.data.submission import to_submission
from src.data.prepare import Create5FoldDataFrame

In [2]:
class LGMObjective:
    def __init__(
                self,
                model,
                X,
                y,
                cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=0),
                stopping_rounds=50
            ):
        self.model = model
        self.X = X
        self.y = y
        self.cv = cv
        self.stopping_rounds = stopping_rounds

    def __call__(self, trial):
        params = {
            'num_leaves': trial.suggest_int('num_leaves', 10, 200),
            'subsample_freq': trial.suggest_int('subsample_freq', 1, 5),
            'subsample': trial.suggest_float('subsample', 0.1, 1),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 1),
            'min_child_samples': trial.suggest_int('min_child_samples', 1, 60),
            'max_depth': trial.suggest_int('max_depth', 1, 30),
            'reg_alpha': trial.suggest_float('reg_alpha', 0.0001, 1, log=True),
            'reg_lambda': trial.suggest_float('reg_alpha', 0.0001, 1, log=True),
        }
        self.model.set_params(**params)
        scores = []
        for (train_idx, valid_idx) in self.cv.split(self.X, self.y):
            self.model.fit(
                X=self.X.iloc[train_idx],
                y=self.y.iloc[train_idx],
                eval_set=(self.X.iloc[valid_idx], self.y.iloc[valid_idx]),
                callbacks=[early_stopping(stopping_rounds=self.stopping_rounds, verbose=False)],
                eval_metric='binary_logloss'
            )
            y_pred_proba = self.model.predict_proba(self.X.iloc[valid_idx])[:, 1]
            scores.append(log_loss(self.y.iloc[valid_idx], y_pred_proba)) 
        return np.mean(scores)


class RFObjective:
    def __init__(self, model, X, y):
        self.model = model
        self.X = X
        self.y = y

    def __call__(self, trial):
        params = {
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 100),
            'max_depth': trial.suggest_int('max_depth', 2, 200),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 100),
            'max_features': trial.suggest_float('max_features', 0.1, 1),
            'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 200)
        }
        self.model.set_params(**params)
        scores = cross_validate(self.model, self.X, self.y, scoring='neg_log_loss', cv=3)
        return -np.mean(scores['test_score'])    


def train_and_eval_lgm(X_train, y_train, X_valid, y_valid, X_test, params, stopping_rounds=50):
    model = LGBMClassifier(**params)
    model.fit(
        X=X_train,
        y=y_train,
        eval_set=(X_valid, y_valid),
        callbacks=[early_stopping(stopping_rounds=stopping_rounds, verbose=True)],
        eval_metric='binary_logloss'
    )
    y_pred_val = model.predict(X_valid)
    y_pred_test = model.predict_proba(X_test)
    return y_pred_test[:, 1], model

## 学習用データ作成用処理

1. ./dataset/preocessed/5fold_stratified_mmbt_seed_0/ 配下のfoldごとのtrain.csv, valid.csv, test.csvを読み込み、辞書として保持(データ構造は下記)

    ```
    {
        "fold_1": {
            "train": 学習用データフレーム,
            "valid": 検証用データフレーム,
            "test": テストデータフレーム(提出用データ),
        },
        ...
        "fold_5": fold_1と同様の構成
    }
    ```
2. ./dataset/preocessed/配下の以下のcsvをidをキーとして手順1で取得したデータフレームと外部結合

    - train(test)_has_person.csv: 文章中に人名を表す単語が出現したか

    - train(test)_od_counts.csv: yolov5で画像ごとに抜き出した物体数

    - train(test)_tfidf_vector.csv: tfidfベクトルの統計量
    
    - train(test)_text_len.csv: 文章の長さ

    - train(test)_similarity.csv: CLIPを使用した画像と文章のcosine類似度(あまり関係なさそうなので外してもいいかも)

3. ./dataset/train.csvを使用して、学習用データフレーム, 検証用データフレームにラベルを付与

4. 学習用, 検証用, テスト用データフレームから, idを抜く。学習用, 検証用データフレームにおいては、X: 特徴量のみのデータフレーム, y: ラベル としておく。

最終的に、以下のようなデータが得られる。

```
{
    "fold_1": {
        "train": {
            "X": 特徴量データフレーム,
            "y": ラベル,
        },
        "valid": {
            "X": 特徴量データフレーム,
            "y": ラベル,
        },
        "test": {
            "X": 特徴量データフレーム
        },
    },
    ...
    "fold_5": fold_1と同様の構成
}
```

In [3]:
train_feature_csv_path_list = [
    '../dataset/processed/train_has_person.csv',
    '../dataset/processed/train_od_counts.csv',
    '../dataset/processed/train_text_len.csv',
    '../dataset/processed/train_tfidf_vector.csv',
    '../dataset/processed/train_similarity.csv'
]
test_feature_csv_path_list = [
    '../dataset/processed/test_has_person.csv',
    '../dataset/processed/test_od_counts.csv',
    '../dataset/processed/test_text_len.csv',
    '../dataset/processed/test_tfidf_vector.csv',
    '../dataset/processed/test_similarity.csv'
]
create_train_valid_test_dict = Create5FoldDataFrame(
    '../dataset/processed/5fold_stratified_mmbt_seed_0/',
    '../dataset/csv/train.csv',
    train_feature_csv_path_list,
    test_feature_csv_path_list
)
train_valid_test_dict = create_train_valid_test_dict()

In [3]:
assert ['fold_1', 'fold_2', 'fold_3', 'fold_4', 'fold_5'] == sorted(train_valid_test_dict.keys()), '第一階層のkeyが想定通りか'
column_list = train_valid_test_dict['fold_1']['train']['X'].columns.to_list()
for fold_name in train_valid_test_dict.keys():
    for phase in train_valid_test_dict[fold_name]:
        assert train_valid_test_dict[fold_name][phase]['X'].columns.to_list() == column_list, '全てのカラムの順番が同じか'
        assert np.all(np.sum(train_valid_test_dict[fold_name][phase]['X'].isna()) == 0)
        if phase != 'test':
            assert np.all(np.sum(train_valid_test_dict[fold_name][phase]['y'].isna()) == 0)


## 学習(lightGBM)

- モデル: lightGBM

- 手順(下記をfold数分実施する。)

    1. 学習用データにおいて、cross validation結果からハイパーパラメータを求める

    2. 学習用データ, 検証用データ(early stopping用)でモデル学習

    3. テストデータで予測して、submission用csv作成

In [4]:
SEED = 0
N_TRIAL = 50
base_params = {
    'learning_rate': 0.01,
    'random_state': SEED,
    'n_estimators': 20000,
}
stopping_rounds = 100
y_pred_result = []
models = []
log_loss_list = []
for fold_name in train_valid_test_dict:
    print(f'fold: {fold_name}')
    model = LGBMClassifier(**base_params)
    objective = LGMObjective(
        model,
        train_valid_test_dict[fold_name]['train']['X'],
        train_valid_test_dict[fold_name]['train']['y'],
        stopping_rounds=stopping_rounds
    )
    study = optuna.create_study(
        direction='minimize',
        sampler=optuna.samplers.TPESampler(seed=SEED)
    )
    study.optimize(objective, n_trials=N_TRIAL)
    best_params = study.best_trial.params
    best_score = study.best_trial.value
    print(f'最適パラメータ {best_params}\nスコア {best_score}')
    y_pred_proba, model = train_and_eval_lgm(
        train_valid_test_dict[fold_name]['train']['X'],
        train_valid_test_dict[fold_name]['train']['y'],
        train_valid_test_dict[fold_name]['valid']['X'],
        train_valid_test_dict[fold_name]['valid']['y'],
        train_valid_test_dict[fold_name]['test']['X'],
        {**best_params, **base_params},
        stopping_rounds=stopping_rounds
    )
    y_val_pred = model.predict_proba(train_valid_test_dict[fold_name]['valid']['X'])[:, 1]
    loss = log_loss(train_valid_test_dict[fold_name]['valid']['y'], y_val_pred)
    log_loss_list.append(loss)
    y_pred_result.append(y_pred_proba)
    models.append(model)
y_pred_result = np.mean(y_pred_result, axis=0)
print(np.mean(log_loss_list), np.std(log_loss_list))
to_submission('../dataset/csv/sample_submission.csv', y_pred_result, '../results/turned_lgm_with_features.csv')

[32m[I 2022-10-07 08:36:02,505][0m A new study created in memory with name: no-name-537ff7c5-aee3-44d1-85dd-abe4f9b27854[0m


fold: fold_3


[32m[I 2022-10-07 08:38:12,004][0m Trial 0 finished with value: 0.5380312494618316 and parameters: {'num_leaves': 114, 'subsample_freq': 4, 'subsample': 0.6424870384644795, 'colsample_bytree': 0.5903948646972071, 'min_child_samples': 26, 'max_depth': 20, 'reg_alpha': 0.0056279320474151686, 'reg_lambda': 0.36905577292137587}. Best is trial 0 with value: 0.5380312494618316.[0m
[32m[I 2022-10-07 08:40:35,540][0m Trial 1 finished with value: 0.5392664278789595 and parameters: {'num_leaves': 194, 'subsample_freq': 2, 'subsample': 0.8125525342743981, 'colsample_bytree': 0.5760054277776141, 'min_child_samples': 35, 'max_depth': 28, 'reg_alpha': 0.00019237305096546508, 'reg_lambda': 0.00022310905607443037}. Best is trial 0 with value: 0.5380312494618316.[0m
[32m[I 2022-10-07 08:41:42,238][0m Trial 2 finished with value: 0.5377345585220241 and parameters: {'num_leaves': 13, 'subsample_freq': 5, 'subsample': 0.8003410758548655, 'colsample_bytree': 0.8830109334221372, 'min_child_samples':

最適パラメータ {'num_leaves': 70, 'subsample_freq': 2, 'subsample': 0.6131770933760917, 'colsample_bytree': 0.4947413621160883, 'min_child_samples': 60, 'max_depth': 4, 'reg_alpha': 0.000684710557668405, 'reg_lambda': 0.00044181257379025503}
スコア 0.5358754853490163
Training until validation scores don't improve for 100 rounds


[32m[I 2022-10-07 09:22:20,749][0m A new study created in memory with name: no-name-44ee1505-8857-444c-bea0-2552ce3db255[0m


Early stopping, best iteration is:
[126]	valid_0's binary_logloss: 0.63945
fold: fold_1


[32m[I 2022-10-07 09:24:17,728][0m Trial 0 finished with value: 0.5268678003189899 and parameters: {'num_leaves': 114, 'subsample_freq': 4, 'subsample': 0.6424870384644795, 'colsample_bytree': 0.5903948646972071, 'min_child_samples': 26, 'max_depth': 20, 'reg_alpha': 0.0056279320474151686, 'reg_lambda': 0.36905577292137587}. Best is trial 0 with value: 0.5268678003189899.[0m
[32m[I 2022-10-07 09:26:41,581][0m Trial 1 finished with value: 0.526256306816663 and parameters: {'num_leaves': 194, 'subsample_freq': 2, 'subsample': 0.8125525342743981, 'colsample_bytree': 0.5760054277776141, 'min_child_samples': 35, 'max_depth': 28, 'reg_alpha': 0.00019237305096546508, 'reg_lambda': 0.00022310905607443037}. Best is trial 1 with value: 0.526256306816663.[0m
[32m[I 2022-10-07 09:27:44,922][0m Trial 2 finished with value: 0.5267816168160981 and parameters: {'num_leaves': 13, 'subsample_freq': 5, 'subsample': 0.8003410758548655, 'colsample_bytree': 0.8830109334221372, 'min_child_samples': 5

最適パラメータ {'num_leaves': 196, 'subsample_freq': 4, 'subsample': 0.7653372214584715, 'colsample_bytree': 0.1352690130288886, 'min_child_samples': 17, 'max_depth': 4, 'reg_alpha': 0.0015295398277813735, 'reg_lambda': 0.0002984770033451218}
スコア 0.5236324103358526
Training until validation scores don't improve for 100 rounds


[32m[I 2022-10-07 10:01:34,257][0m A new study created in memory with name: no-name-7b4c7d47-97bd-413b-a417-2e9fd716e0ee[0m


Early stopping, best iteration is:
[108]	valid_0's binary_logloss: 0.644246
fold: fold_5


[32m[I 2022-10-07 10:03:31,877][0m Trial 0 finished with value: 0.5642762724778482 and parameters: {'num_leaves': 114, 'subsample_freq': 4, 'subsample': 0.6424870384644795, 'colsample_bytree': 0.5903948646972071, 'min_child_samples': 26, 'max_depth': 20, 'reg_alpha': 0.0056279320474151686, 'reg_lambda': 0.36905577292137587}. Best is trial 0 with value: 0.5642762724778482.[0m
[32m[I 2022-10-07 10:05:38,508][0m Trial 1 finished with value: 0.5661273746089118 and parameters: {'num_leaves': 194, 'subsample_freq': 2, 'subsample': 0.8125525342743981, 'colsample_bytree': 0.5760054277776141, 'min_child_samples': 35, 'max_depth': 28, 'reg_alpha': 0.00019237305096546508, 'reg_lambda': 0.00022310905607443037}. Best is trial 0 with value: 0.5642762724778482.[0m
[32m[I 2022-10-07 10:07:02,590][0m Trial 2 finished with value: 0.5652443170050153 and parameters: {'num_leaves': 13, 'subsample_freq': 5, 'subsample': 0.8003410758548655, 'colsample_bytree': 0.8830109334221372, 'min_child_samples':

最適パラメータ {'num_leaves': 34, 'subsample_freq': 2, 'subsample': 0.5521811708659964, 'colsample_bytree': 0.4710910954564057, 'min_child_samples': 26, 'max_depth': 7, 'reg_alpha': 0.02024983706622042, 'reg_lambda': 0.7998811455731946}
スコア 0.5632476226691987
Training until validation scores don't improve for 100 rounds


[32m[I 2022-10-07 10:37:28,345][0m A new study created in memory with name: no-name-2dca60e9-8f20-4aee-b437-f562e203e704[0m


Early stopping, best iteration is:
[119]	valid_0's binary_logloss: 0.640907
fold: fold_4


[32m[I 2022-10-07 10:39:33,315][0m Trial 0 finished with value: 0.5427412708408965 and parameters: {'num_leaves': 114, 'subsample_freq': 4, 'subsample': 0.6424870384644795, 'colsample_bytree': 0.5903948646972071, 'min_child_samples': 26, 'max_depth': 20, 'reg_alpha': 0.0056279320474151686, 'reg_lambda': 0.36905577292137587}. Best is trial 0 with value: 0.5427412708408965.[0m
[32m[I 2022-10-07 10:42:01,490][0m Trial 1 finished with value: 0.5440965808711645 and parameters: {'num_leaves': 194, 'subsample_freq': 2, 'subsample': 0.8125525342743981, 'colsample_bytree': 0.5760054277776141, 'min_child_samples': 35, 'max_depth': 28, 'reg_alpha': 0.00019237305096546508, 'reg_lambda': 0.00022310905607443037}. Best is trial 0 with value: 0.5427412708408965.[0m
[32m[I 2022-10-07 10:43:08,564][0m Trial 2 finished with value: 0.5428775657373013 and parameters: {'num_leaves': 13, 'subsample_freq': 5, 'subsample': 0.8003410758548655, 'colsample_bytree': 0.8830109334221372, 'min_child_samples':

最適パラメータ {'num_leaves': 80, 'subsample_freq': 2, 'subsample': 0.3728565525188909, 'colsample_bytree': 0.4461359344653275, 'min_child_samples': 60, 'max_depth': 5, 'reg_alpha': 0.22676501116213518, 'reg_lambda': 0.0006740460328341064}
スコア 0.5394715008850011
Training until validation scores don't improve for 100 rounds


[32m[I 2022-10-07 11:13:07,028][0m A new study created in memory with name: no-name-244627ea-a2fe-4969-a889-6594a3666f45[0m


Early stopping, best iteration is:
[113]	valid_0's binary_logloss: 0.644115
fold: fold_2


[32m[I 2022-10-07 11:14:51,119][0m Trial 0 finished with value: 0.5794643349263292 and parameters: {'num_leaves': 114, 'subsample_freq': 4, 'subsample': 0.6424870384644795, 'colsample_bytree': 0.5903948646972071, 'min_child_samples': 26, 'max_depth': 20, 'reg_alpha': 0.0056279320474151686, 'reg_lambda': 0.36905577292137587}. Best is trial 0 with value: 0.5794643349263292.[0m
[32m[I 2022-10-07 11:16:52,759][0m Trial 1 finished with value: 0.5796519197414434 and parameters: {'num_leaves': 194, 'subsample_freq': 2, 'subsample': 0.8125525342743981, 'colsample_bytree': 0.5760054277776141, 'min_child_samples': 35, 'max_depth': 28, 'reg_alpha': 0.00019237305096546508, 'reg_lambda': 0.00022310905607443037}. Best is trial 0 with value: 0.5794643349263292.[0m
[32m[I 2022-10-07 11:17:34,954][0m Trial 2 finished with value: 0.5796956043078756 and parameters: {'num_leaves': 13, 'subsample_freq': 5, 'subsample': 0.8003410758548655, 'colsample_bytree': 0.8830109334221372, 'min_child_samples':

最適パラメータ {'num_leaves': 196, 'subsample_freq': 4, 'subsample': 0.7653372214584715, 'colsample_bytree': 0.1352690130288886, 'min_child_samples': 17, 'max_depth': 4, 'reg_alpha': 0.0015295398277813735, 'reg_lambda': 0.0002984770033451218}
スコア 0.5780812515863076
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[121]	valid_0's binary_logloss: 0.650532
0.6438499458142858 0.0038182486379954345


## 学習(RandomForest)

- モデル: RandomForest

- 手順(下記をfold数分実施する。)

    1. 学習用データにおいて、cross validation結果からハイパーパラメータを求める

    2. 学習用データ, 検証用データ(log loss計算用)でモデル学習

    3. テストデータで予測して、submission用csv作成

In [4]:
SEED = 0
N_TRIAL = 10
base_params = {
    'random_state': SEED,
    'n_estimators': 500,
    'n_jobs': -1
}
y_pred_result = []
log_loss_list = []
for fold_name in train_valid_test_dict:
    print(f'fold: {fold_name}')
    model = RandomForestClassifier(**base_params)
    objective = RFObjective(
        model,
        train_valid_test_dict[fold_name]['train']['X'],
        train_valid_test_dict[fold_name]['train']['y'],
    )
    study = optuna.create_study(
        direction='minimize',
        sampler=optuna.samplers.TPESampler(seed=SEED)
    )
    study.optimize(objective, n_trials=N_TRIAL)
    best_params = study.best_trial.params
    best_score = study.best_trial.value
    print(f'最適パラメータ {best_params}\nスコア {best_score}')
    model = RandomForestClassifier(**{**base_params, **best_params})
    model.fit(
        train_valid_test_dict[fold_name]['train']['X'],
        train_valid_test_dict[fold_name]['train']['y']
    )
    y_val_pred = model.predict_proba(train_valid_test_dict[fold_name]['valid']['X'])[:, 1]
    y_pred_proba = model.predict_proba(train_valid_test_dict[fold_name]['test']['X'])[:, 1]
    loss = log_loss(train_valid_test_dict[fold_name]['valid']['y'], y_val_pred)
    log_loss_list.append(loss)
    y_pred_result.append(y_pred_proba)
y_pred_result = np.mean(y_pred_result, axis=0)
print(np.mean(log_loss_list), np.std(log_loss_list))
to_submission('../dataset/csv/sample_submission.csv', y_pred_result, '../results/turned_RFC_with_features.csv')

[32m[I 2022-10-10 00:00:37,281][0m A new study created in memory with name: no-name-d7eeeb24-113b-41ed-adfb-8abda96201fa[0m


fold: fold_3


[32m[I 2022-10-10 00:08:34,008][0m Trial 0 finished with value: 0.5488132123121616 and parameters: {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}. Best is trial 0 with value: 0.5488132123121616.[0m
[32m[I 2022-10-10 00:20:34,803][0m Trial 1 finished with value: 0.5502984745688081 and parameters: {'min_samples_split': 65, 'max_depth': 89, 'min_samples_leaf': 90, 'max_features': 0.9672964844509263, 'max_leaf_nodes': 83}. Best is trial 0 with value: 0.5488132123121616.[0m
[32m[I 2022-10-10 00:29:53,575][0m Trial 2 finished with value: 0.5567393482259129 and parameters: {'min_samples_split': 80, 'max_depth': 107, 'min_samples_leaf': 57, 'max_features': 0.933036974463395, 'max_leaf_nodes': 23}. Best is trial 0 with value: 0.5488132123121616.[0m
[32m[I 2022-10-10 00:38:17,704][0m Trial 3 finished with value: 0.5517770372673181 and parameters: {'min_samples_split': 10, 'max_depth': 6, 'min_samples_leaf':

最適パラメータ {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}
スコア 0.5488132123121616


[32m[I 2022-10-10 01:23:04,250][0m A new study created in memory with name: no-name-c6936667-1904-4786-8435-a786eae940cd[0m


fold: fold_1


[32m[I 2022-10-10 01:30:51,192][0m Trial 0 finished with value: 0.5342149747948985 and parameters: {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}. Best is trial 0 with value: 0.5342149747948985.[0m
[32m[I 2022-10-10 01:42:32,243][0m Trial 1 finished with value: 0.5355417451678419 and parameters: {'min_samples_split': 65, 'max_depth': 89, 'min_samples_leaf': 90, 'max_features': 0.9672964844509263, 'max_leaf_nodes': 83}. Best is trial 0 with value: 0.5342149747948985.[0m
[32m[I 2022-10-10 01:51:39,912][0m Trial 2 finished with value: 0.5413501108980824 and parameters: {'min_samples_split': 80, 'max_depth': 107, 'min_samples_leaf': 57, 'max_features': 0.933036974463395, 'max_leaf_nodes': 23}. Best is trial 0 with value: 0.5342149747948985.[0m
[32m[I 2022-10-10 01:59:57,602][0m Trial 3 finished with value: 0.5368758712838156 and parameters: {'min_samples_split': 10, 'max_depth': 6, 'min_samples_leaf':

最適パラメータ {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}
スコア 0.5342149747948985


[32m[I 2022-10-10 02:44:06,799][0m A new study created in memory with name: no-name-28547d92-1d78-42e6-8567-9fc38d5d5f24[0m


fold: fold_5


[32m[I 2022-10-10 02:51:51,781][0m Trial 0 finished with value: 0.5702629077140556 and parameters: {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}. Best is trial 0 with value: 0.5702629077140556.[0m
[32m[I 2022-10-10 03:03:27,672][0m Trial 1 finished with value: 0.5713555838473706 and parameters: {'min_samples_split': 65, 'max_depth': 89, 'min_samples_leaf': 90, 'max_features': 0.9672964844509263, 'max_leaf_nodes': 83}. Best is trial 0 with value: 0.5702629077140556.[0m
[32m[I 2022-10-10 03:12:39,687][0m Trial 2 finished with value: 0.575409897553408 and parameters: {'min_samples_split': 80, 'max_depth': 107, 'min_samples_leaf': 57, 'max_features': 0.933036974463395, 'max_leaf_nodes': 23}. Best is trial 0 with value: 0.5702629077140556.[0m
[32m[I 2022-10-10 03:20:57,561][0m Trial 3 finished with value: 0.5720978923105774 and parameters: {'min_samples_split': 10, 'max_depth': 6, 'min_samples_leaf': 

最適パラメータ {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}
スコア 0.5702629077140556


[32m[I 2022-10-10 04:05:20,238][0m A new study created in memory with name: no-name-45262d88-3ac0-487b-b1c2-d022961108a9[0m


fold: fold_4


[32m[I 2022-10-10 04:13:06,946][0m Trial 0 finished with value: 0.5520307317885959 and parameters: {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}. Best is trial 0 with value: 0.5520307317885959.[0m
[32m[I 2022-10-10 04:24:51,378][0m Trial 1 finished with value: 0.5532697746333031 and parameters: {'min_samples_split': 65, 'max_depth': 89, 'min_samples_leaf': 90, 'max_features': 0.9672964844509263, 'max_leaf_nodes': 83}. Best is trial 0 with value: 0.5520307317885959.[0m
[32m[I 2022-10-10 04:34:06,541][0m Trial 2 finished with value: 0.5590056667320659 and parameters: {'min_samples_split': 80, 'max_depth': 107, 'min_samples_leaf': 57, 'max_features': 0.933036974463395, 'max_leaf_nodes': 23}. Best is trial 0 with value: 0.5520307317885959.[0m
[32m[I 2022-10-10 04:42:23,746][0m Trial 3 finished with value: 0.5548165646218598 and parameters: {'min_samples_split': 10, 'max_depth': 6, 'min_samples_leaf':

最適パラメータ {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}
スコア 0.5520307317885959


[32m[I 2022-10-10 05:27:00,498][0m A new study created in memory with name: no-name-06ae5983-c8bb-4381-aab3-11957d3fa9ce[0m


fold: fold_2


[32m[I 2022-10-10 05:34:55,353][0m Trial 0 finished with value: 0.5821342047643232 and parameters: {'min_samples_split': 56, 'max_depth': 144, 'min_samples_leaf': 61, 'max_features': 0.5903948646972071, 'max_leaf_nodes': 90}. Best is trial 0 with value: 0.5821342047643232.[0m
[32m[I 2022-10-10 05:46:43,383][0m Trial 1 finished with value: 0.5824440671607337 and parameters: {'min_samples_split': 65, 'max_depth': 89, 'min_samples_leaf': 90, 'max_features': 0.9672964844509263, 'max_leaf_nodes': 83}. Best is trial 0 with value: 0.5821342047643232.[0m
[32m[I 2022-10-10 05:55:53,388][0m Trial 2 finished with value: 0.5843843181485414 and parameters: {'min_samples_split': 80, 'max_depth': 107, 'min_samples_leaf': 57, 'max_features': 0.933036974463395, 'max_leaf_nodes': 23}. Best is trial 0 with value: 0.5821342047643232.[0m
[32m[I 2022-10-10 06:04:10,843][0m Trial 3 finished with value: 0.5825731666445948 and parameters: {'min_samples_split': 10, 'max_depth': 6, 'min_samples_leaf':

最適パラメータ {'min_samples_split': 37, 'max_depth': 88, 'min_samples_leaf': 70, 'max_features': 0.15420292446634287, 'max_leaf_nodes': 137}
スコア 0.5814857052732891
0.6609619193312888 0.006896035222634583
