# exp070

In [1]:
import os
import sys
import traceback
import gc
import time
import random
import pickle
import pathlib
import subprocess
from dataclasses import dataclass
from collections import defaultdict

import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.model_selection import GroupKFold
import lightgbm as lgb
import optuna.integration.lightgbm as optuna_lgb

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import itertools

import warnings
warnings.simplefilter('ignore')



In [2]:
@dataclass
class Cfg:
    mode = "local_cv" # "local_cv" or "kaggle_inf" 
    exp_name = "exp070_hyp_opt"
    input_dir = "/mnt/predict-student-performance-from-game-play/input/"
    output_dir = "/mnt/predict-student-performance-from-game-play/output/"
    prep_dir = "/mnt/predict-student-performance-from-game-play/prep/"
    seed = 42
    n_splits = 5
    best_threshold = 0.630 # local_cvの結果を入れる
cfg = Cfg()

if cfg.mode == "local_cv":
    os.makedirs(os.path.join(cfg.output_dir, cfg.exp_name), exist_ok=True)
    os.makedirs(os.path.join(cfg.output_dir, cfg.exp_name, "cache"), exist_ok=True)

elif cfg.mode == "kaggle_inf":
    import jo_wilder_310

In [3]:
params = {
    'objective': 'binary', 
    'boosting': 'gbdt', 
    'learning_rate': 0.1, 
    'metric': 'binary_logloss', 
    'seed': cfg.seed
} 

In [4]:
level_group_list = ['0-4', '5-12', '13-22']
level_group_map = {
    "q1":"0-4", "q2":"0-4", "q3":"0-4",
    "q4":"5-12", "q5":"5-12", "q6":"5-12", "q7":"5-12", "q8":"5-12", "q9":"5-12", "q10":"5-12", "q11":"5-12", "q12":"5-12", "q13":"5-12",
    "q14":"13-22", "q15":"13-22", "q16":"13-22", "q17":"13-22", "q18":"13-22"  
}

In [5]:
with open(cfg.prep_dir + 'cat_col_lists.pkl', 'rb') as f:
    cat_col_lists = pickle.load(f) 

In [6]:
def transform_labels_df_train(labels_):
    """
    labelsデータを整形する
    """
    labels = labels_.copy()
    labels["question"] = labels["session_id"].apply(lambda x: x.split("_")[1].replace("q", "")).astype(int)
    labels["session_id"] = labels["session_id"].apply(lambda x: x.split("_")[0]).astype(int)

    # trainの特徴量と結合するためにquestionに対応するlabel_groupを列として設けておく
    labels["level_group"] = ""
    labels.loc[labels["question"]<=3, "level_group"] = "0-4"
    labels.loc[(labels["question"]>=4)&(labels["question"]<=13), "level_group"] = "5-12"
    labels.loc[labels["question"]>=14, "level_group"] = "13-22"

    return labels


def transform_labels_df_inf(labels_):
    """
    labelsデータを整形する
    """
    labels = labels_.copy()
    labels["question"] = labels["session_id"].apply(lambda x: x.split("_")[1].replace("q", "")).astype(int)
    labels["session_id"] = labels["session_id"].apply(lambda x: x.split("_")[0]).astype(int)

    return labels

In [7]:
class FeaturesTrain:
    def __init__(self, sessions_df, labels):
        self.sessions_df = sessions_df.sort_values(["session_id", "level_group", "elapsed_time"], ignore_index=True)
        self.features = self.sessions_df[["session_id", "level_group"]].drop_duplicates().copy()
        self.result = labels
        self.group = sessions_df["level_group"].values[0]

    def _prep(self):
        self.sessions_df["time_diff"] = self.sessions_df["elapsed_time"] - self.sessions_df.groupby(["session_id", "level_group"])["elapsed_time"].shift(1)

    def _total_record_cnt(self):
        """level_groupごとのレコード数
        """
        add_features = self.sessions_df.groupby(["session_id", "level_group"])["index"].count().reset_index().rename(columns={"index":f"{self.group}_record_cnt"})
        self.features = self.features.merge(add_features, on=["session_id", "level_group"], how="left")

    def _group_elapsed_time(self):
        """level_groupごと、epapsed_timeのmax - min（経過時間）
        """
        add_features = self.sessions_df.groupby(["session_id", "level_group"])["elapsed_time"].agg([max,min]).reset_index()
        add_features[f"{self.group}_group_elapsed_time"] = add_features["max"] - add_features["min"]
        add_features[f"{self.group}_group_elapsed_time"] = add_features[f"{self.group}_group_elapsed_time"].astype(np.float32)
        add_features = add_features[["session_id", "level_group", f"{self.group}_group_elapsed_time"]].copy()
        self.features = self.features.merge(add_features, on=["session_id", "level_group"], how="left")

    def _cat_record_cnt(self, cat_col):
        """level_groupごと、各{cat}のレコード数
        """
        cat_list = cat_col_lists[self.group][cat_col]
        add_features = self.sessions_df.groupby(["session_id", "level_group", cat_col])["index"].count().reset_index().rename(columns={"index":"cnt"})
        for cat in cat_list:
            feat_name = f"{self.group}_{cat_col}_{str(cat)}_record_cnt"
            tmp = add_features[add_features[cat_col]==cat][["session_id", "level_group", "cnt"]].copy()
            if len(tmp) > 0:
                tmp = tmp.rename(columns={"cnt": feat_name})
                self.features = self.features.merge(tmp, on=["session_id", "level_group"], how="left")
                self.features[feat_name] = self.features[feat_name].fillna(0)
            else:
                self.features[feat_name] = 0

    def _cat_col_nunique(self, cat_col):
        """level_groupごと、[col]のユニーク数
        """
        add_features = self.sessions_df.dropna(subset=[cat_col]).drop_duplicates(["session_id", "level_group", cat_col])
        add_features = add_features.groupby(["session_id", "level_group"])["index"].count().reset_index().rename(columns={"index":f"{self.group}_{cat_col}_nunique"})
        self.features = self.features.merge(add_features, on=["session_id", "level_group"], how="left")        

    def _agg_features(self, val_cols, aggs):
        new_cols = [f"{self.group}_{v}_{a}" for v,a in itertools.product(val_cols, aggs)]
        add_features = self.sessions_df.groupby(["session_id", "level_group"])[val_cols].agg(aggs).reset_index()
        add_features.columns = ["session_id", "level_group"] + new_cols
        add_features[new_cols] = add_features[new_cols].astype(np.float32)
        self.features = self.features.merge(add_features, on=["session_id", "level_group"], how="left")

    def _cat_agg_features(self, val_cols, aggs, cat_col, not_use_cats=None):
        add_features = self.sessions_df.groupby(["session_id", "level_group", cat_col])[val_cols].agg(aggs).reset_index()

        if not_use_cats is not None:
            cat_list = [c for c in cat_col_lists[self.group][cat_col] if c not in not_use_cats]
        else:
            cat_list = cat_col_lists[self.group][cat_col]

        for cat in cat_list:
            new_cols = [f"{self.group}_{cat_col}_{cat}_{v}_{a}" for v,a in itertools.product(val_cols, aggs)]
            tmp = add_features[add_features[cat_col]==cat].copy()
            if len(tmp) > 0:
                tmp.columns = ["session_id", "level_group", cat_col] + new_cols
                tmp = tmp.drop(columns=[cat_col])
                self.features = self.features.merge(tmp, on=["session_id", "level_group"], how="left")
                self.features[new_cols] = self.features[new_cols].fillna(-1)
            else:
                self.features[new_cols] = -1
            self.features[new_cols] = self.features[new_cols].astype(np.float32)

    def get_train(self):
        self._prep()
        self._total_record_cnt()
        self._group_elapsed_time()
        self._cat_record_cnt("event_name")
        self._cat_record_cnt("name")
        self._cat_record_cnt("page")
        self._cat_record_cnt("level")
        self._cat_record_cnt("room_fqid")
        self._cat_record_cnt("fqid")
        self._cat_record_cnt("text_fqid")
        self._cat_col_nunique("text")
        self._cat_col_nunique("text_fqid")
        self._cat_col_nunique("room_fqid")
        self._cat_col_nunique("fqid")

        self._agg_features(val_cols=["room_coor_x", "room_coor_y", "screen_coor_x", "screen_coor_y"], 
                           aggs=["mean"])
        self._agg_features(val_cols=["time_diff", "hover_duration"], 
                           aggs=["mean", "max", "min", "std", "sum"])
        
        self._agg_features(val_cols=["elapsed_time", "index"], 
                           aggs=["max", "min"])

        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="event_name")
        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="room_fqid")
        
        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="fqid")
        self._cat_agg_features(val_cols=["elapsed_time"],
                               aggs=["max", "min"],
                               cat_col="fqid")

        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="text_fqid")
        self._cat_agg_features(val_cols=["elapsed_time", "index"],
                               aggs=["max", "min"],
                               cat_col="text_fqid")
        
        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="level")
        self._cat_agg_features(val_cols=["elapsed_time", "index"],
                               aggs=["max", "min"],
                               cat_col="level")
        
        self._cat_agg_features(val_cols=["room_coor_x", "room_coor_y", "screen_coor_x", "screen_coor_y"],
                               aggs=["mean"],
                               cat_col="event_name",
                               not_use_cats=['checkpoint', 'map_hover', 'object_hover'])        
        self._cat_agg_features(val_cols=["room_coor_x", "room_coor_y", "screen_coor_x", "screen_coor_y"],
                               aggs=["mean"],
                               cat_col="name")
        
        self.result = self.result.merge(self.features, on=["session_id", "level_group"], how="left")
        return self.result

In [8]:
class FeaturesInf:
    def __init__(self, sessions_df, labels):
        self.sessions_df = sessions_df.sort_values(["elapsed_time"], ignore_index=True)
        self.result = labels
        self.group = sessions_df["level_group"].values[0]
        self.use_cols = [
            "elapsed_time", "event_name", "name", "level", "page", "index",
            "room_coor_x", "room_coor_y", "screen_coor_x", "screen_coor_y",
            "hover_duration", "text", "fqid", "room_fqid", "text_fqid"
        ]

    def _prep(self):
        # dataframeの各列をnumpy arrayで保持
        self.sessions = {}
        for c in self.use_cols:
            self.sessions[c] = self.sessions_df[c].values
        self.sessions["time_diff"] = self.sessions["elapsed_time"] - self.sessions_df["elapsed_time"].shift(1).values

    def _total_record_cnt(self):
        """level_groupごとのレコード数
        """
        add_feature = len(self.sessions["elapsed_time"])
        self.result[f"{self.group}_record_cnt"] = add_feature

    def _group_elapsed_time(self):
        """level_groupごと、epapsed_timeのmax - min（経過時間）
        """
        add_feature = np.max(self.sessions["elapsed_time"]) - np.min(self.sessions["elapsed_time"])
        self.result[f"{self.group}_group_elapsed_time"] = np.float32(add_feature)

    def _cat_record_cnt(self, cat_col):
        """level_groupごと、各{cat}のレコード数
        """
        cat_list = cat_col_lists[self.group][cat_col]
        for cat in cat_list:
            feat_name = f"{self.group}_{cat_col}_{str(cat)}_record_cnt"
            add_feature = (self.sessions[cat_col] == cat).astype(int).sum()
            self.result[feat_name] = add_feature

    def _cat_col_nunique(self, cat_col):
        """level_groupごと、[col]のユニーク数
        """
        self.result[f"{self.group}_{cat_col}_nunique"] = self.sessions_df[cat_col].dropna().nunique()       

    def _agg_features(self, val_cols, aggs):
        for val_col, agg in itertools.product(val_cols, aggs):
            feat_name = f"{self.group}_{val_col}_{agg}"
            if agg == "mean":
                add_feature = np.nanmean(self.sessions[val_col])
            elif agg == "max":
                add_feature = np.nanmax(self.sessions[val_col])
            elif agg == "min":
                add_feature = np.nanmin(self.sessions[val_col])
            elif agg == "std":
                add_feature = np.nanstd(self.sessions[val_col], ddof=1)
            elif agg == "sum":
                add_feature = np.nansum(self.sessions[val_col])
            self.result[feat_name] = np.float32(add_feature)

    def _cat_agg_features(self, val_cols, aggs, cat_col, not_use_cats=None):
        if not_use_cats is not None:
            cat_list = [c for c in cat_col_lists[self.group][cat_col] if c not in not_use_cats]
        else:
            cat_list = cat_col_lists[self.group][cat_col]

        for cat in cat_list:
            idx = self.sessions[cat_col] == cat
        
            if idx.sum() == 0:
                for val_col, agg in itertools.product(val_cols, aggs):
                    feat_name = f"{self.group}_{cat_col}_{cat}_{val_col}_{agg}"
                    self.result[feat_name] = np.float32(-1)
            else:
                for val_col, agg in itertools.product(val_cols, aggs):
                    feat_name = f"{self.group}_{cat_col}_{cat}_{val_col}_{agg}"
                    tmp = self.sessions[val_col][idx]
                    if agg == "mean":
                        add_feature = np.nanmean(tmp)
                    elif agg == "max":
                        add_feature = np.nanmax(tmp)
                    elif agg == "min":
                        add_feature = np.nanmin(tmp)
                    elif agg == "std":
                        add_feature = np.nanstd(tmp, ddof=1)
                    elif agg == "sum":
                        add_feature = np.nansum(tmp)
                    if np.isnan(add_feature):
                        self.result[feat_name] = np.float32(-1)
                    else:
                        self.result[feat_name] = np.float32(add_feature)

    def get_test(self):
        self._prep()
        self._total_record_cnt()
        self._group_elapsed_time()
        self._cat_record_cnt("event_name")
        self._cat_record_cnt("name")
        self._cat_record_cnt("page")
        self._cat_record_cnt("level")
        self._cat_record_cnt("room_fqid")
        self._cat_record_cnt("fqid")
        self._cat_record_cnt("text_fqid")
        self._cat_col_nunique("text")
        self._cat_col_nunique("text_fqid")
        self._cat_col_nunique("room_fqid")
        self._cat_col_nunique("fqid")

        self._agg_features(val_cols=["room_coor_x", "room_coor_y", "screen_coor_x", "screen_coor_y"], 
                           aggs=["mean"])
        self._agg_features(val_cols=["time_diff", "hover_duration"], 
                           aggs=["mean", "max", "min", "std", "sum"])

        self._agg_features(val_cols=["elapsed_time", "index"], 
                           aggs=["max", "min"])

        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="event_name")
        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="room_fqid")
        
        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="fqid")
        self._cat_agg_features(val_cols=["elapsed_time"],
                               aggs=["max", "min"],
                               cat_col="fqid")

        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="text_fqid")
        self._cat_agg_features(val_cols=["elapsed_time", "index"],
                               aggs=["max", "min"],
                               cat_col="text_fqid")
        
        self._cat_agg_features(val_cols=["time_diff"],
                               aggs=["mean", "max", "min", "std", "sum"],
                               cat_col="level")
        self._cat_agg_features(val_cols=["elapsed_time", "index"],
                               aggs=["max", "min"],
                               cat_col="level")
        
        self._cat_agg_features(val_cols=["room_coor_x", "room_coor_y", "screen_coor_x", "screen_coor_y"],
                               aggs=["mean"],
                               cat_col="event_name",
                               not_use_cats=['checkpoint', 'map_hover', 'object_hover'])        
        self._cat_agg_features(val_cols=["room_coor_x", "room_coor_y", "screen_coor_x", "screen_coor_y"],
                               aggs=["mean"],
                               cat_col="name")
        return self.result

In [9]:
def get_train_dataset(sessions, labels):
    # labelデータの整形
    labels = transform_labels_df_train(labels)

    # 特徴量生成
    feat = FeaturesTrain(sessions, labels)
    train = feat.get_train()
    train["question"] = train["question"].astype("category")

    return train

def get_test_dataset(sessions, labels):
    # labelデータの整形
    labels = transform_labels_df_inf(labels)

    # 特徴量生成
    feat = FeaturesInf(sessions, labels)
    test = feat.get_test()
    test["question"] = test["question"].astype("category")

    return test   

In [10]:
def calc_metrics(oof):
    logloss = log_loss(oof["correct"], oof["pred"])

    # find best th
    scores = []; thresholds = []
    best_score = 0; best_threshold = 0

    for threshold in np.arange(0.4,0.81,0.01):
        preds = (oof["pred"].values>threshold).astype(int)
        m = f1_score(oof["correct"].values, preds, average='macro')   
        scores.append(m)
        thresholds.append(threshold)
        if m>best_score:
            best_score = m
            best_threshold = threshold
    print("logloss", format(logloss, ".6f"))
    print("best_score", format(best_score, ".6f"))
    print("best_threshold", format(best_threshold, ".3f"))

    # Q別スコア
    print("---"*10)
    for q in range(18):
        q = q + 1
        preds = (oof[oof["question"]==q]["pred"].values>threshold).astype(int)
        m = f1_score(oof[oof["question"]==q]["correct"].values, preds, average='macro')
        print(f"Q{q} : F1 = {format(m, '.6f')}")
    return best_threshold

In [11]:
def run_train():
    oofs = []
    prev_features_df = None # 次のlevel_groupで特徴量を使うための保持データ。0-4は前のlevel_groupがないので初期値はNone
    dfs = []
    for group in level_group_list:
        print(group)
        # データ読み込み
        train_sessions = pd.read_csv(cfg.prep_dir + f"train{group}.csv")
        labels = pd.read_csv(cfg.prep_dir + f"train_labels{group}.csv")
        train_group = get_train_dataset(train_sessions, labels)

        # 一つ前のlevel_groupの特徴量を追加
        if prev_features_df is not None:
            train_group = train_group.merge(prev_features_df, on=["session_id"], how="left")
        else:
            pass

        # 前のlevel_groupのquestionパートの経過時間特徴量
        if group == "5-12":
            train_group["0-4_question_duration_time"] = train_group["5-12_elapsed_time_min"] - train_group["0-4_elapsed_time_max"]
            train_group["0-4_question_duration_index"] = train_group["5-12_index_min"] - train_group["0-4_index_max"]
        elif group == "13-22":
            train_group["5-12_question_duration_time"] = train_group["13-22_elapsed_time_min"] - train_group["5-12_elapsed_time_max"]
            train_group["5-12_question_duration_index"] = train_group["13-22_index_min"] - train_group["5-12_index_max"]
    
        target = "correct"
        not_use_cols = [target, "session_id", "level_group"]
        features = [c for c in train_group.columns if c not in not_use_cols]

        # 次のlevel_groupで使う用に特徴量を保持
        prev_features_df = train_group[["session_id"]+features].groupby("session_id").head(1).drop(columns="question")

        dfs.append(train_group)
    train = pd.concat(dfs, ignore_index=True)
    # concatするとcategory型がリセットされてしまうので再度cast
    train["question"] = train["question"].astype("category")

    target = "correct"
    not_use_cols = [target, "session_id", "level_group"]
    features = [c for c in train.columns if c not in not_use_cols]    

    gkf = GroupKFold(n_splits=cfg.n_splits)
    fis = []
    
    for i, (tr_idx, vl_idx) in enumerate(gkf.split(train[features], train[target], train["session_id"])):
        oof_groups = []
        print(f"fold : {i}")
        tr_x, tr_y = train.iloc[tr_idx][features], train.iloc[tr_idx][target]
        vl_x, vl_y = train.iloc[vl_idx][features], train.iloc[vl_idx][target]
        tr_data = lgb.Dataset(tr_x, label=tr_y)
        vl_data = lgb.Dataset(vl_x, label=vl_y)

        model = lgb.train(params, tr_data, valid_sets=[tr_data, vl_data],
                        num_boost_round=20000, early_stopping_rounds=100, verbose_eval=100)
        # モデル出力
        model.save_model(cfg.output_dir + f"{cfg.exp_name}/{cfg.exp_name}_model_{i}.lgb")
    
        # valid_pred
        oof_fold = train.iloc[vl_idx].copy()
        oof_fold["pred"] = model.predict(vl_x, num_iteration=model.best_iteration)
        oofs.append(oof_fold)

        # 特徴量重要度
        fi_fold = pd.DataFrame()
        fi_fold["feature"] = model.feature_name()
        fi_fold["importance"] = model.feature_importance(importance_type="gain")
        fi_fold["fold"] = i
        fis.append(fi_fold)

    fi = pd.concat(fis)    
    fi = fi.groupby("feature")["importance"].mean().reset_index()
    fi = fi.sort_values("importance", ascending=False).reset_index(drop=True)
    fi.to_csv(cfg.output_dir + f"{cfg.exp_name}/fi.csv", index=False)

    # cv
    oof = pd.concat(oofs)
    best_threshold = calc_metrics(oof)
    cfg.best_threshold = best_threshold
    oof[["session_id", "question", "pred", "correct"]].to_csv(cfg.output_dir + f"{cfg.exp_name}/oof.csv.gz", compression="gzip", index=False)

In [12]:
def get_mock_iter_train():
    """trainデータのiter分割を適用したtest_sample
    """
    test = pd.read_csv(cfg.input_dir + "_old/test.csv")
    sub = pd.read_csv(cfg.input_dir + "_old/sample_submission.csv")
    sub["level_group"] = sub["session_level"].apply(lambda x: x.split("_")[-1])
    
    # groupbyでiter作るときにgroup_levelの順番が崩れないように
    test["level_group2"] = test["level_group"].str.replace("13-22", "6")
    sub["level_group2"] = sub["level_group"].str.replace("13-22", "6")

    tests = [df[1].drop(columns=["session_level", "level_group2"]).reset_index(drop=True) for df in test.groupby("level_group2")]
    subs = [df[1].drop(columns=["session_level", "level_group2"]).reset_index(drop=True) for df in sub.groupby("level_group2")]
    return zip(tests, subs)

def get_mock_iter_test():
    """testデータのiter分割を適用したtest_sample
    """
    test = pd.read_csv(cfg.input_dir + "_old/test.csv")
    sub = pd.read_csv(cfg.input_dir + "_old/sample_submission.csv")
    
    # groupbyでiter作るときにgroup_levelの順番が崩れないように
    test["session_level"] = test["session_level"].str.replace("13-22", "6")
    sub["session_level"] = sub["session_level"].str.replace("13-22", "6")

    tests = [df[1].drop(columns="session_level").reset_index(drop=True) for df in test.groupby("session_level")]
    subs = [df[1].drop(columns="session_level").reset_index(drop=True) for df in sub.groupby("session_level")]
    return zip(tests, subs)

In [13]:
def inference(mode):
    if mode == "local_cv":
        # time series apiを模したiterをモックとして用意する
        iter_test = get_mock_iter_test()
        start_time = time.time()
    elif mode == "kaggle_inf":
        env = jo_wilder_310.make_env()
        iter_test = env.iter_test()
        
    models = []
    for i in range(cfg.n_splits):
        if mode == "local_cv":
            model_path = cfg.output_dir + f"{cfg.exp_name}/{cfg.exp_name}_model_{i}.lgb"
        elif mode == "kaggle_inf":
            model_path = f"/kaggle/input/jo-wilder-{cfg.exp_name}/{cfg.exp_name}_model_{i}.lgb"
        models.append(lgb.Booster(model_file=model_path))
    use_features = models[0].feature_name()
    
    prev_features_df = None
    for (test_sessions, sample_submission) in iter_test:
        level_group = test_sessions["level_group"].values[0]
        test = get_test_dataset(test_sessions, sample_submission)
        preds = np.zeros(len(test))

        if level_group == "0-4":
            pass
        else:
            test = test.merge(prev_features_df, on=["session_id"], how="left")

        # 前のlevel_groupのquestionパートの経過時間特徴量
        if level_group == "5-12":
            test["0-4_question_duration_time"] = test["5-12_elapsed_time_min"] - test["0-4_elapsed_time_max"]
            test["0-4_question_duration_index"] = test["5-12_index_min"] - test["0-4_index_max"]
        elif level_group == "13-22":
            test["5-12_question_duration_time"] = test["13-22_elapsed_time_min"] - test["5-12_elapsed_time_max"]
            test["5-12_question_duration_index"] = test["13-22_index_min"] - test["5-12_index_max"]

        target = "correct"
        not_use_cols = [target, "session_id", "level_group"]
        features = [c for c in test.columns if c not in not_use_cols]

        prev_features_df = test[["session_id"]+features].groupby("session_id").head(1).drop(columns="question")
        
        # そのlevel_group時点で存在しない列を追加
        complement_features = list(set(use_features) - set(test.columns.tolist()))
        test[complement_features] = np.nan

        for i in range(cfg.n_splits):
            model = models[i]
            preds += model.predict(test[use_features], num_iteration=model.best_iteration) / cfg.n_splits
        test["pred"] = preds
        preds = (preds>cfg.best_threshold).astype(int)
        sample_submission["correct"] = preds

        if mode == "local_cv":
            print(sample_submission["correct"].values)
        elif mode == "kaggle_inf":
            env.predict(sample_submission)
    if mode == "local_cv":
        process_time = format(time.time() - start_time, ".1f")
        print("sample_inf処理時間 : ", process_time, "秒")

In [14]:
prev_features_df = None # 次のlevel_groupで特徴量を使うための保持データ。0-4は前のlevel_groupがないので初期値はNone
dfs = []
for group in ["0-4", "5-12"]:
    print(group)
    # データ読み込み
    train_sessions = pd.read_csv(cfg.prep_dir + f"train{group}.csv")
    labels = pd.read_csv(cfg.prep_dir + f"train_labels{group}.csv")
    train_group = get_train_dataset(train_sessions, labels)

    # 一つ前のlevel_groupの特徴量を追加
    if prev_features_df is not None:
        train_group = train_group.merge(prev_features_df, on=["session_id"], how="left")
    else:
        pass

    # 前のlevel_groupのquestionパートの経過時間特徴量
    if group == "5-12":
        train_group["0-4_question_duration_time"] = train_group["5-12_elapsed_time_min"] - train_group["0-4_elapsed_time_max"]
        train_group["0-4_question_duration_index"] = train_group["5-12_index_min"] - train_group["0-4_index_max"]
    elif group == "13-22":
        train_group["5-12_question_duration_time"] = train_group["13-22_elapsed_time_min"] - train_group["5-12_elapsed_time_max"]
        train_group["5-12_question_duration_index"] = train_group["13-22_index_min"] - train_group["5-12_index_max"]

    target = "correct"
    not_use_cols = [target, "session_id", "level_group"]
    features = [c for c in train_group.columns if c not in not_use_cols]

    # 次のlevel_groupで使う用に特徴量を保持
    prev_features_df = train_group[["session_id"]+features].groupby("session_id").head(1).drop(columns="question")

    dfs.append(train_group)
train = pd.concat(dfs, ignore_index=True)
# concatするとcategory型がリセットされてしまうので再度cast
train_group["question"] = train["question"].astype("category")

target = "correct"
not_use_cols = [target, "session_id", "level_group"]
features = [c for c in train_group.columns if c not in not_use_cols]
gkf = GroupKFold(n_splits=cfg.n_splits)

for i, (tr_idx, vl_idx) in enumerate(gkf.split(train_group[features], train_group[target], train_group["session_id"])):
    oof_groups = []
    print(f"fold : {i}")
    tr_x, tr_y = train_group.iloc[tr_idx][features], train_group.iloc[tr_idx][target]
    vl_x, vl_y = train_group.iloc[vl_idx][features], train_group.iloc[vl_idx][target]
    tr_data = lgb.Dataset(tr_x, label=tr_y)
    vl_data = lgb.Dataset(vl_x, label=vl_y)
    break

model = optuna_lgb.train(params, tr_data, valid_sets=[tr_data, vl_data],
                         num_boost_round=20000, early_stopping_rounds=100, verbose_eval=100)

0-4
5-12
fold : 0


[32m[I 2023-05-30 23:57:11,566][0m A new study created in memory with name: no-name-ffd5848f-ff56-4e9e-a063-4c503780a156[0m
feature_fraction, val_score: inf:   0%|          | 0/7 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.507636	valid_1's binary_logloss: 0.535095
[200]	valid_0's binary_logloss: 0.485936	valid_1's binary_logloss: 0.534311


feature_fraction, val_score: 0.534095:  14%|#4        | 1/7 [03:43<22:23, 223.93s/it][32m[I 2023-05-31 00:00:55,535][0m Trial 0 finished with value: 0.5340947129714599 and parameters: {'feature_fraction': 0.4}. Best is trial 0 with value: 0.5340947129714599.[0m
feature_fraction, val_score: 0.534095:  14%|#4        | 1/7 [03:43<22:23, 223.93s/it]

Early stopping, best iteration is:
[153]	valid_0's binary_logloss: 0.494665	valid_1's binary_logloss: 0.534095
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.505179	valid_1's binary_logloss: 0.535473
[200]	valid_0's binary_logloss: 0.483392	valid_1's binary_logloss: 0.534903
[300]	valid_0's binary_logloss: 0.466536	valid_1's binary_logloss: 0.53531


feature_fraction, val_score: 0.534095:  29%|##8       | 2/7 [10:41<28:08, 337.80s/it]

Early stopping, best iteration is:
[202]	valid_0's binary_logloss: 0.483022	valid_1's binary_logloss: 0.534846


[32m[I 2023-05-31 00:07:53,525][0m Trial 1 finished with value: 0.5348459448227766 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 0.5340947129714599.[0m
feature_fraction, val_score: 0.534095:  29%|##8       | 2/7 [10:41<28:08, 337.80s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.505704	valid_1's binary_logloss: 0.535801
[200]	valid_0's binary_logloss: 0.484094	valid_1's binary_logloss: 0.535541


feature_fraction, val_score: 0.534095:  43%|####2     | 3/7 [15:54<21:45, 326.33s/it]

Early stopping, best iteration is:
[145]	valid_0's binary_logloss: 0.4947	valid_1's binary_logloss: 0.535146


[32m[I 2023-05-31 00:13:06,153][0m Trial 2 finished with value: 0.5351458536456757 and parameters: {'feature_fraction': 0.8}. Best is trial 0 with value: 0.5340947129714599.[0m
feature_fraction, val_score: 0.534095:  43%|####2     | 3/7 [15:54<21:45, 326.33s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.506268	valid_1's binary_logloss: 0.535571
[200]	valid_0's binary_logloss: 0.484312	valid_1's binary_logloss: 0.53501
[300]	valid_0's binary_logloss: 0.46826	valid_1's binary_logloss: 0.53566


feature_fraction, val_score: 0.534095:  57%|#####7    | 4/7 [21:07<16:03, 321.08s/it]

Early stopping, best iteration is:
[212]	valid_0's binary_logloss: 0.482165	valid_1's binary_logloss: 0.534997


[32m[I 2023-05-31 00:18:19,255][0m Trial 3 finished with value: 0.534996699955248 and parameters: {'feature_fraction': 0.6}. Best is trial 0 with value: 0.5340947129714599.[0m
feature_fraction, val_score: 0.534095:  57%|#####7    | 4/7 [21:07<16:03, 321.08s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.506043	valid_1's binary_logloss: 0.535823
[200]	valid_0's binary_logloss: 0.483963	valid_1's binary_logloss: 0.535058
[300]	valid_0's binary_logloss: 0.467876	valid_1's binary_logloss: 0.535592


feature_fraction, val_score: 0.534095:  71%|#######1  | 5/7 [27:54<11:44, 352.20s/it]

Early stopping, best iteration is:
[218]	valid_0's binary_logloss: 0.480838	valid_1's binary_logloss: 0.534967


[32m[I 2023-05-31 00:25:06,588][0m Trial 4 finished with value: 0.5349667318015209 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 0.5340947129714599.[0m
feature_fraction, val_score: 0.534095:  71%|#######1  | 5/7 [27:55<11:44, 352.20s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.504972	valid_1's binary_logloss: 0.53576
[200]	valid_0's binary_logloss: 0.482762	valid_1's binary_logloss: 0.535058


feature_fraction, val_score: 0.534095:  86%|########5 | 6/7 [33:40<05:50, 350.19s/it]

Early stopping, best iteration is:
[150]	valid_0's binary_logloss: 0.492894	valid_1's binary_logloss: 0.534905


[32m[I 2023-05-31 00:30:52,891][0m Trial 5 finished with value: 0.5349049289675214 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 0.5340947129714599.[0m
feature_fraction, val_score: 0.534095:  86%|########5 | 6/7 [33:41<05:50, 350.19s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.50672	valid_1's binary_logloss: 0.535851
[200]	valid_0's binary_logloss: 0.48504	valid_1's binary_logloss: 0.53574


feature_fraction, val_score: 0.534095: 100%|##########| 7/7 [37:46<00:00, 316.05s/it]

Early stopping, best iteration is:
[130]	valid_0's binary_logloss: 0.499076	valid_1's binary_logloss: 0.535511


[32m[I 2023-05-31 00:34:58,659][0m Trial 6 finished with value: 0.5355109116637381 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.5340947129714599.[0m
feature_fraction, val_score: 0.534095: 100%|##########| 7/7 [37:47<00:00, 323.87s/it]
num_leaves, val_score: 0.534095:   0%|          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.424789	valid_1's binary_logloss: 0.537046
[200]	valid_0's binary_logloss: 0.378527	valid_1's binary_logloss: 0.539578


num_leaves, val_score: 0.534095:   5%|5         | 1/20 [12:04<3:49:34, 724.95s/it][32m[I 2023-05-31 00:47:03,651][0m Trial 7 finished with value: 0.5368133979903189 and parameters: {'num_leaves': 195}. Best is trial 7 with value: 0.5368133979903189.[0m
num_leaves, val_score: 0.534095:   5%|5         | 1/20 [12:04<3:49:34, 724.95s/it]

Early stopping, best iteration is:
[109]	valid_0's binary_logloss: 0.419574	valid_1's binary_logloss: 0.536813
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.467977	valid_1's binary_logloss: 0.53586


num_leaves, val_score: 0.534095:  10%|#         | 2/20 [17:09<2:23:13, 477.44s/it]

[200]	valid_0's binary_logloss: 0.430946	valid_1's binary_logloss: 0.53712
Early stopping, best iteration is:
[100]	valid_0's binary_logloss: 0.467977	valid_1's binary_logloss: 0.53586


[32m[I 2023-05-31 00:52:08,213][0m Trial 8 finished with value: 0.5358598323393428 and parameters: {'num_leaves': 91}. Best is trial 8 with value: 0.5358598323393428.[0m
num_leaves, val_score: 0.534095:  10%|#         | 2/20 [17:09<2:23:13, 477.44s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.424254	valid_1's binary_logloss: 0.537654


num_leaves, val_score: 0.534095:  15%|#5        | 3/20 [24:30<2:10:36, 460.97s/it]

Early stopping, best iteration is:
[73]	valid_0's binary_logloss: 0.445161	valid_1's binary_logloss: 0.537467


[32m[I 2023-05-31 00:59:29,518][0m Trial 9 finished with value: 0.5374671113773618 and parameters: {'num_leaves': 196}. Best is trial 8 with value: 0.5358598323393428.[0m
num_leaves, val_score: 0.534095:  15%|#5        | 3/20 [24:30<2:10:36, 460.97s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.418101	valid_1's binary_logloss: 0.537375


num_leaves, val_score: 0.534095:  20%|##        | 4/20 [31:42<1:59:51, 449.44s/it]

Early stopping, best iteration is:
[74]	valid_0's binary_logloss: 0.438247	valid_1's binary_logloss: 0.537162


[32m[I 2023-05-31 01:06:41,763][0m Trial 10 finished with value: 0.5371615552603703 and parameters: {'num_leaves': 216}. Best is trial 8 with value: 0.5358598323393428.[0m
num_leaves, val_score: 0.534095:  20%|##        | 4/20 [31:43<1:59:51, 449.44s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.474756	valid_1's binary_logloss: 0.535694
[200]	valid_0's binary_logloss: 0.439759	valid_1's binary_logloss: 0.536089


num_leaves, val_score: 0.534095:  25%|##5       | 5/20 [36:23<1:37:11, 388.78s/it]

Early stopping, best iteration is:
[143]	valid_0's binary_logloss: 0.456833	valid_1's binary_logloss: 0.535439


[32m[I 2023-05-31 01:11:22,670][0m Trial 11 finished with value: 0.5354389282588291 and parameters: {'num_leaves': 78}. Best is trial 11 with value: 0.5354389282588291.[0m
num_leaves, val_score: 0.534095:  25%|##5       | 5/20 [36:24<1:37:11, 388.78s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.516156	valid_1's binary_logloss: 0.535869
[200]	valid_0's binary_logloss: 0.498177	valid_1's binary_logloss: 0.534384
[300]	valid_0's binary_logloss: 0.485693	valid_1's binary_logloss: 0.534599


num_leaves, val_score: 0.534095:  30%|###       | 6/20 [42:33<1:29:14, 382.45s/it]

Early stopping, best iteration is:
[214]	valid_0's binary_logloss: 0.496206	valid_1's binary_logloss: 0.534184


[32m[I 2023-05-31 01:17:32,790][0m Trial 12 finished with value: 0.5341838679562143 and parameters: {'num_leaves': 22}. Best is trial 12 with value: 0.5341838679562143.[0m
num_leaves, val_score: 0.534095:  30%|###       | 6/20 [42:34<1:29:14, 382.45s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.506767	valid_1's binary_logloss: 0.534953
[200]	valid_0's binary_logloss: 0.484515	valid_1's binary_logloss: 0.533585
[300]	valid_0's binary_logloss: 0.468642	valid_1's binary_logloss: 0.533368


num_leaves, val_score: 0.533245:  35%|###5      | 7/20 [50:42<1:30:24, 417.24s/it][32m[I 2023-05-31 01:25:41,201][0m Trial 13 finished with value: 0.5332450356212636 and parameters: {'num_leaves': 32}. Best is trial 13 with value: 0.5332450356212636.[0m
num_leaves, val_score: 0.533245:  35%|###5      | 7/20 [50:42<1:30:24, 417.24s/it]

Early stopping, best iteration is:
[235]	valid_0's binary_logloss: 0.478202	valid_1's binary_logloss: 0.533245
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.43626	valid_1's binary_logloss: 0.536241
[200]	valid_0's binary_logloss: 0.392696	valid_1's binary_logloss: 0.539063


num_leaves, val_score: 0.533245:  40%|####      | 8/20 [1:01:05<1:36:33, 482.80s/it]

Early stopping, best iteration is:
[101]	valid_0's binary_logloss: 0.435862	valid_1's binary_logloss: 0.536142


[32m[I 2023-05-31 01:36:04,744][0m Trial 14 finished with value: 0.5361417367024051 and parameters: {'num_leaves': 163}. Best is trial 13 with value: 0.5332450356212636.[0m
num_leaves, val_score: 0.533245:  40%|####      | 8/20 [1:01:06<1:36:33, 482.80s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.520349	valid_1's binary_logloss: 0.536278
[200]	valid_0's binary_logloss: 0.504503	valid_1's binary_logloss: 0.535177
[300]	valid_0's binary_logloss: 0.492918	valid_1's binary_logloss: 0.534706
[400]	valid_0's binary_logloss: 0.483439	valid_1's binary_logloss: 0.534974


num_leaves, val_score: 0.533245:  45%|####5     | 9/20 [1:08:16<1:25:30, 466.41s/it]

Early stopping, best iteration is:
[303]	valid_0's binary_logloss: 0.492632	valid_1's binary_logloss: 0.534678


[32m[I 2023-05-31 01:43:15,136][0m Trial 15 finished with value: 0.5346775983945998 and parameters: {'num_leaves': 18}. Best is trial 13 with value: 0.5332450356212636.[0m
num_leaves, val_score: 0.533245:  45%|####5     | 9/20 [1:08:16<1:25:30, 466.41s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.465717	valid_1's binary_logloss: 0.535042
[200]	valid_0's binary_logloss: 0.42795	valid_1's binary_logloss: 0.536257


num_leaves, val_score: 0.533245:  50%|#####     | 10/20 [1:13:27<1:09:44, 418.42s/it]

Early stopping, best iteration is:
[125]	valid_0's binary_logloss: 0.454007	valid_1's binary_logloss: 0.534946


[32m[I 2023-05-31 01:48:26,123][0m Trial 16 finished with value: 0.5349458040387735 and parameters: {'num_leaves': 95}. Best is trial 13 with value: 0.5332450356212636.[0m
num_leaves, val_score: 0.533245:  50%|#####     | 10/20 [1:13:27<1:09:44, 418.42s/it]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466141	valid_1's binary_logloss: 0.533015


num_leaves, val_score: 0.533009:  55%|#####5    | 11/20 [1:17:50<55:38, 370.97s/it]  [32m[I 2023-05-31 01:52:49,079][0m Trial 17 finished with value: 0.5330088247443111 and parameters: {'num_leaves': 48}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  55%|#####5    | 11/20 [1:17:50<55:38, 370.97s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.47225	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.487281	valid_1's binary_logloss: 0.535772
[200]	valid_0's binary_logloss: 0.456103	valid_1's binary_logloss: 0.535933


num_leaves, val_score: 0.533009:  60%|######    | 12/20 [1:22:17<45:13, 339.22s/it][32m[I 2023-05-31 01:57:15,706][0m Trial 18 finished with value: 0.5355907648661028 and parameters: {'num_leaves': 58}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  60%|######    | 12/20 [1:22:17<45:13, 339.22s/it]

Early stopping, best iteration is:
[124]	valid_0's binary_logloss: 0.478264	valid_1's binary_logloss: 0.535591
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.542749	valid_1's binary_logloss: 0.544969
[200]	valid_0's binary_logloss: 0.535549	valid_1's binary_logloss: 0.540074
[300]	valid_0's binary_logloss: 0.531398	valid_1's binary_logloss: 0.53848
[400]	valid_0's binary_logloss: 0.528022	valid_1's binary_logloss: 0.53764
[500]	valid_0's binary_logloss: 0.525018	valid_1's binary_logloss: 

num_leaves, val_score: 0.533009:  65%|######5   | 13/20 [1:32:15<48:44, 417.82s/it][32m[I 2023-05-31 02:07:14,369][0m Trial 19 finished with value: 0.5358669599321315 and parameters: {'num_leaves': 4}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  65%|######5   | 13/20 [1:32:15<48:44, 417.82s/it]

Early stopping, best iteration is:
[1241]	valid_0's binary_logloss: 0.50862	valid_1's binary_logloss: 0.535867
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.4521	valid_1's binary_logloss: 0.536011
[200]	valid_0's binary_logloss: 0.410878	valid_1's binary_logloss: 0.537622


num_leaves, val_score: 0.533009:  70%|#######   | 14/20 [1:37:30<38:40, 386.83s/it][32m[I 2023-05-31 02:12:29,588][0m Trial 20 finished with value: 0.5359761789224295 and parameters: {'num_leaves': 123}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  70%|#######   | 14/20 [1:37:30<38:40, 386.83s/it]

Early stopping, best iteration is:
[101]	valid_0's binary_logloss: 0.451654	valid_1's binary_logloss: 0.535976
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.493501	valid_1's binary_logloss: 0.534881
[200]	valid_0's binary_logloss: 0.464937	valid_1's binary_logloss: 0.53409
[300]	valid_0's binary_logloss: 0.44521	valid_1's binary_logloss: 0.53449


num_leaves, val_score: 0.533009:  75%|#######5  | 15/20 [1:42:56<30:42, 368.44s/it][32m[I 2023-05-31 02:17:55,426][0m Trial 21 finished with value: 0.5339669167328602 and parameters: {'num_leaves': 49}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  75%|#######5  | 15/20 [1:42:56<30:42, 368.44s/it]

Early stopping, best iteration is:
[213]	valid_0's binary_logloss: 0.461923	valid_1's binary_logloss: 0.533967
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.450458	valid_1's binary_logloss: 0.535928


num_leaves, val_score: 0.533009:  80%|########  | 16/20 [1:47:22<22:29, 337.41s/it][32m[I 2023-05-31 02:22:20,750][0m Trial 22 finished with value: 0.5358236669686496 and parameters: {'num_leaves': 127}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  80%|########  | 16/20 [1:47:22<22:29, 337.41s/it]

Early stopping, best iteration is:
[93]	valid_0's binary_logloss: 0.455074	valid_1's binary_logloss: 0.535824
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466141	valid_1's binary_logloss: 0.533015


num_leaves, val_score: 0.533009:  85%|########5 | 17/20 [1:54:35<18:18, 366.32s/it][32m[I 2023-05-31 02:29:34,306][0m Trial 23 finished with value: 0.5330088247443111 and parameters: {'num_leaves': 48}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  85%|########5 | 17/20 [1:54:35<18:18, 366.32s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.47225	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.406982	valid_1's binary_logloss: 0.537309


num_leaves, val_score: 0.533009:  90%|######### | 18/20 [2:02:57<13:34, 407.18s/it][32m[I 2023-05-31 02:37:56,604][0m Trial 24 finished with value: 0.5370257129106043 and parameters: {'num_leaves': 255}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  90%|######### | 18/20 [2:02:57<13:34, 407.18s/it]

Early stopping, best iteration is:
[81]	valid_0's binary_logloss: 0.422388	valid_1's binary_logloss: 0.537026
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.486575	valid_1's binary_logloss: 0.534936


num_leaves, val_score: 0.533009:  95%|#########5| 19/20 [2:06:54<05:56, 356.05s/it][32m[I 2023-05-31 02:41:53,556][0m Trial 25 finished with value: 0.5349355885113765 and parameters: {'num_leaves': 59}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009:  95%|#########5| 19/20 [2:06:54<05:56, 356.05s/it]

[200]	valid_0's binary_logloss: 0.45496	valid_1's binary_logloss: 0.535644
Early stopping, best iteration is:
[100]	valid_0's binary_logloss: 0.486575	valid_1's binary_logloss: 0.534936
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.458435	valid_1's binary_logloss: 0.535654
[200]	valid_0's binary_logloss: 0.419811	valid_1's binary_logloss: 0.53645


num_leaves, val_score: 0.533009: 100%|##########| 20/20 [2:11:59<00:00, 340.60s/it][32m[I 2023-05-31 02:46:58,135][0m Trial 26 finished with value: 0.5353040696245952 and parameters: {'num_leaves': 110}. Best is trial 17 with value: 0.5330088247443111.[0m
num_leaves, val_score: 0.533009: 100%|##########| 20/20 [2:11:59<00:00, 395.97s/it]


Early stopping, best iteration is:
[120]	valid_0's binary_logloss: 0.448715	valid_1's binary_logloss: 0.535304


bagging, val_score: 0.533009:   0%|          | 0/10 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494319	valid_1's binary_logloss: 0.535851
[200]	valid_0's binary_logloss: 0.466548	valid_1's binary_logloss: 0.535487


bagging, val_score: 0.533009:  10%|#         | 1/10 [05:15<47:19, 315.52s/it][32m[I 2023-05-31 02:52:13,699][0m Trial 27 finished with value: 0.5353405121998125 and parameters: {'bagging_fraction': 0.8244333618923735, 'bagging_freq': 1}. Best is trial 27 with value: 0.5353405121998125.[0m
bagging, val_score: 0.533009:  10%|#         | 1/10 [05:15<47:19, 315.52s/it]

Early stopping, best iteration is:
[192]	valid_0's binary_logloss: 0.46836	valid_1's binary_logloss: 0.535341
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.495711	valid_1's binary_logloss: 0.536795
[200]	valid_0's binary_logloss: 0.469076	valid_1's binary_logloss: 0.53784


bagging, val_score: 0.533009:  20%|##        | 2/10 [08:13<31:19, 234.90s/it][32m[I 2023-05-31 02:55:12,167][0m Trial 28 finished with value: 0.536728035972462 and parameters: {'bagging_fraction': 0.5189924746422235, 'bagging_freq': 7}. Best is trial 27 with value: 0.5353405121998125.[0m
bagging, val_score: 0.533009:  20%|##        | 2/10 [08:14<31:19, 234.90s/it]

Early stopping, best iteration is:
[120]	valid_0's binary_logloss: 0.489184	valid_1's binary_logloss: 0.536728
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.535322
[200]	valid_0's binary_logloss: 0.466194	valid_1's binary_logloss: 0.534875


bagging, val_score: 0.533009:  30%|###       | 3/10 [13:20<31:13, 267.63s/it][32m[I 2023-05-31 03:00:18,736][0m Trial 29 finished with value: 0.5345457409823166 and parameters: {'bagging_fraction': 0.95687119283154, 'bagging_freq': 7}. Best is trial 29 with value: 0.5345457409823166.[0m
bagging, val_score: 0.533009:  30%|###       | 3/10 [13:20<31:13, 267.63s/it]

Early stopping, best iteration is:
[162]	valid_0's binary_logloss: 0.475331	valid_1's binary_logloss: 0.534546
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.495984	valid_1's binary_logloss: 0.537791
[200]	valid_0's binary_logloss: 0.470246	valid_1's binary_logloss: 0.539492


bagging, val_score: 0.533009:  40%|####      | 4/10 [16:03<22:38, 226.41s/it][32m[I 2023-05-31 03:03:01,960][0m Trial 30 finished with value: 0.5377056576536495 and parameters: {'bagging_fraction': 0.4185398883666168, 'bagging_freq': 4}. Best is trial 29 with value: 0.5345457409823166.[0m
bagging, val_score: 0.533009:  40%|####      | 4/10 [16:03<22:38, 226.41s/it]

Early stopping, best iteration is:
[127]	valid_0's binary_logloss: 0.487406	valid_1's binary_logloss: 0.537706
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494372	valid_1's binary_logloss: 0.535475
[200]	valid_0's binary_logloss: 0.466174	valid_1's binary_logloss: 0.535548


bagging, val_score: 0.533009:  50%|#####     | 5/10 [21:04<21:05, 253.07s/it][32m[I 2023-05-31 03:08:02,296][0m Trial 31 finished with value: 0.535188430634139 and parameters: {'bagging_fraction': 0.9092961359244282, 'bagging_freq': 1}. Best is trial 29 with value: 0.5345457409823166.[0m
bagging, val_score: 0.533009:  50%|#####     | 5/10 [21:04<21:05, 253.07s/it]

Early stopping, best iteration is:
[152]	valid_0's binary_logloss: 0.477895	valid_1's binary_logloss: 0.535188
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494366	valid_1's binary_logloss: 0.535302
[200]	valid_0's binary_logloss: 0.46676	valid_1's binary_logloss: 0.535378


bagging, val_score: 0.533009:  60%|######    | 6/10 [24:31<15:49, 237.40s/it][32m[I 2023-05-31 03:11:29,281][0m Trial 32 finished with value: 0.5349168164279218 and parameters: {'bagging_fraction': 0.8518794259079707, 'bagging_freq': 2}. Best is trial 29 with value: 0.5345457409823166.[0m
bagging, val_score: 0.533009:  60%|######    | 6/10 [24:31<15:49, 237.40s/it]

Early stopping, best iteration is:
[119]	valid_0's binary_logloss: 0.487974	valid_1's binary_logloss: 0.534917
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494218	valid_1's binary_logloss: 0.535145
[200]	valid_0's binary_logloss: 0.466395	valid_1's binary_logloss: 0.534448


bagging, val_score: 0.533009:  70%|#######   | 7/10 [29:05<12:28, 249.42s/it][32m[I 2023-05-31 03:16:03,449][0m Trial 33 finished with value: 0.5342380107212333 and parameters: {'bagging_fraction': 0.8852695324765545, 'bagging_freq': 6}. Best is trial 33 with value: 0.5342380107212333.[0m
bagging, val_score: 0.533009:  70%|#######   | 7/10 [29:05<12:28, 249.42s/it]

Early stopping, best iteration is:
[167]	valid_0's binary_logloss: 0.474162	valid_1's binary_logloss: 0.534238
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.495321	valid_1's binary_logloss: 0.53739
[200]	valid_0's binary_logloss: 0.46874	valid_1's binary_logloss: 0.537915


bagging, val_score: 0.533009:  80%|########  | 8/10 [32:23<07:46, 233.17s/it][32m[I 2023-05-31 03:19:21,841][0m Trial 34 finished with value: 0.5370851343683625 and parameters: {'bagging_fraction': 0.5507662302556335, 'bagging_freq': 3}. Best is trial 33 with value: 0.5342380107212333.[0m
bagging, val_score: 0.533009:  80%|########  | 8/10 [32:23<07:46, 233.17s/it]

Early stopping, best iteration is:
[120]	valid_0's binary_logloss: 0.488625	valid_1's binary_logloss: 0.537085
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.496033	valid_1's binary_logloss: 0.538481


bagging, val_score: 0.533009:  90%|######### | 9/10 [35:32<03:39, 219.37s/it][32m[I 2023-05-31 03:22:30,862][0m Trial 35 finished with value: 0.5381850268247642 and parameters: {'bagging_fraction': 0.4170659406660186, 'bagging_freq': 2}. Best is trial 33 with value: 0.5342380107212333.[0m
bagging, val_score: 0.533009:  90%|######### | 9/10 [35:32<03:39, 219.37s/it]

Early stopping, best iteration is:
[82]	valid_0's binary_logloss: 0.50262	valid_1's binary_logloss: 0.538185
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49509	valid_1's binary_logloss: 0.535008
[200]	valid_0's binary_logloss: 0.467966	valid_1's binary_logloss: 0.535498


bagging, val_score: 0.533009: 100%|##########| 10/10 [41:16<00:00, 257.68s/it][32m[I 2023-05-31 03:28:14,333][0m Trial 36 finished with value: 0.5345146988476355 and parameters: {'bagging_fraction': 0.6906578134945391, 'bagging_freq': 7}. Best is trial 33 with value: 0.5342380107212333.[0m
bagging, val_score: 0.533009: 100%|##########| 10/10 [41:16<00:00, 247.62s/it]


Early stopping, best iteration is:
[130]	valid_0's binary_logloss: 0.485148	valid_1's binary_logloss: 0.534515


feature_fraction_stage2, val_score: 0.533009:   0%|          | 0/3 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.493475	valid_1's binary_logloss: 0.535248
[200]	valid_0's binary_logloss: 0.465153	valid_1's binary_logloss: 0.535179


feature_fraction_stage2, val_score: 0.533009:  33%|###3      | 1/3 [05:01<10:02, 301.31s/it][32m[I 2023-05-31 03:33:15,675][0m Trial 37 finished with value: 0.5348178662824535 and parameters: {'feature_fraction': 0.44800000000000006}. Best is trial 37 with value: 0.5348178662824535.[0m
feature_fraction_stage2, val_score: 0.533009:  33%|###3      | 1/3 [05:01<10:02, 301.31s/it]

Early stopping, best iteration is:
[144]	valid_0's binary_logloss: 0.479476	valid_1's binary_logloss: 0.534818
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.493241	valid_1's binary_logloss: 0.535585
[200]	valid_0's binary_logloss: 0.464464	valid_1's binary_logloss: 0.535216


feature_fraction_stage2, val_score: 0.533009:  67%|######6   | 2/3 [09:58<04:59, 299.12s/it][32m[I 2023-05-31 03:38:13,267][0m Trial 38 finished with value: 0.5351360315439739 and parameters: {'feature_fraction': 0.48000000000000004}. Best is trial 37 with value: 0.5348178662824535.[0m
feature_fraction_stage2, val_score: 0.533009:  67%|######6   | 2/3 [09:58<04:59, 299.12s/it]

Early stopping, best iteration is:
[193]	valid_0's binary_logloss: 0.466012	valid_1's binary_logloss: 0.535136
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.493684	valid_1's binary_logloss: 0.534928
[200]	valid_0's binary_logloss: 0.465534	valid_1's binary_logloss: 0.534726


feature_fraction_stage2, val_score: 0.533009: 100%|##########| 3/3 [17:22<00:00, 364.94s/it][32m[I 2023-05-31 03:45:36,530][0m Trial 39 finished with value: 0.5346574431221167 and parameters: {'feature_fraction': 0.41600000000000004}. Best is trial 39 with value: 0.5346574431221167.[0m
feature_fraction_stage2, val_score: 0.533009: 100%|##########| 3/3 [17:22<00:00, 347.40s/it]


Early stopping, best iteration is:
[190]	valid_0's binary_logloss: 0.467793	valid_1's binary_logloss: 0.534657


regularization_factors, val_score: 0.533009:   0%|          | 0/20 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494122	valid_1's binary_logloss: 0.534869
[200]	valid_0's binary_logloss: 0.465943	valid_1's binary_logloss: 0.53471


regularization_factors, val_score: 0.533009:   5%|5         | 1/20 [04:15<1:20:51, 255.35s/it][32m[I 2023-05-31 03:49:51,911][0m Trial 40 finished with value: 0.5343071899806007 and parameters: {'lambda_l1': 0.0604554962111354, 'lambda_l2': 3.869954801736793e-08}. Best is trial 40 with value: 0.5343071899806007.[0m
regularization_factors, val_score: 0.533009:   5%|5         | 1/20 [04:15<1:20:51, 255.35s/it]

Early stopping, best iteration is:
[132]	valid_0's binary_logloss: 0.483392	valid_1's binary_logloss: 0.534307
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.496355	valid_1's binary_logloss: 0.534797
[200]	valid_0's binary_logloss: 0.469875	valid_1's binary_logloss: 0.534344


regularization_factors, val_score: 0.533009:  10%|#         | 2/20 [08:07<1:12:30, 241.67s/it][32m[I 2023-05-31 03:53:44,010][0m Trial 41 finished with value: 0.5343145393423903 and parameters: {'lambda_l1': 4.9975706902437915, 'lambda_l2': 1.9012424041685485e-06}. Best is trial 40 with value: 0.5343071899806007.[0m
regularization_factors, val_score: 0.533009:  10%|#         | 2/20 [08:07<1:12:30, 241.67s/it]

Early stopping, best iteration is:
[146]	valid_0's binary_logloss: 0.48222	valid_1's binary_logloss: 0.534315
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494152	valid_1's binary_logloss: 0.534225
[200]	valid_0's binary_logloss: 0.466165	valid_1's binary_logloss: 0.534286


regularization_factors, val_score: 0.533009:  15%|#5        | 3/20 [12:20<1:09:53, 246.69s/it][32m[I 2023-05-31 03:57:56,673][0m Trial 42 finished with value: 0.5338199521903195 and parameters: {'lambda_l1': 0.00039376838225038495, 'lambda_l2': 0.002499037071924892}. Best is trial 42 with value: 0.5338199521903195.[0m
regularization_factors, val_score: 0.533009:  15%|#5        | 3/20 [12:20<1:09:53, 246.69s/it]

Early stopping, best iteration is:
[125]	valid_0's binary_logloss: 0.485836	valid_1's binary_logloss: 0.53382
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494159	valid_1's binary_logloss: 0.53474
[200]	valid_0's binary_logloss: 0.466644	valid_1's binary_logloss: 0.534017


regularization_factors, val_score: 0.533009:  20%|##        | 4/20 [20:06<1:28:56, 333.52s/it][32m[I 2023-05-31 04:05:43,307][0m Trial 43 finished with value: 0.5339580689095228 and parameters: {'lambda_l1': 1.6327842809598889e-06, 'lambda_l2': 0.12222223687206561}. Best is trial 42 with value: 0.5338199521903195.[0m
regularization_factors, val_score: 0.533009:  20%|##        | 4/20 [20:06<1:28:56, 333.52s/it]

Early stopping, best iteration is:
[195]	valid_0's binary_logloss: 0.467679	valid_1's binary_logloss: 0.533958
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466141	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009:  25%|##5       | 5/20 [26:57<1:30:20, 361.38s/it][32m[I 2023-05-31 04:12:34,067][0m Trial 44 finished with value: 0.5330088082772017 and parameters: {'lambda_l1': 0.00015569864847923652, 'lambda_l2': 1.972108672067178e-05}. Best is trial 44 with value: 0.5330088082772017.[0m
regularization_factors, val_score: 0.533009:  25%|##5       | 5/20 [26:57<1:30:20, 361.38s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.47225	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494372	valid_1's binary_logloss: 0.534432
[200]	valid_0's binary_logloss: 0.46632	valid_1's binary_logloss: 0.53375


regularization_factors, val_score: 0.533009:  30%|###       | 6/20 [31:49<1:18:46, 337.63s/it][32m[I 2023-05-31 04:17:25,615][0m Trial 45 finished with value: 0.5335700065176225 and parameters: {'lambda_l1': 0.340061975132354, 'lambda_l2': 0.18377744502257684}. Best is trial 44 with value: 0.5330088082772017.[0m
regularization_factors, val_score: 0.533009:  30%|###       | 6/20 [31:49<1:18:46, 337.63s/it]

Early stopping, best iteration is:
[156]	valid_0's binary_logloss: 0.477009	valid_1's binary_logloss: 0.53357
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494152	valid_1's binary_logloss: 0.534225
[200]	valid_0's binary_logloss: 0.466094	valid_1's binary_logloss: 0.534324


regularization_factors, val_score: 0.533009:  35%|###5      | 7/20 [35:53<1:06:33, 307.17s/it][32m[I 2023-05-31 04:21:30,062][0m Trial 46 finished with value: 0.5338199699103042 and parameters: {'lambda_l1': 2.8664020497040848e-05, 'lambda_l2': 0.002540819891025607}. Best is trial 44 with value: 0.5330088082772017.[0m
regularization_factors, val_score: 0.533009:  35%|###5      | 7/20 [35:53<1:06:33, 307.17s/it]

Early stopping, best iteration is:
[125]	valid_0's binary_logloss: 0.485836	valid_1's binary_logloss: 0.53382
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49432	valid_1's binary_logloss: 0.534541
[200]	valid_0's binary_logloss: 0.466085	valid_1's binary_logloss: 0.534406


regularization_factors, val_score: 0.533009:  40%|####      | 8/20 [42:31<1:07:11, 335.98s/it][32m[I 2023-05-31 04:28:07,716][0m Trial 47 finished with value: 0.5340980247287268 and parameters: {'lambda_l1': 0.3631575115070118, 'lambda_l2': 2.565058627813059e-07}. Best is trial 44 with value: 0.5330088082772017.[0m
regularization_factors, val_score: 0.533009:  40%|####      | 8/20 [42:31<1:07:11, 335.98s/it]

Early stopping, best iteration is:
[140]	valid_0's binary_logloss: 0.481153	valid_1's binary_logloss: 0.534098
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466142	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009:  45%|####5     | 9/20 [47:18<58:49, 320.84s/it]  [32m[I 2023-05-31 04:32:55,277][0m Trial 48 finished with value: 0.5330087828067551 and parameters: {'lambda_l1': 0.0004765648129632002, 'lambda_l2': 5.355668054611892e-06}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  45%|####5     | 9/20 [47:18<58:49, 320.84s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.472251	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49576	valid_1's binary_logloss: 0.534248
[200]	valid_0's binary_logloss: 0.469615	valid_1's binary_logloss: 0.533533


regularization_factors, val_score: 0.533009:  50%|#####     | 10/20 [54:28<59:05, 354.56s/it][32m[I 2023-05-31 04:40:05,346][0m Trial 49 finished with value: 0.5335060741159852 and parameters: {'lambda_l1': 5.0585963953619845e-06, 'lambda_l2': 7.694730802665455}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  50%|#####     | 10/20 [54:28<59:05, 354.56s/it]

Early stopping, best iteration is:
[157]	valid_0's binary_logloss: 0.479152	valid_1's binary_logloss: 0.533506
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466141	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009:  55%|#####5    | 11/20 [59:06<49:39, 331.08s/it][32m[I 2023-05-31 04:44:43,200][0m Trial 50 finished with value: 0.5330088247405984 and parameters: {'lambda_l1': 1.783781087925491e-08, 'lambda_l2': 1.0588809037037295e-08}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  55%|#####5    | 11/20 [59:06<49:39, 331.08s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.47225	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494284	valid_1's binary_logloss: 0.534009
[200]	valid_0's binary_logloss: 0.466159	valid_1's binary_logloss: 0.533866


regularization_factors, val_score: 0.533009:  60%|######    | 12/20 [1:03:12<40:40, 305.10s/it][32m[I 2023-05-31 04:48:48,858][0m Trial 51 finished with value: 0.5333409497547723 and parameters: {'lambda_l1': 0.0018428821961065651, 'lambda_l2': 5.790872075589118e-06}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  60%|######    | 12/20 [1:03:12<40:40, 305.10s/it]

Early stopping, best iteration is:
[153]	valid_0's binary_logloss: 0.477721	valid_1's binary_logloss: 0.533341
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494241	valid_1's binary_logloss: 0.534137
[200]	valid_0's binary_logloss: 0.466246	valid_1's binary_logloss: 0.533916


regularization_factors, val_score: 0.533009:  65%|######5   | 13/20 [1:07:38<34:12, 293.27s/it][32m[I 2023-05-31 04:53:14,921][0m Trial 52 finished with value: 0.5335338955179935 and parameters: {'lambda_l1': 0.003142473301171557, 'lambda_l2': 3.168975415346845e-05}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  65%|######5   | 13/20 [1:07:38<34:12, 293.27s/it]

Early stopping, best iteration is:
[165]	valid_0's binary_logloss: 0.474338	valid_1's binary_logloss: 0.533534
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466141	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009:  70%|#######   | 14/20 [1:11:49<28:03, 280.54s/it][32m[I 2023-05-31 04:57:26,036][0m Trial 53 finished with value: 0.5330088073872115 and parameters: {'lambda_l1': 8.954444081826908e-05, 'lambda_l2': 6.246467255033147e-05}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  70%|#######   | 14/20 [1:11:49<28:03, 280.54s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.47225	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494271	valid_1's binary_logloss: 0.534305
[200]	valid_0's binary_logloss: 0.46599	valid_1's binary_logloss: 0.534114


regularization_factors, val_score: 0.533009:  75%|#######5  | 15/20 [1:17:51<25:24, 305.00s/it][32m[I 2023-05-31 05:03:27,721][0m Trial 54 finished with value: 0.533749693563732 and parameters: {'lambda_l1': 0.004541415190106224, 'lambda_l2': 0.0005286712575090093}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  75%|#######5  | 15/20 [1:17:51<25:24, 305.00s/it]

Early stopping, best iteration is:
[131]	valid_0's binary_logloss: 0.483885	valid_1's binary_logloss: 0.53375
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466141	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009:  80%|########  | 16/20 [1:25:23<23:17, 349.38s/it][32m[I 2023-05-31 05:11:00,160][0m Trial 55 finished with value: 0.5330088216614665 and parameters: {'lambda_l1': 3.4739364682166064e-05, 'lambda_l2': 5.949031026001032e-07}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  80%|########  | 16/20 [1:25:23<23:17, 349.38s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.47225	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466141	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009:  85%|########5 | 17/20 [1:33:02<19:06, 382.25s/it][32m[I 2023-05-31 05:18:38,853][0m Trial 56 finished with value: 0.533008817024269 and parameters: {'lambda_l1': 6.880018130833114e-07, 'lambda_l2': 4.97176851039701e-05}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  85%|########5 | 17/20 [1:33:02<19:06, 382.25s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.47225	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466142	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009:  90%|######### | 18/20 [1:36:48<11:10, 335.46s/it][32m[I 2023-05-31 05:22:25,388][0m Trial 57 finished with value: 0.5330087910340476 and parameters: {'lambda_l1': 0.00039042774207132403, 'lambda_l2': 2.1400000378852668e-07}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  90%|######### | 18/20 [1:36:48<11:10, 335.46s/it]

Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.472251	valid_1's binary_logloss: 0.533009
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49414	valid_1's binary_logloss: 0.534703
[200]	valid_0's binary_logloss: 0.465648	valid_1's binary_logloss: 0.534954


regularization_factors, val_score: 0.533009:  95%|#########5| 19/20 [1:43:04<05:47, 347.53s/it][32m[I 2023-05-31 05:28:41,045][0m Trial 58 finished with value: 0.5345640148440186 and parameters: {'lambda_l1': 0.020259121814818877, 'lambda_l2': 1.856907255001664e-07}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009:  95%|#########5| 19/20 [1:43:04<05:47, 347.53s/it]

Early stopping, best iteration is:
[112]	valid_0's binary_logloss: 0.48992	valid_1's binary_logloss: 0.534564
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.49429	valid_1's binary_logloss: 0.534004
[200]	valid_0's binary_logloss: 0.466142	valid_1's binary_logloss: 0.533015


regularization_factors, val_score: 0.533009: 100%|##########| 20/20 [1:47:30<00:00, 323.20s/it][32m[I 2023-05-31 05:33:07,526][0m Trial 59 finished with value: 0.5330087902477733 and parameters: {'lambda_l1': 0.0003956555200109906, 'lambda_l2': 2.3882055188566842e-06}. Best is trial 48 with value: 0.5330087828067551.[0m
regularization_factors, val_score: 0.533009: 100%|##########| 20/20 [1:47:30<00:00, 322.55s/it]


Early stopping, best iteration is:
[174]	valid_0's binary_logloss: 0.472251	valid_1's binary_logloss: 0.533009


min_data_in_leaf, val_score: 0.533009:   0%|          | 0/5 [00:00<?, ?it/s]

[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494481	valid_1's binary_logloss: 0.534517
[200]	valid_0's binary_logloss: 0.466739	valid_1's binary_logloss: 0.533905


min_data_in_leaf, val_score: 0.533009:  20%|##        | 1/5 [07:11<28:46, 431.55s/it][32m[I 2023-05-31 05:40:19,109][0m Trial 60 finished with value: 0.5337455757712775 and parameters: {'min_child_samples': 100}. Best is trial 60 with value: 0.5337455757712775.[0m
min_data_in_leaf, val_score: 0.533009:  20%|##        | 1/5 [07:11<28:46, 431.55s/it]

Early stopping, best iteration is:
[190]	valid_0's binary_logloss: 0.468856	valid_1's binary_logloss: 0.533746
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494221	valid_1's binary_logloss: 0.5344
[200]	valid_0's binary_logloss: 0.466351	valid_1's binary_logloss: 0.533855


min_data_in_leaf, val_score: 0.533009:  40%|####      | 2/5 [11:35<16:38, 332.71s/it][32m[I 2023-05-31 05:44:42,635][0m Trial 61 finished with value: 0.5336968987105085 and parameters: {'min_child_samples': 25}. Best is trial 61 with value: 0.5336968987105085.[0m
min_data_in_leaf, val_score: 0.533009:  40%|####      | 2/5 [11:35<16:38, 332.71s/it]

Early stopping, best iteration is:
[141]	valid_0's binary_logloss: 0.480927	valid_1's binary_logloss: 0.533697
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494221	valid_1's binary_logloss: 0.534532
[200]	valid_0's binary_logloss: 0.466187	valid_1's binary_logloss: 0.534321


min_data_in_leaf, val_score: 0.533009:  60%|######    | 3/5 [17:54<11:48, 354.23s/it][32m[I 2023-05-31 05:51:02,472][0m Trial 62 finished with value: 0.5338224270090719 and parameters: {'min_child_samples': 50}. Best is trial 61 with value: 0.5336968987105085.[0m
min_data_in_leaf, val_score: 0.533009:  60%|######    | 3/5 [17:54<11:48, 354.23s/it]

Early stopping, best iteration is:
[128]	valid_0's binary_logloss: 0.48487	valid_1's binary_logloss: 0.533822
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494142	valid_1's binary_logloss: 0.534797
[200]	valid_0's binary_logloss: 0.466068	valid_1's binary_logloss: 0.534519


min_data_in_leaf, val_score: 0.533009:  80%|########  | 4/5 [22:01<05:11, 311.58s/it][32m[I 2023-05-31 05:55:08,673][0m Trial 63 finished with value: 0.5342254577526487 and parameters: {'min_child_samples': 5}. Best is trial 61 with value: 0.5336968987105085.[0m
min_data_in_leaf, val_score: 0.533009:  80%|########  | 4/5 [22:01<05:11, 311.58s/it]

Early stopping, best iteration is:
[137]	valid_0's binary_logloss: 0.481901	valid_1's binary_logloss: 0.534225
[LightGBM] [Info] Number of positive: 122655, number of negative: 65835
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 472995
[LightGBM] [Info] Number of data points in the train set: 188490, number of used features: 5185
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.650724 -> initscore=0.622224
[LightGBM] [Info] Start training from score 0.622224
Training until validation scores don't improve for 100 rounds
[100]	valid_0's binary_logloss: 0.494275	valid_1's binary_logloss: 0.534388
[200]	valid_0's binary_logloss: 0.46577	valid_1's binary_logloss: 0.534298


min_data_in_leaf, val_score: 0.533009: 100%|##########| 5/5 [29:18<00:00, 356.87s/it][32m[I 2023-05-31 06:02:25,834][0m Trial 64 finished with value: 0.5338909136644174 and parameters: {'min_child_samples': 10}. Best is trial 61 with value: 0.5336968987105085.[0m
min_data_in_leaf, val_score: 0.533009: 100%|##########| 5/5 [29:18<00:00, 351.66s/it]

Early stopping, best iteration is:
[143]	valid_0's binary_logloss: 0.48013	valid_1's binary_logloss: 0.533891





In [15]:
print(model.params)

{'objective': 'binary', 'boosting': 'gbdt', 'learning_rate': 0.1, 'metric': 'binary_logloss', 'seed': 42, 'feature_pre_filter': False, 'lambda_l1': 0.0004765648129632002, 'lambda_l2': 5.355668054611892e-06, 'num_leaves': 48, 'feature_fraction': 0.4, 'bagging_fraction': 1.0, 'bagging_freq': 0, 'min_child_samples': 20, 'num_iterations': 20000, 'early_stopping_round': 100, 'categorical_column': [0]}


In [None]:
model.