# exp026

In [1]:
import os
import sys
import itertools
import datetime
from dateutil.relativedelta import relativedelta
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getenv('UTILS_PATH'))

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
from optuna.integration import lightgbm as optuna_lgb
import matplotlib.pyplot as plt
import seaborn as sns

import line_notify

In [2]:
import builtins
import types

def imports():
    for name, val in globals().items():
        # module imports
        if isinstance(val, types.ModuleType):
            yield name, val

            # functions / callables
        if hasattr(val, '__call__'):
            yield name, val


def noglobal(f):
    '''
    ref: https://gist.github.com/raven38/4e4c3c7a179283c441f575d6e375510c
    '''
    return types.FunctionType(f.__code__,
                              dict(imports()),
                              f.__name__,
                              f.__defaults__,
                              f.__closure__
                              )

In [3]:
SEED = 42

ディレクトリ設定

In [4]:
INPUT_DIR = os.getenv('INPUT_DIR')
OUTPUT_DIR = os.getenv('OUTPUT_DIR')
#exp_name = os.path.dirname(__file__).split('/')[-1]
exp_name = 'exp026'
os.makedirs(OUTPUT_DIR + exp_name, exist_ok=True)

データ読み込み

In [5]:
articles = pd.read_csv(INPUT_DIR + 'articles.csv', dtype='object')
customers = pd.read_csv(INPUT_DIR + 'customers.csv')
transactions = pd.read_csv(INPUT_DIR + 'transactions_train.csv', dtype={'article_id':'str'}, parse_dates=['t_dat'])
sample = pd.read_csv(INPUT_DIR + 'sample_submission.csv')

# 前処理

In [6]:
# 名寄せ
customers['fashion_news_frequency'] = customers['fashion_news_frequency'].str.replace('None','NONE')
# transactionに紐づけ
transactions = transactions.merge(customers, on='customer_id', how='left')
transactions = transactions.merge(articles, on='article_id', how='left')

# データセット作成（レコメンド→対象データセット作成→特徴量エンジニアリング）

In [7]:
@noglobal
def get_customer_frequent(history, n=12, timedelta=None):
    """顧客ごと商品の購入数をカウントし上位の商品を抽出

    Args:
        history (dataframe): 集計対象の実績データ
        n (int): レコメンド対象とする数
        timedelta (dateutil.relativedelta): 指定された場合、実績データの終端からtimedelta分のデータを取得する

    Returns:
        dataframe: 抽出結果
    """
    if timedelta is not None:
        st_date = history['t_dat'].max() - timedelta
        history = history[history['t_dat']>=st_date].copy()
        
    customer_agg = history.groupby(['customer_id', 'article_id'])['t_dat'].count().reset_index()
    customer_agg = customer_agg.rename(columns={'t_dat':'cnt'})
    customer_agg = customer_agg.sort_values(['customer_id', 'cnt'], ascending=False)
    result = customer_agg.groupby('customer_id').head(n)
    return result[['customer_id', 'article_id']]

@noglobal
def get_popular_article(history, n=12, timedelta=None):
    """全体の購入数をカウントし上位の商品を抽出

    Args:
        history (dataframe): 集計対象の実績データ
        n (int): レコメンド対象とする数
        timedelta (dateutil.relativedelta): 指定された場合、実績データの終端からtimedelta分のデータを取得する

    Returns:
        list: 抽出結果
    """
    # 全体の購入数量
    if timedelta is not None:
        st_date = history['t_dat'].max() - timedelta
        history = history[history['t_dat']>=st_date].copy()

    total_agg = history.groupby('article_id')['t_dat'].count().reset_index()
    total_agg = total_agg.rename(columns={'t_dat':'cnt'})
    total_agg = total_agg.sort_values(['cnt'], ascending=False)
    total_agg = total_agg.head(n)
    result = list(total_agg['article_id'].values)
    return result

@noglobal
def get_reccomend(target_customer_id, history):
    """対象のcustomer_idに対するレコメンド結果を返す

    Args:
        target_customer_id (list): 対象のcustomer_id
        history (dataframe): レコメンドに用いる実績データ

    Returns:
        dataframe: レコメンド結果
    """
    n = 12
    result = pd.DataFrame()
    
    for td in [None, relativedelta(weeks=1), relativedelta(months=1), relativedelta(years=1)]:

        customer_freq = get_customer_frequent(history, n, td)
        popular_article = get_popular_article(history, n, td)
        # customerとpopular articleの全組み合わせでdataframe作成
        popular_article = pd.DataFrame(itertools.product(target_customer_id, popular_article), columns=['customer_id', 'article_id'])
        tmp = pd.concat([customer_freq, popular_article])
        result = result.append(tmp)
    result = result.drop_duplicates()
    result = result[result['customer_id'].isin(target_customer_id)].copy()

    return result

In [8]:
@noglobal
def add_labels(recom_result, history):
    """レコメンドしたデータが学習期間で購入されたかどうかのフラグを付与する

    Args:
        recom_result (_type_): レコメンド結果
        train_tran (_type_): 学習期間のトランザクションデータ

    Returns:
        _type_: 学習期間での購入フラグを付与したレコメンド結果
    """
    history = history[['customer_id', 'article_id']].drop_duplicates()
    history['buy'] = 1
    recom_result = recom_result.merge(history, on=['customer_id', 'article_id'], how='left')
    recom_result['buy'] = recom_result['buy'].fillna(0)
    return recom_result


In [9]:
@noglobal
def make_article_features(articles):
    le_cols = ['product_type_name', 'product_group_name', 'graphical_appearance_name',
               'colour_group_name', 'perceived_colour_value_name', 'department_name',
               'index_name', 'index_group_name', 'section_name', 'garment_group_name']
    for c in le_cols:
        le = LabelEncoder()
        articles[c] = le.fit_transform(articles[c].fillna(''))
    return articles[['article_id']+le_cols]

@noglobal
def make_article_tran_features(history):
    df = history.groupby('article_id').agg({'t_dat':['count', 'max', 'min'],
                                            'price':['max', 'min', 'mean'], 
                                            'age':['max', 'min', 'mean', 'std']}).reset_index()
    df.columns = ['article_id','article_total_cnt', 'article_total_latest_buy', 'article_total_1st_buy', 'article_price_max', 'article_price_min', 'article_price_mean', 'article_age_max', 'article_age_min', 'article_age_mean', 'article_age_std']
    df['article_total_1st_buy'] = (history['t_dat'].max() - df['article_total_1st_buy']).dt.days
    df['article_total_latest_buy'] = (history['t_dat'].max() - df['article_total_latest_buy']).dt.days
    return df


@noglobal
def make_customer_features(customers):
    le_cols = ['club_member_status', 'fashion_news_frequency', 'postal_code']
    for c in le_cols:
        le = LabelEncoder()
        customers[c] = le.fit_transform(customers[c].fillna(''))
    return customers

@noglobal
def make_customer_tran_features(history):
    df = history.groupby('customer_id').agg({'t_dat':['count', 'max', 'min'],
                                            'price':['max', 'min', 'mean']}).reset_index()
    df.columns = ['customer_id','customer_total_cnt', 'customer_total_latest_buy', 'customer_total_1st_buy', 'customer_price_max', 'customer_price_min', 'customer_price_mean']
    df['customer_total_1st_buy'] = (history['t_dat'].max() - df['customer_total_1st_buy']).dt.days
    df['customer_total_latest_buy'] = (history['t_dat'].max() - df['customer_total_latest_buy']).dt.days
    return df

@noglobal
def make_customer_article_features(target, history):
    df = target.merge(history, on=['customer_id', 'article_id'], how='inner')
    df = df.groupby(['customer_id', 'article_id']).agg({'t_dat':['count', 'min', 'max']}).reset_index()
    df.columns = ['customer_id', 'article_id', 'count', '1st_buy_date_diff', 'latest_buy_date_diff']
    df['1st_buy_date_diff'] = (history['t_dat'].max() - df['1st_buy_date_diff']).dt.days
    df['latest_buy_date_diff'] = (history['t_dat'].max() - df['latest_buy_date_diff']).dt.days
    return df

@noglobal
def add_features(df, history, articles, customers):
    article_features = make_article_features(articles)
    article_tran_features = make_article_tran_features(history)
    customer_features = make_customer_features(customers)
    customer_tran_features = make_customer_tran_features(history)
    customer_article_features = make_customer_article_features(df[['customer_id', 'article_id']], history)

    df = df.merge(article_features, on=['article_id'], how='left')
    df = df.merge(article_tran_features, on=['article_id'], how='left')
    df = df.merge(customer_features, on=['customer_id'], how='left')
    df = df.merge(customer_tran_features, on=['customer_id'], how='left')
    df = df.merge(customer_article_features, on=['article_id', 'customer_id'], how='left')
    return df
    

In [10]:
# テストデータの作成
target_id = sample['customer_id'].tolist()
recom = get_reccomend(target_id, transactions)
ml_test = add_features(recom, transactions, articles, customers)

# レコメンド商品を購入するかどうかの2値分類モデル

In [11]:
def apk(y_true, y_pred, K=12):
    assert(len(y_true) == len(y_pred))
    apks = []
    for idx in range(len(y_true)):
        y_i_true = y_true[idx]
        y_i_pred = y_pred[idx]

        # 予測値の数と重複の確認
        assert(len(y_i_pred) <= K)
        assert(len(np.unique(y_i_pred)) == len(y_i_pred))

        sum_precision = 0.0
        num_hits = 0.0

        for i, p in enumerate(y_i_pred):
            if p in y_i_true:
                num_hits += 1
                precision = num_hits / (i+1)
                sum_precision += precision
        apk = sum_precision / min(len(y_i_true), K)
        apks.append(apk)
    return apks

In [12]:
target = 'buy'
not_use_cols = ['customer_id', 'article_id', target]
features = [c for c in ml_test.columns if c not in not_use_cols]

In [13]:
params = {
    "objective": "binary",
    "boosting" : "gbdt",
    "learning_rate": 0.01,
    "metric": "binary_logloss",
    "seed": SEED
}

In [14]:
# 1週ずつローリングして学習データを生成し検証
n_iter = 3
train_start = datetime.datetime(2020,9,9)
valid_start = datetime.datetime(2020,9,16)
valid_end = datetime.datetime(2020,9,22)
test_pred = np.zeros(len(ml_test))
fi = pd.DataFrame()
scores = []
for i in range(n_iter):
    # 学習データの作成
    history_tran = transactions[transactions['t_dat'] < train_start].copy()
    target_tran = transactions[(transactions['t_dat'] >= train_start) & (transactions['t_dat'] < valid_start)].copy()
    target_id = target_tran['customer_id'].unique().tolist()
    recom = get_reccomend(target_id, history_tran)
    ml_train = add_labels(recom, target_tran)
    ml_train = add_features(ml_train, history_tran, articles, customers)

    # 評価データの作成
    history_tran = transactions[transactions['t_dat'] < valid_start].copy()
    target_tran = transactions[(transactions['t_dat'] >= valid_start) & (transactions['t_dat'] <= valid_end)].copy()
    target_id = target_tran['customer_id'].unique().tolist()
    recom = get_reccomend(target_id, history_tran)
    ml_valid = add_labels(recom, target_tran)
    ml_valid = add_features(ml_valid, history_tran, articles, customers)
    
    # 学習
    tr_x, tr_y = ml_train[features], ml_train[target]
    vl_x, vl_y = ml_valid[features], ml_valid[target]
    tr_data = lgb.Dataset(tr_x, label=tr_y)
    vl_data = lgb.Dataset(vl_x, label=vl_y)
    break
model = optuna_lgb.train(params, tr_data, valid_sets=[tr_data, vl_data],
                num_boost_round=20000, early_stopping_rounds=100,verbose_eval=1000)

[32m[I 2022-02-23 11:25:07,260][0m A new study created in memory with name: no-name-177cca2b-4309-4399-acc5-aabca976b252[0m


[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.0215707	valid_1's binary_logloss: 0.0244567


feature_fraction, val_score: 0.024419:  14%|#4        | 1/7 [02:17<13:45, 137.57s/it][32m[I 2022-02-23 11:27:24,834][0m Trial 0 finished with value: 0.024419216297994664 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 0.024419216297994664.[0m
feature_fraction, val_score: 0.024419:  14%|#4        | 1/7 [02:17<13:45, 137.57s/it]

Early stopping, best iteration is:
[1693]	valid_0's binary_logloss: 0.0207474	valid_1's binary_logloss: 0.0244192




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.0215907	valid_1's binary_logloss: 0.024435


feature_fraction, val_score: 0.024419:  29%|##8       | 2/7 [03:46<09:03, 108.70s/it][32m[I 2022-02-23 11:28:53,326][0m Trial 1 finished with value: 0.024431407570134532 and parameters: {'feature_fraction': 0.6}. Best is trial 0 with value: 0.024419216297994664.[0m
feature_fraction, val_score: 0.024419:  29%|##8       | 2/7 [03:46<09:03, 108.70s/it]

Early stopping, best iteration is:
[1134]	valid_0's binary_logloss: 0.0214201	valid_1's binary_logloss: 0.0244314




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


feature_fraction, val_score: 0.024419:  43%|####2     | 3/7 [04:56<06:04, 91.02s/it] [32m[I 2022-02-23 11:30:03,315][0m Trial 2 finished with value: 0.02445313272777726 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 0.024419216297994664.[0m
feature_fraction, val_score: 0.024419:  43%|####2     | 3/7 [04:56<06:04, 91.02s/it]

Early stopping, best iteration is:
[811]	valid_0's binary_logloss: 0.021804	valid_1's binary_logloss: 0.0244531




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.021698	valid_1's binary_logloss: 0.0244234


feature_fraction, val_score: 0.024419:  57%|#####7    | 4/7 [06:09<04:12, 84.21s/it][32m[I 2022-02-23 11:31:17,070][0m Trial 3 finished with value: 0.024421568809689674 and parameters: {'feature_fraction': 0.4}. Best is trial 0 with value: 0.024419216297994664.[0m
feature_fraction, val_score: 0.024419:  57%|#####7    | 4/7 [06:09<04:12, 84.21s/it]

Early stopping, best iteration is:
[1032]	valid_0's binary_logloss: 0.0216597	valid_1's binary_logloss: 0.0244216




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.0215431	valid_1's binary_logloss: 0.0244406


feature_fraction, val_score: 0.024419:  71%|#######1  | 5/7 [08:03<03:09, 94.75s/it][32m[I 2022-02-23 11:33:10,518][0m Trial 4 finished with value: 0.024425477711968967 and parameters: {'feature_fraction': 0.8}. Best is trial 0 with value: 0.024419216297994664.[0m
feature_fraction, val_score: 0.024419:  71%|#######1  | 5/7 [08:03<03:09, 94.75s/it]

Early stopping, best iteration is:
[1255]	valid_0's binary_logloss: 0.0212352	valid_1's binary_logloss: 0.0244255




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.0215132	valid_1's binary_logloss: 0.0244498


feature_fraction, val_score: 0.024419:  86%|########5 | 6/7 [09:41<01:35, 95.84s/it][32m[I 2022-02-23 11:34:48,478][0m Trial 5 finished with value: 0.024438748385271918 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 0.024419216297994664.[0m
feature_fraction, val_score: 0.024419:  86%|########5 | 6/7 [09:41<01:35, 95.84s/it]

Early stopping, best iteration is:
[1300]	valid_0's binary_logloss: 0.0211387	valid_1's binary_logloss: 0.0244387




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.0216316	valid_1's binary_logloss: 0.0244466


feature_fraction, val_score: 0.024419: 100%|##########| 7/7 [11:08<00:00, 93.18s/it][32m[I 2022-02-23 11:36:16,164][0m Trial 6 finished with value: 0.0244448161551729 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.024419216297994664.[0m
feature_fraction, val_score: 0.024419: 100%|##########| 7/7 [11:08<00:00, 95.56s/it]


Early stopping, best iteration is:
[926]	valid_0's binary_logloss: 0.0217233	valid_1's binary_logloss: 0.0244448




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024419:   5%|5         | 1/20 [01:23<26:32, 83.83s/it][32m[I 2022-02-23 11:37:39,999][0m Trial 7 finished with value: 0.024423360844425448 and parameters: {'num_leaves': 163}. Best is trial 7 with value: 0.024423360844425448.[0m
num_leaves, val_score: 0.024419:   5%|5         | 1/20 [01:23<26:32, 83.83s/it]

Early stopping, best iteration is:
[553]	valid_0's binary_logloss: 0.01818	valid_1's binary_logloss: 0.0244234




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024419:  10%|#         | 2/20 [02:45<24:49, 82.78s/it][32m[I 2022-02-23 11:39:02,037][0m Trial 8 finished with value: 0.02442369925915596 and parameters: {'num_leaves': 43}. Best is trial 7 with value: 0.024423360844425448.[0m
num_leaves, val_score: 0.024419:  10%|#         | 2/20 [02:45<24:49, 82.78s/it]

Early stopping, best iteration is:
[725]	valid_0's binary_logloss: 0.0214068	valid_1's binary_logloss: 0.0244237




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024393:  15%|#5        | 3/20 [04:14<24:12, 85.45s/it][32m[I 2022-02-23 11:40:30,673][0m Trial 9 finished with value: 0.024392927278511032 and parameters: {'num_leaves': 175}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  15%|#5        | 3/20 [04:14<24:12, 85.45s/it]

Early stopping, best iteration is:
[575]	valid_0's binary_logloss: 0.0177775	valid_1's binary_logloss: 0.0243929




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.0234941	valid_1's binary_logloss: 0.0247594
[2000]	valid_0's binary_logloss: 0.0232396	valid_1's binary_logloss: 0.0246173
[3000]	valid_0's binary_logloss: 0.0230892	valid_1's binary_logloss: 0.0245387
[4000]	valid_0's binary_logloss: 0.0229778	valid_1's binary_logloss: 0.0245073
[5000]	valid_0's binary_logloss: 0.0228811	valid_1's binary_logloss: 0.0244882
[6000]	valid_0's binary_logloss: 0.0227995	valid_1's binary_logloss: 0.0244677
[7000]

num_leaves, val_score: 0.024393:  20%|##        | 4/20 [12:05<1:03:19, 237.47s/it][32m[I 2022-02-23 11:48:21,191][0m Trial 10 finished with value: 0.024430856410499684 and parameters: {'num_leaves': 4}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  20%|##        | 4/20 [12:05<1:03:19, 237.47s/it]

Early stopping, best iteration is:
[9564]	valid_0's binary_logloss: 0.0225631	valid_1's binary_logloss: 0.0244309




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024393:  25%|##5       | 5/20 [13:26<45:18, 181.21s/it]  [32m[I 2022-02-23 11:49:42,637][0m Trial 11 finished with value: 0.02439436801072206 and parameters: {'num_leaves': 235}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  25%|##5       | 5/20 [13:26<45:18, 181.21s/it]

Early stopping, best iteration is:
[547]	valid_0's binary_logloss: 0.0166086	valid_1's binary_logloss: 0.0243944




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024393:  30%|###       | 6/20 [14:44<34:05, 146.12s/it][32m[I 2022-02-23 11:51:00,639][0m Trial 12 finished with value: 0.024411255548268132 and parameters: {'num_leaves': 191}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  30%|###       | 6/20 [14:44<34:05, 146.12s/it]

Early stopping, best iteration is:
[555]	valid_0's binary_logloss: 0.0175005	valid_1's binary_logloss: 0.0244113




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024393:  35%|###5      | 7/20 [16:20<28:04, 129.59s/it][32m[I 2022-02-23 11:52:36,188][0m Trial 13 finished with value: 0.024436447599428514 and parameters: {'num_leaves': 247}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  35%|###5      | 7/20 [16:20<28:04, 129.59s/it]

Early stopping, best iteration is:
[581]	valid_0's binary_logloss: 0.0161557	valid_1's binary_logloss: 0.0244364




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds
[1000]	valid_0's binary_logloss: 0.0203863	valid_1's binary_logloss: 0.0244068


num_leaves, val_score: 0.024393:  40%|####      | 8/20 [18:00<24:02, 120.18s/it][32m[I 2022-02-23 11:54:16,232][0m Trial 14 finished with value: 0.02440447588316906 and parameters: {'num_leaves': 55}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  40%|####      | 8/20 [18:00<24:02, 120.18s/it]

Early stopping, best iteration is:
[962]	valid_0's binary_logloss: 0.0204649	valid_1's binary_logloss: 0.0244045




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024393:  45%|####5     | 9/20 [19:34<20:33, 112.15s/it][32m[I 2022-02-23 11:55:50,710][0m Trial 15 finished with value: 0.02440422630077587 and parameters: {'num_leaves': 133}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  45%|####5     | 9/20 [19:34<20:33, 112.15s/it]

Early stopping, best iteration is:
[675]	valid_0's binary_logloss: 0.0184774	valid_1's binary_logloss: 0.0244042




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024393:  50%|#####     | 10/20 [20:59<17:18, 103.88s/it][32m[I 2022-02-23 11:57:16,094][0m Trial 16 finished with value: 0.024402590335897895 and parameters: {'num_leaves': 198}. Best is trial 9 with value: 0.024392927278511032.[0m
num_leaves, val_score: 0.024393:  50%|#####     | 10/20 [20:59<17:18, 103.88s/it]

Early stopping, best iteration is:
[508]	valid_0's binary_logloss: 0.0176211	valid_1's binary_logloss: 0.0244026




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  55%|#####5    | 11/20 [22:22<14:36, 97.41s/it] [32m[I 2022-02-23 11:58:38,831][0m Trial 17 finished with value: 0.024368176909803013 and parameters: {'num_leaves': 100}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  55%|#####5    | 11/20 [22:22<14:36, 97.41s/it]

Early stopping, best iteration is:
[597]	valid_0's binary_logloss: 0.0197127	valid_1's binary_logloss: 0.0243682




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  60%|######    | 12/20 [23:29<11:44, 88.04s/it][32m[I 2022-02-23 11:59:45,428][0m Trial 18 finished with value: 0.02439462153106917 and parameters: {'num_leaves': 105}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  60%|######    | 12/20 [23:29<11:44, 88.04s/it]

Early stopping, best iteration is:
[551]	valid_0's binary_logloss: 0.0197096	valid_1's binary_logloss: 0.0243946




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  65%|######5   | 13/20 [24:48<09:58, 85.43s/it][32m[I 2022-02-23 12:01:04,859][0m Trial 19 finished with value: 0.02438050943020348 and parameters: {'num_leaves': 104}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  65%|######5   | 13/20 [24:48<09:58, 85.43s/it]

Early stopping, best iteration is:
[567]	valid_0's binary_logloss: 0.0196914	valid_1's binary_logloss: 0.0243805




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  70%|#######   | 14/20 [26:22<08:47, 88.00s/it][32m[I 2022-02-23 12:02:38,796][0m Trial 20 finished with value: 0.0243885304550499 and parameters: {'num_leaves': 96}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  70%|#######   | 14/20 [26:22<08:47, 88.00s/it]

Early stopping, best iteration is:
[744]	valid_0's binary_logloss: 0.0193542	valid_1's binary_logloss: 0.0243885




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  75%|#######5  | 15/20 [27:47<07:14, 86.99s/it][32m[I 2022-02-23 12:04:03,452][0m Trial 21 finished with value: 0.02440424187915946 and parameters: {'num_leaves': 77}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  75%|#######5  | 15/20 [27:47<07:14, 86.99s/it]

Early stopping, best iteration is:
[671]	valid_0's binary_logloss: 0.0202076	valid_1's binary_logloss: 0.0244042




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  80%|########  | 16/20 [28:59<05:30, 82.60s/it][32m[I 2022-02-23 12:05:15,855][0m Trial 22 finished with value: 0.024406115246554692 and parameters: {'num_leaves': 123}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  80%|########  | 16/20 [28:59<05:30, 82.60s/it]

Early stopping, best iteration is:
[590]	valid_0's binary_logloss: 0.019094	valid_1's binary_logloss: 0.0244061




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  85%|########5 | 17/20 [30:33<04:17, 85.98s/it][32m[I 2022-02-23 12:06:49,698][0m Trial 23 finished with value: 0.024397774974191933 and parameters: {'num_leaves': 148}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  85%|########5 | 17/20 [30:33<04:17, 85.98s/it]

Early stopping, best iteration is:
[643]	valid_0's binary_logloss: 0.0181795	valid_1's binary_logloss: 0.0243978




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  90%|######### | 18/20 [31:42<02:41, 80.97s/it][32m[I 2022-02-23 12:07:58,988][0m Trial 24 finished with value: 0.024499348258796617 and parameters: {'num_leaves': 24}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  90%|######### | 18/20 [31:42<02:41, 80.97s/it]

Early stopping, best iteration is:
[838]	valid_0's binary_logloss: 0.0221519	valid_1's binary_logloss: 0.0244993




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368:  95%|#########5| 19/20 [33:09<01:22, 82.63s/it][32m[I 2022-02-23 12:09:25,482][0m Trial 25 finished with value: 0.02440424187915946 and parameters: {'num_leaves': 77}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368:  95%|#########5| 19/20 [33:09<01:22, 82.63s/it]

Early stopping, best iteration is:
[671]	valid_0's binary_logloss: 0.0202076	valid_1's binary_logloss: 0.0244042




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


num_leaves, val_score: 0.024368: 100%|##########| 20/20 [34:31<00:00, 82.43s/it][32m[I 2022-02-23 12:10:47,469][0m Trial 26 finished with value: 0.02437900408190972 and parameters: {'num_leaves': 120}. Best is trial 17 with value: 0.024368176909803013.[0m
num_leaves, val_score: 0.024368: 100%|##########| 20/20 [34:31<00:00, 103.57s/it]


Early stopping, best iteration is:
[553]	valid_0's binary_logloss: 0.0192978	valid_1's binary_logloss: 0.024379




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  10%|#         | 1/10 [01:32<13:49, 92.18s/it][32m[I 2022-02-23 12:12:19,658][0m Trial 27 finished with value: 0.02434247609211936 and parameters: {'bagging_fraction': 0.7235734670479185, 'bagging_freq': 1}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  10%|#         | 1/10 [01:32<13:49, 92.18s/it]

Early stopping, best iteration is:
[750]	valid_0's binary_logloss: 0.0188549	valid_1's binary_logloss: 0.0243425




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  20%|##        | 2/10 [03:05<12:22, 92.81s/it][32m[I 2022-02-23 12:13:52,902][0m Trial 28 finished with value: 0.02435109787302415 and parameters: {'bagging_fraction': 0.6009066611077654, 'bagging_freq': 1}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  20%|##        | 2/10 [03:05<12:22, 92.81s/it]

Early stopping, best iteration is:
[869]	valid_0's binary_logloss: 0.018283	valid_1's binary_logloss: 0.0243511




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  30%|###       | 3/10 [04:12<09:26, 80.89s/it][32m[I 2022-02-23 12:14:59,613][0m Trial 29 finished with value: 0.024385840860172876 and parameters: {'bagging_fraction': 0.49206865364722224, 'bagging_freq': 2}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  30%|###       | 3/10 [04:12<09:26, 80.89s/it]

Early stopping, best iteration is:
[668]	valid_0's binary_logloss: 0.0190355	valid_1's binary_logloss: 0.0243858




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  40%|####      | 4/10 [05:39<08:21, 83.55s/it][32m[I 2022-02-23 12:16:27,240][0m Trial 30 finished with value: 0.02443995819023783 and parameters: {'bagging_fraction': 0.6093307355472419, 'bagging_freq': 6}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  40%|####      | 4/10 [05:39<08:21, 83.55s/it]

Early stopping, best iteration is:
[894]	valid_0's binary_logloss: 0.0182345	valid_1's binary_logloss: 0.02444




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  50%|#####     | 5/10 [06:48<06:30, 78.15s/it][32m[I 2022-02-23 12:17:35,815][0m Trial 31 finished with value: 0.024368432989062844 and parameters: {'bagging_fraction': 0.8511710583480203, 'bagging_freq': 5}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  50%|#####     | 5/10 [06:48<06:30, 78.15s/it]

Early stopping, best iteration is:
[551]	valid_0's binary_logloss: 0.0197349	valid_1's binary_logloss: 0.0243684




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  60%|######    | 6/10 [07:56<04:58, 74.60s/it][32m[I 2022-02-23 12:18:43,532][0m Trial 32 finished with value: 0.024358658527162805 and parameters: {'bagging_fraction': 0.6988151714601163, 'bagging_freq': 6}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  60%|######    | 6/10 [07:56<04:58, 74.60s/it]

Early stopping, best iteration is:
[633]	valid_0's binary_logloss: 0.0193202	valid_1's binary_logloss: 0.0243587




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  70%|#######   | 7/10 [09:14<03:47, 75.78s/it][32m[I 2022-02-23 12:20:01,734][0m Trial 33 finished with value: 0.02435062295860429 and parameters: {'bagging_fraction': 0.6836070828146776, 'bagging_freq': 4}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  70%|#######   | 7/10 [09:14<03:47, 75.78s/it]

Early stopping, best iteration is:
[802]	valid_0's binary_logloss: 0.0186384	valid_1's binary_logloss: 0.0243506




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  80%|########  | 8/10 [10:35<02:34, 77.44s/it][32m[I 2022-02-23 12:21:22,725][0m Trial 34 finished with value: 0.024382888258661382 and parameters: {'bagging_fraction': 0.7539676448595583, 'bagging_freq': 7}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  80%|########  | 8/10 [10:35<02:34, 77.44s/it]

Early stopping, best iteration is:
[640]	valid_0's binary_logloss: 0.0193176	valid_1's binary_logloss: 0.0243829




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342:  90%|######### | 9/10 [12:19<01:25, 85.77s/it][32m[I 2022-02-23 12:23:06,829][0m Trial 35 finished with value: 0.024405820956094962 and parameters: {'bagging_fraction': 0.7879897575656343, 'bagging_freq': 3}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342:  90%|######### | 9/10 [12:19<01:25, 85.77s/it]

Early stopping, best iteration is:
[770]	valid_0's binary_logloss: 0.0187979	valid_1's binary_logloss: 0.0244058




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


bagging, val_score: 0.024342: 100%|##########| 10/10 [13:41<00:00, 84.75s/it][32m[I 2022-02-23 12:24:29,273][0m Trial 36 finished with value: 0.024419759429573556 and parameters: {'bagging_fraction': 0.5475997995869051, 'bagging_freq': 2}. Best is trial 27 with value: 0.02434247609211936.[0m
bagging, val_score: 0.024342: 100%|##########| 10/10 [13:41<00:00, 82.18s/it]


Early stopping, best iteration is:
[843]	valid_0's binary_logloss: 0.0183587	valid_1's binary_logloss: 0.0244198




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 0.024342:  17%|#6        | 1/6 [01:25<07:06, 85.25s/it][32m[I 2022-02-23 12:25:54,534][0m Trial 37 finished with value: 0.024391909248331036 and parameters: {'feature_fraction': 0.7799999999999999}. Best is trial 37 with value: 0.024391909248331036.[0m
feature_fraction_stage2, val_score: 0.024342:  17%|#6        | 1/6 [01:25<07:06, 85.25s/it]

Early stopping, best iteration is:
[842]	valid_0's binary_logloss: 0.0184363	valid_1's binary_logloss: 0.0243919




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 0.024342:  33%|###3      | 2/6 [02:47<05:32, 83.23s/it][32m[I 2022-02-23 12:27:16,340][0m Trial 38 finished with value: 0.0243745730072653 and parameters: {'feature_fraction': 0.748}. Best is trial 38 with value: 0.0243745730072653.[0m
feature_fraction_stage2, val_score: 0.024342:  33%|###3      | 2/6 [02:47<05:32, 83.23s/it]

Early stopping, best iteration is:
[824]	valid_0's binary_logloss: 0.01852	valid_1's binary_logloss: 0.0243746




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 0.024342:  50%|#####     | 3/6 [04:09<04:08, 82.73s/it][32m[I 2022-02-23 12:28:38,482][0m Trial 39 finished with value: 0.02437220387733463 and parameters: {'feature_fraction': 0.652}. Best is trial 39 with value: 0.02437220387733463.[0m
feature_fraction_stage2, val_score: 0.024342:  50%|#####     | 3/6 [04:09<04:08, 82.73s/it]

Early stopping, best iteration is:
[671]	valid_0's binary_logloss: 0.0192089	valid_1's binary_logloss: 0.0243722




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 0.024342:  67%|######6   | 4/6 [05:44<02:55, 87.86s/it][32m[I 2022-02-23 12:30:14,209][0m Trial 40 finished with value: 0.024356768262204228 and parameters: {'feature_fraction': 0.6839999999999999}. Best is trial 40 with value: 0.024356768262204228.[0m
feature_fraction_stage2, val_score: 0.024342:  67%|######6   | 4/6 [05:44<02:55, 87.86s/it]

Early stopping, best iteration is:
[781]	valid_0's binary_logloss: 0.0187566	valid_1's binary_logloss: 0.0243568




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 0.024342:  83%|########3 | 5/6 [07:15<01:28, 88.90s/it][32m[I 2022-02-23 12:31:44,957][0m Trial 41 finished with value: 0.024408253245012803 and parameters: {'feature_fraction': 0.62}. Best is trial 40 with value: 0.024356768262204228.[0m
feature_fraction_stage2, val_score: 0.024342:  83%|########3 | 5/6 [07:15<01:28, 88.90s/it]

Early stopping, best iteration is:
[750]	valid_0's binary_logloss: 0.0189501	valid_1's binary_logloss: 0.0244083




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


feature_fraction_stage2, val_score: 0.024342: 100%|##########| 6/6 [08:45<00:00, 89.30s/it][32m[I 2022-02-23 12:33:15,015][0m Trial 42 finished with value: 0.024342476092119363 and parameters: {'feature_fraction': 0.716}. Best is trial 42 with value: 0.024342476092119363.[0m
feature_fraction_stage2, val_score: 0.024342: 100%|##########| 6/6 [08:45<00:00, 87.62s/it]


Early stopping, best iteration is:
[750]	valid_0's binary_logloss: 0.0188549	valid_1's binary_logloss: 0.0243425




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024342:   5%|5         | 1/20 [01:18<24:47, 78.29s/it][32m[I 2022-02-23 12:34:33,310][0m Trial 43 finished with value: 0.02434970723600465 and parameters: {'lambda_l1': 0.004976801386049323, 'lambda_l2': 3.6975737730464346e-07}. Best is trial 43 with value: 0.02434970723600465.[0m
regularization_factors, val_score: 0.024342:   5%|5         | 1/20 [01:18<24:47, 78.29s/it]

Early stopping, best iteration is:
[771]	valid_0's binary_logloss: 0.0187689	valid_1's binary_logloss: 0.0243497




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024342:  10%|#         | 2/20 [02:52<26:22, 87.93s/it][32m[I 2022-02-23 12:36:07,988][0m Trial 44 finished with value: 0.024363917829280157 and parameters: {'lambda_l1': 1.705898979238511e-07, 'lambda_l2': 1.000034951470484e-07}. Best is trial 43 with value: 0.02434970723600465.[0m
regularization_factors, val_score: 0.024342:  10%|#         | 2/20 [02:52<26:22, 87.93s/it]

Early stopping, best iteration is:
[751]	valid_0's binary_logloss: 0.0188456	valid_1's binary_logloss: 0.0243639




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024332:  15%|#5        | 3/20 [04:09<23:30, 82.95s/it][32m[I 2022-02-23 12:37:25,005][0m Trial 45 finished with value: 0.02433211996489611 and parameters: {'lambda_l1': 5.506472853480445e-06, 'lambda_l2': 0.02504859156083246}. Best is trial 45 with value: 0.02433211996489611.[0m
regularization_factors, val_score: 0.024332:  15%|#5        | 3/20 [04:09<23:30, 82.95s/it]

Early stopping, best iteration is:
[784]	valid_0's binary_logloss: 0.0190265	valid_1's binary_logloss: 0.0243321




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024332:  20%|##        | 4/20 [05:56<24:39, 92.44s/it][32m[I 2022-02-23 12:39:11,992][0m Trial 46 finished with value: 0.024397189515499653 and parameters: {'lambda_l1': 0.9086148802067959, 'lambda_l2': 2.912818501880647e-07}. Best is trial 45 with value: 0.02433211996489611.[0m
regularization_factors, val_score: 0.024332:  20%|##        | 4/20 [05:56<24:39, 92.44s/it]

Early stopping, best iteration is:
[852]	valid_0's binary_logloss: 0.0189324	valid_1's binary_logloss: 0.0243972




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024332:  25%|##5       | 5/20 [07:10<21:22, 85.49s/it][32m[I 2022-02-23 12:40:25,152][0m Trial 47 finished with value: 0.02435414748880942 and parameters: {'lambda_l1': 0.026607587591229078, 'lambda_l2': 1.0288719996065516e-07}. Best is trial 45 with value: 0.02433211996489611.[0m
regularization_factors, val_score: 0.024332:  25%|##5       | 5/20 [07:10<21:22, 85.49s/it]

Early stopping, best iteration is:
[690]	valid_0's binary_logloss: 0.0190856	valid_1's binary_logloss: 0.0243541




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  30%|###       | 6/20 [08:27<19:20, 82.88s/it][32m[I 2022-02-23 12:41:42,972][0m Trial 48 finished with value: 0.024328416353054803 and parameters: {'lambda_l1': 2.652545580792273e-07, 'lambda_l2': 4.127044972448657e-07}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  30%|###       | 6/20 [08:27<19:20, 82.88s/it]

Early stopping, best iteration is:
[764]	valid_0's binary_logloss: 0.0187971	valid_1's binary_logloss: 0.0243284




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  35%|###5      | 7/20 [10:03<18:50, 86.95s/it][32m[I 2022-02-23 12:43:18,293][0m Trial 49 finished with value: 0.02433128511719284 and parameters: {'lambda_l1': 9.280465055709417e-05, 'lambda_l2': 0.011513663296755959}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  35%|###5      | 7/20 [10:03<18:50, 86.95s/it]

Early stopping, best iteration is:
[791]	valid_0's binary_logloss: 0.0188719	valid_1's binary_logloss: 0.0243313




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  40%|####      | 8/20 [11:35<17:41, 88.48s/it][32m[I 2022-02-23 12:44:50,048][0m Trial 50 finished with value: 0.024342127426292978 and parameters: {'lambda_l1': 0.01601041121120325, 'lambda_l2': 0.002257086288892145}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  40%|####      | 8/20 [11:35<17:41, 88.48s/it]

Early stopping, best iteration is:
[750]	valid_0's binary_logloss: 0.0188723	valid_1's binary_logloss: 0.0243421




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  45%|####5     | 9/20 [12:52<15:35, 85.01s/it][32m[I 2022-02-23 12:46:07,443][0m Trial 51 finished with value: 0.024364980720272093 and parameters: {'lambda_l1': 1.3743690946107495e-07, 'lambda_l2': 9.232734394855335e-06}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  45%|####5     | 9/20 [12:52<15:35, 85.01s/it]

Early stopping, best iteration is:
[751]	valid_0's binary_logloss: 0.0188321	valid_1's binary_logloss: 0.024365




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  50%|#####     | 10/20 [14:24<14:30, 87.05s/it][32m[I 2022-02-23 12:47:39,046][0m Trial 52 finished with value: 0.024416877246554003 and parameters: {'lambda_l1': 0.0036000975371223744, 'lambda_l2': 1.8758095950132456}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  50%|#####     | 10/20 [14:24<14:30, 87.05s/it]

Early stopping, best iteration is:
[807]	valid_0's binary_logloss: 0.0208153	valid_1's binary_logloss: 0.0244169




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  55%|#####5    | 11/20 [15:46<12:49, 85.51s/it][32m[I 2022-02-23 12:49:01,079][0m Trial 53 finished with value: 0.024358359927866308 and parameters: {'lambda_l1': 7.569917733590331e-06, 'lambda_l2': 3.54225778149218e-05}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  55%|#####5    | 11/20 [15:46<12:49, 85.51s/it]

Early stopping, best iteration is:
[820]	valid_0's binary_logloss: 0.0186024	valid_1's binary_logloss: 0.0243584




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  60%|######    | 12/20 [17:13<11:29, 86.20s/it][32m[I 2022-02-23 12:50:28,846][0m Trial 54 finished with value: 0.02437793975897554 and parameters: {'lambda_l1': 5.247813885092218e-05, 'lambda_l2': 0.11512585127618073}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  60%|######    | 12/20 [17:13<11:29, 86.20s/it]

Early stopping, best iteration is:
[891]	valid_0's binary_logloss: 0.0192544	valid_1's binary_logloss: 0.0243779




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  65%|######5   | 13/20 [18:27<09:36, 82.38s/it][32m[I 2022-02-23 12:51:42,427][0m Trial 55 finished with value: 0.02434940667888221 and parameters: {'lambda_l1': 1.6600194644148547e-08, 'lambda_l2': 0.0005538835742353322}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  65%|######5   | 13/20 [18:27<09:36, 82.38s/it]

Early stopping, best iteration is:
[702]	valid_0's binary_logloss: 0.0190358	valid_1's binary_logloss: 0.0243494




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  70%|#######   | 14/20 [20:01<08:36, 86.02s/it][32m[I 2022-02-23 12:53:16,870][0m Trial 56 finished with value: 0.024344539389967064 and parameters: {'lambda_l1': 0.00011778888445128869, 'lambda_l2': 1.4739634457807534e-05}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  70%|#######   | 14/20 [20:01<08:36, 86.02s/it]

Early stopping, best iteration is:
[781]	valid_0's binary_logloss: 0.018721	valid_1's binary_logloss: 0.0243445




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  75%|#######5  | 15/20 [21:33<07:19, 87.87s/it][32m[I 2022-02-23 12:54:49,017][0m Trial 57 finished with value: 0.024334717625448442 and parameters: {'lambda_l1': 1.0325044196075442e-06, 'lambda_l2': 0.008413098466700342}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  75%|#######5  | 15/20 [21:33<07:19, 87.87s/it]

Early stopping, best iteration is:
[783]	valid_0's binary_logloss: 0.0188568	valid_1's binary_logloss: 0.0243347




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  80%|########  | 16/20 [23:31<06:26, 96.69s/it][32m[I 2022-02-23 12:56:46,181][0m Trial 58 finished with value: 0.024415223693659102 and parameters: {'lambda_l1': 0.0004885596519296439, 'lambda_l2': 7.742691363055127}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  80%|########  | 16/20 [23:31<06:26, 96.69s/it]

Early stopping, best iteration is:
[757]	valid_0's binary_logloss: 0.0213392	valid_1's binary_logloss: 0.0244152




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  85%|########5 | 17/20 [24:56<04:40, 93.42s/it][32m[I 2022-02-23 12:58:12,002][0m Trial 59 finished with value: 0.024422460606308302 and parameters: {'lambda_l1': 0.47924500925251207, 'lambda_l2': 1.524306146522353e-08}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  85%|########5 | 17/20 [24:56<04:40, 93.42s/it]

Early stopping, best iteration is:
[795]	valid_0's binary_logloss: 0.0188356	valid_1's binary_logloss: 0.0244225




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  90%|######### | 18/20 [26:38<03:11, 95.80s/it][32m[I 2022-02-23 12:59:53,344][0m Trial 60 finished with value: 0.02439211214956318 and parameters: {'lambda_l1': 1.186813936768744e-08, 'lambda_l2': 0.2380541918489892}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  90%|######### | 18/20 [26:38<03:11, 95.80s/it]

Early stopping, best iteration is:
[826]	valid_0's binary_logloss: 0.0198296	valid_1's binary_logloss: 0.0243921




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328:  95%|#########5| 19/20 [27:54<01:29, 89.84s/it][32m[I 2022-02-23 13:01:09,314][0m Trial 61 finished with value: 0.0243630351946252 and parameters: {'lambda_l1': 8.0168976644923e-06, 'lambda_l2': 9.846316670418355e-05}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328:  95%|#########5| 19/20 [27:54<01:29, 89.84s/it]

Early stopping, best iteration is:
[748]	valid_0's binary_logloss: 0.0188475	valid_1's binary_logloss: 0.024363




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


regularization_factors, val_score: 0.024328: 100%|##########| 20/20 [29:11<00:00, 86.01s/it][32m[I 2022-02-23 13:02:26,403][0m Trial 62 finished with value: 0.024359196360765605 and parameters: {'lambda_l1': 0.0006620434213245456, 'lambda_l2': 0.0006507600237914236}. Best is trial 48 with value: 0.024328416353054803.[0m
regularization_factors, val_score: 0.024328: 100%|##########| 20/20 [29:11<00:00, 87.57s/it]


Early stopping, best iteration is:
[751]	valid_0's binary_logloss: 0.0188516	valid_1's binary_logloss: 0.0243592




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 0.024328:  20%|##        | 1/5 [01:12<04:50, 72.63s/it][32m[I 2022-02-23 13:03:39,037][0m Trial 63 finished with value: 0.024347519690985753 and parameters: {'min_child_samples': 25}. Best is trial 63 with value: 0.024347519690985753.[0m
min_data_in_leaf, val_score: 0.024328:  20%|##        | 1/5 [01:12<04:50, 72.63s/it]

Early stopping, best iteration is:
[708]	valid_0's binary_logloss: 0.0191394	valid_1's binary_logloss: 0.0243475




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 0.024328:  40%|####      | 2/5 [02:22<03:33, 71.15s/it][32m[I 2022-02-23 13:04:49,146][0m Trial 64 finished with value: 0.024615242131034273 and parameters: {'min_child_samples': 5}. Best is trial 63 with value: 0.024347519690985753.[0m
min_data_in_leaf, val_score: 0.024328:  40%|####      | 2/5 [02:22<03:33, 71.15s/it]

Early stopping, best iteration is:
[413]	valid_0's binary_logloss: 0.0195996	valid_1's binary_logloss: 0.0246152




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 0.024328:  60%|######    | 3/5 [03:46<02:33, 76.82s/it][32m[I 2022-02-23 13:06:12,728][0m Trial 65 finished with value: 0.024331969993790854 and parameters: {'min_child_samples': 50}. Best is trial 65 with value: 0.024331969993790854.[0m
min_data_in_leaf, val_score: 0.024328:  60%|######    | 3/5 [03:46<02:33, 76.82s/it]

Early stopping, best iteration is:
[787]	valid_0's binary_logloss: 0.0192489	valid_1's binary_logloss: 0.024332




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 0.024317:  80%|########  | 4/5 [05:09<01:19, 79.22s/it][32m[I 2022-02-23 13:07:35,610][0m Trial 66 finished with value: 0.024316836225889173 and parameters: {'min_child_samples': 100}. Best is trial 66 with value: 0.024316836225889173.[0m
min_data_in_leaf, val_score: 0.024317:  80%|########  | 4/5 [05:09<01:19, 79.22s/it]

Early stopping, best iteration is:
[787]	valid_0's binary_logloss: 0.0196511	valid_1's binary_logloss: 0.0243168




[LightGBM] [Info] Number of positive: 12998, number of negative: 2977593
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4864
[LightGBM] [Info] Number of data points in the train set: 2990591, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004346 -> initscore=-5.434075
[LightGBM] [Info] Start training from score -5.434075
Training until validation scores don't improve for 100 rounds


min_data_in_leaf, val_score: 0.024317: 100%|##########| 5/5 [06:51<00:00, 87.67s/it][32m[I 2022-02-23 13:09:18,275][0m Trial 67 finished with value: 0.02446794383542016 and parameters: {'min_child_samples': 10}. Best is trial 66 with value: 0.024316836225889173.[0m
min_data_in_leaf, val_score: 0.024317: 100%|##########| 5/5 [06:51<00:00, 82.37s/it]

Early stopping, best iteration is:
[784]	valid_0's binary_logloss: 0.0182872	valid_1's binary_logloss: 0.0244679





In [15]:
model.params

{'objective': 'binary',
 'boosting': 'gbdt',
 'learning_rate': 0.01,
 'metric': 'binary_logloss',
 'seed': 42,
 'feature_pre_filter': False,
 'lambda_l1': 2.652545580792273e-07,
 'lambda_l2': 4.127044972448657e-07,
 'num_leaves': 100,
 'feature_fraction': 0.7,
 'bagging_fraction': 0.7235734670479185,
 'bagging_freq': 1,
 'min_child_samples': 100,
 'num_iterations': 20000,
 'early_stopping_round': 100}