# exp034

In [55]:
import os
import sys
import gc
import itertools
import pickle
import pathlib
import datetime
from dateutil.relativedelta import relativedelta
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getenv('UTILS_PATH'))

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
import matplotlib.pyplot as plt
import seaborn as sns
import optuna

import line_notify

In [56]:
import builtins
import types

def imports():
    for name, val in globals().items():
        # module imports
        if isinstance(val, types.ModuleType):
            yield name, val

            # functions / callables
        if hasattr(val, '__call__'):
            yield name, val


def noglobal(f):
    '''
    ref: https://gist.github.com/raven38/4e4c3c7a179283c441f575d6e375510c
    '''
    return types.FunctionType(f.__code__,
                              dict(imports()),
                              f.__name__,
                              f.__defaults__,
                              f.__closure__
                              )

In [57]:
SEED = 42
N_ITER = 1
RUN_INF = False # 推論処理を行うか
BATCH_SIZE = int(5e5)

In [58]:
Ns = {}
Ns['cf_a'] = 12
Ns['ctf_a'] = 12
Ns['atfd_a'] = 12
Ns['atfp_a'] = 12
Ns['pa_a'] = 12

Ns['cf_w'] = 12
Ns['ctf_w'] = 12
Ns['atfd_w'] = 12
Ns['atfp_w'] = 12
Ns['pa_w'] = 12

Ns['cf_m'] = 12
Ns['ctf_m'] = 12
Ns['atfd_m'] = 12
Ns['atfp_m'] = 12
Ns['pa_m'] = 12

Ns['cf_y'] = 12
Ns['ctf_y'] = 12
Ns['atfd_y'] = 12
Ns['atfp_y'] = 12
Ns['pa_y'] = 12

In [59]:
score = 0.0025


ディレクトリ設定

In [60]:
INPUT_DIR = os.getenv('INPUT_DIR')
OUTPUT_DIR = os.getenv('OUTPUT_DIR')
#exp_name = os.path.dirname(__file__).split('/')[-1]
#exp_name = 'exp034'
#os.makedirs(OUTPUT_DIR + exp_name, exist_ok=True)

データ読み込み

In [61]:
articles = pd.read_csv(INPUT_DIR + 'articles.csv', dtype='object')
customers = pd.read_csv(INPUT_DIR + 'customers.csv')
transactions = pd.read_csv(INPUT_DIR + 'transactions_train.csv', dtype={'article_id':'str'}, parse_dates=['t_dat'])
sample = pd.read_csv(INPUT_DIR + 'sample_submission.csv')

# 前処理

In [62]:
ALL_CUSTOMER = customers['customer_id'].unique().tolist()
ALL_ARTICLE = articles['article_id'].unique().tolist()

customer_ids = dict(list(enumerate(ALL_CUSTOMER)))
article_ids = dict(list(enumerate(ALL_ARTICLE)))

customer_map = {u: uidx for uidx, u in customer_ids.items()}
article_map = {i: iidx for iidx, i in article_ids.items()}

articles['article_id'] = articles['article_id'].map(article_map)
customers['customer_id'] = customers['customer_id'].map(customer_map)
transactions['article_id'] = transactions['article_id'].map(article_map)
transactions['customer_id'] = transactions['customer_id'].map(customer_map)
sample['customer_id'] = sample['customer_id'].map(customer_map)

In [63]:
# 名寄せ
customers['fashion_news_frequency'] = customers['fashion_news_frequency'].str.replace('None','NONE')

In [64]:
customers['age10'] = str((customers['age'] // 10) * 10)
customers.loc[customers['age'].isnull(), 'age10'] = np.nan

In [65]:
# label_encoding
le_cols = ['product_type_name', 'product_group_name', 'graphical_appearance_name',
            'colour_group_name', 'perceived_colour_value_name', 'perceived_colour_master_name', 'department_name',
            'index_name', 'index_group_name', 'section_name', 'garment_group_name']
for c in le_cols:
    le = LabelEncoder()
    articles[c] = le.fit_transform(articles[c].fillna(''))


le_cols = ['club_member_status', 'fashion_news_frequency', 'postal_code', 'age10']
for c in le_cols:
    le = LabelEncoder()
    customers[c] = le.fit_transform(customers[c].fillna(''))

In [66]:
customers['customer_type'] = customers['FN'].fillna(0).astype(int).astype(str) + \
                             customers['Active'].fillna(0).astype(int).astype(str) + \
                             customers['club_member_status'].fillna(0).astype(int).astype(str) + \
                             customers['fashion_news_frequency'].fillna(0).astype(int).astype(str) + \
                             customers['age10'].fillna(0).astype(int).astype(str)

le = LabelEncoder()
customers['customer_type'] = le.fit_transform(customers['customer_type'])

In [67]:
# transactionに紐づけ
transactions = transactions.merge(customers, on='customer_id', how='left')
transactions = transactions.merge(articles, on='article_id', how='left')

# データセット作成（レコメンド→対象データセット作成→特徴量エンジニアリング）

In [68]:
@noglobal
def get_customer_frequent(history, n=12, timedelta=None):
    """顧客ごと商品の購入数をカウントし上位の商品を抽出

    Args:
        history (dataframe): 集計対象の実績データ
        n (int): レコメンド対象とする数
        timedelta (dateutil.relativedelta): 指定された場合、実績データの終端からtimedelta分のデータを取得する

    Returns:
        dataframe: 抽出結果
    """
    if timedelta is not None:
        st_date = history['t_dat'].max() - timedelta
        history = history[history['t_dat']>=st_date].copy()
        
    customer_agg = history.groupby(['customer_id', 'article_id'])['t_dat'].count().reset_index()
    customer_agg = customer_agg.rename(columns={'t_dat':'cnt'})
    customer_agg = customer_agg.sort_values(['customer_id', 'cnt'], ascending=False)
    result = customer_agg.groupby('customer_id').head(n)
    return result[['customer_id', 'article_id']]

@noglobal
def get_popular_article(history, n=12, timedelta=None):
    """全体の購入数をカウントし上位の商品を抽出

    Args:
        history (dataframe): 集計対象の実績データ
        n (int): レコメンド対象とする数
        timedelta (dateutil.relativedelta): 指定された場合、実績データの終端からtimedelta分のデータを取得する

    Returns:
        list: 抽出結果
    """
    # 全体の購入数量
    if timedelta is not None:
        st_date = history['t_dat'].max() - timedelta
        history = history[history['t_dat']>=st_date].copy()

    total_agg = history.groupby('article_id')['t_dat'].count().reset_index()
    total_agg = total_agg.rename(columns={'t_dat':'cnt'})
    total_agg = total_agg.sort_values(['cnt'], ascending=False)
    total_agg = total_agg.head(n)
    result = list(total_agg['article_id'].values)
    return result

@noglobal
def get_customer_type_frequent(history, n=12, timedelta=None):
    if timedelta is not None:
        st_date = history['t_dat'].max() - timedelta
        history = history[history['t_dat']>=st_date].copy()

    result = history[['customer_id', 'customer_type']].drop_duplicates().copy()
    agg = history.groupby(['customer_type', 'article_id'])['t_dat'].count().reset_index()
    agg = agg.rename(columns={'t_dat':'cnt'})
    agg = agg.sort_values(['customer_type', 'cnt'], ascending=False)
    agg = agg.groupby('customer_type').head(n)
    result = result.merge(agg[['customer_type', 'article_id']], on='customer_type', how='left')
    return result[['customer_id', 'article_id']]

@noglobal
def get_article_type_frequent(history, col, n=12, timedelta=None):
    if timedelta is not None:
        st_date = history['t_dat'].max() - timedelta
        history = history[history['t_dat']>=st_date].copy()

    result = history.groupby(['customer_id', col])['t_dat'].count().reset_index()
    result = result.rename(columns={'t_dat':'cnt'})
    result = result.sort_values(['customer_id', 'cnt'], ascending=False)
    result = result.groupby(['customer_id']).head(1)[['customer_id', col]]

    agg = history.groupby([col, 'article_id'])['t_dat'].count().reset_index()
    agg = agg.rename(columns={'t_dat':'cnt'})
    agg = agg.sort_values([col, 'cnt'], ascending=False)
    agg = agg.groupby(col).head(n)
    result = result.merge(agg[[col, 'article_id']], on=col, how='left')
    return result[['customer_id', 'article_id']]

@noglobal
def get_reccomend(target_customer_id, history, Ns):
    result = pd.DataFrame()
    
    td = None
    result = result.append(get_customer_frequent(history, Ns['cf_a'], td))
    result = result.append(get_customer_type_frequent(history, Ns['ctf_a'], td))
    result = result.append(get_article_type_frequent(history, 'department_name', Ns['atfd_a'], td))
    result = result.append(get_article_type_frequent(history, 'perceived_colour_master_name', Ns['atfp_a'], td))
    popular_article = get_popular_article(history, Ns['pa_a'], td)
    # customerとpopular articleの全組み合わせでdataframe作成
    popular_article = pd.DataFrame(itertools.product(target_customer_id, popular_article), columns=['customer_id', 'article_id'])
    result = result.append(popular_article)
    result = result.drop_duplicates()

    td = relativedelta(weeks=1)
    result = result.append(get_customer_frequent(history, Ns['cf_w'], td))
    result = result.append(get_customer_type_frequent(history, Ns['ctf_w'], td))
    result = result.append(get_article_type_frequent(history, 'department_name', Ns['atfd_w'], td))
    result = result.append(get_article_type_frequent(history, 'perceived_colour_master_name', Ns['atfp_w'], td))
    popular_article = get_popular_article(history, Ns['pa_w'], td)
    # customerとpopular articleの全組み合わせでdataframe作成
    popular_article = pd.DataFrame(itertools.product(target_customer_id, popular_article), columns=['customer_id', 'article_id'])
    result = result.append(popular_article)
    result = result.drop_duplicates()

    td = relativedelta(months=1)
    result = result.append(get_customer_frequent(history, Ns['cf_m'], td))
    result = result.append(get_customer_type_frequent(history, Ns['ctf_m'], td))
    result = result.append(get_article_type_frequent(history, 'department_name', Ns['atfd_m'], td))
    result = result.append(get_article_type_frequent(history, 'perceived_colour_master_name', Ns['atfp_m'], td))
    popular_article = get_popular_article(history, Ns['pa_m'], td)
    # customerとpopular articleの全組み合わせでdataframe作成
    popular_article = pd.DataFrame(itertools.product(target_customer_id, popular_article), columns=['customer_id', 'article_id'])
    result = result.append(popular_article)
    result = result.drop_duplicates()

    td = relativedelta(years=1)
    result = result.append(get_customer_frequent(history, Ns['cf_y'], td))
    result = result.append(get_customer_type_frequent(history, Ns['ctf_y'], td))
    result = result.append(get_article_type_frequent(history, 'department_name', Ns['atfd_y'], td))
    result = result.append(get_article_type_frequent(history, 'perceived_colour_master_name', Ns['atfp_y'], td))
    popular_article = get_popular_article(history, Ns['pa_y'], td)
    # customerとpopular articleの全組み合わせでdataframe作成
    popular_article = pd.DataFrame(itertools.product(target_customer_id, popular_article), columns=['customer_id', 'article_id'])
    result = result.append(popular_article)
    result = result.drop_duplicates()

    result = result[result['customer_id'].isin(target_customer_id)].copy()

    return result

In [69]:
@noglobal
def add_labels(recom_result, history):
    """レコメンドしたデータが学習期間で購入されたかどうかのフラグを付与する

    Args:
        recom_result (_type_): レコメンド結果
        train_tran (_type_): 学習期間のトランザクションデータ

    Returns:
        _type_: 学習期間での購入フラグを付与したレコメンド結果
    """
    history = history[['customer_id', 'article_id']].drop_duplicates()
    history['buy'] = 1
    recom_result = recom_result.merge(history, on=['customer_id', 'article_id'], how='left')
    recom_result['buy'] = recom_result['buy'].fillna(0)
    return recom_result


In [70]:
@noglobal
def make_article_features(articles):
    cols = ['product_type_name', 'product_group_name', 'graphical_appearance_name',
            'colour_group_name', 'perceived_colour_value_name', 'perceived_colour_master_name', 'department_name',
            'index_name', 'index_group_name', 'section_name', 'garment_group_name']
    return articles[['article_id']+cols]

@noglobal
def make_article_tran_features(history):
    df = history.groupby('article_id').agg({'t_dat':['count', 'max', 'min'],
                                            'price':['max', 'min', 'mean'], 
                                            'age':['max', 'min', 'mean', 'std']}).reset_index()
    df.columns = ['article_id','article_total_cnt', 'article_total_latest_buy', 'article_total_1st_buy', 'article_price_max', 'article_price_min', 'article_price_mean', 'article_age_max', 'article_age_min', 'article_age_mean', 'article_age_std']
    df['article_total_1st_buy'] = (history['t_dat'].max() - df['article_total_1st_buy']).dt.days
    df['article_total_latest_buy'] = (history['t_dat'].max() - df['article_total_latest_buy']).dt.days
    return df


@noglobal
def make_customer_features(customers):
    return customers

@noglobal
def make_customer_tran_features(history):
    df = history.groupby('customer_id').agg({'t_dat':['count', 'max', 'min'],
                                            'price':['max', 'min', 'mean']}).reset_index()
    df.columns = ['customer_id','customer_total_cnt', 'customer_total_latest_buy', 'customer_total_1st_buy', 'customer_price_max', 'customer_price_min', 'customer_price_mean']
    df['customer_total_1st_buy'] = (history['t_dat'].max() - df['customer_total_1st_buy']).dt.days
    df['customer_total_latest_buy'] = (history['t_dat'].max() - df['customer_total_latest_buy']).dt.days
    return df

@noglobal
def make_customer_article_features(target, history):
    df = target.merge(history, on=['customer_id', 'article_id'], how='inner')
    df = df.groupby(['customer_id', 'article_id']).agg({'t_dat':['count', 'min', 'max']}).reset_index()
    df.columns = ['customer_id', 'article_id', 'count', '1st_buy_date_diff', 'latest_buy_date_diff']
    df['1st_buy_date_diff'] = (history['t_dat'].max() - df['1st_buy_date_diff']).dt.days
    df['latest_buy_date_diff'] = (history['t_dat'].max() - df['latest_buy_date_diff']).dt.days
    return df

@noglobal
def add_same_article_type_rate(target, history, col):
    add_data = history[['customer_id', col]].copy()
    add_data['total'] = add_data.groupby('customer_id').transform('count')
    add_data = add_data.groupby(['customer_id', col])['total'].agg(['max', 'count']).reset_index()
    add_data[f'{col}_customer_buy_rate'] = add_data['count'] / add_data['max']
    target = target.merge(add_data[['customer_id', col, f'{col}_customer_buy_rate']], on=['customer_id', col], how='left')
    return target

    

@noglobal
def add_features(df, history, articles, customers):
    df = df.merge(make_article_features(articles), on=['article_id'], how='left')
    df = df.merge(make_article_tran_features(history), on=['article_id'], how='left')
    df = df.merge(make_customer_features(customers), on=['customer_id'], how='left')
    df = df.merge(make_customer_tran_features(history), on=['customer_id'], how='left')
    df = df.merge(make_customer_article_features(df[['customer_id', 'article_id']], history), on=['article_id', 'customer_id'], how='left')

    cols = ['product_type_name', 'product_group_name', 'graphical_appearance_name',
            'colour_group_name', 'perceived_colour_value_name', 'perceived_colour_master_name', 'department_name',
            'index_name', 'index_group_name', 'section_name', 'garment_group_name']

    for c in cols:
        df = add_same_article_type_rate(df, history, c)

    return df
    

# レコメンド商品を購入するかどうかの2値分類モデル

In [71]:
def apk(y_true, y_pred, K=12):
    assert(len(y_true) == len(y_pred))
    apks = []
    for idx in range(len(y_true)):
        y_i_true = y_true[idx]
        y_i_pred = y_pred[idx]

        # 予測値の数と重複の確認
        assert(len(y_i_pred) <= K)
        assert(len(np.unique(y_i_pred)) == len(y_i_pred))

        sum_precision = 0.0
        num_hits = 0.0

        for i, p in enumerate(y_i_pred):
            if p in y_i_true:
                num_hits += 1
                precision = num_hits / (i+1)
                sum_precision += precision
        apk = sum_precision / min(len(y_i_true), K)
        apks.append(apk)
    return apks

In [72]:
@noglobal
def run_train(transactions, articles, customers, Ns):
    # 1週ずつローリングして学習データを生成し検証
    train_start = datetime.datetime(2020,9,9)
    valid_start = datetime.datetime(2020,9,16)
    valid_end = datetime.datetime(2020,9,22)

    # 学習データの作成
    history_tran = transactions[transactions['t_dat'] < train_start].copy()
    target_tran = transactions[(transactions['t_dat'] >= train_start) & (transactions['t_dat'] < valid_start)].copy()
    target_id = target_tran['customer_id'].unique().tolist()
    recom = get_reccomend(target_id, history_tran, Ns)
    ml_train = add_labels(recom, target_tran)
    ml_train = add_features(ml_train, history_tran, articles, customers)

    # 評価データの作成
    history_tran = transactions[transactions['t_dat'] < valid_start].copy()
    target_tran = transactions[(transactions['t_dat'] >= valid_start) & (transactions['t_dat'] <= valid_end)].copy()
    target_id = target_tran['customer_id'].unique().tolist()
    recom = get_reccomend(target_id, history_tran, Ns)
    ml_valid = add_labels(recom, target_tran)
    ml_valid = add_features(ml_valid, history_tran, articles, customers)

    target = 'buy'
    not_use_cols = ['customer_id', 'article_id', target]
    features = [c for c in ml_train.columns if c not in not_use_cols]

    params = {
    "objective": "binary",
    "boosting" : "gbdt",
    "learning_rate": 0.01,
    "metric": "binary_logloss",
    "seed": 42
}

    # 学習
    tr_x, tr_y = ml_train[features], ml_train[target]
    vl_x, vl_y = ml_valid[features], ml_valid[target]
    tr_data = lgb.Dataset(tr_x, label=tr_y)
    vl_data = lgb.Dataset(vl_x, label=vl_y)
    model = lgb.train(params, tr_data, valid_sets=[tr_data, vl_data],
                    num_boost_round=20000, early_stopping_rounds=100,verbose_eval=1000)

    # cv
    vl_pred = model.predict(vl_x, num_iteration=model.best_iteration)
    # 正解データ作成
    valid = transactions[(transactions['t_dat'] >= valid_start) & (transactions['t_dat'] <= valid_end)].copy()
    valid = valid[['customer_id', 'article_id']].drop_duplicates()
    valid = valid.groupby('customer_id')['article_id'].apply(list).reset_index()
    valid = valid.sort_values('customer_id').reset_index(drop=True)
    # 2値分類の出力を元に12個選定
    valid_pred = ml_valid[['customer_id', 'article_id']].copy()
    valid_pred['prob'] = vl_pred
    valid_pred = valid_pred.sort_values(['customer_id', 'prob'], ascending=False)
    valid_pred = valid_pred.groupby('customer_id').head(12)
    valid_pred = valid_pred.groupby('customer_id')['article_id'].apply(list).reset_index()
    valid_pred = valid_pred.sort_values('customer_id').reset_index(drop=True)
    assert(valid['customer_id'].tolist() == valid_pred['customer_id'].tolist())
    # MAP@12
    score = np.mean(apk(valid['article_id'].tolist(), valid_pred['article_id'].tolist()))

    return score

In [73]:
def objective(trial):
    cf_a = trial.suggest_int('cf_a', 0, 24)
    ctf_a = trial.suggest_int('ctf_a', 0, 24)
    atfd_a = trial.suggest_int('atfd_a', 0, 24)
    atfp_a = trial.suggest_int('atfp_a', 0, 24)
    pa_a = trial.suggest_int('pa_a', 0, 24)

    cf_w = trial.suggest_int('cf_w', 0, 24)
    ctf_w = trial.suggest_int('ctf_w', 0, 24)
    atfd_w = trial.suggest_int('atfd_w', 0, 24)
    atfp_w = trial.suggest_int('atfp_w', 0, 24)
    pa_w = trial.suggest_int('pa_w', 0, 24)

    cf_m = trial.suggest_int('cf_m', 0, 24)
    ctf_m = trial.suggest_int('ctf_m', 0, 24)
    atfd_m = trial.suggest_int('atfd_m', 0, 24)
    atfp_m = trial.suggest_int('atfp_m', 0, 24)
    pa_m = trial.suggest_int('pa_m', 0, 24)

    cf_y = trial.suggest_int('cf_y', 0, 24)
    ctf_y = trial.suggest_int('ctf_y', 0, 24)
    atfd_y = trial.suggest_int('atfd_y', 0, 24)
    atfp_y = trial.suggest_int('atfp_y', 0, 24)
    pa_y = trial.suggest_int('pa_y', 0, 24)

    Ns['cf_a'] = cf_a
    Ns['ctf_a'] = ctf_a
    Ns['atfd_a'] = atfd_a
    Ns['atfp_a'] = atfp_a
    Ns['pa_a'] = pa_a

    Ns['cf_w'] = cf_w
    Ns['ctf_w'] = ctf_w
    Ns['atfd_w'] = atfd_w
    Ns['atfp_w'] = atfp_w
    Ns['pa_w'] = pa_w

    Ns['cf_m'] = cf_m
    Ns['ctf_m'] = ctf_m
    Ns['atfd_m'] = atfd_m
    Ns['atfp_m'] = atfp_m
    Ns['pa_m'] = pa_m

    Ns['cf_y'] = cf_y
    Ns['ctf_y'] = ctf_y
    Ns['atfd_y'] = atfd_y
    Ns['atfp_y'] = atfp_y
    Ns['pa_y'] = pa_y

    total_n = ctf_a + pa_a + \
              ctf_w + pa_w + \
              ctf_m + pa_m + \
              ctf_y + pa_y

    if total_n > 12:
        score = run_train(transactions, articles, customers, Ns)
    else:
        score = 0.0
    message = f'{Ns}\n{score}'
    line_notify.send(message)

    return score

In [74]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=12*60*60)

[32m[I 2022-03-05 23:35:34,571][0m A new study created in memory with name: no-name-5711e405-32d4-4709-ad61-3f5670a29693[0m


[LightGBM] [Info] Number of positive: 18494, number of negative: 6454551
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7678
[LightGBM] [Info] Number of data points in the train set: 6473045, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002857 -> initscore=-5.855094
[LightGBM] [Info] Start training from score -5.855094
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[789]	training's binary_logloss: 0.0156292	valid_1's binary_logloss: 0.0175252


[32m[I 2022-03-05 23:51:40,855][0m Trial 0 finished with value: 0.030452073847616577 and parameters: {'cf_a': 12, 'ctf_a': 20, 'atfd_a': 6, 'atfp_a': 18, 'pa_a': 10, 'cf_w': 2, 'ctf_w': 14, 'atfd_w': 11, 'atfp_w': 6, 'pa_w': 15, 'cf_m': 14, 'ctf_m': 3, 'atfd_m': 17, 'atfp_m': 6, 'pa_m': 12, 'cf_y': 23, 'ctf_y': 8, 'atfd_y': 14, 'atfp_y': 9, 'pa_y': 21}. Best is trial 0 with value: 0.030452073847616577.[0m


[LightGBM] [Info] Number of positive: 17827, number of negative: 5737420
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7649
[LightGBM] [Info] Number of data points in the train set: 5755247, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003098 -> initscore=-5.774051
[LightGBM] [Info] Start training from score -5.774051
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0164761	valid_1's binary_logloss: 0.0189033
[2000]	training's binary_logloss: 0.015446	valid_1's binary_logloss: 0.0188845
Early stopping, best iteration is:
[2069]	training's binary_logloss: 0.0153834	valid_1's binary_logloss: 0.0188838


[32m[I 2022-03-06 00:06:22,870][0m Trial 1 finished with value: 0.03088291661301478 and parameters: {'cf_a': 5, 'ctf_a': 3, 'atfd_a': 17, 'atfp_a': 6, 'pa_a': 11, 'cf_w': 15, 'ctf_w': 8, 'atfd_w': 14, 'atfp_w': 23, 'pa_w': 17, 'cf_m': 5, 'ctf_m': 1, 'atfd_m': 2, 'atfp_m': 4, 'pa_m': 14, 'cf_y': 23, 'ctf_y': 21, 'atfd_y': 8, 'atfp_y': 5, 'pa_y': 2}. Best is trial 1 with value: 0.03088291661301478.[0m


[LightGBM] [Info] Number of positive: 18525, number of negative: 6104509
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7647
[LightGBM] [Info] Number of data points in the train set: 6123034, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003025 -> initscore=-5.797662
[LightGBM] [Info] Start training from score -5.797662
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0163202	valid_1's binary_logloss: 0.0186329
[2000]	training's binary_logloss: 0.0153456	valid_1's binary_logloss: 0.0186038
Early stopping, best iteration is:
[2552]	training's binary_logloss: 0.0149001	valid_1's binary_logloss: 0.0185936


[32m[I 2022-03-06 00:24:13,099][0m Trial 2 finished with value: 0.030664596025918128 and parameters: {'cf_a': 6, 'ctf_a': 21, 'atfd_a': 6, 'atfp_a': 13, 'pa_a': 5, 'cf_w': 15, 'ctf_w': 11, 'atfd_w': 5, 'atfp_w': 13, 'pa_w': 10, 'cf_m': 6, 'ctf_m': 20, 'atfd_m': 15, 'atfp_m': 18, 'pa_m': 19, 'cf_y': 23, 'ctf_y': 1, 'atfd_y': 3, 'atfp_y': 4, 'pa_y': 24}. Best is trial 1 with value: 0.03088291661301478.[0m


[LightGBM] [Info] Number of positive: 19015, number of negative: 6412356
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7651
[LightGBM] [Info] Number of data points in the train set: 6431371, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002957 -> initscore=-5.820754
[LightGBM] [Info] Start training from score -5.820754
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0159736	valid_1's binary_logloss: 0.0182275
Early stopping, best iteration is:
[1496]	training's binary_logloss: 0.0154822	valid_1's binary_logloss: 0.0182149


[32m[I 2022-03-06 00:39:00,002][0m Trial 3 finished with value: 0.030918865428753658 and parameters: {'cf_a': 5, 'ctf_a': 10, 'atfd_a': 11, 'atfp_a': 17, 'pa_a': 9, 'cf_w': 13, 'ctf_w': 7, 'atfd_w': 21, 'atfp_w': 2, 'pa_w': 17, 'cf_m': 10, 'ctf_m': 18, 'atfd_m': 6, 'atfp_m': 7, 'pa_m': 19, 'cf_y': 18, 'ctf_y': 2, 'atfd_y': 19, 'atfp_y': 0, 'pa_y': 9}. Best is trial 3 with value: 0.030918865428753658.[0m


[LightGBM] [Info] Number of positive: 19162, number of negative: 7092925
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7646
[LightGBM] [Info] Number of data points in the train set: 7112087, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002694 -> initscore=-5.913924
[LightGBM] [Info] Start training from score -5.913924
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0147255	valid_1's binary_logloss: 0.016405
Early stopping, best iteration is:
[1460]	training's binary_logloss: 0.0143025	valid_1's binary_logloss: 0.016388


[32m[I 2022-03-06 00:55:48,419][0m Trial 4 finished with value: 0.030731669139141115 and parameters: {'cf_a': 13, 'ctf_a': 24, 'atfd_a': 4, 'atfp_a': 8, 'pa_a': 14, 'cf_w': 9, 'ctf_w': 20, 'atfd_w': 21, 'atfp_w': 22, 'pa_w': 3, 'cf_m': 5, 'ctf_m': 1, 'atfd_m': 16, 'atfp_m': 12, 'pa_m': 20, 'cf_y': 11, 'ctf_y': 14, 'atfd_y': 23, 'atfp_y': 22, 'pa_y': 4}. Best is trial 3 with value: 0.030918865428753658.[0m


[LightGBM] [Info] Number of positive: 15720, number of negative: 5851020
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7690
[LightGBM] [Info] Number of data points in the train set: 5866740, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002680 -> initscore=-5.919437
[LightGBM] [Info] Start training from score -5.919437
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[621]	training's binary_logloss: 0.0143103	valid_1's binary_logloss: 0.016134


[32m[I 2022-03-06 01:08:43,893][0m Trial 5 finished with value: 0.03044174796231609 and parameters: {'cf_a': 24, 'ctf_a': 16, 'atfd_a': 20, 'atfp_a': 12, 'pa_a': 10, 'cf_w': 14, 'ctf_w': 19, 'atfd_w': 0, 'atfp_w': 6, 'pa_w': 4, 'cf_m': 1, 'ctf_m': 22, 'atfd_m': 10, 'atfp_m': 1, 'pa_m': 9, 'cf_y': 12, 'ctf_y': 13, 'atfd_y': 13, 'atfp_y': 5, 'pa_y': 5}. Best is trial 3 with value: 0.030918865428753658.[0m


[LightGBM] [Info] Number of positive: 21156, number of negative: 7065685
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7684
[LightGBM] [Info] Number of data points in the train set: 7086841, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002985 -> initscore=-5.811082
[LightGBM] [Info] Start training from score -5.811082
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.016509	valid_1's binary_logloss: 0.0186416
[2000]	training's binary_logloss: 0.0156195	valid_1's binary_logloss: 0.018622
Early stopping, best iteration is:
[2525]	training's binary_logloss: 0.015228	valid_1's binary_logloss: 0.0186163


[32m[I 2022-03-06 01:26:42,892][0m Trial 6 finished with value: 0.03092889964023639 and parameters: {'cf_a': 14, 'ctf_a': 17, 'atfd_a': 3, 'atfp_a': 19, 'pa_a': 18, 'cf_w': 16, 'ctf_w': 8, 'atfd_w': 19, 'atfp_w': 6, 'pa_w': 19, 'cf_m': 17, 'ctf_m': 19, 'atfd_m': 22, 'atfp_m': 19, 'pa_m': 21, 'cf_y': 24, 'ctf_y': 3, 'atfd_y': 5, 'atfp_y': 5, 'pa_y': 19}. Best is trial 6 with value: 0.03092889964023639.[0m


[LightGBM] [Info] Number of positive: 18852, number of negative: 6626692
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7598
[LightGBM] [Info] Number of data points in the train set: 6645544, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002837 -> initscore=-5.862242
[LightGBM] [Info] Start training from score -5.862242
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.015434	valid_1's binary_logloss: 0.0177632
Early stopping, best iteration is:
[1226]	training's binary_logloss: 0.0152112	valid_1's binary_logloss: 0.017758


[32m[I 2022-03-06 01:42:19,782][0m Trial 7 finished with value: 0.030352707062643654 and parameters: {'cf_a': 8, 'ctf_a': 19, 'atfd_a': 11, 'atfp_a': 21, 'pa_a': 10, 'cf_w': 3, 'ctf_w': 7, 'atfd_w': 10, 'atfp_w': 9, 'pa_w': 17, 'cf_m': 13, 'ctf_m': 17, 'atfd_m': 4, 'atfp_m': 7, 'pa_m': 23, 'cf_y': 1, 'ctf_y': 23, 'atfd_y': 15, 'atfp_y': 20, 'pa_y': 2}. Best is trial 6 with value: 0.03092889964023639.[0m


[LightGBM] [Info] Number of positive: 16454, number of negative: 6716099
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7676
[LightGBM] [Info] Number of data points in the train set: 6732553, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002444 -> initscore=-6.011694
[LightGBM] [Info] Start training from score -6.011694
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0128511	valid_1's binary_logloss: 0.0145799
[2000]	training's binary_logloss: 0.0119388	valid_1's binary_logloss: 0.0145576
Early stopping, best iteration is:
[2893]	training's binary_logloss: 0.0112869	valid_1's binary_logloss: 0.0145507


[32m[I 2022-03-06 02:03:12,352][0m Trial 8 finished with value: 0.030688852165571018 and parameters: {'cf_a': 18, 'ctf_a': 19, 'atfd_a': 24, 'atfp_a': 7, 'pa_a': 13, 'cf_w': 14, 'ctf_w': 2, 'atfd_w': 7, 'atfp_w': 14, 'pa_w': 6, 'cf_m': 3, 'ctf_m': 11, 'atfd_m': 7, 'atfp_m': 1, 'pa_m': 4, 'cf_y': 22, 'ctf_y': 13, 'atfd_y': 18, 'atfp_y': 11, 'pa_y': 21}. Best is trial 6 with value: 0.03092889964023639.[0m


[LightGBM] [Info] Number of positive: 18805, number of negative: 7192411
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7685
[LightGBM] [Info] Number of data points in the train set: 7211216, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002608 -> initscore=-5.946659
[LightGBM] [Info] Start training from score -5.946659
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[495]	training's binary_logloss: 0.0146355	valid_1's binary_logloss: 0.0153137


[32m[I 2022-03-06 02:17:33,138][0m Trial 9 finished with value: 0.030684287396694442 and parameters: {'cf_a': 19, 'ctf_a': 21, 'atfd_a': 13, 'atfp_a': 21, 'pa_a': 8, 'cf_w': 15, 'ctf_w': 15, 'atfd_w': 4, 'atfp_w': 6, 'pa_w': 2, 'cf_m': 14, 'ctf_m': 23, 'atfd_m': 13, 'atfp_m': 4, 'pa_m': 16, 'cf_y': 22, 'ctf_y': 16, 'atfd_y': 20, 'atfp_y': 16, 'pa_y': 4}. Best is trial 6 with value: 0.03092889964023639.[0m


[LightGBM] [Info] Number of positive: 20657, number of negative: 6490414
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7663
[LightGBM] [Info] Number of data points in the train set: 6511071, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003173 -> initscore=-5.750027
[LightGBM] [Info] Start training from score -5.750027
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0174866	valid_1's binary_logloss: 0.0200918
[2000]	training's binary_logloss: 0.0165759	valid_1's binary_logloss: 0.0200824
Early stopping, best iteration is:
[1901]	training's binary_logloss: 0.016655	valid_1's binary_logloss: 0.0200821


[32m[I 2022-03-06 02:34:31,488][0m Trial 10 finished with value: 0.030855775996823393 and parameters: {'cf_a': 14, 'ctf_a': 11, 'atfd_a': 0, 'atfp_a': 1, 'pa_a': 22, 'cf_w': 23, 'ctf_w': 0, 'atfd_w': 16, 'atfp_w': 0, 'pa_w': 23, 'cf_m': 22, 'ctf_m': 12, 'atfd_m': 24, 'atfp_m': 24, 'pa_m': 24, 'cf_y': 6, 'ctf_y': 7, 'atfd_y': 0, 'atfp_y': 15, 'pa_y': 15}. Best is trial 6 with value: 0.03092889964023639.[0m


[LightGBM] [Info] Number of positive: 20704, number of negative: 6562399
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7623
[LightGBM] [Info] Number of data points in the train set: 6583103, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003145 -> initscore=-5.758785
[LightGBM] [Info] Start training from score -5.758785
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0172884	valid_1's binary_logloss: 0.0197155
Early stopping, best iteration is:
[1020]	training's binary_logloss: 0.0172675	valid_1's binary_logloss: 0.0197151


[32m[I 2022-03-06 02:47:59,952][0m Trial 11 finished with value: 0.03055388858650307 and parameters: {'cf_a': 0, 'ctf_a': 10, 'atfd_a': 11, 'atfp_a': 16, 'pa_a': 19, 'cf_w': 21, 'ctf_w': 6, 'atfd_w': 24, 'atfp_w': 0, 'pa_w': 23, 'cf_m': 20, 'ctf_m': 16, 'atfd_m': 24, 'atfp_m': 13, 'pa_m': 19, 'cf_y': 17, 'ctf_y': 0, 'atfd_y': 8, 'atfp_y': 1, 'pa_y': 11}. Best is trial 6 with value: 0.03092889964023639.[0m


[LightGBM] [Info] Number of positive: 19326, number of negative: 6167678
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7640
[LightGBM] [Info] Number of data points in the train set: 6187004, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003124 -> initscore=-5.765626
[LightGBM] [Info] Start training from score -5.765626
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0169345	valid_1's binary_logloss: 0.0194162
Early stopping, best iteration is:
[1682]	training's binary_logloss: 0.0162545	valid_1's binary_logloss: 0.0194008


[32m[I 2022-03-06 03:08:17,821][0m Trial 12 finished with value: 0.03086517507102227 and parameters: {'cf_a': 2, 'ctf_a': 7, 'atfd_a': 1, 'atfp_a': 24, 'pa_a': 17, 'cf_w': 9, 'ctf_w': 4, 'atfd_w': 19, 'atfp_w': 3, 'pa_w': 19, 'cf_m': 18, 'ctf_m': 17, 'atfd_m': 19, 'atfp_m': 20, 'pa_m': 0, 'cf_y': 17, 'ctf_y': 5, 'atfd_y': 8, 'atfp_y': 0, 'pa_y': 11}. Best is trial 6 with value: 0.03092889964023639.[0m


[LightGBM] [Info] Number of positive: 17300, number of negative: 5201827
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7667
[LightGBM] [Info] Number of data points in the train set: 5219127, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003315 -> initscore=-5.706059
[LightGBM] [Info] Start training from score -5.706059
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0174127	valid_1's binary_logloss: 0.0203179
[2000]	training's binary_logloss: 0.0163491	valid_1's binary_logloss: 0.0202928
Early stopping, best iteration is:
[2108]	training's binary_logloss: 0.016245	valid_1's binary_logloss: 0.0202912


[32m[I 2022-03-06 03:28:01,979][0m Trial 13 finished with value: 0.03105930675658314 and parameters: {'cf_a': 9, 'ctf_a': 14, 'atfd_a': 8, 'atfp_a': 16, 'pa_a': 0, 'cf_w': 19, 'ctf_w': 9, 'atfd_w': 18, 'atfp_w': 10, 'pa_w': 13, 'cf_m': 9, 'ctf_m': 8, 'atfd_m': 8, 'atfp_m': 11, 'pa_m': 17, 'cf_y': 16, 'ctf_y': 4, 'atfd_y': 3, 'atfp_y': 8, 'pa_y': 15}. Best is trial 13 with value: 0.03105930675658314.[0m


[LightGBM] [Info] Number of positive: 16615, number of negative: 4918403
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7661
[LightGBM] [Info] Number of data points in the train set: 4935018, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003367 -> initscore=-5.690433
[LightGBM] [Info] Start training from score -5.690433
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0175061	valid_1's binary_logloss: 0.0201136
Early stopping, best iteration is:
[1636]	training's binary_logloss: 0.0167458	valid_1's binary_logloss: 0.0200923


[32m[I 2022-03-06 03:46:24,826][0m Trial 14 finished with value: 0.030871956380870303 and parameters: {'cf_a': 10, 'ctf_a': 13, 'atfd_a': 7, 'atfp_a': 12, 'pa_a': 0, 'cf_w': 19, 'ctf_w': 11, 'atfd_w': 17, 'atfp_w': 17, 'pa_w': 10, 'cf_m': 9, 'ctf_m': 7, 'atfd_m': 9, 'atfp_m': 14, 'pa_m': 8, 'cf_y': 15, 'ctf_y': 9, 'atfd_y': 4, 'atfp_y': 9, 'pa_y': 16}. Best is trial 13 with value: 0.03105930675658314.[0m


[LightGBM] [Info] Number of positive: 18257, number of negative: 5748609
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7679
[LightGBM] [Info] Number of data points in the train set: 5766866, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003166 -> initscore=-5.752165
[LightGBM] [Info] Start training from score -5.752165
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0169126	valid_1's binary_logloss: 0.0196224
Early stopping, best iteration is:
[1024]	training's binary_logloss: 0.0168844	valid_1's binary_logloss: 0.0196212


[32m[I 2022-03-06 04:00:29,250][0m Trial 15 finished with value: 0.030947477501238257 and parameters: {'cf_a': 17, 'ctf_a': 14, 'atfd_a': 3, 'atfp_a': 20, 'pa_a': 2, 'cf_w': 19, 'ctf_w': 16, 'atfd_w': 14, 'atfp_w': 9, 'pa_w': 13, 'cf_m': 17, 'ctf_m': 8, 'atfd_m': 21, 'atfp_m': 17, 'pa_m': 16, 'cf_y': 8, 'ctf_y': 4, 'atfd_y': 5, 'atfp_y': 7, 'pa_y': 16}. Best is trial 13 with value: 0.03105930675658314.[0m


[LightGBM] [Info] Number of positive: 16851, number of negative: 5448260
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7686
[LightGBM] [Info] Number of data points in the train set: 5465111, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003083 -> initscore=-5.778642
[LightGBM] [Info] Start training from score -5.778642
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[860]	training's binary_logloss: 0.0162626	valid_1's binary_logloss: 0.0189722


[32m[I 2022-03-06 04:21:03,822][0m Trial 16 finished with value: 0.030790251266374677 and parameters: {'cf_a': 19, 'ctf_a': 15, 'atfd_a': 8, 'atfp_a': 24, 'pa_a': 0, 'cf_w': 24, 'ctf_w': 24, 'atfd_w': 13, 'atfp_w': 11, 'pa_w': 12, 'cf_m': 24, 'ctf_m': 7, 'atfd_m': 0, 'atfp_m': 10, 'pa_m': 16, 'cf_y': 7, 'ctf_y': 5, 'atfd_y': 0, 'atfp_y': 8, 'pa_y': 15}. Best is trial 13 with value: 0.03105930675658314.[0m


[LightGBM] [Info] Number of positive: 17591, number of negative: 6088724
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7687
[LightGBM] [Info] Number of data points in the train set: 6106315, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002881 -> initscore=-5.846806
[LightGBM] [Info] Start training from score -5.846806
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0152766	valid_1's binary_logloss: 0.0177272
Early stopping, best iteration is:
[1791]	training's binary_logloss: 0.0144702	valid_1's binary_logloss: 0.017709


[32m[I 2022-03-06 04:37:37,355][0m Trial 17 finished with value: 0.031030497303215533 and parameters: {'cf_a': 23, 'ctf_a': 2, 'atfd_a': 14, 'atfp_a': 14, 'pa_a': 4, 'cf_w': 19, 'ctf_w': 18, 'atfd_w': 15, 'atfp_w': 17, 'pa_w': 8, 'cf_m': 17, 'ctf_m': 8, 'atfd_m': 12, 'atfp_m': 17, 'pa_m': 11, 'cf_y': 9, 'ctf_y': 9, 'atfd_y': 11, 'atfp_y': 14, 'pa_y': 8}. Best is trial 13 with value: 0.03105930675658314.[0m


[LightGBM] [Info] Number of positive: 17575, number of negative: 6235909
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7676
[LightGBM] [Info] Number of data points in the train set: 6253484, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002810 -> initscore=-5.871602
[LightGBM] [Info] Start training from score -5.871602
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.014954	valid_1's binary_logloss: 0.01731
[2000]	training's binary_logloss: 0.0140048	valid_1's binary_logloss: 0.0172827
Early stopping, best iteration is:
[2277]	training's binary_logloss: 0.0137784	valid_1's binary_logloss: 0.0172809


[32m[I 2022-03-06 04:53:50,514][0m Trial 18 finished with value: 0.031072679108807847 and parameters: {'cf_a': 23, 'ctf_a': 1, 'atfd_a': 15, 'atfp_a': 14, 'pa_a': 5, 'cf_w': 10, 'ctf_w': 19, 'atfd_w': 23, 'atfp_w': 19, 'pa_w': 7, 'cf_m': 9, 'ctf_m': 9, 'atfd_m': 12, 'atfp_m': 23, 'pa_m': 10, 'cf_y': 1, 'ctf_y': 10, 'atfd_y': 10, 'atfp_y': 14, 'pa_y': 8}. Best is trial 18 with value: 0.031072679108807847.[0m


[LightGBM] [Info] Number of positive: 17272, number of negative: 5913319
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7644
[LightGBM] [Info] Number of data points in the train set: 5930591, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002912 -> initscore=-5.835876
[LightGBM] [Info] Start training from score -5.835876
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0155321	valid_1's binary_logloss: 0.0180296
[2000]	training's binary_logloss: 0.0145418	valid_1's binary_logloss: 0.0180073
Early stopping, best iteration is:
[2074]	training's binary_logloss: 0.0144739	valid_1's binary_logloss: 0.0180067


[32m[I 2022-03-06 05:10:45,237][0m Trial 19 finished with value: 0.030781340199113154 and parameters: {'cf_a': 9, 'ctf_a': 5, 'atfd_a': 16, 'atfp_a': 10, 'pa_a': 6, 'cf_w': 9, 'ctf_w': 23, 'atfd_w': 24, 'atfp_w': 19, 'pa_w': 7, 'cf_m': 9, 'ctf_m': 14, 'atfd_m': 10, 'atfp_m': 24, 'pa_m': 6, 'cf_y': 2, 'ctf_y': 18, 'atfd_y': 10, 'atfp_y': 18, 'pa_y': 7}. Best is trial 18 with value: 0.031072679108807847.[0m


[LightGBM] [Info] Number of positive: 16931, number of negative: 6199667
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7669
[LightGBM] [Info] Number of data points in the train set: 6216598, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002724 -> initscore=-5.903105
[LightGBM] [Info] Start training from score -5.903105
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[759]	training's binary_logloss: 0.0146615	valid_1's binary_logloss: 0.0163656


[32m[I 2022-03-06 05:23:30,526][0m Trial 20 finished with value: 0.030684366536082638 and parameters: {'cf_a': 22, 'ctf_a': 8, 'atfd_a': 19, 'atfp_a': 15, 'pa_a': 2, 'cf_w': 5, 'ctf_w': 13, 'atfd_w': 21, 'atfp_w': 16, 'pa_w': 10, 'cf_m': 11, 'ctf_m': 4, 'atfd_m': 7, 'atfp_m': 10, 'pa_m': 3, 'cf_y': 5, 'ctf_y': 11, 'atfd_y': 16, 'atfp_y': 12, 'pa_y': 13}. Best is trial 18 with value: 0.031072679108807847.[0m


[LightGBM] [Info] Number of positive: 17631, number of negative: 6302981
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7671
[LightGBM] [Info] Number of data points in the train set: 6320612, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002789 -> initscore=-5.879119
[LightGBM] [Info] Start training from score -5.879119
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0148562	valid_1's binary_logloss: 0.0172237
Early stopping, best iteration is:
[1633]	training's binary_logloss: 0.0142274	valid_1's binary_logloss: 0.0172159


[32m[I 2022-03-06 05:39:52,927][0m Trial 21 finished with value: 0.030953151558952875 and parameters: {'cf_a': 24, 'ctf_a': 1, 'atfd_a': 14, 'atfp_a': 14, 'pa_a': 4, 'cf_w': 19, 'ctf_w': 20, 'atfd_w': 18, 'atfp_w': 20, 'pa_w': 7, 'cf_m': 7, 'ctf_m': 10, 'atfd_m': 13, 'atfp_m': 21, 'pa_m': 11, 'cf_y': 11, 'ctf_y': 10, 'atfd_y': 11, 'atfp_y': 13, 'pa_y': 8}. Best is trial 18 with value: 0.031072679108807847.[0m


[LightGBM] [Info] Number of positive: 16104, number of negative: 5200727
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7704
[LightGBM] [Info] Number of data points in the train set: 5216831, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003087 -> initscore=-5.777486
[LightGBM] [Info] Start training from score -5.777486
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0159369	valid_1's binary_logloss: 0.018775
[2000]	training's binary_logloss: 0.0148583	valid_1's binary_logloss: 0.0187623
Early stopping, best iteration is:
[2059]	training's binary_logloss: 0.014799	valid_1's binary_logloss: 0.0187614


[32m[I 2022-03-06 05:58:48,806][0m Trial 22 finished with value: 0.030962863262539702 and parameters: {'cf_a': 21, 'ctf_a': 0, 'atfd_a': 9, 'atfp_a': 10, 'pa_a': 3, 'cf_w': 11, 'ctf_w': 17, 'atfd_w': 16, 'atfp_w': 17, 'pa_w': 5, 'cf_m': 15, 'ctf_m': 9, 'atfd_m': 11, 'atfp_m': 15, 'pa_m': 10, 'cf_y': 3, 'ctf_y': 7, 'atfd_y': 10, 'atfp_y': 14, 'pa_y': 0}. Best is trial 18 with value: 0.031072679108807847.[0m


[LightGBM] [Info] Number of positive: 17869, number of negative: 6171783
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7673
[LightGBM] [Info] Number of data points in the train set: 6189652, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002887 -> initscore=-5.844676
[LightGBM] [Info] Start training from score -5.844676
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0154379	valid_1's binary_logloss: 0.016764
[2000]	training's binary_logloss: 0.0144743	valid_1's binary_logloss: 0.0167407
Early stopping, best iteration is:
[2592]	training's binary_logloss: 0.0140062	valid_1's binary_logloss: 0.0167337


[32m[I 2022-03-06 06:23:01,446][0m Trial 23 finished with value: 0.030505435489408064 and parameters: {'cf_a': 21, 'ctf_a': 3, 'atfd_a': 15, 'atfp_a': 16, 'pa_a': 6, 'cf_w': 6, 'ctf_w': 17, 'atfd_w': 22, 'atfp_w': 20, 'pa_w': 0, 'cf_m': 12, 'ctf_m': 5, 'atfd_m': 13, 'atfp_m': 22, 'pa_m': 14, 'cf_y': 0, 'ctf_y': 6, 'atfd_y': 7, 'atfp_y': 17, 'pa_y': 9}. Best is trial 18 with value: 0.031072679108807847.[0m


[LightGBM] [Info] Number of positive: 16965, number of negative: 5638894
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7681
[LightGBM] [Info] Number of data points in the train set: 5655859, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003000 -> initscore=-5.806291
[LightGBM] [Info] Start training from score -5.806291
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0157205	valid_1's binary_logloss: 0.0181586
[2000]	training's binary_logloss: 0.0146577	valid_1's binary_logloss: 0.0181317
Early stopping, best iteration is:
[2209]	training's binary_logloss: 0.0144714	valid_1's binary_logloss: 0.0181302


[32m[I 2022-03-06 06:38:10,264][0m Trial 24 finished with value: 0.0309984125711945 and parameters: {'cf_a': 16, 'ctf_a': 4, 'atfd_a': 18, 'atfp_a': 3, 'pa_a': 7, 'cf_w': 17, 'ctf_w': 21, 'atfd_w': 19, 'atfp_w': 15, 'pa_w': 8, 'cf_m': 8, 'ctf_m': 14, 'atfd_m': 8, 'atfp_m': 16, 'pa_m': 7, 'cf_y': 14, 'ctf_y': 11, 'atfd_y': 12, 'atfp_y': 10, 'pa_y': 13}. Best is trial 18 with value: 0.031072679108807847.[0m


[LightGBM] [Info] Number of positive: 15882, number of negative: 5085215
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7686
[LightGBM] [Info] Number of data points in the train set: 5101097, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003113 -> initscore=-5.768906
[LightGBM] [Info] Start training from score -5.768906
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0160505	valid_1's binary_logloss: 0.0186982
[2000]	training's binary_logloss: 0.0149715	valid_1's binary_logloss: 0.0186757
Early stopping, best iteration is:
[1961]	training's binary_logloss: 0.0150098	valid_1's binary_logloss: 0.0186752


[32m[I 2022-03-06 06:57:32,623][0m Trial 25 finished with value: 0.0310817736793081 and parameters: {'cf_a': 22, 'ctf_a': 7, 'atfd_a': 9, 'atfp_a': 10, 'pa_a': 0, 'cf_w': 11, 'ctf_w': 10, 'atfd_w': 9, 'atfp_w': 12, 'pa_w': 9, 'cf_m': 11, 'ctf_m': 6, 'atfd_m': 5, 'atfp_m': 11, 'pa_m': 12, 'cf_y': 9, 'ctf_y': 16, 'atfd_y': 2, 'atfp_y': 19, 'pa_y': 6}. Best is trial 25 with value: 0.0310817736793081.[0m


[LightGBM] [Info] Number of positive: 16415, number of negative: 5252382
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7670
[LightGBM] [Info] Number of data points in the train set: 5268797, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003116 -> initscore=-5.768241
[LightGBM] [Info] Start training from score -5.768241
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0163489	valid_1's binary_logloss: 0.0191343
Early stopping, best iteration is:
[1670]	training's binary_logloss: 0.0155899	valid_1's binary_logloss: 0.0191198


[32m[I 2022-03-06 07:17:41,315][0m Trial 26 finished with value: 0.0309485941465493 and parameters: {'cf_a': 20, 'ctf_a': 6, 'atfd_a': 9, 'atfp_a': 4, 'pa_a': 0, 'cf_w': 11, 'ctf_w': 11, 'atfd_w': 8, 'atfp_w': 10, 'pa_w': 14, 'cf_m': 3, 'ctf_m': 5, 'atfd_m': 4, 'atfp_m': 10, 'pa_m': 14, 'cf_y': 4, 'ctf_y': 18, 'atfd_y': 2, 'atfp_y': 23, 'pa_y': 6}. Best is trial 25 with value: 0.0310817736793081.[0m


[LightGBM] [Info] Number of positive: 17402, number of negative: 6157703
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 6175105, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002818 -> initscore=-5.868874
[LightGBM] [Info] Start training from score -5.868874
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[678]	training's binary_logloss: 0.0153075	valid_1's binary_logloss: 0.0174608


[32m[I 2022-03-06 07:31:13,545][0m Trial 27 finished with value: 0.030784283501174993 and parameters: {'cf_a': 11, 'ctf_a': 8, 'atfd_a': 22, 'atfp_a': 10, 'pa_a': 2, 'cf_w': 7, 'ctf_w': 9, 'atfd_w': 9, 'atfp_w': 12, 'pa_w': 11, 'cf_m': 12, 'ctf_m': 13, 'atfd_m': 4, 'atfp_m': 9, 'pa_m': 17, 'cf_y': 20, 'ctf_y': 16, 'atfd_y': 2, 'atfp_y': 19, 'pa_y': 11}. Best is trial 25 with value: 0.0310817736793081.[0m


[LightGBM] [Info] Number of positive: 18186, number of negative: 6227461
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7657
[LightGBM] [Info] Number of data points in the train set: 6245647, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002912 -> initscore=-5.836072
[LightGBM] [Info] Start training from score -5.836072
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0156303	valid_1's binary_logloss: 0.0180415
Early stopping, best iteration is:
[981]	training's binary_logloss: 0.0156408	valid_1's binary_logloss: 0.0180072


[32m[I 2022-03-06 07:44:23,493][0m Trial 28 finished with value: 0.0309925886790288 and parameters: {'cf_a': 15, 'ctf_a': 12, 'atfd_a': 12, 'atfp_a': 10, 'pa_a': 1, 'cf_w': 12, 'ctf_w': 4, 'atfd_w': 12, 'atfp_w': 24, 'pa_w': 15, 'cf_m': 10, 'ctf_m': 6, 'atfd_m': 5, 'atfp_m': 13, 'pa_m': 13, 'cf_y': 14, 'ctf_y': 19, 'atfd_y': 6, 'atfp_y': 22, 'pa_y': 19}. Best is trial 25 with value: 0.0310817736793081.[0m


[LightGBM] [Info] Number of positive: 14868, number of negative: 4665401
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7653
[LightGBM] [Info] Number of data points in the train set: 4680269, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003177 -> initscore=-5.748718
[LightGBM] [Info] Start training from score -5.748718
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0165388	valid_1's binary_logloss: 0.0191214
Early stopping, best iteration is:
[1655]	training's binary_logloss: 0.0157701	valid_1's binary_logloss: 0.0191069


[32m[I 2022-03-06 07:57:25,414][0m Trial 29 finished with value: 0.03002079221659077 and parameters: {'cf_a': 8, 'ctf_a': 0, 'atfd_a': 5, 'atfp_a': 17, 'pa_a': 4, 'cf_w': 1, 'ctf_w': 13, 'atfd_w': 11, 'atfp_w': 9, 'pa_w': 9, 'cf_m': 7, 'ctf_m': 3, 'atfd_m': 2, 'atfp_m': 8, 'pa_m': 12, 'cf_y': 9, 'ctf_y': 15, 'atfd_y': 1, 'atfp_y': 20, 'pa_y': 18}. Best is trial 25 with value: 0.0310817736793081.[0m


[LightGBM] [Info] Number of positive: 17710, number of negative: 5619610
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7678
[LightGBM] [Info] Number of data points in the train set: 5637320, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003142 -> initscore=-5.759888
[LightGBM] [Info] Start training from score -5.759888
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0166408	valid_1's binary_logloss: 0.0189154
Early stopping, best iteration is:
[1894]	training's binary_logloss: 0.0157088	valid_1's binary_logloss: 0.0189009


[32m[I 2022-03-06 08:11:55,905][0m Trial 30 finished with value: 0.031112422090567846 and parameters: {'cf_a': 12, 'ctf_a': 9, 'atfd_a': 8, 'atfp_a': 12, 'pa_a': 2, 'cf_w': 8, 'ctf_w': 15, 'atfd_w': 23, 'atfp_w': 13, 'pa_w': 14, 'cf_m': 15, 'ctf_m': 10, 'atfd_m': 15, 'atfp_m': 4, 'pa_m': 5, 'cf_y': 20, 'ctf_y': 21, 'atfd_y': 4, 'atfp_y': 12, 'pa_y': 13}. Best is trial 30 with value: 0.031112422090567846.[0m


[LightGBM] [Info] Number of positive: 17565, number of negative: 5573959
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7671
[LightGBM] [Info] Number of data points in the train set: 5591524, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003141 -> initscore=-5.759953
[LightGBM] [Info] Start training from score -5.759953
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0166276	valid_1's binary_logloss: 0.0189513
[2000]	training's binary_logloss: 0.0156231	valid_1's binary_logloss: 0.0189265
[3000]	training's binary_logloss: 0.0147952	valid_1's binary_logloss: 0.0188966
Early stopping, best iteration is:
[3114]	training's binary_logloss: 0.0147039	valid_1's binary_logloss: 0.018896


[32m[I 2022-03-06 08:30:58,784][0m Trial 31 finished with value: 0.03143208627505985 and parameters: {'cf_a': 12, 'ctf_a': 9, 'atfd_a': 8, 'atfp_a': 12, 'pa_a': 2, 'cf_w': 8, 'ctf_w': 14, 'atfd_w': 23, 'atfp_w': 12, 'pa_w': 12, 'cf_m': 15, 'ctf_m': 10, 'atfd_m': 15, 'atfp_m': 11, 'pa_m': 5, 'cf_y': 19, 'ctf_y': 22, 'atfd_y': 3, 'atfp_y': 11, 'pa_y': 13}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 18123, number of negative: 5888080
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7671
[LightGBM] [Info] Number of data points in the train set: 5906203, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003068 -> initscore=-5.783503
[LightGBM] [Info] Start training from score -5.783503
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0163766	valid_1's binary_logloss: 0.0185467
[2000]	training's binary_logloss: 0.0154	valid_1's binary_logloss: 0.0185154
Early stopping, best iteration is:
[2363]	training's binary_logloss: 0.0150847	valid_1's binary_logloss: 0.0185093


[32m[I 2022-03-06 08:48:35,923][0m Trial 32 finished with value: 0.03128298089039722 and parameters: {'cf_a': 12, 'ctf_a': 8, 'atfd_a': 9, 'atfp_a': 12, 'pa_a': 3, 'cf_w': 4, 'ctf_w': 14, 'atfd_w': 23, 'atfp_w': 13, 'pa_w': 15, 'cf_m': 14, 'ctf_m': 10, 'atfd_m': 18, 'atfp_m': 4, 'pa_m': 5, 'cf_y': 18, 'ctf_y': 24, 'atfd_y': 4, 'atfp_y': 12, 'pa_y': 13}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 18231, number of negative: 5842328
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7680
[LightGBM] [Info] Number of data points in the train set: 5860559, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003111 -> initscore=-5.769761
[LightGBM] [Info] Start training from score -5.769761
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0166498	valid_1's binary_logloss: 0.0188574
Early stopping, best iteration is:
[1113]	training's binary_logloss: 0.0165249	valid_1's binary_logloss: 0.0188509


[32m[I 2022-03-06 09:01:33,639][0m Trial 33 finished with value: 0.030984696871091788 and parameters: {'cf_a': 12, 'ctf_a': 9, 'atfd_a': 10, 'atfp_a': 8, 'pa_a': 2, 'cf_w': 3, 'ctf_w': 14, 'atfd_w': 23, 'atfp_w': 13, 'pa_w': 15, 'cf_m': 15, 'ctf_m': 11, 'atfd_m': 17, 'atfp_m': 4, 'pa_m': 4, 'cf_y': 20, 'ctf_y': 24, 'atfd_y': 4, 'atfp_y': 11, 'pa_y': 13}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 18021, number of negative: 5649110
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7673
[LightGBM] [Info] Number of data points in the train set: 5667131, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003180 -> initscore=-5.747716
[LightGBM] [Info] Start training from score -5.747716
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[765]	training's binary_logloss: 0.0171785	valid_1's binary_logloss: 0.0191914


[32m[I 2022-03-06 09:14:15,632][0m Trial 34 finished with value: 0.030986481109733306 and parameters: {'cf_a': 11, 'ctf_a': 6, 'atfd_a': 7, 'atfp_a': 12, 'pa_a': 3, 'cf_w': 7, 'ctf_w': 15, 'atfd_w': 6, 'atfp_w': 12, 'pa_w': 18, 'cf_m': 15, 'ctf_m': 10, 'atfd_m': 18, 'atfp_m': 5, 'pa_m': 1, 'cf_y': 19, 'ctf_y': 21, 'atfd_y': 6, 'atfp_y': 12, 'pa_y': 10}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 17283, number of negative: 5201769
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7671
[LightGBM] [Info] Number of data points in the train set: 5219052, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003312 -> initscore=-5.707031
[LightGBM] [Info] Start training from score -5.707031
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[867]	training's binary_logloss: 0.0175943	valid_1's binary_logloss: 0.0198176


[32m[I 2022-03-06 09:27:10,421][0m Trial 35 finished with value: 0.031150422023169963 and parameters: {'cf_a': 6, 'ctf_a': 9, 'atfd_a': 6, 'atfp_a': 6, 'pa_a': 8, 'cf_w': 5, 'ctf_w': 12, 'atfd_w': 21, 'atfp_w': 15, 'pa_w': 12, 'cf_m': 20, 'ctf_m': 1, 'atfd_m': 15, 'atfp_m': 0, 'pa_m': 5, 'cf_y': 21, 'ctf_y': 22, 'atfd_y': 2, 'atfp_y': 16, 'pa_y': 13}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 18054, number of negative: 5521869
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7672
[LightGBM] [Info] Number of data points in the train set: 5539923, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003259 -> initscore=-5.723104
[LightGBM] [Info] Start training from score -5.723104
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0173418	valid_1's binary_logloss: 0.0194705
[2000]	training's binary_logloss: 0.0163152	valid_1's binary_logloss: 0.0194478
Early stopping, best iteration is:
[2541]	training's binary_logloss: 0.0158301	valid_1's binary_logloss: 0.0194354


[32m[I 2022-03-06 09:44:48,101][0m Trial 36 finished with value: 0.03090104615388013 and parameters: {'cf_a': 6, 'ctf_a': 11, 'atfd_a': 6, 'atfp_a': 5, 'pa_a': 12, 'cf_w': 0, 'ctf_w': 13, 'atfd_w': 21, 'atfp_w': 15, 'pa_w': 16, 'cf_m': 20, 'ctf_m': 1, 'atfd_m': 15, 'atfp_m': 0, 'pa_m': 6, 'cf_y': 21, 'ctf_y': 21, 'atfd_y': 4, 'atfp_y': 16, 'pa_y': 13}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 19005, number of negative: 5717193
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7656
[LightGBM] [Info] Number of data points in the train set: 5736198, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003313 -> initscore=-5.706531
[LightGBM] [Info] Start training from score -5.706531
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0178055	valid_1's binary_logloss: 0.0201768
Early stopping, best iteration is:
[1340]	training's binary_logloss: 0.017452	valid_1's binary_logloss: 0.0201614


[32m[I 2022-03-06 09:58:06,370][0m Trial 37 finished with value: 0.031094676302717787 and parameters: {'cf_a': 3, 'ctf_a': 9, 'atfd_a': 5, 'atfp_a': 7, 'pa_a': 8, 'cf_w': 4, 'ctf_w': 12, 'atfd_w': 22, 'atfp_w': 15, 'pa_w': 21, 'cf_m': 21, 'ctf_m': 0, 'atfd_m': 15, 'atfp_m': 2, 'pa_m': 2, 'cf_y': 24, 'ctf_y': 22, 'atfd_y': 0, 'atfp_y': 10, 'pa_y': 24}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 17812, number of negative: 5516205
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7700
[LightGBM] [Info] Number of data points in the train set: 5534017, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003219 -> initscore=-5.735573
[LightGBM] [Info] Start training from score -5.735573
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0170565	valid_1's binary_logloss: 0.0193847
Early stopping, best iteration is:
[1493]	training's binary_logloss: 0.0165272	valid_1's binary_logloss: 0.0193672


[32m[I 2022-03-06 10:12:38,298][0m Trial 38 finished with value: 0.03142532631206851 and parameters: {'cf_a': 13, 'ctf_a': 12, 'atfd_a': 3, 'atfp_a': 2, 'pa_a': 6, 'cf_w': 7, 'ctf_w': 15, 'atfd_w': 20, 'atfp_w': 8, 'pa_w': 12, 'cf_m': 19, 'ctf_m': 15, 'atfd_m': 20, 'atfp_m': 2, 'pa_m': 5, 'cf_y': 19, 'ctf_y': 24, 'atfd_y': 8, 'atfp_y': 7, 'pa_y': 17}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 17574, number of negative: 5234755
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7660
[LightGBM] [Info] Number of data points in the train set: 5252329, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003346 -> initscore=-5.696655
[LightGBM] [Info] Start training from score -5.696655
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.017709	valid_1's binary_logloss: 0.020066
Early stopping, best iteration is:
[1754]	training's binary_logloss: 0.0168636	valid_1's binary_logloss: 0.0200395


[32m[I 2022-03-06 10:27:21,145][0m Trial 39 finished with value: 0.031022195380691106 and parameters: {'cf_a': 7, 'ctf_a': 12, 'atfd_a': 4, 'atfp_a': 1, 'pa_a': 8, 'cf_w': 5, 'ctf_w': 12, 'atfd_w': 19, 'atfp_w': 7, 'pa_w': 11, 'cf_m': 19, 'ctf_m': 21, 'atfd_m': 19, 'atfp_m': 2, 'pa_m': 7, 'cf_y': 18, 'ctf_y': 24, 'atfd_y': 7, 'atfp_y': 3, 'pa_y': 17}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 17822, number of negative: 5452879
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7689
[LightGBM] [Info] Number of data points in the train set: 5470701, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003258 -> initscore=-5.723465
[LightGBM] [Info] Start training from score -5.723465
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0172353	valid_1's binary_logloss: 0.0194364
[2000]	training's binary_logloss: 0.0161724	valid_1's binary_logloss: 0.0194167
Early stopping, best iteration is:
[2622]	training's binary_logloss: 0.0156136	valid_1's binary_logloss: 0.019411


[32m[I 2022-03-06 10:42:49,598][0m Trial 40 finished with value: 0.031108517208634008 and parameters: {'cf_a': 14, 'ctf_a': 16, 'atfd_a': 2, 'atfp_a': 2, 'pa_a': 11, 'cf_w': 2, 'ctf_w': 14, 'atfd_w': 21, 'atfp_w': 4, 'pa_w': 12, 'cf_m': 23, 'ctf_m': 15, 'atfd_m': 21, 'atfp_m': 6, 'pa_m': 3, 'cf_y': 22, 'ctf_y': 19, 'atfd_y': 6, 'atfp_y': 6, 'pa_y': 20}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 18032, number of negative: 5675568
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7678
[LightGBM] [Info] Number of data points in the train set: 5693600, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003167 -> initscore=-5.751778
[LightGBM] [Info] Start training from score -5.751778
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0168399	valid_1's binary_logloss: 0.0192399
Early stopping, best iteration is:
[1785]	training's binary_logloss: 0.0160273	valid_1's binary_logloss: 0.0192249


[32m[I 2022-03-06 10:57:22,513][0m Trial 41 finished with value: 0.031052078475755054 and parameters: {'cf_a': 12, 'ctf_a': 10, 'atfd_a': 7, 'atfp_a': 0, 'pa_a': 6, 'cf_w': 7, 'ctf_w': 16, 'atfd_w': 24, 'atfp_w': 13, 'pa_w': 14, 'cf_m': 18, 'ctf_m': 12, 'atfd_m': 16, 'atfp_m': 3, 'pa_m': 5, 'cf_y': 20, 'ctf_y': 23, 'atfd_y': 3, 'atfp_y': 10, 'pa_y': 22}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 18198, number of negative: 5602425
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7691
[LightGBM] [Info] Number of data points in the train set: 5620623, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003238 -> initscore=-5.729643
[LightGBM] [Info] Start training from score -5.729643
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.0172468	valid_1's binary_logloss: 0.0194979
Early stopping, best iteration is:
[1058]	training's binary_logloss: 0.0171829	valid_1's binary_logloss: 0.0194961


[32m[I 2022-03-06 11:11:25,576][0m Trial 42 finished with value: 0.031014275080320317 and parameters: {'cf_a': 13, 'ctf_a': 9, 'atfd_a': 5, 'atfp_a': 8, 'pa_a': 5, 'cf_w': 8, 'ctf_w': 15, 'atfd_w': 20, 'atfp_w': 7, 'pa_w': 16, 'cf_m': 16, 'ctf_m': 18, 'atfd_m': 17, 'atfp_m': 0, 'pa_m': 5, 'cf_y': 18, 'ctf_y': 22, 'atfd_y': 5, 'atfp_y': 12, 'pa_y': 14}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 18189, number of negative: 6129221
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7649
[LightGBM] [Info] Number of data points in the train set: 6147410, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.002959 -> initscore=-5.820006
[LightGBM] [Info] Start training from score -5.820006
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.015849	valid_1's binary_logloss: 0.0181282
[2000]	training's binary_logloss: 0.0148679	valid_1's binary_logloss: 0.0180996
Early stopping, best iteration is:
[2351]	training's binary_logloss: 0.0145838	valid_1's binary_logloss: 0.0180963


[32m[I 2022-03-06 11:26:58,229][0m Trial 43 finished with value: 0.03080334973536638 and parameters: {'cf_a': 4, 'ctf_a': 11, 'atfd_a': 3, 'atfp_a': 5, 'pa_a': 7, 'cf_w': 5, 'ctf_w': 16, 'atfd_w': 22, 'atfp_w': 14, 'pa_w': 12, 'cf_m': 14, 'ctf_m': 13, 'atfd_m': 19, 'atfp_m': 6, 'pa_m': 8, 'cf_y': 19, 'ctf_y': 22, 'atfd_y': 24, 'atfp_y': 4, 'pa_y': 12}. Best is trial 31 with value: 0.03143208627505985.[0m


[LightGBM] [Info] Number of positive: 17945, number of negative: 5955185
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7676
[LightGBM] [Info] Number of data points in the train set: 5973130, number of used features: 49
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.003004 -> initscore=-5.804706
[LightGBM] [Info] Start training from score -5.804706
Training until validation scores don't improve for 100 rounds
[1000]	training's binary_logloss: 0.016038	valid_1's binary_logloss: 0.0181074
[2000]	training's binary_logloss: 0.0150581	valid_1's binary_logloss: 0.0180929
Early stopping, best iteration is:
[1980]	training's binary_logloss: 0.0150751	valid_1's binary_logloss: 0.0180893


[32m[I 2022-03-06 11:41:59,239][0m Trial 44 finished with value: 0.0309393399301375 and parameters: {'cf_a': 16, 'ctf_a': 7, 'atfd_a': 10, 'atfp_a': 12, 'pa_a': 9, 'cf_w': 3, 'ctf_w': 11, 'atfd_w': 23, 'atfp_w': 11, 'pa_w': 14, 'cf_m': 13, 'ctf_m': 16, 'atfd_m': 14, 'atfp_m': 3, 'pa_m': 3, 'cf_y': 21, 'ctf_y': 20, 'atfd_y': 1, 'atfp_y': 15, 'pa_y': 17}. Best is trial 31 with value: 0.03143208627505985.[0m


In [75]:
print(study.best_params)

{'cf_a': 12, 'ctf_a': 9, 'atfd_a': 8, 'atfp_a': 12, 'pa_a': 2, 'cf_w': 8, 'ctf_w': 14, 'atfd_w': 23, 'atfp_w': 12, 'pa_w': 12, 'cf_m': 15, 'ctf_m': 10, 'atfd_m': 15, 'atfp_m': 11, 'pa_m': 5, 'cf_y': 19, 'ctf_y': 22, 'atfd_y': 3, 'atfp_y': 11, 'pa_y': 13}
