In [41]:
import gc
import os
import sys
import math
import random
import warnings
import pickle
from pathlib import Path
from joblib import Parallel, delayed
from statsmodels.tsa.deterministic import (CalendarFourier,
                                           CalendarSeasonality,
                                           CalendarTimeTrend,
                                           DeterministicProcess)
import optuna
from tqdm import tqdm
import os
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# from pandarallel import pandarallel
# pandarallel.initialize()
warnings.simplefilter("ignore")
import ctypes as ct
from datetime import timedelta

from sklearn.metrics import mean_absolute_error

import statistics as st
import lightgbm as lgbm
from scipy.stats import norm

In [42]:
sys.path.append('../../')
import src.utils as utils

## Param

In [43]:
targets_cols = ['playerId', 'target1', 'target2', 'target3', 'target4', 'date']
players_cols = ['playerId', 'primaryPositionName', 'birthCity', 'DOY', 'mlbDebutYear', 'DebutAge', 'heightInches', 'weight', 'playerForTestSetAndFuturePreds']
rosters_cols = ['playerId', 'teamId', 'status', 'date']
salaries_cols = ['teamId', 'salary', 'year']
standings_cols = ['teamId', 'wildCardRank', 'sportGamesBack', 'date']
transactions_cols = ['playerId', 'transaction_flag', 'date']
scores_cols = ['playerId', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',
       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',
       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',
       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',
       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',
       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',
       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',
       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',
       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',
       'groundOutsPitching', 'runsPitching', 'doublesPitching',
       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',
       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',
       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',
       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',
       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',
       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',
       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',
       'inheritedRunnersScored', 'catchersInterferencePitching',
       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',
       'assists', 'putOuts', 'errors', 'chances', 'date']

feature_cols1 = ['week_day','label_playerId', 'label_primaryPositionName', 'label_teamId',
       'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',
       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',
       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',
       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',
       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',
       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',
       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',
       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',
       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',
       'groundOutsPitching', 'runsPitching', 'doublesPitching',
       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',
       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',
       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',
       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',
       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',
       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',
       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',
       'inheritedRunnersScored', 'catchersInterferencePitching',
       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',
       'assists', 'putOuts', 'errors', 'chances',
        "target1_mean","target1_median","target1_std","target1_min","target1_max","target1_skew","target1_kurt",
         "target2_mean","target2_median","target2_std","target2_min","target2_max","target2_skew","target2_kurt",
        "target3_mean","target3_median","target3_std","target3_min","target3_max","target3_skew","target3_kurt",
        "target4_mean","target4_median","target4_std","target4_min","target4_max","target4_skew","target4_kurt", 
        'season_info', 'wildCardRank'] 

feature_cols2 = ['label_playerId', 'label_primaryPositionName', 'label_teamId',
       'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',
       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',
       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',
       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',
       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',
       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',
       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',
       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',
       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',
       'groundOutsPitching', 'runsPitching', 'doublesPitching',
       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',
       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',
       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',
       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',
       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',
       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',
       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',
       'inheritedRunnersScored', 'catchersInterferencePitching',
       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',
       'assists', 'putOuts', 'errors', 'chances',
        "target1_mean","target1_median","target1_std","target1_min","target1_max","target1_skew","target1_kurt",
         "target2_mean","target2_median","target2_std","target2_min","target2_max","target2_skew","target2_kurt",
        "target3_mean","target3_median","target3_std","target3_min","target3_max","target3_skew","target3_kurt",
        "target4_mean","target4_median","target4_std","target4_min","target4_max","target4_skew","target4_kurt", 
        'season_info', 'wildCardRank'] 

feature_cols3 = ['week_day','label_playerId', 'label_primaryPositionName', 'label_teamId',
       'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',
       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',
       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',
       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',
       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',
       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',
       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',
       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',
       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',
       'groundOutsPitching', 'runsPitching', 'doublesPitching',
       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',
       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',
       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',
       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',
       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',
       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',
       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',
       'inheritedRunnersScored', 'catchersInterferencePitching',
       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',
       'assists', 'putOuts', 'errors', 'chances',
        "target1_mean","target1_median","target1_std","target1_min","target1_max","target1_skew","target1_kurt",
         "target2_mean","target2_median","target2_std","target2_min","target2_max","target2_skew","target2_kurt",
        "target3_mean","target3_median","target3_std","target3_min","target3_max","target3_skew","target3_kurt",
        "target4_mean","target4_median","target4_std","target4_min","target4_max","target4_skew","target4_kurt", 
        'season_info', 'wildCardRank'] 

feature_cols4 = ['week_day', 'annual_day', 'month', 'label_playerId', 'label_primaryPositionName', 'label_teamId', 'label_birthCity',
                'DOY', 'mlbDebutYear', 'DebutAge', 'heightInches', 'weight',
       'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',
       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',
       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',
       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',
       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',
       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',
       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',
       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',
       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',
       'groundOutsPitching', 'runsPitching', 'doublesPitching',
       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',
       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',
       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',
       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',
       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',
       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',
       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',
       'inheritedRunnersScored', 'catchersInterferencePitching',
       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',
       'assists', 'putOuts', 'errors', 'chances',
        "target1_mean","target1_median","target1_std","target1_min","target1_max","target1_skew","target1_kurt",
         "target2_mean","target2_median","target2_std","target2_min","target2_max","target2_skew","target2_kurt",
        "target3_mean","target3_median","target3_std","target3_min","target3_max","target3_skew","target3_kurt",
        "target4_mean","target4_median","target4_std","target4_min","target4_max","target4_skew","target4_kurt", 
        'season_info', 'wildCardRank'] 

In [44]:
EXP_NUM = 57
NFOLDS = 5
SEED = 42

In [45]:
utils.set_seed(SEED)

## Dir

In [46]:
DATA_DIR = Path("/home/knikaido/work/MLB-Player-Digital-Engagement-Forecasting/data/")
MAIN_DATA_DIR = DATA_DIR / 'mlb-player-digital-engagement-forecasting'
TRAIN_DIR = MAIN_DATA_DIR / 'train'
OUTPUT_DIR = Path('./output/')

In [47]:
players = pd.read_csv(MAIN_DATA_DIR / 'players.csv')

rosters = pd.read_csv(TRAIN_DIR / 'rosters_train.csv')
targets = pd.read_csv(TRAIN_DIR / 'nextDayPlayerEngagement_train.csv')
scores = pd.read_csv(TRAIN_DIR / 'playerBoxScores_train.csv')
scores = scores.groupby(['playerId', 'date']).sum().reset_index()
seasons = pd.read_csv(MAIN_DATA_DIR / 'seasons.csv')
salaries = pd.read_csv(MAIN_DATA_DIR / 'mlbSalaries.csv')
teams = pd.read_csv(MAIN_DATA_DIR / 'teams.csv')

standings = pd.read_csv(TRAIN_DIR / 'standings_train.csv')
playerTwitterFollowers = pd.read_csv(TRAIN_DIR / 'playerTwitterFollowers_train.csv')

# events = pd.read_csv(TRAIN_DIR / 'events_train.csv')
# events = events.groupby(['gameDate']).sum().reset_index()


In [48]:
scores

Unnamed: 0,playerId,date,home,gamePk,teamId,jerseyNum,positionCode,battingOrder,gamesPlayedBatting,flyOuts,...,sacBuntsPitching,sacFliesPitching,saves,holds,blownSaves,assists,putOuts,errors,chances,index
0,112526,20180402,0,529469,140,40.0,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,91
1,112526,20180408,1,529546,140,40.0,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,97
2,112526,20180410,1,529565,140,40.0,1,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,99
3,112526,20180415,0,529640,140,40.0,1,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,104
4,112526,20180421,1,529718,140,40.0,1,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182945,685503,20210409,1,634478,140,35.0,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1194
182946,685503,20210414,0,634496,140,35.0,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1199
182947,685503,20210419,0,634536,140,35.0,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1204
182948,685503,20210425,0,634393,140,35.0,1,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1210


In [49]:
seasons = seasons.fillna('0000-00-00')
for c_ in seasons.columns[1:]:
    seasons[c_] = seasons[c_].str.replace('-', '').astype(int)

In [50]:
game_dates_range = []
game_dates_range.append(np.array([20170101, 20171231]))
game_dates_range.append(scores[scores['date'] <= 20181231].sort_values('date')['date'].iloc[[0, -1]].values)
game_dates_range.append(scores[(scores['date'] >= 20190101) & (scores['date'] <= 20191231)].sort_values('date')['date'].iloc[[0, -1]].values)
game_dates_range.append(scores[(scores['date'] >= 20200101) & (scores['date'] <= 20201231)].sort_values('date')['date'].iloc[[0, -1]].values)
game_dates_range.append(scores[(scores['date'] >= 20210101)].sort_values('date')['date'].iloc[[0, -1]].values)
game_dates_range = np.array(game_dates_range)

In [51]:
seasons['gameStartDate'] = game_dates_range[:, 0]
seasons['gameEndDate'] = game_dates_range[:, 1]

In [52]:
seasons

Unnamed: 0,seasonId,seasonStartDate,seasonEndDate,preSeasonStartDate,preSeasonEndDate,regularSeasonStartDate,regularSeasonEndDate,lastDate1stHalf,allStarDate,firstDate2ndHalf,postSeasonStartDate,postSeasonEndDate,gameStartDate,gameEndDate
0,2017,20170402,20171101,20170222,20170401,20170402,20171001,20170709,20170711,20170714,20171003,20171101,20170101,20171231
1,2018,20180329,20181028,20180221,20180327,20180329,20181001,20180715,20180717,20180719,20181002,20181028,20180329,20181028
2,2019,20190320,20191030,20190221,20190326,20190320,20190929,20190707,20190709,20190711,20191001,20191030,20190320,20191030
3,2020,20200723,20201028,20200221,20200722,20200723,20200927,20200825,0,20200826,20200929,20201028,20200723,20201027
4,2021,20210228,20211031,20210228,20210330,20210401,20211003,20210711,20210713,20210715,20211004,20211031,20210401,20210430


In [53]:
targets_train = targets.merge(rosters[rosters_cols], on=['playerId', 'date'], how='left')

In [54]:
targets_train = targets_train[(targets_train['date'] >= 20210401)]

In [55]:
targets_train

Unnamed: 0,engagementMetricsDate,playerId,target1,target2,target3,target4,index,date,teamId,status
2444346,2021-04-02,624415,2.810640,3.998462,0.136137,10.012438,1186,20210401,141.0,Active
2444347,2021-04-02,656548,0.000000,0.153787,0.068889,0.062189,1186,20210401,146.0,Reassigned to Minors
2444348,2021-04-02,400284,0.000971,0.010985,0.021323,3.182007,1186,20210401,,
2444349,2021-04-02,606157,0.002736,1.642226,0.021323,0.549337,1186,20210401,113.0,Active
2444350,2021-04-02,665620,0.020473,0.411929,0.549469,0.435323,1186,20210401,147.0,Reassigned to Minors
...,...,...,...,...,...,...,...,...,...,...
2506171,2021-05-01,451661,0.000000,0.013314,0.000000,0.625925,1215,20210430,,
2506172,2021-05-01,519301,0.000131,0.003329,0.000000,0.216229,1215,20210430,,
2506173,2021-05-01,527055,0.000000,0.019971,0.000000,0.273131,1215,20210430,,
2506174,2021-05-01,543484,0.000131,0.056586,0.000000,1.024240,1215,20210430,,


In [56]:
playerId_list = targets_train['playerId'].unique()

In [57]:
def calc_corr(df):
    # 相関係数行列を作成
    corr_mat = df.corr(method='pearson')

    # 行（列）サイズを取得
    n = corr_mat.shape[0]
    corr_ary = []

    for i in range(n):
        for j in range(i):
            if i == j:
                continue
            corr_ary.append(corr_mat.iloc[i,j])

    return corr_ary

In [58]:
def calc_probs(pid,df,temp):
    to_append=[pid,'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','']
    targets=['target1','target2','target3','target4']
    z=1
    for target in targets:
        target_prob = temp[target].tolist()
        mean = np.mean(target_prob)
        std = np.std(target_prob)
        median = st.median(target_prob)
        distribution = norm(mean, std)
        min_weight = min(target_prob)
        max_weight = max(target_prob)
        values = list(np.linspace(min_weight, max_weight))
        probabilities = [distribution.pdf(v) for v in values]
        max_value = max(probabilities)
        max_index = probabilities.index(max_value)
        to_append[z]=mean
        to_append[z+1]=median
        to_append[z+2]=std
        to_append[z+3]=min_weight
        to_append[z+4]=max_weight
        to_append[z+5]=temp[target].skew()
        to_append[z+6]=temp[target].kurt()

        z=z+7
    corr_ = calc_corr(temp[['target1', 'target2', 'target3', 'target4']])
    to_append[z:] = corr_  
    df_length = len(df)
    df.loc[df_length] = to_append
    return df

    

### CREATE DATAFRAME to store probabilities
column_names = ["playerId", "target1_mean","target1_median","target1_std","target1_min","target1_max","target1_skew","target1_kurt",
                "target2_mean","target2_median","target2_std","target2_min","target2_max","target2_skew","target2_kurt",
                "target3_mean","target3_median","target3_std","target3_min","target3_max","target3_skew","target3_kurt",
                "target4_mean","target4_median","target4_std","target4_min","target4_max","target4_skew","target4_kurt",
                'tgt1_2_corr', 'tgt1_3_corr', 'tgt2_3_corr', 'tgt1_4_corr', 'tgt2_4_corr', 'tgt3_4_corr']
player_target_probs = pd.DataFrame(columns = column_names)
    
for pid in tqdm(playerId_list):
    temp = targets_train[targets_train['playerId'] == pid]
    player_target_stats=calc_probs(pid,player_target_probs,temp)

100%|██████████| 2061/2061 [00:46<00:00, 44.71it/s]


In [59]:
teamId_list = targets_train['teamId'].dropna().unique()

In [60]:
team_target_probs = pd.DataFrame(columns = column_names)
    
for pid in tqdm(teamId_list):
    temp = targets_train[targets_train['teamId'] == pid]
    team_target_stats=calc_probs(pid,team_target_probs,temp)

100%|██████████| 30/30 [00:00<00:00, 42.06it/s]


In [61]:
c_ = list(team_target_stats.columns)
c_ = c_[:1] + ["team_" + word for word in c_[1:]]
team_target_stats.columns = c_

In [62]:
feature_cols1 += c_[1:]
feature_cols2 += c_[1:]
feature_cols3 += c_[1:]
feature_cols4 += c_[1:]

In [63]:
data_names=player_target_stats.columns.values.tolist()

In [64]:
players['DOY'] = pd.to_datetime(players['DOB'], format="%Y-%m-%d").dt.year
players['mlbDebutYear'] = pd.to_datetime(players['mlbDebutDate'], format="%Y-%m-%d").dt.year
players['DebutAge'] = players['mlbDebutYear'] - players['DOY']

In [65]:
# creat dataset
train = targets[targets_cols].merge(players[players_cols], on=['playerId'], how='left')
train = train.merge(rosters[rosters_cols], on=['playerId', 'date'], how='left')
train = train.merge(scores[scores_cols], on=['playerId', 'date'], how='left')
train = train.merge(player_target_stats, how='inner', left_on=["playerId"],right_on=["playerId"])
train = train.merge(standings[standings_cols], on=['teamId', 'date'], how='left')
train = train.merge(team_target_stats, how='left', left_on=["teamId"],right_on=["playerId"], suffixes=('', 'team_'))

In [66]:
# label encoding
player2num = {c: i for i, c in enumerate(train['playerId'].unique())}
position2num = {c: i for i, c in enumerate(train['primaryPositionName'].unique())}
birthCityn2num = {c: i for i, c in enumerate(train['birthCity'].unique())}
teamid2num = {c: i for i, c in enumerate(train['teamId'].unique())}
status2num = {c: i for i, c in enumerate(train['status'].unique())}
train['label_playerId'] = train['playerId'].map(player2num)
train['label_primaryPositionName'] = train['primaryPositionName'].map(position2num)
train['label_birthCity'] = train['birthCity'].map(birthCityn2num)
train['label_teamId'] = train['teamId'].map(teamid2num)
train['label_status'] = train['status'].map(status2num)

In [67]:
date_ = pd.to_datetime(train['date'], format="%Y%m%d")
train['annual_day'] = (date_ - pd.to_datetime(date_.dt.year, format="%Y")) /  timedelta(days=1)
train['week_day'] = date_.dt.weekday
train['month'] = date_.dt.month

In [68]:
train['gameday'] = ~train['battingOrder'].isna()*1

In [69]:
train.sort_values(by=['playerId','date'],inplace=True,ascending=True)

In [70]:
def count_consecutive_items_n_cols(df, col_name_list, output_col):
    cum_sum_list = [
        (df[col_name] != df[col_name].shift(1)).cumsum().tolist() for col_name in col_name_list
    ]
    df[output_col] = df.groupby(
        ["_".join(map(str, x)) for x in zip(*cum_sum_list)]
    ).cumcount() + 1
    return df

In [71]:
train=count_consecutive_items_n_cols(train,['playerId','gameday'],'daysSinceLastGame')
train.loc[train['gameday']==1,'daysSinceLastGame']=0

In [72]:
train_game = train[train['gameday']==1]
train_last_game = train_game[~train_game.duplicated(subset='playerId', keep='last')][['playerId', 'date']]
train_last_game.columns = ['playerId', 'lastdate']
train_player_unique = pd.DataFrame(train['playerId'].unique(), columns=['playerId'])
train_last_game = pd.merge(train_player_unique, train_last_game, on=['playerId'], how='left' )
train_last_game = train_last_game.fillna(20171231)

In [73]:
train_last_game.sort_values('lastdate')

Unnamed: 0,playerId,lastdate
856,593590,20171231.0
1995,670462,20171231.0
2000,670764,20171231.0
1765,661269,20171231.0
1932,667674,20171231.0
...,...,...
473,542932,20210430.0
1480,641856,20210430.0
1481,641857,20210430.0
483,543037,20210430.0


In [74]:
def extract_season(date_raw, season_start_end):
    idxes = 0
    for raw in season_start_end.iloc():
        idx_ = ((date_raw >= raw.iloc[0]) & (date_raw <= raw.iloc[1])) * 1
        idxes += idx_
    return idxes

In [75]:
on_preseason_idxes = extract_season(train['date'], seasons[['preSeasonStartDate', 'preSeasonEndDate']])
on_season_idxes = extract_season(train['date'], seasons[['regularSeasonStartDate', 'regularSeasonEndDate']]) * 2
on_postseason_idxes = extract_season(train['date'], seasons[['postSeasonStartDate', 'postSeasonEndDate']]) * 3

special_days = seasons['lastDate1stHalf'].to_list() + seasons['allStarDate'].to_list() + seasons['firstDate2ndHalf'].to_list()
special_idxes = 0
for day in special_days:
    special_idxes += (train['date'] == day) * 4

on_total_season_idxes = on_preseason_idxes
on_total_season_idxes[on_season_idxes==2] = 2
on_total_season_idxes[on_postseason_idxes==3] = 3
on_total_season_idxes[special_idxes==4] = 4

train['season_info'] = on_total_season_idxes

In [76]:
## only on season
on_whole_idxes = extract_season(train['date'], seasons[['seasonStartDate', 'seasonEndDate']])
train = train[on_whole_idxes == 1].reset_index(drop=True)

In [77]:
train = train[train['playerForTestSetAndFuturePreds']==True].reset_index(drop=True)

In [78]:
train

Unnamed: 0,playerId,target1,target2,target3,target4,date,primaryPositionName,birthCity,DOY,mlbDebutYear,...,label_primaryPositionName,label_birthCity,label_teamId,label_status,annual_day,week_day,month,gameday,daysSinceLastGame,season_info
0,405395,1.916353,21.512858,0.078997,7.389917,20180329,First Base,Santo Domingo,1980,2001.0,...,1,52,8,0,87.0,3,3,1,0,2
1,405395,2.431219,38.280184,0.080521,10.307364,20180330,First Base,Santo Domingo,1980,2001.0,...,1,52,8,0,88.0,4,3,1,0,2
2,405395,4.890499,18.149785,0.196634,19.190820,20180331,First Base,Santo Domingo,1980,2001.0,...,1,52,8,0,89.0,5,3,1,0,2
3,405395,0.061685,21.539123,0.215855,28.610586,20180401,First Base,Santo Domingo,1980,2001.0,...,1,52,8,0,90.0,6,4,1,0,2
4,405395,0.021304,19.234421,0.217329,20.419970,20180402,First Base,Santo Domingo,1980,2001.0,...,1,52,8,0,91.0,0,4,1,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711008,685503,0.044617,1.224728,0.009437,0.737463,20210426,Pitcher,Hiroshima,1992,2021.0,...,0,902,7,0,115.0,0,4,0,1,2
711009,685503,0.019123,1.178880,0.013161,0.790301,20210427,Pitcher,Hiroshima,1992,2021.0,...,0,902,7,0,116.0,1,4,0,2,2
711010,685503,0.015799,4.323489,0.002350,0.970273,20210428,Pitcher,Hiroshima,1992,2021.0,...,0,902,7,0,117.0,2,4,0,3,2
711011,685503,0.018770,31.946021,0.305491,5.938273,20210429,Pitcher,Hiroshima,1992,2021.0,...,0,902,7,0,118.0,3,4,0,4,2


In [79]:
# def fit_lgbm(x_train, y_train, x_valid, y_valid, params: dict=None, verbose=100):
#     oof_pred = np.zeros(len(y_valid), dtype=np.float32)
#     model = lgbm.LGBMRegressor(**params)
#     model.fit(x_train, y_train, 
#         eval_set=[(X_train, y_train), (x_valid, y_valid)],  
#         early_stopping_rounds=verbose, 
#         verbose=verbose)
#     oof_pred = model.predict(x_valid)
#     oof_pred = np.clip(oof_pred, 0, 100)
#     score = mean_absolute_error(oof_pred, y_valid)
#     print('mae:', score)
#     return oof_pred, model, score


In [80]:
def fit_lgbm(x_train, y_train, x_valid, y_valid, learning_rates, verbose=100):
    def opt(trial):
        params = {
                'random_state': SEED,
                'objective':'mae',
                'n_estimators': 10000,
                'learning_rate': 0.1,
                'max_depth': trial.suggest_int('max_depth', 1, 20),
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 20),
                'subsample': trial.suggest_discrete_uniform('subsample', 0.5, 0.9, 0.1),
                'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.5, 0.9, 0.1),
                'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 1e3),
                'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 1e3),
                'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
                'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
                'bagging_freq': trial.suggest_int('bagging_freq', 1, 10)
        }

        model_opt = lgbm.LGBMRegressor(**params)

        model_opt.fit(x_train, y_train, 
            eval_set=[(x_train, y_train), (x_valid, y_valid)],  
            early_stopping_rounds=verbose, 
            verbose=verbose)
        oof_pred = model_opt.predict(x_valid)
        oof_pred = np.clip(oof_pred, 0, 100)
        score = mean_absolute_error(oof_pred, y_valid)
        return -score
    return opt

In [81]:
train_X = train
train_y = train[['target1', 'target2', 'target3', 'target4']]
tr_idxs = []
val_idxs = []

In [82]:
# tr_idx = (train['date'].astype(int) < 20200801)
# val_idx = (train['date'].astype(int) >= 20200801) & (train['date'].astype(int) < 20200901)
# tr_idxs.append(tr_idx)
# val_idxs.append(val_idx)

# tr_idx = (train['date'].astype(int) < 20200901)
# val_idx = (train['date'].astype(int) >= 20200901) & (train['date'].astype(int) < 20201001)
# tr_idxs.append(tr_idx)
# val_idxs.append(val_idx)

# tr_idx = (train['date'].astype(int) < 20201001)
# val_idx = (train['date'].astype(int) >= 20201001) & (train['date'].astype(int) < 20201028)
# tr_idxs.append(tr_idx)
# val_idxs.append(val_idx)

# tr_idx = (train['date'].astype(int) < 20210228)
# val_idx = (train['date'].astype(int) >= 20210228) & (train['date'].astype(int) < 20210401)
# tr_idxs.append(tr_idx)
# val_idxs.append(val_idx)

tr_idx = (train['date'].astype(int) < 20210401)
val_idx = ~tr_idx
tr_idxs.append(tr_idx)
val_idxs.append(val_idx)

In [83]:
idx = 0

tr_idx = tr_idxs[idx]
val_idx = val_idxs[idx]

x_train = train_X.loc[tr_idx].reset_index(drop=True)
y_train = train_y.loc[tr_idx].reset_index(drop=True)
x_valid = train_X.loc[val_idx].reset_index(drop=True)
y_valid = train_y.loc[val_idx].reset_index(drop=True)

In [84]:
learning_rates = [0.1, 0.1, 0.1, 0.1]

In [None]:
study1 = optuna.create_study(direction='maximize')
study1.optimize(fit_lgbm(x_train[feature_cols1], y_train['target1'], x_valid[feature_cols1], y_valid['target1'], learning_rates[0]), n_trials=100)

print('Number of finished trials:', len(study1.trials))
print('Best trial:', study1.best_trial.params)

[32m[I 2021-07-09 02:10:24,284][0m A new study created in memory with name: no-name-d835c5a1-a978-4751-9a71-2aab5b862eac[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.889669	valid_1's l1: 1.22059
[200]	training's l1: 0.889667	valid_1's l1: 1.22059


[32m[I 2021-07-09 02:10:31,343][0m Trial 0 finished with value: -1.2205864675842983 and parameters: {'max_depth': 14, 'min_child_weight': 4, 'subsample': 0.8, 'colsample_bytree': 0.7, 'reg_lambda': 0.18836623095314597, 'reg_alpha': 0.0032418627050351378, 'feature_fraction': 0.42534082402377577, 'bagging_fraction': 0.508080027008323, 'bagging_freq': 2}. Best is trial 0 with value: -1.2205864675842983.[0m


Early stopping, best iteration is:
[116]	training's l1: 0.889669	valid_1's l1: 1.22059
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.889928	valid_1's l1: 1.21995
[200]	training's l1: 0.889601	valid_1's l1: 1.2192
[300]	training's l1: 0.889592	valid_1's l1: 1.21913
[400]	training's l1: 0.889585	valid_1's l1: 1.21913


[32m[I 2021-07-09 02:10:47,029][0m Trial 1 finished with value: -1.2191175178071816 and parameters: {'max_depth': 10, 'min_child_weight': 5, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 34.66863161028365, 'reg_alpha': 0.0011391713820332165, 'feature_fraction': 0.6698083924910014, 'bagging_fraction': 0.9108178210880399, 'bagging_freq': 4}. Best is trial 1 with value: -1.2191175178071816.[0m


Early stopping, best iteration is:
[393]	training's l1: 0.889586	valid_1's l1: 1.21912
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.890524	valid_1's l1: 1.22053


[32m[I 2021-07-09 02:10:52,095][0m Trial 2 finished with value: -1.2205299765877653 and parameters: {'max_depth': 20, 'min_child_weight': 5, 'subsample': 0.7, 'colsample_bytree': 0.8, 'reg_lambda': 0.6210530114246657, 'reg_alpha': 0.9793791903767944, 'feature_fraction': 0.9090074344930096, 'bagging_fraction': 0.41871837115702587, 'bagging_freq': 3}. Best is trial 1 with value: -1.2191175178071816.[0m


Early stopping, best iteration is:
[60]	training's l1: 0.890528	valid_1's l1: 1.22053
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.868813	valid_1's l1: 1.19104
[200]	training's l1: 0.866346	valid_1's l1: 1.18633
[300]	training's l1: 0.864191	valid_1's l1: 1.18323
[400]	training's l1: 0.862483	valid_1's l1: 1.18081
[500]	training's l1: 0.862064	valid_1's l1: 1.18041
[600]	training's l1: 0.862062	valid_1's l1: 1.1804


[32m[I 2021-07-09 02:11:10,431][0m Trial 3 finished with value: -1.1803332959096229 and parameters: {'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 1.7656600481758704, 'reg_alpha': 0.047429947398004255, 'feature_fraction': 0.761611848593557, 'bagging_fraction': 0.9838033870010923, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[517]	training's l1: 0.862063	valid_1's l1: 1.1804
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.887761	valid_1's l1: 1.21706


[32m[I 2021-07-09 02:11:18,635][0m Trial 4 finished with value: -1.2170580661601196 and parameters: {'max_depth': 20, 'min_child_weight': 11, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 0.07335148961671999, 'reg_alpha': 0.6056460180027443, 'feature_fraction': 0.43463087401992884, 'bagging_fraction': 0.9662460919721929, 'bagging_freq': 10}. Best is trial 3 with value: -1.1803332959096229.[0m


[200]	training's l1: 0.88776	valid_1's l1: 1.21706
Early stopping, best iteration is:
[100]	training's l1: 0.887761	valid_1's l1: 1.21706
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.878328	valid_1's l1: 1.20049
[200]	training's l1: 0.874012	valid_1's l1: 1.19294


[32m[I 2021-07-09 02:11:25,305][0m Trial 5 finished with value: -1.1928852092193598 and parameters: {'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_lambda': 12.166903014318995, 'reg_alpha': 0.02245351235416841, 'feature_fraction': 0.40832038130920806, 'bagging_fraction': 0.5596366644925742, 'bagging_freq': 10}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[152]	training's l1: 0.874044	valid_1's l1: 1.19291
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.888048	valid_1's l1: 1.21673
[200]	training's l1: 0.888046	valid_1's l1: 1.21673
[300]	training's l1: 0.887904	valid_1's l1: 1.21672
[400]	training's l1: 0.887866	valid_1's l1: 1.21668


[32m[I 2021-07-09 02:11:40,117][0m Trial 6 finished with value: -1.2166586007238163 and parameters: {'max_depth': 19, 'min_child_weight': 5, 'subsample': 0.6, 'colsample_bytree': 0.8, 'reg_lambda': 0.0010731609644032611, 'reg_alpha': 0.03675948374419034, 'feature_fraction': 0.993981588626634, 'bagging_fraction': 0.521777792900718, 'bagging_freq': 2}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[348]	training's l1: 0.887878	valid_1's l1: 1.21666
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.892917	valid_1's l1: 1.22472
[200]	training's l1: 0.892914	valid_1's l1: 1.22472


[32m[I 2021-07-09 02:11:48,243][0m Trial 7 finished with value: -1.2247020942957842 and parameters: {'max_depth': 17, 'min_child_weight': 18, 'subsample': 0.8, 'colsample_bytree': 0.5, 'reg_lambda': 0.09307017228978781, 'reg_alpha': 326.24182222124807, 'feature_fraction': 0.5773643225857719, 'bagging_fraction': 0.4784331003271958, 'bagging_freq': 9}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[181]	training's l1: 0.892914	valid_1's l1: 1.22471
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.99286	valid_1's l1: 1.384


[32m[I 2021-07-09 02:11:52,419][0m Trial 8 finished with value: -1.383994068913908 and parameters: {'max_depth': 1, 'min_child_weight': 8, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 0.39527605196585186, 'reg_alpha': 0.0245303001518635, 'feature_fraction': 0.8551155582475718, 'bagging_fraction': 0.47662812726535403, 'bagging_freq': 3}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[61]	training's l1: 0.992869	valid_1's l1: 1.38399
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.889459	valid_1's l1: 1.21877
[200]	training's l1: 0.889458	valid_1's l1: 1.21877
[300]	training's l1: 0.889452	valid_1's l1: 1.21878


[32m[I 2021-07-09 02:12:00,819][0m Trial 9 finished with value: -1.218764157234154 and parameters: {'max_depth': 16, 'min_child_weight': 9, 'subsample': 0.7, 'colsample_bytree': 0.7, 'reg_lambda': 0.2143984013628337, 'reg_alpha': 0.9826859631042147, 'feature_fraction': 0.42956611343859497, 'bagging_fraction': 0.4850684982754531, 'bagging_freq': 10}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[248]	training's l1: 0.889457	valid_1's l1: 1.21877
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.996514	valid_1's l1: 1.38792


[32m[I 2021-07-09 02:12:04,679][0m Trial 10 finished with value: -1.387919128866123 and parameters: {'max_depth': 1, 'min_child_weight': 16, 'subsample': 0.5, 'colsample_bytree': 0.9, 'reg_lambda': 843.4963543854902, 'reg_alpha': 143.74967153817147, 'feature_fraction': 0.7772381402724479, 'bagging_fraction': 0.8253032044933035, 'bagging_freq': 7}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[41]	training's l1: 0.996524	valid_1's l1: 1.38792
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.879336	valid_1's l1: 1.20597
[200]	training's l1: 0.878123	valid_1's l1: 1.20315


[32m[I 2021-07-09 02:12:13,649][0m Trial 11 finished with value: -1.203108006894472 and parameters: {'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_lambda': 17.899131391404417, 'reg_alpha': 0.035676091665573915, 'feature_fraction': 0.612771297266933, 'bagging_fraction': 0.6746856454613729, 'bagging_freq': 7}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[160]	training's l1: 0.878143	valid_1's l1: 1.20313
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.873871	valid_1's l1: 1.19608
[200]	training's l1: 0.870774	valid_1's l1: 1.18984


[32m[I 2021-07-09 02:12:21,193][0m Trial 12 finished with value: -1.189792270919451 and parameters: {'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_lambda': 10.564353382224171, 'reg_alpha': 20.063198340740485, 'feature_fraction': 0.7507984581157352, 'bagging_fraction': 0.6578433652470348, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[197]	training's l1: 0.870774	valid_1's l1: 1.18984
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.88148	valid_1's l1: 1.20671
[200]	training's l1: 0.878704	valid_1's l1: 1.19996
[300]	training's l1: 0.876362	valid_1's l1: 1.19473


[32m[I 2021-07-09 02:12:31,008][0m Trial 13 finished with value: -1.1946496246521547 and parameters: {'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_lambda': 305.0076340231093, 'reg_alpha': 18.522840705369788, 'feature_fraction': 0.7624648208238721, 'bagging_fraction': 0.7053714150132205, 'bagging_freq': 7}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[265]	training's l1: 0.876477	valid_1's l1: 1.19471
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.886708	valid_1's l1: 1.2174
[200]	training's l1: 0.875882	valid_1's l1: 1.1961
[300]	training's l1: 0.871442	valid_1's l1: 1.18861
[400]	training's l1: 0.868956	valid_1's l1: 1.18474
[500]	training's l1: 0.868477	valid_1's l1: 1.18404


[32m[I 2021-07-09 02:12:42,965][0m Trial 14 finished with value: -1.183938059092993 and parameters: {'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 3.5514993261150765, 'reg_alpha': 11.29704240731941, 'feature_fraction': 0.7355396929975226, 'bagging_fraction': 0.6418466119967524, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[472]	training's l1: 0.86848	valid_1's l1: 1.18403
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.908347	valid_1's l1: 1.24954
[200]	training's l1: 0.904511	valid_1's l1: 1.24159
[300]	training's l1: 0.902432	valid_1's l1: 1.23712
[400]	training's l1: 0.901071	valid_1's l1: 1.23384
[500]	training's l1: 0.900161	valid_1's l1: 1.23142
[600]	training's l1: 0.899565	valid_1's l1: 1.23004
[700]	training's l1: 0.89909	valid_1's l1: 1.22905
[800]	training's l1: 0.898215	valid_1's l1: 1.22662
[900]	training's l1: 0.897875	valid_1's l1: 1.22601
[1000]	training's l1: 0.897687	valid_1's l1: 1.22541
[1100]	training's l1: 0.897246	valid_1's l1: 1.22433
[1200]	training's l1: 0.897187	valid_1's l1: 1.22394
[1300]	training's l1: 0.897118	valid_1's l1: 1.22371
[1400]	training's l1: 0.897014	valid_1's l1: 1.22349
[1500]	training's l1: 0.896998	valid_1's l1: 1.22343
[1600]	training's l1: 0.896986	valid_1's l1: 1.22341
Early stopping

[32m[I 2021-07-09 02:13:23,254][0m Trial 15 finished with value: -1.2233415267717402 and parameters: {'max_depth': 3, 'min_child_weight': 13, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 0.004753846269644635, 'reg_alpha': 13.825108556575072, 'feature_fraction': 0.8362521019234783, 'bagging_fraction': 0.781863178977326, 'bagging_freq': 5}. Best is trial 3 with value: -1.1803332959096229.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.888308	valid_1's l1: 1.21902


[32m[I 2021-07-09 02:13:28,965][0m Trial 16 finished with value: -1.2190126823047518 and parameters: {'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.5, 'colsample_bytree': 0.6, 'reg_lambda': 3.6193104444515725, 'reg_alpha': 0.18814072595737838, 'feature_fraction': 0.5285022023512945, 'bagging_fraction': 0.602939823914551, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[65]	training's l1: 0.88831	valid_1's l1: 1.21902
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.889776	valid_1's l1: 1.22099


[32m[I 2021-07-09 02:13:34,499][0m Trial 17 finished with value: -1.2209714170629944 and parameters: {'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.7, 'colsample_bytree': 0.6, 'reg_lambda': 92.04294109247373, 'reg_alpha': 6.606766926938739, 'feature_fraction': 0.6800935015436903, 'bagging_fraction': 0.7639887579823147, 'bagging_freq': 6}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[68]	training's l1: 0.889777	valid_1's l1: 1.22098
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.914476	valid_1's l1: 1.25616
[200]	training's l1: 0.910069	valid_1's l1: 1.24629
[300]	training's l1: 0.907917	valid_1's l1: 1.24092
[400]	training's l1: 0.906981	valid_1's l1: 1.2389
[500]	training's l1: 0.906372	valid_1's l1: 1.2381
[600]	training's l1: 0.906228	valid_1's l1: 1.23767
[700]	training's l1: 0.90601	valid_1's l1: 1.23721
[800]	training's l1: 0.905405	valid_1's l1: 1.23562
[900]	training's l1: 0.904977	valid_1's l1: 1.23497
[1000]	training's l1: 0.904678	valid_1's l1: 1.23378
[1100]	training's l1: 0.904557	valid_1's l1: 1.23342
[1200]	training's l1: 0.904435	valid_1's l1: 1.23297
[1300]	training's l1: 0.903836	valid_1's l1: 1.23143
[1400]	training's l1: 0.903407	valid_1's l1: 1.23047
[1500]	training's l1: 0.903329	valid_1's l1: 1.23009
[1600]	training's l1: 0.903244	valid_1's l1: 1.2299
Early stopping, b

[32m[I 2021-07-09 02:14:14,000][0m Trial 18 finished with value: -1.2298391279060148 and parameters: {'max_depth': 3, 'min_child_weight': 7, 'subsample': 0.6, 'colsample_bytree': 0.5, 'reg_lambda': 1.8186653286652683, 'reg_alpha': 74.95698663805994, 'feature_fraction': 0.9277993187093772, 'bagging_fraction': 0.8776450904006262, 'bagging_freq': 9}. Best is trial 3 with value: -1.1803332959096229.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.907971	valid_1's l1: 1.24819
[200]	training's l1: 0.901329	valid_1's l1: 1.23586
[300]	training's l1: 0.899269	valid_1's l1: 1.23204
[400]	training's l1: 0.897665	valid_1's l1: 1.22957
[500]	training's l1: 0.897414	valid_1's l1: 1.22932
[600]	training's l1: 0.896591	valid_1's l1: 1.22649
[700]	training's l1: 0.896196	valid_1's l1: 1.22558
[800]	training's l1: 0.895947	valid_1's l1: 1.22557
Early stopping, best iteration is:
[706]	training's l1: 0.896147	valid_1's l1: 1.22551


[32m[I 2021-07-09 02:14:35,303][0m Trial 19 finished with value: -1.2254379332140686 and parameters: {'max_depth': 3, 'min_child_weight': 14, 'subsample': 0.5, 'colsample_bytree': 0.6, 'reg_lambda': 0.015091573598729768, 'reg_alpha': 984.341243605552, 'feature_fraction': 0.7188056310821775, 'bagging_fraction': 0.9853100613607253, 'bagging_freq': 6}. Best is trial 3 with value: -1.1803332959096229.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.889065	valid_1's l1: 1.22071
[200]	training's l1: 0.888538	valid_1's l1: 1.21901


[32m[I 2021-07-09 02:14:43,317][0m Trial 20 finished with value: -1.2190061724676375 and parameters: {'max_depth': 8, 'min_child_weight': 7, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 2.609744318222277, 'reg_alpha': 3.6604766344557502, 'feature_fraction': 0.6184066516243281, 'bagging_fraction': 0.6051139077043651, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[154]	training's l1: 0.888543	valid_1's l1: 1.21901
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.887961	valid_1's l1: 1.21977
[200]	training's l1: 0.877405	valid_1's l1: 1.19979
[300]	training's l1: 0.874752	valid_1's l1: 1.19565
[400]	training's l1: 0.874056	valid_1's l1: 1.19483
[500]	training's l1: 0.872684	valid_1's l1: 1.19227
[600]	training's l1: 0.872007	valid_1's l1: 1.19043


[32m[I 2021-07-09 02:14:58,675][0m Trial 21 finished with value: -1.1902510090970522 and parameters: {'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.7, 'colsample_bytree': 0.7, 'reg_lambda': 6.391575165754375, 'reg_alpha': 47.866847113447974, 'feature_fraction': 0.7918520769741856, 'bagging_fraction': 0.675637886508909, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[599]	training's l1: 0.872007	valid_1's l1: 1.19043
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.87407	valid_1's l1: 1.197
[200]	training's l1: 0.868717	valid_1's l1: 1.18615


[32m[I 2021-07-09 02:15:05,819][0m Trial 22 finished with value: -1.1861058724571831 and parameters: {'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 62.035128317021254, 'reg_alpha': 0.16446469647306913, 'feature_fraction': 0.7200362442239546, 'bagging_fraction': 0.6267724207351644, 'bagging_freq': 9}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[199]	training's l1: 0.868718	valid_1's l1: 1.18615
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.995851	valid_1's l1: 1.38723


[32m[I 2021-07-09 02:15:09,696][0m Trial 23 finished with value: -1.3872287953407352 and parameters: {'max_depth': 1, 'min_child_weight': 3, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 76.6881066825584, 'reg_alpha': 0.16939957538900938, 'feature_fraction': 0.8281234545436655, 'bagging_fraction': 0.7304271593775721, 'bagging_freq': 9}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[48]	training's l1: 0.99586	valid_1's l1: 1.38723
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.889739	valid_1's l1: 1.21999


[32m[I 2021-07-09 02:15:14,195][0m Trial 24 finished with value: -1.2199782221933038 and parameters: {'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 1.0938025207555315, 'reg_alpha': 0.006524369281893509, 'feature_fraction': 0.6911742826054641, 'bagging_fraction': 0.6010955102386437, 'bagging_freq': 9}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[54]	training's l1: 0.889766	valid_1's l1: 1.21998
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.886674	valid_1's l1: 1.21449
[200]	training's l1: 0.881278	valid_1's l1: 1.20487
[300]	training's l1: 0.877526	valid_1's l1: 1.1972
[400]	training's l1: 0.876581	valid_1's l1: 1.19565


[32m[I 2021-07-09 02:15:24,780][0m Trial 25 finished with value: -1.1955804521077233 and parameters: {'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 134.76387464183898, 'reg_alpha': 0.17102824808657535, 'feature_fraction': 0.7242175020668234, 'bagging_fraction': 0.6383877423178669, 'bagging_freq': 7}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[396]	training's l1: 0.876581	valid_1's l1: 1.19565
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.890903	valid_1's l1: 1.21963


[32m[I 2021-07-09 02:15:29,596][0m Trial 26 finished with value: -1.2196189849985777 and parameters: {'max_depth': 12, 'min_child_weight': 20, 'subsample': 0.7, 'colsample_bytree': 0.7, 'reg_lambda': 689.5415563537233, 'reg_alpha': 3.130062633305253, 'feature_fraction': 0.6413079995172754, 'bagging_fraction': 0.5607913580324828, 'bagging_freq': 5}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[59]	training's l1: 0.890975	valid_1's l1: 1.21963
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.885137	valid_1's l1: 1.21108
[200]	training's l1: 0.885005	valid_1's l1: 1.21067
[300]	training's l1: 0.884362	valid_1's l1: 1.20954


[32m[I 2021-07-09 02:15:42,584][0m Trial 27 finished with value: -1.2095183046659654 and parameters: {'max_depth': 7, 'min_child_weight': 7, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 29.950264372408956, 'reg_alpha': 0.26777798145558485, 'feature_fraction': 0.546021513646801, 'bagging_fraction': 0.8010979499989405, 'bagging_freq': 10}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[258]	training's l1: 0.884373	valid_1's l1: 1.20952
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.928783	valid_1's l1: 1.28081
[200]	training's l1: 0.927608	valid_1's l1: 1.2784
[300]	training's l1: 0.92377	valid_1's l1: 1.27043
[400]	training's l1: 0.917019	valid_1's l1: 1.25628
[500]	training's l1: 0.913472	valid_1's l1: 1.24795
[600]	training's l1: 0.912399	valid_1's l1: 1.24559
[700]	training's l1: 0.910977	valid_1's l1: 1.24271
[800]	training's l1: 0.910467	valid_1's l1: 1.24153
[900]	training's l1: 0.909996	valid_1's l1: 1.24001
[1000]	training's l1: 0.909507	valid_1's l1: 1.23884
[1100]	training's l1: 0.90903	valid_1's l1: 1.23794
[1200]	training's l1: 0.908931	valid_1's l1: 1.23761
[1300]	training's l1: 0.908832	valid_1's l1: 1.23738
[1400]	training's l1: 0.908463	valid_1's l1: 1.23612
[1500]	training's l1: 0.908444	valid_1's l1: 1.23608
[1600]	training's l1: 0.908417	valid_1's l1: 1.23605
[1700]	training

[32m[I 2021-07-09 02:16:29,029][0m Trial 28 finished with value: -1.2336306547747804 and parameters: {'max_depth': 2, 'min_child_weight': 3, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 4.690308878404186, 'reg_alpha': 0.0809111596016405, 'feature_fraction': 0.9051348261098391, 'bagging_fraction': 0.711032309349378, 'bagging_freq': 9}. Best is trial 3 with value: -1.1803332959096229.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.888882	valid_1's l1: 1.21733


[32m[I 2021-07-09 02:16:33,410][0m Trial 29 finished with value: -1.2173204655244174 and parameters: {'max_depth': 11, 'min_child_weight': 4, 'subsample': 0.7, 'colsample_bytree': 0.7, 'reg_lambda': 216.24771115405198, 'reg_alpha': 0.0052998787186547365, 'feature_fraction': 0.805544492374863, 'bagging_fraction': 0.40781158604314977, 'bagging_freq': 6}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[54]	training's l1: 0.888908	valid_1's l1: 1.21732
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.872863	valid_1's l1: 1.19414
[200]	training's l1: 0.868779	valid_1's l1: 1.18548
[300]	training's l1: 0.866676	valid_1's l1: 1.18358


[32m[I 2021-07-09 02:16:42,481][0m Trial 30 finished with value: -1.1834956503537892 and parameters: {'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 38.34733929486844, 'reg_alpha': 0.0016667928010995034, 'feature_fraction': 0.7288865753074063, 'bagging_fraction': 0.7446559333549093, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[243]	training's l1: 0.866738	valid_1's l1: 1.18356
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.870895	valid_1's l1: 1.19664
[200]	training's l1: 0.867265	valid_1's l1: 1.18932


[32m[I 2021-07-09 02:16:51,286][0m Trial 31 finished with value: -1.1892816239831705 and parameters: {'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 37.306917856223734, 'reg_alpha': 0.0012919677973788478, 'feature_fraction': 0.7321876684492915, 'bagging_fraction': 0.8434329849265094, 'bagging_freq': 8}. Best is trial 3 with value: -1.1803332959096229.[0m


[300]	training's l1: 0.867136	valid_1's l1: 1.18933
Early stopping, best iteration is:
[201]	training's l1: 0.867265	valid_1's l1: 1.18932
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.888346	valid_1's l1: 1.21849
[200]	training's l1: 0.8883	valid_1's l1: 1.21841
[300]	training's l1: 0.888208	valid_1's l1: 1.21828


[32m[I 2021-07-09 02:17:03,422][0m Trial 32 finished with value: -1.2182712838493206 and parameters: {'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 48.57545093345537, 'reg_alpha': 0.0019336430681930123, 'feature_fraction': 0.6487433726991865, 'bagging_fraction': 0.9461911424713632, 'bagging_freq': 7}. Best is trial 3 with value: -1.1803332959096229.[0m


Early stopping, best iteration is:
[283]	training's l1: 0.888212	valid_1's l1: 1.21827
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.86668	valid_1's l1: 1.18596
[200]	training's l1: 0.862736	valid_1's l1: 1.17945
[300]	training's l1: 0.862727	valid_1's l1: 1.17944


[32m[I 2021-07-09 02:17:11,775][0m Trial 33 finished with value: -1.1793677862004224 and parameters: {'max_depth': 5, 'min_child_weight': 6, 'subsample': 0.6, 'colsample_bytree': 0.6, 'reg_lambda': 1.1383185071781075, 'reg_alpha': 0.012764145059440355, 'feature_fraction': 0.7012860634574578, 'bagging_fraction': 0.7471153709762581, 'bagging_freq': 8}. Best is trial 33 with value: -1.1793677862004224.[0m


Early stopping, best iteration is:
[217]	training's l1: 0.862733	valid_1's l1: 1.17944
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.883804	valid_1's l1: 1.21248
[200]	training's l1: 0.87684	valid_1's l1: 1.19836
[300]	training's l1: 0.870794	valid_1's l1: 1.18828
[400]	training's l1: 0.870187	valid_1's l1: 1.18732
[500]	training's l1: 0.869105	valid_1's l1: 1.18511
[600]	training's l1: 0.868546	valid_1's l1: 1.18412
[700]	training's l1: 0.867137	valid_1's l1: 1.18192
[800]	training's l1: 0.865615	valid_1's l1: 1.17915
[900]	training's l1: 0.864969	valid_1's l1: 1.17732
[1000]	training's l1: 0.864023	valid_1's l1: 1.17563
[1100]	training's l1: 0.862117	valid_1's l1: 1.17247


[32m[I 2021-07-09 02:17:38,903][0m Trial 34 finished with value: -1.172220649660644 and parameters: {'max_depth': 4, 'min_child_weight': 11, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 0.8690091455411256, 'reg_alpha': 0.008667831982483666, 'feature_fraction': 0.8745181698887139, 'bagging_fraction': 0.7305921908873649, 'bagging_freq': 8}. Best is trial 34 with value: -1.172220649660644.[0m


Early stopping, best iteration is:
[1069]	training's l1: 0.862136	valid_1's l1: 1.17243
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.885307	valid_1's l1: 1.21191
[200]	training's l1: 0.884252	valid_1's l1: 1.21142
[300]	training's l1: 0.884195	valid_1's l1: 1.21129


[32m[I 2021-07-09 02:17:48,720][0m Trial 35 finished with value: -1.2112694374111566 and parameters: {'max_depth': 7, 'min_child_weight': 10, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 0.5856351402497225, 'reg_alpha': 0.009781716867747762, 'feature_fraction': 0.8732876368414042, 'bagging_fraction': 0.7547991847805674, 'bagging_freq': 7}. Best is trial 34 with value: -1.172220649660644.[0m


Early stopping, best iteration is:
[216]	training's l1: 0.884204	valid_1's l1: 1.21128
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.924378	valid_1's l1: 1.2714
[200]	training's l1: 0.917459	valid_1's l1: 1.25667
[300]	training's l1: 0.915044	valid_1's l1: 1.25065
[400]	training's l1: 0.913234	valid_1's l1: 1.24673
[500]	training's l1: 0.912454	valid_1's l1: 1.24519
[600]	training's l1: 0.911448	valid_1's l1: 1.24307
[700]	training's l1: 0.91091	valid_1's l1: 1.24229
[800]	training's l1: 0.910256	valid_1's l1: 1.24032
[900]	training's l1: 0.910042	valid_1's l1: 1.23969
[1000]	training's l1: 0.909977	valid_1's l1: 1.23967
[1100]	training's l1: 0.909559	valid_1's l1: 1.23895
[1200]	training's l1: 0.909359	valid_1's l1: 1.23814
[1300]	training's l1: 0.908968	valid_1's l1: 1.23712
[1400]	training's l1: 0.908922	valid_1's l1: 1.23701


[32m[I 2021-07-09 02:18:25,346][0m Trial 36 finished with value: -1.2369188155325395 and parameters: {'max_depth': 2, 'min_child_weight': 11, 'subsample': 0.5, 'colsample_bytree': 0.5, 'reg_lambda': 0.028390852247173762, 'reg_alpha': 0.002818548911323487, 'feature_fraction': 0.9719403469729916, 'bagging_fraction': 0.8960593260153361, 'bagging_freq': 6}. Best is trial 34 with value: -1.172220649660644.[0m


Early stopping, best iteration is:
[1378]	training's l1: 0.908936	valid_1's l1: 1.23698
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.887281	valid_1's l1: 1.21677
[200]	training's l1: 0.887058	valid_1's l1: 1.21603
[300]	training's l1: 0.885917	valid_1's l1: 1.21431
[400]	training's l1: 0.885395	valid_1's l1: 1.21348


[32m[I 2021-07-09 02:18:38,386][0m Trial 37 finished with value: -1.2134377557785134 and parameters: {'max_depth': 7, 'min_child_weight': 12, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 0.22398963069298183, 'reg_alpha': 0.01525450743246661, 'feature_fraction': 0.8083617321500236, 'bagging_fraction': 0.8425797131459605, 'bagging_freq': 8}. Best is trial 34 with value: -1.172220649660644.[0m


Early stopping, best iteration is:
[322]	training's l1: 0.885411	valid_1's l1: 1.21345
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.880121	valid_1's l1: 1.20415
[200]	training's l1: 0.878466	valid_1's l1: 1.20059
[300]	training's l1: 0.876789	valid_1's l1: 1.19839
[400]	training's l1: 0.876562	valid_1's l1: 1.19812
[500]	training's l1: 0.876554	valid_1's l1: 1.19812
[600]	training's l1: 0.876543	valid_1's l1: 1.19812
Early stopping, best iteration is:
[506]	training's l1: 0.876551	valid_1's l1: 1.19812


[32m[I 2021-07-09 02:18:55,289][0m Trial 38 finished with value: -1.1981018008088153 and parameters: {'max_depth': 6, 'min_child_weight': 6, 'subsample': 0.7, 'colsample_bytree': 0.5, 'reg_lambda': 0.8507250629971012, 'reg_alpha': 0.0010521585305374483, 'feature_fraction': 0.9463490632294804, 'bagging_fraction': 0.8095661885045261, 'bagging_freq': 10}. Best is trial 34 with value: -1.172220649660644.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.878688	valid_1's l1: 1.20588
[200]	training's l1: 0.873029	valid_1's l1: 1.19543
[300]	training's l1: 0.871879	valid_1's l1: 1.19316
[400]	training's l1: 0.871089	valid_1's l1: 1.19228
[500]	training's l1: 0.870511	valid_1's l1: 1.19132
[600]	training's l1: 0.868701	valid_1's l1: 1.18673
[700]	training's l1: 0.866475	valid_1's l1: 1.18337
[800]	training's l1: 0.866161	valid_1's l1: 1.18317
[900]	training's l1: 0.864393	valid_1's l1: 1.17973
[1000]	training's l1: 0.863702	valid_1's l1: 1.17838
[1100]	training's l1: 0.862096	valid_1's l1: 1.17557
[1200]	training's l1: 0.861403	valid_1's l1: 1.17457
[1300]	training's l1: 0.86021	valid_1's l1: 1.17232
[1400]	training's l1: 0.85916	valid_1's l1: 1.17021
[1500]	training's l1: 0.858233	valid_1's l1: 1.1691
Early stopping, best iteration is:
[1485]	training's l1: 0.858439	valid_1's l1: 1.16894


[32m[I 2021-07-09 02:19:26,205][0m Trial 39 finished with value: -1.16870092375586 and parameters: {'max_depth': 4, 'min_child_weight': 9, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 0.054975369188135864, 'reg_alpha': 0.004155002965710786, 'feature_fraction': 0.8672836142067211, 'bagging_fraction': 0.7404507123539454, 'bagging_freq': 1}. Best is trial 39 with value: -1.16870092375586.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.926567	valid_1's l1: 1.27588
[200]	training's l1: 0.919109	valid_1's l1: 1.26001
[300]	training's l1: 0.915311	valid_1's l1: 1.25153
[400]	training's l1: 0.913761	valid_1's l1: 1.24824
[500]	training's l1: 0.912405	valid_1's l1: 1.2458
[600]	training's l1: 0.911587	valid_1's l1: 1.24447
[700]	training's l1: 0.910908	valid_1's l1: 1.2426
[800]	training's l1: 0.910124	valid_1's l1: 1.24041
[900]	training's l1: 0.909796	valid_1's l1: 1.23945


[32m[I 2021-07-09 02:19:52,927][0m Trial 40 finished with value: -1.239318785718085 and parameters: {'max_depth': 2, 'min_child_weight': 9, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 0.09242132474683425, 'reg_alpha': 0.05780745483193941, 'feature_fraction': 0.995583062208503, 'bagging_fraction': 0.86383803922666, 'bagging_freq': 4}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[885]	training's l1: 0.909807	valid_1's l1: 1.23939
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.879702	valid_1's l1: 1.20524
[200]	training's l1: 0.874008	valid_1's l1: 1.19483
[300]	training's l1: 0.872345	valid_1's l1: 1.192
[400]	training's l1: 0.870731	valid_1's l1: 1.1887
[500]	training's l1: 0.868897	valid_1's l1: 1.18451
[600]	training's l1: 0.868767	valid_1's l1: 1.18418
[700]	training's l1: 0.868764	valid_1's l1: 1.18419


[32m[I 2021-07-09 02:20:07,445][0m Trial 41 finished with value: -1.1839973088845908 and parameters: {'max_depth': 4, 'min_child_weight': 9, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 0.01836132053246509, 'reg_alpha': 0.004597176157687432, 'feature_fraction': 0.8521477799042371, 'bagging_fraction': 0.7296874401106479, 'bagging_freq': 1}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[622]	training's l1: 0.868766	valid_1's l1: 1.18418
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.868458	valid_1's l1: 1.1883
[200]	training's l1: 0.865128	valid_1's l1: 1.18157
[300]	training's l1: 0.863269	valid_1's l1: 1.17923
[400]	training's l1: 0.863224	valid_1's l1: 1.17921


[32m[I 2021-07-09 02:20:18,195][0m Trial 42 finished with value: -1.1791440860007278 and parameters: {'max_depth': 5, 'min_child_weight': 10, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 0.040896323446048564, 'reg_alpha': 0.014467615873770817, 'feature_fraction': 0.8860242570534644, 'bagging_fraction': 0.752879682136265, 'bagging_freq': 1}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[358]	training's l1: 0.863229	valid_1's l1: 1.17921
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.912263	valid_1's l1: 1.25464
[200]	training's l1: 0.907458	valid_1's l1: 1.24484
[300]	training's l1: 0.905716	valid_1's l1: 1.24104
[400]	training's l1: 0.904429	valid_1's l1: 1.2378
[500]	training's l1: 0.903319	valid_1's l1: 1.23524
[600]	training's l1: 0.902428	valid_1's l1: 1.23318
[700]	training's l1: 0.902193	valid_1's l1: 1.23236
[800]	training's l1: 0.901847	valid_1's l1: 1.23177
[900]	training's l1: 0.901444	valid_1's l1: 1.23066
[1000]	training's l1: 0.901217	valid_1's l1: 1.22998
[1100]	training's l1: 0.901202	valid_1's l1: 1.22997
[1200]	training's l1: 0.90098	valid_1's l1: 1.2296
[1300]	training's l1: 0.900543	valid_1's l1: 1.22839
[1400]	training's l1: 0.900453	valid_1's l1: 1.22809


[32m[I 2021-07-09 02:20:48,345][0m Trial 43 finished with value: -1.2280141969118286 and parameters: {'max_depth': 3, 'min_child_weight': 12, 'subsample': 0.7, 'colsample_bytree': 0.8, 'reg_lambda': 0.038259196322637565, 'reg_alpha': 0.011036668879458318, 'feature_fraction': 0.8905884828442846, 'bagging_fraction': 0.9346439510062103, 'bagging_freq': 1}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[1317]	training's l1: 0.900479	valid_1's l1: 1.22807
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.882154	valid_1's l1: 1.20803
[200]	training's l1: 0.879353	valid_1's l1: 1.20202
[300]	training's l1: 0.879229	valid_1's l1: 1.20168
[400]	training's l1: 0.877737	valid_1's l1: 1.19916


[32m[I 2021-07-09 02:21:07,234][0m Trial 44 finished with value: -1.1991526348080936 and parameters: {'max_depth': 6, 'min_child_weight': 8, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 0.37275701644432463, 'reg_alpha': 0.020211772779693892, 'feature_fraction': 0.9406268254258019, 'bagging_fraction': 0.788141596798027, 'bagging_freq': 2}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[368]	training's l1: 0.877751	valid_1's l1: 1.19916
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.888722	valid_1's l1: 1.21684


[32m[I 2021-07-09 02:21:14,265][0m Trial 45 finished with value: -1.2168399721116945 and parameters: {'max_depth': 13, 'min_child_weight': 10, 'subsample': 0.8, 'colsample_bytree': 0.7, 'reg_lambda': 0.006779299715869684, 'reg_alpha': 0.03982650228886026, 'feature_fraction': 0.8728992252503053, 'bagging_fraction': 0.6784686585790504, 'bagging_freq': 2}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[56]	training's l1: 0.888727	valid_1's l1: 1.21684
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.889933	valid_1's l1: 1.22054
[200]	training's l1: 0.889833	valid_1's l1: 1.22055


[32m[I 2021-07-09 02:21:22,962][0m Trial 46 finished with value: -1.2204654594677733 and parameters: {'max_depth': 9, 'min_child_weight': 14, 'subsample': 0.6, 'colsample_bytree': 0.8, 'reg_lambda': 0.0010810934288944022, 'reg_alpha': 0.0036579462663066317, 'feature_fraction': 0.7700513245620928, 'bagging_fraction': 0.7704740293071086, 'bagging_freq': 3}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[149]	training's l1: 0.8899	valid_1's l1: 1.22047
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.925196	valid_1's l1: 1.27299
[200]	training's l1: 0.918543	valid_1's l1: 1.25838
[300]	training's l1: 0.915043	valid_1's l1: 1.25047
[400]	training's l1: 0.913235	valid_1's l1: 1.24714
[500]	training's l1: 0.91244	valid_1's l1: 1.24517
[600]	training's l1: 0.91133	valid_1's l1: 1.24285
[700]	training's l1: 0.910374	valid_1's l1: 1.24054
[800]	training's l1: 0.909893	valid_1's l1: 1.23958
[900]	training's l1: 0.909417	valid_1's l1: 1.23779
[1000]	training's l1: 0.909176	valid_1's l1: 1.23701
[1100]	training's l1: 0.908944	valid_1's l1: 1.23666
[1200]	training's l1: 0.908689	valid_1's l1: 1.23631
[1300]	training's l1: 0.908634	valid_1's l1: 1.23628
[1400]	training's l1: 0.908594	valid_1's l1: 1.23617
[1500]	training's l1: 0.908438	valid_1's l1: 1.23564
[1600]	training's l1: 0.907974	valid_1's l1: 1.235
[1700]	training's 

[32m[I 2021-07-09 02:22:01,854][0m Trial 47 finished with value: -1.23318558150036 and parameters: {'max_depth': 2, 'min_child_weight': 12, 'subsample': 0.7, 'colsample_bytree': 0.7, 'reg_lambda': 0.16651561010772595, 'reg_alpha': 0.008757618355291718, 'feature_fraction': 0.9649015741766198, 'bagging_fraction': 0.6985592522318135, 'bagging_freq': 1}. Best is trial 39 with value: -1.16870092375586.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.883278	valid_1's l1: 1.21113
[200]	training's l1: 0.877726	valid_1's l1: 1.20126
[300]	training's l1: 0.875834	valid_1's l1: 1.1985
[400]	training's l1: 0.874898	valid_1's l1: 1.19626
[500]	training's l1: 0.874836	valid_1's l1: 1.19623
[600]	training's l1: 0.87384	valid_1's l1: 1.19475
[700]	training's l1: 0.872938	valid_1's l1: 1.19277
[800]	training's l1: 0.872569	valid_1's l1: 1.19236
[900]	training's l1: 0.8717	valid_1's l1: 1.19059
[1000]	training's l1: 0.87113	valid_1's l1: 1.18997
[1100]	training's l1: 0.871122	valid_1's l1: 1.18996
[1200]	training's l1: 0.871119	valid_1's l1: 1.18996
[1300]	training's l1: 0.871115	valid_1's l1: 1.18997
Early stopping, best iteration is:
[1241]	training's l1: 0.871119	valid_1's l1: 1.18996


[32m[I 2021-07-09 02:22:51,074][0m Trial 48 finished with value: -1.1898018174172498 and parameters: {'max_depth': 4, 'min_child_weight': 6, 'subsample': 0.6, 'colsample_bytree': 0.7, 'reg_lambda': 1.6213292252513025, 'reg_alpha': 0.08689717514516168, 'feature_fraction': 0.8344892359112204, 'bagging_fraction': 0.9994394082758608, 'bagging_freq': 3}. Best is trial 39 with value: -1.16870092375586.[0m


Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.869247	valid_1's l1: 1.19087
[200]	training's l1: 0.863127	valid_1's l1: 1.18004
[300]	training's l1: 0.862584	valid_1's l1: 1.17941


[32m[I 2021-07-09 02:22:59,299][0m Trial 49 finished with value: -1.1793568955934053 and parameters: {'max_depth': 5, 'min_child_weight': 8, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 0.047107930706906144, 'reg_alpha': 0.028694575076088524, 'feature_fraction': 0.9124553583703207, 'bagging_fraction': 0.7137733329125708, 'bagging_freq': 1}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[245]	training's l1: 0.862642	valid_1's l1: 1.1794
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.886371	valid_1's l1: 1.21696
[200]	training's l1: 0.885684	valid_1's l1: 1.21554
[300]	training's l1: 0.88558	valid_1's l1: 1.2154
[400]	training's l1: 0.885578	valid_1's l1: 1.2154


[32m[I 2021-07-09 02:23:09,080][0m Trial 50 finished with value: -1.2153907027823374 and parameters: {'max_depth': 7, 'min_child_weight': 8, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_lambda': 0.003087928081993416, 'reg_alpha': 0.0023152903346295374, 'feature_fraction': 0.9228910630470383, 'bagging_fraction': 0.7118784421387493, 'bagging_freq': 1}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[313]	training's l1: 0.88558	valid_1's l1: 1.2154
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.868635	valid_1's l1: 1.19059
[200]	training's l1: 0.863767	valid_1's l1: 1.18295
[300]	training's l1: 0.86277	valid_1's l1: 1.18141
[400]	training's l1: 0.860978	valid_1's l1: 1.17848


[32m[I 2021-07-09 02:23:28,080][0m Trial 51 finished with value: -1.1784138844827468 and parameters: {'max_depth': 5, 'min_child_weight': 10, 'subsample': 0.5, 'colsample_bytree': 0.9, 'reg_lambda': 0.048624075564785735, 'reg_alpha': 0.02802318112620915, 'feature_fraction': 0.8923548080646257, 'bagging_fraction': 0.7392130252392374, 'bagging_freq': 2}. Best is trial 39 with value: -1.16870092375586.[0m


Early stopping, best iteration is:
[396]	training's l1: 0.860979	valid_1's l1: 1.17848
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.869515	valid_1's l1: 1.19085
[200]	training's l1: 0.865216	valid_1's l1: 1.18451
[300]	training's l1: 0.860848	valid_1's l1: 1.17488
[400]	training's l1: 0.858652	valid_1's l1: 1.1719
[500]	training's l1: 0.856144	valid_1's l1: 1.16864
[600]	training's l1: 0.855851	valid_1's l1: 1.16793


[32m[I 2021-07-09 02:23:52,932][0m Trial 52 finished with value: -1.1678252180570403 and parameters: {'max_depth': 5, 'min_child_weight': 11, 'subsample': 0.5, 'colsample_bytree': 0.9, 'reg_lambda': 0.13496725043565078, 'reg_alpha': 0.02792071939062328, 'feature_fraction': 0.8892147014842748, 'bagging_fraction': 0.7340556657562571, 'bagging_freq': 2}. Best is trial 52 with value: -1.1678252180570403.[0m


Early stopping, best iteration is:
[563]	training's l1: 0.855856	valid_1's l1: 1.16792
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.880541	valid_1's l1: 1.20341
[200]	training's l1: 0.879861	valid_1's l1: 1.20184


[32m[I 2021-07-09 02:24:03,565][0m Trial 53 finished with value: -1.2018204355534583 and parameters: {'max_depth': 6, 'min_child_weight': 11, 'subsample': 0.5, 'colsample_bytree': 0.9, 'reg_lambda': 0.04811139586604816, 'reg_alpha': 0.02686573024802533, 'feature_fraction': 0.8976418469319403, 'bagging_fraction': 0.6888871646470076, 'bagging_freq': 2}. Best is trial 52 with value: -1.1678252180570403.[0m


Early stopping, best iteration is:
[167]	training's l1: 0.879918	valid_1's l1: 1.20183
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.912183	valid_1's l1: 1.25458
[200]	training's l1: 0.898572	valid_1's l1: 1.23163
[300]	training's l1: 0.893314	valid_1's l1: 1.22114
[400]	training's l1: 0.888917	valid_1's l1: 1.21438
[500]	training's l1: 0.888196	valid_1's l1: 1.21232
[600]	training's l1: 0.886599	valid_1's l1: 1.20825
[700]	training's l1: 0.886287	valid_1's l1: 1.20793
[800]	training's l1: 0.885576	valid_1's l1: 1.20652
[900]	training's l1: 0.885001	valid_1's l1: 1.20545
[1000]	training's l1: 0.884018	valid_1's l1: 1.20286
[1100]	training's l1: 0.883824	valid_1's l1: 1.20285


[32m[I 2021-07-09 02:24:23,790][0m Trial 54 finished with value: -1.20262575934462 and parameters: {'max_depth': 3, 'min_child_weight': 10, 'subsample': 0.5, 'colsample_bytree': 0.9, 'reg_lambda': 0.1378980403635952, 'reg_alpha': 0.4366962327821339, 'feature_fraction': 0.8700098797650899, 'bagging_fraction': 0.7193837618448439, 'bagging_freq': 1}. Best is trial 52 with value: -1.1678252180570403.[0m


Early stopping, best iteration is:
[1032]	training's l1: 0.883957	valid_1's l1: 1.20272
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 0.885039	valid_1's l1: 1.2141
[200]	training's l1: 0.877388	valid_1's l1: 1.20203
[300]	training's l1: 0.874791	valid_1's l1: 1.19697
[400]	training's l1: 0.874078	valid_1's l1: 1.19603
[500]	training's l1: 0.873457	valid_1's l1: 1.19435
[600]	training's l1: 0.872544	valid_1's l1: 1.19197


In [None]:
study2 = optuna.create_study(direction='maximize')
study2.optimize(fit_lgbm(x_train[feature_cols2], y_train['target2'], x_valid[feature_cols2], y_valid['target2'], learning_rates[1]), n_trials=100)

print('Number of finished trials:', len(study2.trials))
print('Best trial:', study2.best_trial.params)

In [None]:
study3 = optuna.create_study(direction='maximize')
study3.optimize(fit_lgbm(x_train[feature_cols3], y_train['target3'], x_valid[feature_cols3], y_valid['target3'], learning_rates[2]), n_trials=100)

print('Number of finished trials:', len(study3.trials))
print('Best trial:', study3.best_trial.params)

In [None]:
study4 = optuna.create_study(direction='maximize')
study4.optimize(fit_lgbm(x_train[feature_cols4], y_train['target4'], x_valid[feature_cols4], y_valid['target4'], learning_rates[3]), n_trials=100)

print('Number of finished trials:', len(study4.trials))
print('Best trial:', study4.best_trial.params)

In [93]:
study1.best_trial.params

{'max_depth': 4,
 'min_child_weight': 18,
 'subsample': 0.6,
 'colsample_bytree': 0.9,
 'reg_lambda': 0.11467861995039172,
 'reg_alpha': 0.06424319118759443,
 'feature_fraction': 0.9638843704312544,
 'bagging_fraction': 0.6767148669241133,
 'bagging_freq': 4}

In [94]:
study2.best_trial.params

{'max_depth': 12,
 'min_child_weight': 4,
 'subsample': 0.8,
 'colsample_bytree': 0.6,
 'reg_lambda': 3.6789038580429465,
 'reg_alpha': 0.001295149012810786,
 'feature_fraction': 0.5155482034139958,
 'bagging_fraction': 0.711248241796355,
 'bagging_freq': 10}

In [95]:
study3.best_trial.params

{'max_depth': 5,
 'min_child_weight': 8,
 'subsample': 0.6,
 'colsample_bytree': 0.8,
 'reg_lambda': 0.08386857038129647,
 'reg_alpha': 0.14416671920586854,
 'feature_fraction': 0.9779531931779787,
 'bagging_fraction': 0.457776092650538,
 'bagging_freq': 4}

In [96]:
study4.best_trial.params

{'max_depth': 6,
 'min_child_weight': 13,
 'subsample': 0.8,
 'colsample_bytree': 0.6,
 'reg_lambda': 0.0376556735585165,
 'reg_alpha': 0.001510757076303828,
 'feature_fraction': 0.4335923168511571,
 'bagging_fraction': 0.9186369126073344,
 'bagging_freq': 10}