# MLB Player Digital Engagement Forecasting Part 2


This is *MLB Digital Engagement Forecasting* competition held by Kaggle. This notebook will:
- load and join the data,
- create a feature set,
- tune hyperparameter,
- trained model.





In [1]:
import gc
import sys
import warnings
from joblib import Parallel, delayed
from pathlib import Path
from optuna.samplers import TPESampler
import multiprocessing
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.utils import resample
import lightgbm as lgb
from sklearn.metrics import accuracy_score
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from statsmodels.tsa.deterministic import (CalendarFourier,
                                           CalendarSeasonality,
                                           CalendarTimeTrend,
                                           DeterministicProcess)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers.experimental.preprocessing import StringLookup
from lightgbm import  LGBMRegressor
warnings.simplefilter("ignore")

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 5))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=14,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
    legend=False,
)

# 1. Create Training Data #

### Read and extract dataframes

In [2]:
# Helper function to unpack json found in daily data
def unpack_json(json_str):
    return pd.DataFrame() if pd.isna(json_str) else pd.read_json(json_str)


def unpack_data(data, dfs=None, n_jobs=-1):
    if dfs is not None:
        data = data.loc[:, dfs]
    unnested_dfs = {}
    for name, column in data.iteritems():
        daily_dfs = Parallel(n_jobs=n_jobs)(
            delayed(unpack_json)(item) for date, item in column.iteritems())
        df = pd.concat(daily_dfs)
        unnested_dfs[name] = df
    return unnested_dfs

We use features from following tables: seasons,teams,players,and awards

In [3]:
data_dir = Path('../input/mlb-player-digital-engagement-forecasting/')

df_names = ['seasons', 'teams', 'players', 'awards']

for name in df_names:
    globals()[name] = pd.read_csv(data_dir / f"{name}.csv")

kaggle_data_tabs = widgets.Tab()
# Add Output widgets for each pandas DF as tabs' children
kaggle_data_tabs.children = list([widgets.Output() for df_name in df_names])

for index in range(0, len(df_names)):
    # Rename tab bar titles to df names
    kaggle_data_tabs.set_title(index, df_names[index])
    
    # Display corresponding table output for this tab name
    with kaggle_data_tabs.children[index]:
        display(eval(df_names[index]))

display(kaggle_data_tabs)
del teams

Tab(children=(Output(), Output(), Output(), Output()), _titles={'0': 'seasons', '1': 'teams', '2': 'players', …

### Players tables(Library containing high level information about all MLB players in this dataset):
Creating dummies for players birthcountry. Features used in the model:birthyear, birthcountry, and primary position code

In [4]:
players_1=players[players['playerForTestSetAndFuturePreds']==True].copy()
players_1.loc[players_1['playerForTestSetAndFuturePreds']==True,"birthCountry"].unique()
players_1.loc[(players_1['birthCountry']=='Taiwan')|(players_1['birthCountry']=='South Korea')|(players_1['birthCountry']=='Japan')|(players_1['birthCountry']=="China"),"birthCountry"]="Asia"
players_1.loc[(players_1['birthCountry']=='Germany')|(players_1['birthCountry']=='Netherlands'),'birthCountry']='Europe'
players_1.loc[(players_1['birthCountry']!='USA')&(players_1['birthCountry']!='Europe')&(players_1['birthCountry']!='Asia'),'birthCountry']='America'
players_1=players_1[['playerId','birthCountry','DOB','heightInches','weight','primaryPositionCode']]
players_1=pd.get_dummies(players_1,columns=['birthCountry','primaryPositionCode'])
players_1['DOB'] = pd.PeriodIndex(players_1.DOB, freq='D')
players_1['birthyear']=players_1.DOB.dt.year
players_1.drop('DOB',axis=1,inplace=True)

The training data is a time-indexed collection of nested JSON fields containing information about each player. Our target is the `nextDayPlayerEngagement` column, while the remaining columns could be used to construct features.

In [5]:
%%time
# Define dataframes to load from training set
dfs = [
    'nextDayPlayerEngagement',  # targets
    'playerBoxScores',  # features
    # Other dataframes available for features:
    'games',
    'rosters',
    #'teamBoxScores',
    'transactions',
    'standings',
    'awards',
    # 'events',
    'playerTwitterFollowers',
]

# Read training data
training = pd.read_csv(
    data_dir / 'train.csv',
    usecols=['date'] + dfs,
)

# Convert training data date field to datetime type
training['date'] = pd.to_datetime(training['date'], format="%Y%m%d")
training = training.set_index('date').to_period('D')
print(training.info())

<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 1216 entries, 2018-01-01 to 2021-04-30
Freq: D
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   nextDayPlayerEngagement  1216 non-null   object
 1   games                    639 non-null    object
 2   rosters                  1216 non-null   object
 3   playerBoxScores          538 non-null    object
 4   transactions             1103 non-null   object
 5   standings                531 non-null    object
 6   awards                   294 non-null    object
 7   playerTwitterFollowers   40 non-null     object
dtypes: object(8)
memory usage: 85.5+ KB
None
CPU times: user 19.2 s, sys: 6.97 s, total: 26.2 s
Wall time: 1min


### Quick overview of the training table

In [6]:
training.head()

Unnamed: 0_level_0,nextDayPlayerEngagement,games,rosters,playerBoxScores,transactions,standings,awards,playerTwitterFollowers
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-01,"[{""engagementMetricsDate"":""2018-01-02"",""player...",,"[{""playerId"":400121,""gameDate"":""2018-01-01"",""t...",,"[{""transactionId"":340732,""playerId"":547348,""pl...",,,"[{""date"":""2018-01-01"",""playerId"":545361,""playe..."
2018-01-02,"[{""engagementMetricsDate"":""2018-01-03"",""player...",,"[{""playerId"":134181,""gameDate"":""2018-01-02"",""t...",,"[{""transactionId"":339458,""playerId"":621173,""pl...",,,
2018-01-03,"[{""engagementMetricsDate"":""2018-01-04"",""player...",,"[{""playerId"":425492,""gameDate"":""2018-01-03"",""t...",,"[{""transactionId"":347527,""playerId"":572389,""pl...",,,
2018-01-04,"[{""engagementMetricsDate"":""2018-01-05"",""player...",,"[{""playerId"":282332,""gameDate"":""2018-01-04"",""t...",,"[{""transactionId"":339549,""playerId"":545343,""pl...",,,
2018-01-05,"[{""engagementMetricsDate"":""2018-01-06"",""player...",,"[{""playerId"":282332,""gameDate"":""2018-01-05"",""t...",,"[{""transactionId"":341195,""playerId"":628336,""pl...",,,


### Unpack Json file with created equation

In [7]:
%time
# Unpack nested dataframes and store in dictionary `training_dfs`
training_dfs = unpack_data(training, dfs=dfs)
print('\n', training_dfs.keys())

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 6.91 µs

 dict_keys(['nextDayPlayerEngagement', 'playerBoxScores', 'games', 'rosters', 'transactions', 'standings', 'awards', 'playerTwitterFollowers'])


### Games Table:
Transfer game/row data structure to team/row structure.Features newly created for the model:wins difference,score difference 


In [8]:
def make_games(dfs:dict): 
    home=dfs['games'].copy()
    if "gameDate" in  home.columns:
        home=home.rename(columns={'homeId':'teamId','homeWins':'teamWins','awayWins':'oppWins','homeScore':'teamScore','awayScore':'oppScore','homeWinner':'teamWinner',"gameDate":"date"})
        home=home[['teamId','teamWins','oppWins','teamScore','oppScore','teamWinner',"date","gameType"]]
        away=dfs['games'].copy()
        away=away.rename(columns={'awayId':'teamId','homeWins':'oppWins','awayWins':'teamWins','homeScore':'oppScore','awayScore':'teamScore','awayWinner':'teamWinner',"gameDate":"date"})
        away=away[['teamId','teamWins','oppWins','teamScore','oppScore','teamWinner',"date","gameType"]]
        away
        games=pd.concat([home,away],axis=0)
        games["winsdiff"]=games['teamWins']-games['oppWins']
        games["scorediff"]=games['teamScore']-games['oppScore']
        games=games.drop(['teamWins','oppWins','teamScore','oppScore'],axis=1)
        games = games.groupby(['date', 'teamId'], as_index=False).mean()
        games['date']=pd.PeriodIndex(games.date,freq='D')
    else:
        games=dfs['games'].copy()
        games[['teamId']]=np.nan
        games[['date']]=np.nan
    return games

### Include players' average target value of the previous year as features

In [9]:
targ = training_dfs['nextDayPlayerEngagement'].copy()
targets = ["target1", "target2", "target3", "target4"]
targ  = targ .astype({name: np.float32 for name  in targets})
targ = targ.rename(columns={'engagementMetricsDate': 'date'})
targ['date'] = pd.to_datetime(targ['date'])
targ = targ.set_index('date').to_period('D')
targ.index = targ.index - 1
targ=targ.reset_index()

In [10]:
targ['seasonId']=targ.date.dt.year+1
targ.drop(['date'],axis=1,inplace=True)
targ=targ.groupby(by=['seasonId','playerId'],as_index=False).mean()
targ=targ.loc[(targ.seasonId!=2022),:]
# Match target dates to feature dates and create date index
targ=targ.rename(columns={"target1":"meantarget1","target2":"meantarget2","target3":"meantarget3","target4":"meantarget4"})
targ = targ.astype({'playerId': str})

### Rosters: player_id, team_id, status_code,game_date

In [11]:
def make_rosters(dfs:dict):
    rosters=dfs['rosters'].copy()
    if "playerId" in rosters.columns:
        rosters=rosters[['playerId','gameDate','teamId','statusCode']]
        rosters=pd.get_dummies(rosters,columns=['statusCode'])
        rosters=rosters.rename(columns={'gameDate':'date'})
        rosters['date']=pd.PeriodIndex(rosters.date,freq='D')
    else:
        rosters[['playerId']]=np.nan
        rosters[['date']]=np.nan
        rosters[['teamId']]=np.nan
    return rosters

### twitter followers: player_id, date, numberof followers

In [12]:

def make_playerTwitterFollowers(dfs:dict):
    twitfollowers=dfs['playerTwitterFollowers'].copy()
    if 'playerId' in  twitfollowers.columns:
        twitfollowers=twitfollowers[['date','playerId','numberOfFollowers']]
        twitfollowers['date']=pd.PeriodIndex(twitfollowers.date,freq='D')
    else:
        twitfollowers[['playerId']]=np.nan
        twitfollowers[['date']]=np.nan
    return twitfollowers


### Team_Standing:'teamid','gamedate','divisionRank','leagueRank','wildCardRank','pct','wins','eliminated','wildCardEliminated'

In [13]:
standings_feature=['divisionRank','leagueRank','wildCardRank','pct','wins','eliminationNumber','wildCardEliminationNumber'
       ]
def make_standings(dfs:dict,standings_feature):
    standings=dfs['standings'].copy()
    if 'teamId' in standings.columns: 
        standings=standings[['teamId','gameDate']+standings_feature]
        standings=standings.rename(columns={'gameDate':'date'})
        standings['date']=pd.PeriodIndex(standings.date,freq='D')
        standings['eliminated']=standings['eliminationNumber']=='E'
        standings['wildeliminated']=standings['wildCardEliminationNumber']=='E'
        standings=standings.drop(['eliminationNumber','wildCardEliminationNumber'],axis=1)
    else:
        standings[['teamId']]=np.nan
        standings[['date']]=np.nan
    return standings

# Previous year award

In [14]:
awardhist=training_dfs['awards'].copy()

In [15]:
awards=['All-MLB First Team', 'All-MLB Second Team','NL Comeback Player of the Year',
       'Edgar Martinez Outstanding DH of the Year',
       'AL Comeback Player of the Year', 'The Hutch Award','AL Cy Young', 'NL Cy Young', 'AL MVP',
       'NL MVP','MLB Players Choice NL Comeback Player',
       'MLB Players Choice NL Outstanding Pitcher',
       'MLB Players Choice NL Outstanding Rookie',
       'MLB Players Choice NL Outstanding Player',
       'Rawlings NL Platinum Glove', 'Babe Ruth Award',
       'MLB Players Choice Player of the Year',
       'MLB Players Choice Man of the Year',
       'MLB Players Choice AL Outstanding Player',
       'MLB Players Choice AL Comeback Player',
       'MLB Players Choice AL Outstanding Pitcher',
       'MLB Players Choice AL Outstanding Rookie', 'Wilson Defensive Player of the Year',
       'Wilson MLB Defensive Player of the Year', 'AL Silver Slugger',
       'NL Silver Slugger', 'MLBPAA Heart and Hustle Award',
       'Rawlings AL Platinum Glove', 'Roberto Clemente Award', 'NL Hank Aaron Award',
       'AL Hank Aaron Award', 'Trevor Hoffman NL Reliever of the Year',
       'Mariano Rivera AL Reliever of the Year',
       'World Series Championship', 'World Series MVP','Baseball America Minor League Player of the Year',
       'Baseball America Major League Rookie of the Year',
       'Baseball America Major League Player of the Year','Home Run Derby Winner', 'All-Star MVP', 'AL All-Star',
       'NL All-Star',]


In [16]:
awardhist=training_dfs['awards'].copy()
awardhist=awardhist[['awardSeason','playerId','awardName']]
awardhist=awardhist.loc[awardhist.awardName.isin(awards),:]
awardhist=awardhist.rename(columns={'awardSeason':'seasonId'})
awardhist['seasonId']=awardhist['seasonId']+1
awardhist=pd.get_dummies(awardhist,'awardName')
awardhist=awardhist.groupby(['playerId','seasonId'],as_index=False).sum()
awardhist['playerId']=[str(a) for a in awardhist.playerId]

In [17]:
#features = [
#    "hits",
#    'doubles', 
#    'triples',
#    "strikeOuts",
#    "homeRuns",
#    "runsScored",
#    "stolenBases",
#    "strikes",
#    "flyOuts",
#    "groundOuts",
#    "errors",
#    "rbi",
#    "baseOnBalls",
#    "gamesPlayedPitching", 
#    'gamesStartedPitching',
#    'completeGamesPitching', 
#    'shutoutsPitching', 
#    'winsPitching',
#    'lossesPitching',
#    'runsPitching', 
#    'homeRunsPitching', 
#    'strikeOutsPitching',
#    'baseOnBallsPitching',
#    'inningsPitched',
#    'teamName',
#    'positionName',
#]


### game_date features (Box Score Statistics)

In [18]:
features = ['home',
    'gamesPlayedBatting', 'flyOuts',
       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',
       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',
       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',
       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',
       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',
       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',
       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',
       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',
       'groundOutsPitching', 'runsPitching', 'doublesPitching',
       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',
       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',
       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',
       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',
       'earnedRuns', 'battersFaced',  'pitchesThrown', 'balls',
       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',
       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',
       'inheritedRunnersScored', 'catchersInterferencePitching',
       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',
       'assists', 'putOuts', 'errors', 'chances','battingOrder',
    'positionName'
]

In [19]:
gc.collect
del training

In [20]:
pids_test = players.playerId.loc[
    players.playerForTestSetAndFuturePreds.fillna(False)
].astype(str)
pids_test.shape

(1187,)

In [21]:
# Players in the test set. We'll filter our data for only this set of players
pids_test = players.playerId.loc[
    players.playerForTestSetAndFuturePreds.fillna(False)
].astype(str)

# Name of target columns
targets = ["target1", "target2", "target3", "target4"]

def make_playerBoxScores(dfs: dict, features):
    X = dfs['playerBoxScores'].copy()
    if 'playerId' in X.columns:
        X = dfs['playerBoxScores'].copy()
        X = X[['gameDate', 'playerId'] + features]
        X = X.rename(columns={'gameDate': 'date'})
        X['date'] = pd.PeriodIndex(X.date, freq='D')
        X=X[features+['date','playerId']]
        X['battingOrder']=X['battingOrder'].fillna(0)
        X.loc[X['battingOrder']==0,'battingOrder']=10009
        X['starting']=X['battingOrder']%100
        X['starting']=X['starting']==0
        X['order']=X['battingOrder']//100
        X=pd.get_dummies(X,columns=['order'])
        X.drop(['battingOrder'],axis=1,inplace=True)
        # Set dtypes
        X = X.astype({'playerId': str,'starting': int})
        X['positionName']=X['positionName'].fillna(value="X")
        # Create date index
        X=pd.get_dummies(data=X,columns=['positionName'])
        # Aggregate multiple games per day by summing
        X = X.groupby(['date', 'playerId'], as_index=False).mean()
    else:
        X[['playerId']]=np.nan
        X[['date']]=np.nan
    return X

# Set dtypes
#def make_playerTwitterFollowers(dfs:dict):
#    X=dfs['playerTwitterFollowers'].copy()
#    X=X[['date', 'playerId','numberOfFollowers']]
#    X['date'] = pd.PeriodIndex(X.date, freq='D')
##    # Aggregate multiple games per day by summing
#    X = X.groupby(['date', 'playerId'], as_index=False).sum()
#    return X

def make_targets(training_dfs: dict):
    Y = training_dfs['nextDayPlayerEngagement'].copy()
    # Set dtypes
    Y = Y.astype({name: np.float32 for name  in targets})
    Y = Y.astype({'playerId': str})
    # Match target dates to feature dates and create date index
    Y = Y.rename(columns={'engagementMetricsDate': 'date'})
    Y['date'] = pd.to_datetime(Y['date'])
    Y = Y.set_index('date').to_period('D')
    Y.index = Y.index - 1
    return Y.reset_index()


#def join_datasets(dfs):
#    dfs = [x.pivot(index='date', columns='playerId') for x in dfs]
#    df = pd.concat(dfs, axis=1).stack().reset_index('playerId')
#    return df

#def make_trainX(test_dfs: dict, features):
#    X = make_playerBoxScores(test_dfs, features)
#    X = X.merge(pids_test, how='right')
    #fourier_terms = CalendarFourier(freq='A', order=4)
    #deterministic = DeterministicProcess(
    #    index=X.index,
    #    order=0,
    #    seasonal=False,  # set to True for weekly seasonality
    #    additional_terms=[fourier_terms],
    #)
    #X = pd.concat([X, deterministic.in_sample()], axis=1)
 #   return X
#def make_trainy(training_dfs: dict,
#                      targets):

#    Y = make_targets(training_dfs)
    # Filter for players in test set
#    Y=Y.loc[Y.playerId.isin(pids_test), :]
    # Convert from long to wide format
    # Restore features and target
    # Fill missing values in features
#    return Y
    # Create train / validation splits
#X,Y=make_trainX(training_dfs,features),make_trainy(training_dfs,targets)

### Merging All Using Tables (test set start from 2021 June)
Before merging the tables, we create new features of "season part" since the atmosphere at the beginning of the season and that at the end of the season should be very different. We use 2021 season as test set, and 2019,2020 as training set.

In [22]:
def make_training_data(training_dfs: dict,
                       features,
                       targets,
                       fourier=4,
                       test_size=30):
    # Process dataframes
    X = make_playerBoxScores(training_dfs, features)
    standings=make_standings(training_dfs,standings_feature)
    games=make_games(training_dfs)
    rosters=make_rosters(training_dfs)
    #transactions=make_transactions(training_dfs)
    TwitterFollowers=make_playerTwitterFollowers(training_dfs)
    Y = make_targets(training_dfs)
    rosters['playerId']=[str(x) for x in rosters.playerId]
    players_1['playerId']=[str(x) for x in players_1.playerId]
    #transactions['playerId']=[str(x) for x in transactions.playerId]
    TwitterFollowers['playerId']=[str(x) for x in TwitterFollowers.playerId]
    # Merge for processing
    Y =Y.loc[Y.playerId.isin(pids_test), :]
    df = Y.merge(X,how='left',on=['date','playerId'])
    # Filter for players in test set
    df['seasonId']=df.date.dt.year
    df=df.loc[(df.seasonId!=2018),:]
    df=df.merge(awardhist,how='left',on=['seasonId','playerId'])
    df=df.merge(targ,how='left',on=['seasonId','playerId'])
    unique=pd.DataFrame({'date':df.date.unique()})
    unique['seasonId']=unique.date.dt.year
    unique=unique.merge(seasons,on='seasonId')
    unique['seasonPart'] = np.select(
        [
        unique['date'] < unique['preSeasonStartDate'], 
        unique['date'] <unique['regularSeasonStartDate'],
        unique['date'] <= unique['lastDate1stHalf'],
        unique['date'] < unique['firstDate2ndHalf'],
        unique['date'] <= unique['regularSeasonEndDate'],
        unique['date'] < unique['postSeasonStartDate'],
       unique['date'] <= unique['postSeasonEndDate'],
        unique['date'] >unique['postSeasonEndDate']
        ], 
        [
        'Offseason',
        'Preseason',
        'Reg Season 1st Half',
        'All-Star Break',
        'Reg Season 2nd Half',
        'Between Reg and Postseason',
        'Postseason',
        'Offseason'
        ], 
        default = np.nan
        )
    unique=unique[['date','seasonPart']]
    df=df.merge(unique,how='left',on='date')
    df=df.loc[df.seasonPart!='Offseason',:]
    #df=df.loc[df.seasonPart!='Preseason',:]
    #df=df.loc[df.seasonPart!='Postseason',:]
    #df=df.loc[df.seasonPart!='Offseason',:]
    df=df.drop('seasonId',axis=1)
    df=df.merge(rosters,how='left',on=['date','playerId'])
    #df=df.merge(transactions,how='left',on=['date','playerId'])
    df=df.merge(TwitterFollowers,how='left',on=['date','playerId'])
    df=df.merge(standings,how='left',on=['date','teamId'])
    df=df.merge(games,how='left',on=['date','teamId'])
    df=df.merge(players_1,how='left',on=['playerId'])
    #df['seasonId']=df.date.dt.year
    #df['age']=df['seasonId']-df['birthyear']
    #df.drop('seasonId',axis=1, inplace=True)
    df=pd.get_dummies(df,columns=['teamId'])
    # Convert from long to wide format
    #df = df.pivot(columns="playerId")
    # Restore features and targets
    df['seasonId']=df.date.dt.year
    df['age']=df['seasonId']-df['birthyear']
    train_df=df.loc[(df.seasonId!=2021),:]
    test_df=df.loc[(df.seasonId==2021),:]
    train_df=pd.get_dummies(train_df,columns=['seasonPart'])
    test_df=pd.get_dummies(test_df,columns=['seasonPart'])
    missing_cols = set( train_df.columns ) - set(test_df.columns )
    for c in missing_cols:
        test_df[c] = 0
    y_train = train_df[targets]
    y_valid = test_df[targets]
    # Fill missing values in features
    X_train =train_df.drop(targets+['date','playerId','birthyear','seasonId'],axis=1)
    X_train.fillna(0, inplace=True)
    X_valid =test_df.drop(targets+['date','playerId','birthyear','seasonId'],axis=1)
    X_valid.fillna(0, inplace=True)
    # Create temporal features
    # Create train / validation splits,:]
    #X_train,X_valid=X.loc[train_id,:],X.loc[test_id,:]
    #y_train,y_valid=Y.loc[train_id,:],Y.loc[test_id,:]
    return X_train, X_valid, y_train, y_valid

In [23]:
X_train, X_valid, y_train, y_valid=make_training_data(training_dfs,
                       features,
                       targets
                       )

In [24]:
X_train.columns

Index(['home', 'gamesPlayedBatting', 'flyOuts', 'groundOuts', 'runsScored',
       'doubles', 'triples', 'homeRuns', 'strikeOuts', 'baseOnBalls',
       ...
       'teamId_146.0', 'teamId_147.0', 'teamId_158.0', 'age',
       'seasonPart_All-Star Break', 'seasonPart_Between Reg and Postseason',
       'seasonPart_Postseason', 'seasonPart_Preseason',
       'seasonPart_Reg Season 1st Half', 'seasonPart_Reg Season 2nd Half'],
      dtype='object', length=215)

In [25]:
del training_dfs 

In [26]:
columns=X_train.columns
uint8=[col for col in columns if X_train[col].dtype == "uint8"]

In [27]:
gc.collect()

18692

In [28]:
#history_df = pd.DataFrame(history.history)
#history_df.loc[:, ['loss', 'val_loss']].plot()

# Optimize Lgbm 4-1 #

Below is how lgbm for taget 1 is tuned and traned. Models for target 2, 3 and 4 are tuned and trained in similar process.

In [29]:
y_valid_1=y_valid['target1']
y_train_1=y_train['target1']
columns=X_train.columns.tolist()
columns
uint8=[col for col in columns if X_train[col].dtype == "uint8"]
X_train = X_train.astype({name: np.int for name  in uint8})
X_valid = X_valid.astype({name: np.int for name  in uint8})
X_train=X_train.astype({name:np.int for name in ["wildeliminated","eliminated"]})
X_valid=X_valid.astype({name:np.int for name in ["wildeliminated","eliminated"]})
del y_train, y_valid

In [30]:
def objective_1(trial):
    params = {
       'boosting_type': 'gbdt',
       'importance_type': 'gain',
       'reg_alpha': trial.suggest_uniform('reg_alpha', 0.1, 0.9),
       'reg_lambda': trial.suggest_uniform('reg_lambda', 0.1, 0.9),
       'num_leaves': trial.suggest_int('num_leaves', 2, 256),
       'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
       'subsample': trial.suggest_uniform('subsample', 0.4, 1.0),
       'subsample_freq': trial.suggest_int('subsample_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'learning_rate':trial.suggest_loguniform('learning_rate', 1e-5, 1e-0),
        'num_threads': multiprocessing.cpu_count(),
        'objective':'mae',
        'n_estimators':1000 
          }
    
    gbm = lgb.LGBMRegressor(**params)
    gbm.fit(X_train, y_train_1,
    eval_set=[(X_valid, y_valid_1)],
    eval_metric='mae',
    early_stopping_rounds=50,verbose=200
    )
    y_pred = gbm.predict(X_valid)
# eval
    mae= mean_absolute_error(y_valid_1, y_pred)
    return mae


# Tune Hyperparameter with optuna #


In [31]:
study = optuna.create_study(direction = "minimize", sampler = TPESampler(seed=101))
study.optimize(objective_1, n_trials =100, n_jobs = multiprocessing.cpu_count(), timeout=60*60*2)

[32m[I 2022-01-01 17:16:24,838][0m A new study created in memory with name: no-name-548e9620-1f52-4e4a-ac13-bc790421a547[0m


Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[92]	valid_0's l1: 1.36968


[32m[I 2022-01-01 17:17:33,237][0m Trial 0 finished with value: 1.3696771614509051 and parameters: {'reg_alpha': 0.6458726980136842, 'reg_lambda': 0.518393241506194, 'num_leaves': 236, 'feature_fraction': 0.8704137221114486, 'subsample': 0.5677980742805127, 'subsample_freq': 5, 'min_child_samples': 60, 'learning_rate': 0.15020749050530585}. Best is trial 0 with value: 1.3696771614509051.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.44741
[200]	valid_0's l1: 1.3683
[200]	valid_0's l1: 1.47658
Early stopping, best iteration is:
[216]	valid_0's l1: 1.36771


[32m[I 2022-01-01 17:18:15,504][0m Trial 3 finished with value: 1.367711365127603 and parameters: {'reg_alpha': 0.21806577703683105, 'reg_lambda': 0.19884363674994265, 'num_leaves': 197, 'feature_fraction': 0.5333030501617965, 'subsample': 0.6053689817274363, 'subsample_freq': 5, 'min_child_samples': 43, 'learning_rate': 0.16057447780325268}. Best is trial 3 with value: 1.367711365127603.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.42699
[200]	valid_0's l1: 1.37989
[400]	valid_0's l1: 1.47641
[200]	valid_0's l1: 1.37673
Early stopping, best iteration is:
[168]	valid_0's l1: 1.3763


[32m[I 2022-01-01 17:19:30,463][0m Trial 5 finished with value: 1.376301685323312 and parameters: {'reg_alpha': 0.6498810277306861, 'reg_lambda': 0.19143140605705666, 'num_leaves': 144, 'feature_fraction': 0.7366711030518323, 'subsample': 0.7917723218520532, 'subsample_freq': 6, 'min_child_samples': 11, 'learning_rate': 0.5600671514412197}. Best is trial 3 with value: 1.367711365127603.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.37835
[600]	valid_0's l1: 1.41338
[600]	valid_0's l1: 1.47623
[600]	valid_0's l1: 1.37467
[800]	valid_0's l1: 1.40384
Early stopping, best iteration is:
[686]	valid_0's l1: 1.37424


[32m[I 2022-01-01 17:21:25,651][0m Trial 4 finished with value: 1.3742443689462789 and parameters: {'reg_alpha': 0.3643530092156637, 'reg_lambda': 0.4956863201624625, 'num_leaves': 56, 'feature_fraction': 0.5494762016872186, 'subsample': 0.9381420690227269, 'subsample_freq': 5, 'min_child_samples': 100, 'learning_rate': 0.0353381260825993}. Best is trial 3 with value: 1.367711365127603.[0m


[200]	valid_0's l1: 1.47647
Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.47604
[1000]	valid_0's l1: 1.39728
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.39728


[32m[I 2022-01-01 17:22:19,026][0m Trial 1 finished with value: 1.3972781224405761 and parameters: {'reg_alpha': 0.4036738228123923, 'reg_lambda': 0.5616150532598799, 'num_leaves': 135, 'feature_fraction': 0.9276567124034819, 'subsample': 0.47828309937373026, 'subsample_freq': 4, 'min_child_samples': 75, 'learning_rate': 0.0013990490210092953}. Best is trial 3 with value: 1.367711365127603.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.47641
[1000]	valid_0's l1: 1.47584
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.47584
[400]	valid_0's l1: 1.47618


[32m[I 2022-01-01 17:23:31,106][0m Trial 2 finished with value: 1.4758433890497322 and parameters: {'reg_alpha': 0.8708470406561415, 'reg_lambda': 0.544428870797115, 'num_leaves': 197, 'feature_fraction': 0.860569330703177, 'subsample': 0.6084816706054401, 'subsample_freq': 7, 'min_child_samples': 62, 'learning_rate': 1.0294145610720608e-05}. Best is trial 3 with value: 1.367711365127603.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.47571
[400]	valid_0's l1: 1.47608
[200]	valid_0's l1: 1.37637
[600]	valid_0's l1: 1.47588
[600]	valid_0's l1: 1.47572
[400]	valid_0's l1: 1.37239
[400]	valid_0's l1: 1.47454
[600]	valid_0's l1: 1.36971
[800]	valid_0's l1: 1.47535
[800]	valid_0's l1: 1.47558
[800]	valid_0's l1: 1.3669
[600]	valid_0's l1: 1.47327
Early stopping, best iteration is:
[862]	valid_0's l1: 1.36668


[32m[I 2022-01-01 17:27:34,693][0m Trial 9 finished with value: 1.3666786314278736 and parameters: {'reg_alpha': 0.8239295814203415, 'reg_lambda': 0.30033533234794263, 'num_leaves': 68, 'feature_fraction': 0.550011442141727, 'subsample': 0.675488010131891, 'subsample_freq': 5, 'min_child_samples': 46, 'learning_rate': 0.08971854080630787}. Best is trial 9 with value: 1.3666786314278736.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.47496
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.47496


[32m[I 2022-01-01 17:28:08,837][0m Trial 7 finished with value: 1.4749562083192853 and parameters: {'reg_alpha': 0.7811496106246775, 'reg_lambda': 0.631511871126627, 'num_leaves': 89, 'feature_fraction': 0.6582226343069593, 'subsample': 0.7793383552462401, 'subsample_freq': 5, 'min_child_samples': 39, 'learning_rate': 2.2043740746469567e-05}. Best is trial 9 with value: 1.3666786314278736.[0m


Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.47195
[1000]	valid_0's l1: 1.47527
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.47527
[200]	valid_0's l1: 1.4744


[32m[I 2022-01-01 17:29:12,518][0m Trial 6 finished with value: 1.4752727875346934 and parameters: {'reg_alpha': 0.770370142403116, 'reg_lambda': 0.5504008628424154, 'num_leaves': 191, 'feature_fraction': 0.46660797506241913, 'subsample': 0.871507693200507, 'subsample_freq': 5, 'min_child_samples': 45, 'learning_rate': 1.670438360572384e-05}. Best is trial 9 with value: 1.3666786314278736.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.46966
Early stopping, best iteration is:
[134]	valid_0's l1: 1.39241


[32m[I 2022-01-01 17:29:56,364][0m Trial 12 finished with value: 1.3924096144743117 and parameters: {'reg_alpha': 0.7658415262838038, 'reg_lambda': 0.5818117461971393, 'num_leaves': 19, 'feature_fraction': 0.8800651821254576, 'subsample': 0.6917226372713785, 'subsample_freq': 4, 'min_child_samples': 64, 'learning_rate': 0.05361925355880613}. Best is trial 9 with value: 1.3666786314278736.[0m


[1000]	valid_0's l1: 1.4706
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.4706
Training until validation scores don't improve for 50 rounds


[32m[I 2022-01-01 17:30:21,786][0m Trial 8 finished with value: 1.4706009480347593 and parameters: {'reg_alpha': 0.8311617249331799, 'reg_lambda': 0.14298939718712964, 'num_leaves': 193, 'feature_fraction': 0.8610394902396532, 'subsample': 0.7001454011234036, 'subsample_freq': 4, 'min_child_samples': 95, 'learning_rate': 5.811887298622751e-05}. Best is trial 9 with value: 1.3666786314278736.[0m


[400]	valid_0's l1: 1.4621
[400]	valid_0's l1: 1.47169
Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.46456
[600]	valid_0's l1: 1.455
[200]	valid_0's l1: 1.41732
[400]	valid_0's l1: 1.45126
[600]	valid_0's l1: 1.46884
[800]	valid_0's l1: 1.44854
[600]	valid_0's l1: 1.44086
[400]	valid_0's l1: 1.395
[800]	valid_0's l1: 1.46595
[1000]	valid_0's l1: 1.44271
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.44271
[800]	valid_0's l1: 1.43326


[32m[I 2022-01-01 17:34:01,679][0m Trial 11 finished with value: 1.4427136699475704 and parameters: {'reg_alpha': 0.27630277825838406, 'reg_lambda': 0.5648472075506311, 'num_leaves': 151, 'feature_fraction': 0.6604152145353299, 'subsample': 0.6229284150304926, 'subsample_freq': 1, 'min_child_samples': 31, 'learning_rate': 0.00035318535114302465}. Best is trial 9 with value: 1.3666786314278736.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.3853
[1000]	valid_0's l1: 1.42724
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.42724


[32m[I 2022-01-01 17:34:44,399][0m Trial 13 finished with value: 1.4272444862073894 and parameters: {'reg_alpha': 0.8989418724308356, 'reg_lambda': 0.8754043910322111, 'num_leaves': 7, 'feature_fraction': 0.6742919695840572, 'subsample': 0.4898712498034121, 'subsample_freq': 1, 'min_child_samples': 11, 'learning_rate': 0.0014332685106404253}. Best is trial 9 with value: 1.3666786314278736.[0m


[1000]	valid_0's l1: 1.46319
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.46319
Training until validation scores don't improve for 50 rounds


[32m[I 2022-01-01 17:35:08,247][0m Trial 10 finished with value: 1.4631933142853026 and parameters: {'reg_alpha': 0.7730180675757875, 'reg_lambda': 0.8559520747507688, 'num_leaves': 101, 'feature_fraction': 0.6011940444582544, 'subsample': 0.8106962562644959, 'subsample_freq': 4, 'min_child_samples': 18, 'learning_rate': 0.00013644747394413474}. Best is trial 9 with value: 1.3666786314278736.[0m


Early stopping, best iteration is:
[112]	valid_0's l1: 1.37353


[32m[I 2022-01-01 17:35:16,615][0m Trial 15 finished with value: 1.3735308383211682 and parameters: {'reg_alpha': 0.12344275112459041, 'reg_lambda': 0.3225918637284936, 'num_leaves': 89, 'feature_fraction': 0.40072871702642926, 'subsample': 0.4423650758766987, 'subsample_freq': 1, 'min_child_samples': 20, 'learning_rate': 0.815268867580768}. Best is trial 9 with value: 1.3666786314278736.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[54]	valid_0's l1: 1.38612


[32m[I 2022-01-01 17:35:21,141][0m Trial 16 finished with value: 1.3861243312756384 and parameters: {'reg_alpha': 0.1051921229907643, 'reg_lambda': 0.3007900375108899, 'num_leaves': 91, 'feature_fraction': 0.40790091362527153, 'subsample': 0.4282637038726166, 'subsample_freq': 7, 'min_child_samples': 27, 'learning_rate': 0.9746413339103317}. Best is trial 9 with value: 1.3666786314278736.[0m


Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.37955
[200]	valid_0's l1: 1.38671
[200]	valid_0's l1: 1.38929
[1000]	valid_0's l1: 1.37665
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.37665
[200]	valid_0's l1: 1.37722


[32m[I 2022-01-01 17:37:19,944][0m Trial 14 finished with value: 1.3766463148414267 and parameters: {'reg_alpha': 0.15908380460328583, 'reg_lambda': 0.3041653508273083, 'num_leaves': 83, 'feature_fraction': 0.4341848282496436, 'subsample': 0.4423260513670303, 'subsample_freq': 1, 'min_child_samples': 27, 'learning_rate': 0.004772812394085137}. Best is trial 9 with value: 1.3666786314278736.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.3717
[400]	valid_0's l1: 1.37435
[400]	valid_0's l1: 1.3673
[200]	valid_0's l1: 1.37875
[600]	valid_0's l1: 1.36616
[600]	valid_0's l1: 1.36595
[600]	valid_0's l1: 1.36914
Early stopping, best iteration is:
[578]	valid_0's l1: 1.36588


[32m[I 2022-01-01 17:40:24,826][0m Trial 19 finished with value: 1.3658844202219236 and parameters: {'reg_alpha': 0.19525166164367225, 'reg_lambda': 0.36348573015731167, 'num_leaves': 249, 'feature_fraction': 0.5155216306590316, 'subsample': 0.5498724790068206, 'subsample_freq': 3, 'min_child_samples': 48, 'learning_rate': 0.01504753251315197}. Best is trial 19 with value: 1.3658844202219236.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.36896
[800]	valid_0's l1: 1.36456
[800]	valid_0's l1: 1.36788
[200]	valid_0's l1: 1.40109
[1000]	valid_0's l1: 1.36366
Did not meet early stopping. Best iteration is:
[993]	valid_0's l1: 1.36365
[600]	valid_0's l1: 1.3673
[400]	valid_0's l1: 1.38729


[32m[I 2022-01-01 17:42:48,788][0m Trial 17 finished with value: 1.3636509262585537 and parameters: {'reg_alpha': 0.1468152491830631, 'reg_lambda': 0.29848378478322757, 'num_leaves': 255, 'feature_fraction': 0.4314757304268292, 'subsample': 0.4065835150956014, 'subsample_freq': 2, 'min_child_samples': 23, 'learning_rate': 0.009499627105679701}. Best is trial 17 with value: 1.3636509262585537.[0m


[1000]	valid_0's l1: 1.3673
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.3673
Training until validation scores don't improve for 50 rounds


[32m[I 2022-01-01 17:43:14,956][0m Trial 18 finished with value: 1.3673003647195572 and parameters: {'reg_alpha': 0.17391939826452557, 'reg_lambda': 0.29165897706216204, 'num_leaves': 236, 'feature_fraction': 0.4997122874280101, 'subsample': 0.6942436201020596, 'subsample_freq': 7, 'min_child_samples': 78, 'learning_rate': 0.008923366622985795}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.38129
[800]	valid_0's l1: 1.36658
[800]	valid_0's l1: 1.38025
Early stopping, best iteration is:
[885]	valid_0's l1: 1.36642
[200]	valid_0's l1: 1.38344
[200]	valid_0's l1: 1.37852


[32m[I 2022-01-01 17:45:05,696][0m Trial 20 finished with value: 1.366420428918404 and parameters: {'reg_alpha': 0.5825948818913593, 'reg_lambda': 0.3915865541615743, 'num_leaves': 248, 'feature_fraction': 0.5142398751337091, 'subsample': 0.6894326139921833, 'subsample_freq': 3, 'min_child_samples': 78, 'learning_rate': 0.013589543577481282}. Best is trial 17 with value: 1.3636509262585537.[0m


[1000]	valid_0's l1: 1.37977
Did not meet early stopping. Best iteration is:
[999]	valid_0's l1: 1.37977
Training until validation scores don't improve for 50 rounds


[32m[I 2022-01-01 17:45:24,187][0m Trial 21 finished with value: 1.3797745127014283 and parameters: {'reg_alpha': 0.5202012208738831, 'reg_lambda': 0.41197288186166825, 'num_leaves': 44, 'feature_fraction': 0.5108834410678623, 'subsample': 0.5391862774256679, 'subsample_freq': 2, 'min_child_samples': 78, 'learning_rate': 0.008308061866202206}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.37067
[400]	valid_0's l1: 1.36903
[200]	valid_0's l1: 1.37646
[200]	valid_0's l1: 1.37138
[600]	valid_0's l1: 1.36698
[600]	valid_0's l1: 1.36784
Early stopping, best iteration is:
[323]	valid_0's l1: 1.36709


[32m[I 2022-01-01 17:48:27,521][0m Trial 25 finished with value: 1.36708794329125 and parameters: {'reg_alpha': 0.5325225775564438, 'reg_lambda': 0.4125735218346755, 'num_leaves': 253, 'feature_fraction': 0.7580639168156124, 'subsample': 0.5249949447982142, 'subsample_freq': 2, 'min_child_samples': 89, 'learning_rate': 0.020334829909390804}. Best is trial 17 with value: 1.3636509262585537.[0m


[400]	valid_0's l1: 1.36692
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[738]	valid_0's l1: 1.36648
[800]	valid_0's l1: 1.36662


[32m[I 2022-01-01 17:49:27,987][0m Trial 23 finished with value: 1.3664782317310242 and parameters: {'reg_alpha': 0.2978732420260235, 'reg_lambda': 0.4039679788581028, 'num_leaves': 256, 'feature_fraction': 0.7682654441527632, 'subsample': 0.5262552321626466, 'subsample_freq': 2, 'min_child_samples': 36, 'learning_rate': 0.013489611296941484}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.44178
[600]	valid_0's l1: 1.36485
[1000]	valid_0's l1: 1.36581
Did not meet early stopping. Best iteration is:
[994]	valid_0's l1: 1.3658
[200]	valid_0's l1: 1.4381


[32m[I 2022-01-01 17:51:09,143][0m Trial 22 finished with value: 1.3658036734591013 and parameters: {'reg_alpha': 0.31120502146391116, 'reg_lambda': 0.4091401151096294, 'num_leaves': 254, 'feature_fraction': 0.4850011820434019, 'subsample': 0.5338913536238782, 'subsample_freq': 2, 'min_child_samples': 80, 'learning_rate': 0.010851751311132963}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.41873
[800]	valid_0's l1: 1.36421
[400]	valid_0's l1: 1.4142
[200]	valid_0's l1: 1.43861
[600]	valid_0's l1: 1.40398
[1000]	valid_0's l1: 1.36384
Did not meet early stopping. Best iteration is:
[988]	valid_0's l1: 1.36384


[32m[I 2022-01-01 17:53:18,495][0m Trial 24 finished with value: 1.3638410190556767 and parameters: {'reg_alpha': 0.5710083832398855, 'reg_lambda': 0.41200965710307963, 'num_leaves': 256, 'feature_fraction': 0.4725786491082933, 'subsample': 0.5276452641176206, 'subsample_freq': 2, 'min_child_samples': 89, 'learning_rate': 0.015443257036456158}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.40003
[400]	valid_0's l1: 1.41497
[800]	valid_0's l1: 1.39406
[200]	valid_0's l1: 1.43871
[800]	valid_0's l1: 1.39099
[1000]	valid_0's l1: 1.38745
Did not meet early stopping. Best iteration is:
[997]	valid_0's l1: 1.38743
[600]	valid_0's l1: 1.40078


[32m[I 2022-01-01 17:55:53,843][0m Trial 26 finished with value: 1.3874281784993163 and parameters: {'reg_alpha': 0.6187742269692744, 'reg_lambda': 0.41358624349928713, 'num_leaves': 246, 'feature_fraction': 0.5978749804734976, 'subsample': 0.4045218244131166, 'subsample_freq': 3, 'min_child_samples': 53, 'learning_rate': 0.0018012148371784043}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.4151
[1000]	valid_0's l1: 1.385
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.385
[800]	valid_0's l1: 1.39162


[32m[I 2022-01-01 17:56:55,003][0m Trial 27 finished with value: 1.3849975393041523 and parameters: {'reg_alpha': 0.6086115049747198, 'reg_lambda': 0.7243580945947596, 'num_leaves': 221, 'feature_fraction': 0.5928680734817174, 'subsample': 0.40409633220887076, 'subsample_freq': 3, 'min_child_samples': 55, 'learning_rate': 0.0020576068934840845}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.42422
[600]	valid_0's l1: 1.40059
[1000]	valid_0's l1: 1.38565
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.38565
[200]	valid_0's l1: 1.46382


[32m[I 2022-01-01 17:58:41,182][0m Trial 28 finished with value: 1.385645062071598 and parameters: {'reg_alpha': 0.4107777518397125, 'reg_lambda': 0.7015114823309212, 'num_leaves': 220, 'feature_fraction': 0.6018980087447388, 'subsample': 0.4060171105030418, 'subsample_freq': 3, 'min_child_samples': 52, 'learning_rate': 0.00202018555950724}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.39116
[400]	valid_0's l1: 1.39911
[200]	valid_0's l1: 1.36606
[400]	valid_0's l1: 1.45153
[1000]	valid_0's l1: 1.38472
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.38472
[600]	valid_0's l1: 1.38632


[32m[I 2022-01-01 18:00:43,960][0m Trial 29 finished with value: 1.3847189107995277 and parameters: {'reg_alpha': 0.4283378640885256, 'reg_lambda': 0.46359566688496745, 'num_leaves': 219, 'feature_fraction': 0.4482463703071291, 'subsample': 0.41101632649207015, 'subsample_freq': 3, 'min_child_samples': 86, 'learning_rate': 0.0021356112957845318}. Best is trial 17 with value: 1.3636509262585537.[0m


Early stopping, best iteration is:
[281]	valid_0's l1: 1.36557
Training until validation scores don't improve for 50 rounds


[32m[I 2022-01-01 18:00:55,229][0m Trial 32 finished with value: 1.3655659880601498 and parameters: {'reg_alpha': 0.686217836770617, 'reg_lambda': 0.49102426516769304, 'num_leaves': 175, 'feature_fraction': 0.4485290038399632, 'subsample': 0.48471355714486497, 'subsample_freq': 2, 'min_child_samples': 69, 'learning_rate': 0.2570921655682015}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.44142
[800]	valid_0's l1: 1.37892
[200]	valid_0's l1: 1.36555
[200]	valid_0's l1: 1.36718
Early stopping, best iteration is:
[264]	valid_0's l1: 1.36644


[32m[I 2022-01-01 18:02:57,291][0m Trial 34 finished with value: 1.3664391518971408 and parameters: {'reg_alpha': 0.6937481133014644, 'reg_lambda': 0.47723306872152654, 'num_leaves': 172, 'feature_fraction': 0.4720472320093235, 'subsample': 0.483439429392874, 'subsample_freq': 2, 'min_child_samples': 69, 'learning_rate': 0.1875642488551682}. Best is trial 17 with value: 1.3636509262585537.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.37442
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.37442
[400]	valid_0's l1: 1.36223
[800]	valid_0's l1: 1.43306
Early stopping, best iteration is:
[394]	valid_0's l1: 1.36222


[32m[I 2022-01-01 18:03:41,162][0m Trial 33 finished with value: 1.36222459369548 and parameters: {'reg_alpha': 0.26590361227273873, 'reg_lambda': 0.2397083380611361, 'num_leaves': 168, 'feature_fraction': 0.4774585948755324, 'subsample': 0.4878808300517289, 'subsample_freq': 2, 'min_child_samples': 69, 'learning_rate': 0.21334481421245585}. Best is trial 33 with value: 1.36222459369548.[0m
[32m[I 2022-01-01 18:03:45,232][0m Trial 30 finished with value: 1.3744248752487338 and parameters: {'reg_alpha': 0.43458910865436473, 'reg_lambda': 0.6641415094158114, 'num_leaves': 224, 'feature_fraction': 0.4289389304900821, 'subsample': 0.40052936605705025, 'subsample_freq': 2, 'min_child_samples': 89, 'learning_rate': 0.003406775440901421}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[130]	valid_0's l1: 1.36644


[32m[I 2022-01-01 18:04:08,164][0m Trial 35 finished with value: 1.3664405151887231 and parameters: {'reg_alpha': 0.6813101060195931, 'reg_lambda': 0.25978003905547453, 'num_leaves': 118, 'feature_fraction': 0.40082244909466225, 'subsample': 0.4836019768721076, 'subsample_freq': 2, 'min_child_samples': 85, 'learning_rate': 0.24323978946438066}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[48]	valid_0's l1: 1.36801


[32m[I 2022-01-01 18:04:46,709][0m Trial 38 finished with value: 1.368005081892687 and parameters: {'reg_alpha': 0.47896877445227415, 'reg_lambda': 0.1992257063999871, 'num_leaves': 175, 'feature_fraction': 0.978253442310012, 'subsample': 0.5970832115911309, 'subsample_freq': 1, 'min_child_samples': 68, 'learning_rate': 0.32140577922232105}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.42568
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.42568
Early stopping, best iteration is:
[140]	valid_0's l1: 1.36677


[32m[I 2022-01-01 18:05:07,439][0m Trial 36 finished with value: 1.3667707852958448 and parameters: {'reg_alpha': 0.4634812386974973, 'reg_lambda': 0.25676102379649784, 'num_leaves': 168, 'feature_fraction': 0.4365302087609301, 'subsample': 0.470791037770198, 'subsample_freq': 2, 'min_child_samples': 71, 'learning_rate': 0.29776459548664436}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.36678


[32m[I 2022-01-01 18:05:29,301][0m Trial 31 finished with value: 1.4256816184749734 and parameters: {'reg_alpha': 0.43883947884656926, 'reg_lambda': 0.2366667145297071, 'num_leaves': 217, 'feature_fraction': 0.452561855577949, 'subsample': 0.509187828027861, 'subsample_freq': 2, 'min_child_samples': 68, 'learning_rate': 0.0006363271961719922}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[219]	valid_0's l1: 1.36619


[32m[I 2022-01-01 18:05:59,331][0m Trial 37 finished with value: 1.3661908875039144 and parameters: {'reg_alpha': 0.49126924346219963, 'reg_lambda': 0.23559749048839956, 'num_leaves': 167, 'feature_fraction': 0.4012756743187959, 'subsample': 0.4754660969688822, 'subsample_freq': 1, 'min_child_samples': 65, 'learning_rate': 0.27357096712874684}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.374
[200]	valid_0's l1: 1.37376
[200]	valid_0's l1: 1.37385
[400]	valid_0's l1: 1.37104
[200]	valid_0's l1: 1.37427
[400]	valid_0's l1: 1.37029
[400]	valid_0's l1: 1.37306
Early stopping, best iteration is:
[471]	valid_0's l1: 1.3709


[32m[I 2022-01-01 18:07:26,414][0m Trial 40 finished with value: 1.3708988968110882 and parameters: {'reg_alpha': 0.2505768057425307, 'reg_lambda': 0.13041215565851633, 'num_leaves': 125, 'feature_fraction': 0.5586474730851144, 'subsample': 0.5705900657879798, 'subsample_freq': 1, 'min_child_samples': 59, 'learning_rate': 0.07703600498076192}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.37276
[600]	valid_0's l1: 1.37162
[600]	valid_0's l1: 1.36929
[200]	valid_0's l1: 1.3724
[800]	valid_0's l1: 1.37073
Early stopping, best iteration is:
[634]	valid_0's l1: 1.36901
[600]	valid_0's l1: 1.37172


[32m[I 2022-01-01 18:09:00,742][0m Trial 39 finished with value: 1.3690108261697496 and parameters: {'reg_alpha': 0.6982766353112716, 'reg_lambda': 0.2335670540738529, 'num_leaves': 163, 'feature_fraction': 0.5555438680618651, 'subsample': 0.5752334339643528, 'subsample_freq': 2, 'min_child_samples': 69, 'learning_rate': 0.07625359638078243}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.3697
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.3697
[400]	valid_0's l1: 1.37165
[800]	valid_0's l1: 1.37033


[32m[I 2022-01-01 18:09:51,886][0m Trial 41 finished with value: 1.3696975499132098 and parameters: {'reg_alpha': 0.3599608506104316, 'reg_lambda': 0.15095676745139047, 'num_leaves': 130, 'feature_fraction': 0.5586703221525307, 'subsample': 0.5881687432402005, 'subsample_freq': 1, 'min_child_samples': 99, 'learning_rate': 0.03632990142740309}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.36943
Did not meet early stopping. Best iteration is:
[990]	valid_0's l1: 1.36943
[200]	valid_0's l1: 1.36943
[600]	valid_0's l1: 1.36998


[32m[I 2022-01-01 18:10:50,219][0m Trial 42 finished with value: 1.3694326855942642 and parameters: {'reg_alpha': 0.24942231484312335, 'reg_lambda': 0.11685000690598935, 'num_leaves': 125, 'feature_fraction': 0.5509113756461252, 'subsample': 0.574196573044532, 'subsample_freq': 6, 'min_child_samples': 59, 'learning_rate': 0.03742110899414778}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.37157
[800]	valid_0's l1: 1.36954
[400]	valid_0's l1: 1.3669
[200]	valid_0's l1: 1.40532
[1000]	valid_0's l1: 1.3689
Did not meet early stopping. Best iteration is:
[999]	valid_0's l1: 1.3689
[400]	valid_0's l1: 1.36984


[32m[I 2022-01-01 18:12:54,573][0m Trial 43 finished with value: 1.3689001081145857 and parameters: {'reg_alpha': 0.36893129119547036, 'reg_lambda': 0.3568288755653135, 'num_leaves': 148, 'feature_fraction': 0.5658632695730778, 'subsample': 0.6338870524146257, 'subsample_freq': 6, 'min_child_samples': 100, 'learning_rate': 0.03620315965672327}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.36581
[400]	valid_0's l1: 1.38403
[600]	valid_0's l1: 1.36855
Early stopping, best iteration is:
[145]	valid_0's l1: 1.3697


[32m[I 2022-01-01 18:14:15,285][0m Trial 47 finished with value: 1.3696964775222893 and parameters: {'reg_alpha': 0.3292605506311819, 'reg_lambda': 0.4549563149304393, 'num_leaves': 198, 'feature_fraction': 0.4928720716797217, 'subsample': 0.4583584025954381, 'subsample_freq': 3, 'min_child_samples': 84, 'learning_rate': 0.5707956702854403}. Best is trial 33 with value: 1.36222459369548.[0m


[800]	valid_0's l1: 1.36517
Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.3759
[800]	valid_0's l1: 1.36742
[1000]	valid_0's l1: 1.36442
Did not meet early stopping. Best iteration is:
[992]	valid_0's l1: 1.36442


[32m[I 2022-01-01 18:15:57,645][0m Trial 44 finished with value: 1.3644168648537494 and parameters: {'reg_alpha': 0.3351338275137726, 'reg_lambda': 0.35081127041033255, 'num_leaves': 200, 'feature_fraction': 0.48299845366994676, 'subsample': 0.6454119953599754, 'subsample_freq': 3, 'min_child_samples': 94, 'learning_rate': 0.03784504549653343}. Best is trial 33 with value: 1.36222459369548.[0m


[200]	valid_0's l1: 1.39728
Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.37094
[1000]	valid_0's l1: 1.36655
Did not meet early stopping. Best iteration is:
[983]	valid_0's l1: 1.36654


[32m[I 2022-01-01 18:16:45,327][0m Trial 45 finished with value: 1.3665426460032943 and parameters: {'reg_alpha': 0.3301485612314563, 'reg_lambda': 0.34444729111269395, 'num_leaves': 196, 'feature_fraction': 0.47311265211075515, 'subsample': 0.6301085582425556, 'subsample_freq': 3, 'min_child_samples': 83, 'learning_rate': 0.028623411775722283}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.36799
[1000]	valid_0's l1: 1.36866
Did not meet early stopping. Best iteration is:
[999]	valid_0's l1: 1.36866
[400]	valid_0's l1: 1.37795


[32m[I 2022-01-01 18:17:56,455][0m Trial 46 finished with value: 1.3686639827546914 and parameters: {'reg_alpha': 0.33253385059561624, 'reg_lambda': 0.3543688201694821, 'num_leaves': 199, 'feature_fraction': 0.48329633566999675, 'subsample': 0.4468506971165938, 'subsample_freq': 3, 'min_child_samples': 74, 'learning_rate': 0.00560847925906099}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.37141
Early stopping, best iteration is:
[346]	valid_0's l1: 1.36598


[32m[I 2022-01-01 18:18:34,106][0m Trial 49 finished with value: 1.3659813880247702 and parameters: {'reg_alpha': 0.54495796010142, 'reg_lambda': 0.34190311268775847, 'num_leaves': 201, 'feature_fraction': 0.4674963871039012, 'subsample': 0.49924264042180916, 'subsample_freq': 3, 'min_child_samples': 90, 'learning_rate': 0.136852418491591}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.37008
[200]	valid_0's l1: 1.3658
[400]	valid_0's l1: 1.36776
Early stopping, best iteration is:
[386]	valid_0's l1: 1.36771


[32m[I 2022-01-01 18:19:57,618][0m Trial 50 finished with value: 1.3677054910940383 and parameters: {'reg_alpha': 0.5517363849038636, 'reg_lambda': 0.526776895627785, 'num_leaves': 185, 'feature_fraction': 0.42420867949589247, 'subsample': 0.74610999596699, 'subsample_freq': 4, 'min_child_samples': 95, 'learning_rate': 0.11252743681002082}. Best is trial 33 with value: 1.36222459369548.[0m


[200]	valid_0's l1: 1.36689
Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.36723
[400]	valid_0's l1: 1.36414
[400]	valid_0's l1: 1.36487
[200]	valid_0's l1: 1.36951
Early stopping, best iteration is:
[200]	valid_0's l1: 1.36951


[32m[I 2022-01-01 18:21:59,661][0m Trial 53 finished with value: 1.3695131231057918 and parameters: {'reg_alpha': 0.13887359863979687, 'reg_lambda': 0.50557855908422, 'num_leaves': 235, 'feature_fraction': 0.6273254017957162, 'subsample': 0.6600270663641752, 'subsample_freq': 2, 'min_child_samples': 6, 'learning_rate': 0.052359073879569645}. Best is trial 33 with value: 1.36222459369548.[0m


Early stopping, best iteration is:
[516]	valid_0's l1: 1.36299
[1000]	valid_0's l1: 1.36633
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.36633
Training until validation scores don't improve for 50 rounds


[32m[I 2022-01-01 18:22:16,214][0m Trial 51 finished with value: 1.3629944795679225 and parameters: {'reg_alpha': 0.552536625161997, 'reg_lambda': 0.5191748617736902, 'num_leaves': 233, 'feature_fraction': 0.4230307967479837, 'subsample': 0.7559552980504192, 'subsample_freq': 4, 'min_child_samples': 95, 'learning_rate': 0.10490869149867478}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.36374


[32m[I 2022-01-01 18:22:33,683][0m Trial 48 finished with value: 1.36632778138624 and parameters: {'reg_alpha': 0.13654590959904223, 'reg_lambda': 0.5212554936455452, 'num_leaves': 235, 'feature_fraction': 0.47814759099837834, 'subsample': 0.5098536299191213, 'subsample_freq': 2, 'min_child_samples': 74, 'learning_rate': 0.007051731957608121}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[612]	valid_0's l1: 1.36374


[32m[I 2022-01-01 18:23:05,667][0m Trial 52 finished with value: 1.3637366088943605 and parameters: {'reg_alpha': 0.13866767529842924, 'reg_lambda': 0.5074828692572885, 'num_leaves': 185, 'feature_fraction': 0.42081946043798774, 'subsample': 0.75176447773664, 'subsample_freq': 4, 'min_child_samples': 95, 'learning_rate': 0.10854770807552584}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[48]	valid_0's l1: 1.37021


[32m[I 2022-01-01 18:23:28,305][0m Trial 56 finished with value: 1.370214310841447 and parameters: {'reg_alpha': 0.7259650241329847, 'reg_lambda': 0.4597179782869165, 'num_leaves': 182, 'feature_fraction': 0.41836229154796606, 'subsample': 0.8459465441322342, 'subsample_freq': 4, 'min_child_samples': 95, 'learning_rate': 0.5228484636663959}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.40415
[200]	valid_0's l1: 1.37388
Early stopping, best iteration is:
[145]	valid_0's l1: 1.36773


[32m[I 2022-01-01 18:24:58,952][0m Trial 58 finished with value: 1.3677326622973935 and parameters: {'reg_alpha': 0.2149171329338304, 'reg_lambda': 0.5699378161211236, 'num_leaves': 209, 'feature_fraction': 0.5300916073021922, 'subsample': 0.7423611856552548, 'subsample_freq': 5, 'min_child_samples': 92, 'learning_rate': 0.10539451328302336}. Best is trial 33 with value: 1.36222459369548.[0m


[200]	valid_0's l1: 1.37281
Training until validation scores don't improve for 50 rounds
[400]	valid_0's l1: 1.38373
[400]	valid_0's l1: 1.36937
[400]	valid_0's l1: 1.36994
Early stopping, best iteration is:
[507]	valid_0's l1: 1.36871


[32m[I 2022-01-01 18:26:47,313][0m Trial 55 finished with value: 1.3687107489788988 and parameters: {'reg_alpha': 0.7456266810386778, 'reg_lambda': 0.5982257610254118, 'num_leaves': 208, 'feature_fraction': 0.5231413877799465, 'subsample': 0.7507183400924201, 'subsample_freq': 4, 'min_child_samples': 95, 'learning_rate': 0.021227271110480218}. Best is trial 33 with value: 1.36222459369548.[0m


[200]	valid_0's l1: 1.37323
[600]	valid_0's l1: 1.37496
Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.36889
[400]	valid_0's l1: 1.36818
[800]	valid_0's l1: 1.37064
[200]	valid_0's l1: 1.36749
[800]	valid_0's l1: 1.3677
[600]	valid_0's l1: 1.3672
[1000]	valid_0's l1: 1.36857
Did not meet early stopping. Best iteration is:
[999]	valid_0's l1: 1.36855
[400]	valid_0's l1: 1.36495


[32m[I 2022-01-01 18:30:16,977][0m Trial 54 finished with value: 1.3685540062275572 and parameters: {'reg_alpha': 0.20968174881690466, 'reg_lambda': 0.4373280945703868, 'num_leaves': 209, 'feature_fraction': 0.5323948100360304, 'subsample': 0.5533280548848861, 'subsample_freq': 2, 'min_child_samples': 96, 'learning_rate': 0.005703146884456339}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.36682
Did not meet early stopping. Best iteration is:
[998]	valid_0's l1: 1.36682
[800]	valid_0's l1: 1.36602


[32m[I 2022-01-01 18:31:20,452][0m Trial 57 finished with value: 1.3668232333183858 and parameters: {'reg_alpha': 0.23468151203931964, 'reg_lambda': 0.6185263479773775, 'num_leaves': 206, 'feature_fraction': 0.41269983924922027, 'subsample': 0.8477141944407555, 'subsample_freq': 4, 'min_child_samples': 95, 'learning_rate': 0.023616498660961244}. Best is trial 33 with value: 1.36222459369548.[0m


[600]	valid_0's l1: 1.36398
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[596]	valid_0's l1: 1.36397


[32m[I 2022-01-01 18:32:02,189][0m Trial 60 finished with value: 1.363974170676233 and parameters: {'reg_alpha': 0.17323445777669294, 'reg_lambda': 0.4352594975924873, 'num_leaves': 231, 'feature_fraction': 0.4041785728218242, 'subsample': 0.7803740387917737, 'subsample_freq': 5, 'min_child_samples': 98, 'learning_rate': 0.06390981490481232}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.37286
[1000]	valid_0's l1: 1.36519
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.36519


[32m[I 2022-01-01 18:32:45,008][0m Trial 59 finished with value: 1.3651869475155236 and parameters: {'reg_alpha': 0.17301442805108186, 'reg_lambda': 0.3771162474671369, 'num_leaves': 209, 'feature_fraction': 0.401534131167551, 'subsample': 0.737768133568794, 'subsample_freq': 4, 'min_child_samples': 97, 'learning_rate': 0.0223096725507844}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.36745
[200]	valid_0's l1: 1.36396
Early stopping, best iteration is:
[262]	valid_0's l1: 1.36723


[32m[I 2022-01-01 18:33:52,078][0m Trial 62 finished with value: 1.3672265104564343 and parameters: {'reg_alpha': 0.16880856155537927, 'reg_lambda': 0.18083381819025698, 'num_leaves': 230, 'feature_fraction': 0.8136005504475776, 'subsample': 0.7960522513024504, 'subsample_freq': 5, 'min_child_samples': 81, 'learning_rate': 0.052153361430329934}. Best is trial 33 with value: 1.36222459369548.[0m


[400]	valid_0's l1: 1.36716
Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.36748
[400]	valid_0's l1: 1.36256
[200]	valid_0's l1: 1.3684
[600]	valid_0's l1: 1.36546
[400]	valid_0's l1: 1.36576
[400]	valid_0's l1: 1.36658
[600]	valid_0's l1: 1.36148
[800]	valid_0's l1: 1.36407
[600]	valid_0's l1: 1.36468
[600]	valid_0's l1: 1.36562
[800]	valid_0's l1: 1.36089
[1000]	valid_0's l1: 1.36364
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.36364
[800]	valid_0's l1: 1.3646
[800]	valid_0's l1: 1.36384


[32m[I 2022-01-01 18:38:39,612][0m Trial 61 finished with value: 1.3636401438580377 and parameters: {'reg_alpha': 0.5716275636828269, 'reg_lambda': 0.3814917992148481, 'num_leaves': 242, 'feature_fraction': 0.4044425149867289, 'subsample': 0.8144528008817438, 'subsample_freq': 5, 'min_child_samples': 100, 'learning_rate': 0.018596908971900496}. Best is trial 33 with value: 1.36222459369548.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[920]	valid_0's l1: 1.36065
Early stopping, best iteration is:
[845]	valid_0's l1: 1.3634
Early stopping, best iteration is:
[913]	valid_0's l1: 1.36445


[32m[I 2022-01-01 18:39:30,744][0m Trial 63 finished with value: 1.3606505292543247 and parameters: {'reg_alpha': 0.10650029101278541, 'reg_lambda': 0.5399843503291755, 'num_leaves': 244, 'feature_fraction': 0.4487727288089311, 'subsample': 0.7873138460840563, 'subsample_freq': 5, 'min_child_samples': 100, 'learning_rate': 0.05678266844862463}. Best is trial 63 with value: 1.3606505292543247.[0m
[32m[I 2022-01-01 18:39:31,665][0m Trial 64 finished with value: 1.3633995276270925 and parameters: {'reg_alpha': 0.10247372660937354, 'reg_lambda': 0.32552037267802736, 'num_leaves': 231, 'feature_fraction': 0.45713080080487434, 'subsample': 0.7841736352834945, 'subsample_freq': 5, 'min_child_samples': 90, 'learning_rate': 0.05023204875623388}. Best is trial 63 with value: 1.3606505292543247.[0m
[32m[I 2022-01-01 18:39:39,157][0m Trial 65 finished with value: 1.3644475338136586 and parameters: {'reg_alpha': 0.10973532975075717, 'reg_lambda': 0.429655431010162, 'num_leaves': 154, 'featu

Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.36454
Early stopping, best iteration is:
[227]	valid_0's l1: 1.36453


[32m[I 2022-01-01 18:41:04,680][0m Trial 66 finished with value: 1.3645341553576875 and parameters: {'reg_alpha': 0.5892405164352204, 'reg_lambda': 0.3189936956949263, 'num_leaves': 246, 'feature_fraction': 0.4529566256879017, 'subsample': 0.9169130140967466, 'subsample_freq': 5, 'min_child_samples': 100, 'learning_rate': 0.15789671339964043}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.36423
Early stopping, best iteration is:
[145]	valid_0's l1: 1.36539


[32m[I 2022-01-01 18:41:27,734][0m Trial 69 finished with value: 1.3653892168279778 and parameters: {'reg_alpha': 0.1010974811946315, 'reg_lambda': 0.28312066354026433, 'num_leaves': 245, 'feature_fraction': 0.43754742605819585, 'subsample': 0.9172128341068824, 'subsample_freq': 6, 'min_child_samples': 87, 'learning_rate': 0.1619920875637345}. Best is trial 63 with value: 1.3606505292543247.[0m


[200]	valid_0's l1: 1.3764
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[233]	valid_0's l1: 1.36385


[32m[I 2022-01-01 18:42:03,305][0m Trial 68 finished with value: 1.3638460037932432 and parameters: {'reg_alpha': 0.10795583154535618, 'reg_lambda': 0.5512548963542798, 'num_leaves': 244, 'feature_fraction': 0.4483724556114654, 'subsample': 0.9321270931957355, 'subsample_freq': 5, 'min_child_samples': 100, 'learning_rate': 0.16270665579150714}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[46]	valid_0's l1: 1.37108


[32m[I 2022-01-01 18:42:20,221][0m Trial 71 finished with value: 1.3710812432133361 and parameters: {'reg_alpha': 0.5126161861923347, 'reg_lambda': 0.5408368012411403, 'num_leaves': 240, 'feature_fraction': 0.4240617930505432, 'subsample': 0.8186116252110311, 'subsample_freq': 6, 'min_child_samples': 92, 'learning_rate': 0.4652655409179009}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.37213
Early stopping, best iteration is:
[207]	valid_0's l1: 1.37031


[32m[I 2022-01-01 18:43:12,476][0m Trial 70 finished with value: 1.3703091937487941 and parameters: {'reg_alpha': 0.6400019156025365, 'reg_lambda': 0.5292820884923438, 'num_leaves': 241, 'feature_fraction': 0.4345232629165273, 'subsample': 0.8155244003530668, 'subsample_freq': 6, 'min_child_samples': 37, 'learning_rate': 0.4800936270523864}. Best is trial 63 with value: 1.3606505292543247.[0m


[400]	valid_0's l1: 1.36709
Early stopping, best iteration is:
[109]	valid_0's l1: 1.36669


[32m[I 2022-01-01 18:43:22,156][0m Trial 72 finished with value: 1.3666918710061318 and parameters: {'reg_alpha': 0.628767360292332, 'reg_lambda': 0.2716198113610046, 'num_leaves': 226, 'feature_fraction': 0.4238167540777431, 'subsample': 0.812145392859102, 'subsample_freq': 6, 'min_child_samples': 92, 'learning_rate': 0.4352548550191212}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.38212
[600]	valid_0's l1: 1.36548
[200]	valid_0's l1: 1.37656
[200]	valid_0's l1: 1.38825
[400]	valid_0's l1: 1.37099
[800]	valid_0's l1: 1.36496
[400]	valid_0's l1: 1.3687
[400]	valid_0's l1: 1.37392
[600]	valid_0's l1: 1.36834
[1000]	valid_0's l1: 1.36423
Did not meet early stopping. Best iteration is:
[999]	valid_0's l1: 1.36423


[32m[I 2022-01-01 18:48:09,940][0m Trial 67 finished with value: 1.3642278115294484 and parameters: {'reg_alpha': 0.10673525181140767, 'reg_lambda': 0.5320691967123405, 'num_leaves': 247, 'feature_fraction': 0.452599762269588, 'subsample': 0.9301519872133592, 'subsample_freq': 6, 'min_child_samples': 100, 'learning_rate': 0.01501452689297023}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.36777
[800]	valid_0's l1: 1.36743
[600]	valid_0's l1: 1.36877
Early stopping, best iteration is:
[644]	valid_0's l1: 1.36762


[32m[I 2022-01-01 18:49:31,626][0m Trial 74 finished with value: 1.3676220428868535 and parameters: {'reg_alpha': 0.5664107896396184, 'reg_lambda': 0.37966149460831067, 'num_leaves': 256, 'feature_fraction': 0.5009625887967097, 'subsample': 0.8401134695446879, 'subsample_freq': 4, 'min_child_samples': 87, 'learning_rate': 0.015872487935990994}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.36644
Did not meet early stopping. Best iteration is:
[993]	valid_0's l1: 1.36644
[200]	valid_0's l1: 1.38522
[800]	valid_0's l1: 1.36751


[32m[I 2022-01-01 18:50:36,122][0m Trial 73 finished with value: 1.3664415443299405 and parameters: {'reg_alpha': 0.6332851211755144, 'reg_lambda': 0.48495110120012336, 'num_leaves': 228, 'feature_fraction': 0.5002447261131258, 'subsample': 0.7670452942214506, 'subsample_freq': 4, 'min_child_samples': 20, 'learning_rate': 0.01203102054172353}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.38728
[400]	valid_0's l1: 1.36994
[1000]	valid_0's l1: 1.36689
Did not meet early stopping. Best iteration is:
[993]	valid_0's l1: 1.36689


[32m[I 2022-01-01 18:52:26,145][0m Trial 75 finished with value: 1.366894284408248 and parameters: {'reg_alpha': 0.5710974866909118, 'reg_lambda': 0.3801826671993318, 'num_leaves': 254, 'feature_fraction': 0.4996566068276567, 'subsample': 0.7664635096849644, 'subsample_freq': 4, 'min_child_samples': 20, 'learning_rate': 0.00916513805810802}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.4197
[400]	valid_0's l1: 1.37167
[600]	valid_0's l1: 1.36657
[200]	valid_0's l1: 1.42538
[400]	valid_0's l1: 1.3947
[800]	valid_0's l1: 1.36589
[600]	valid_0's l1: 1.36749
[400]	valid_0's l1: 1.40109
[600]	valid_0's l1: 1.38289
[1000]	valid_0's l1: 1.36526
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.36526
[800]	valid_0's l1: 1.3665


[32m[I 2022-01-01 18:56:55,070][0m Trial 76 finished with value: 1.3652574548779162 and parameters: {'reg_alpha': 0.5662580197635636, 'reg_lambda': 0.31294961358612927, 'num_leaves': 254, 'feature_fraction': 0.4986374043782351, 'subsample': 0.7601975127004388, 'subsample_freq': 4, 'min_child_samples': 42, 'learning_rate': 0.010796725358123153}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.38862
[800]	valid_0's l1: 1.37628
[1000]	valid_0's l1: 1.36614
Did not meet early stopping. Best iteration is:
[989]	valid_0's l1: 1.36614
[200]	valid_0's l1: 1.36775
[800]	valid_0's l1: 1.3816


[32m[I 2022-01-01 18:58:46,053][0m Trial 77 finished with value: 1.36613783369983 and parameters: {'reg_alpha': 0.1458271275568584, 'reg_lambda': 0.3163590849218324, 'num_leaves': 256, 'feature_fraction': 0.5046114378050534, 'subsample': 0.8809384274778272, 'subsample_freq': 5, 'min_child_samples': 89, 'learning_rate': 0.009544652591839401}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's l1: 1.37178
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.37178
[400]	valid_0's l1: 1.36603
Early stopping, best iteration is:
[350]	valid_0's l1: 1.36598


[32m[I 2022-01-01 18:59:51,762][0m Trial 80 finished with value: 1.3659830085557372 and parameters: {'reg_alpha': 0.14063480099506823, 'reg_lambda': 0.5020146391046423, 'num_leaves': 217, 'feature_fraction': 0.46398413937907934, 'subsample': 0.7218999030717528, 'subsample_freq': 5, 'min_child_samples': 88, 'learning_rate': 0.08885804279983325}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds


[32m[I 2022-01-01 19:00:11,331][0m Trial 78 finished with value: 1.3717838800831148 and parameters: {'reg_alpha': 0.592018495413787, 'reg_lambda': 0.30420353262810307, 'num_leaves': 256, 'feature_fraction': 0.4641523360765551, 'subsample': 0.7617104865943612, 'subsample_freq': 5, 'min_child_samples': 90, 'learning_rate': 0.0037404690013564793}. Best is trial 63 with value: 1.3606505292543247.[0m


[1000]	valid_0's l1: 1.37677
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.37677
Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.42589


[32m[I 2022-01-01 19:00:40,485][0m Trial 79 finished with value: 1.376773717248256 and parameters: {'reg_alpha': 0.5960753484534225, 'reg_lambda': 0.31812825371316844, 'num_leaves': 187, 'feature_fraction': 0.46398679009379246, 'subsample': 0.7257232790269337, 'subsample_freq': 5, 'min_child_samples': 89, 'learning_rate': 0.0032529637199889727}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.37408
[200]	valid_0's l1: 1.417
[400]	valid_0's l1: 1.40288
[200]	valid_0's l1: 1.36664
[400]	valid_0's l1: 1.37246
Early stopping, best iteration is:
[277]	valid_0's l1: 1.36492
[600]	valid_0's l1: 1.39029
[400]	valid_0's l1: 1.39446


[32m[I 2022-01-01 19:03:23,575][0m Trial 84 finished with value: 1.3649231806008209 and parameters: {'reg_alpha': 0.6605377663014773, 'reg_lambda': 0.5596875024059725, 'num_leaves': 240, 'feature_fraction': 0.44327223232066854, 'subsample': 0.7887718005405501, 'subsample_freq': 5, 'min_child_samples': 92, 'learning_rate': 0.11327789175629176}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.37052
[800]	valid_0's l1: 1.38336
[600]	valid_0's l1: 1.38302
[800]	valid_0's l1: 1.36836
[200]	valid_0's l1: 1.36899
[1000]	valid_0's l1: 1.36737
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.36737
[1000]	valid_0's l1: 1.3788
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.3788


[32m[I 2022-01-01 19:06:24,818][0m Trial 83 finished with value: 1.367366882177527 and parameters: {'reg_alpha': 0.19089086602744032, 'reg_lambda': 0.20022864471639298, 'num_leaves': 138, 'feature_fraction': 0.4098547036110898, 'subsample': 0.7844268395552147, 'subsample_freq': 5, 'min_child_samples': 81, 'learning_rate': 0.03014439275067332}. Best is trial 63 with value: 1.3606505292543247.[0m


[400]	valid_0's l1: 1.3671
[800]	valid_0's l1: 1.37632


[32m[I 2022-01-01 19:06:35,988][0m Trial 81 finished with value: 1.378797348319981 and parameters: {'reg_alpha': 0.6591521711057484, 'reg_lambda': 0.220604151925884, 'num_leaves': 158, 'feature_fraction': 0.40043901720758035, 'subsample': 0.7166626413078596, 'subsample_freq': 5, 'min_child_samples': 15, 'learning_rate': 0.0033238597770563175}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
[600]	valid_0's l1: 1.36556
[1000]	valid_0's l1: 1.37245
Did not meet early stopping. Best iteration is:
[1000]	valid_0's l1: 1.37245
[200]	valid_0's l1: 1.36957


[32m[I 2022-01-01 19:08:30,631][0m Trial 82 finished with value: 1.3724458695655122 and parameters: {'reg_alpha': 0.6014366911877553, 'reg_lambda': 0.5941563713000693, 'num_leaves': 189, 'feature_fraction': 0.4018053569448113, 'subsample': 0.7850389706347479, 'subsample_freq': 5, 'min_child_samples': 81, 'learning_rate': 0.00422621359291065}. Best is trial 63 with value: 1.3606505292543247.[0m


[200]	valid_0's l1: 1.3694
Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.36347
Early stopping, best iteration is:
[78]	valid_0's l1: 1.37579


[32m[I 2022-01-01 19:09:21,239][0m Trial 88 finished with value: 1.3757895761977306 and parameters: {'reg_alpha': 0.12405282314945962, 'reg_lambda': 0.4720231062280345, 'num_leaves': 110, 'feature_fraction': 0.7175894440048359, 'subsample': 0.8808174939262922, 'subsample_freq': 4, 'min_child_samples': 76, 'learning_rate': 0.21571346200529148}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[840]	valid_0's l1: 1.36341
[400]	valid_0's l1: 1.36717
[400]	valid_0's l1: 1.36741


[32m[I 2022-01-01 19:10:17,148][0m Trial 85 finished with value: 1.3634100402717944 and parameters: {'reg_alpha': 0.11506008650504464, 'reg_lambda': 0.550133563381692, 'num_leaves': 238, 'feature_fraction': 0.41392311867780696, 'subsample': 0.7043590047825592, 'subsample_freq': 5, 'min_child_samples': 83, 'learning_rate': 0.05266042474409279}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
[200]	valid_0's l1: 1.37023
[600]	valid_0's l1: 1.36571
[600]	valid_0's l1: 1.36503
[200]	valid_0's l1: 1.36661
[400]	valid_0's l1: 1.36853
[400]	valid_0's l1: 1.36407
[800]	valid_0's l1: 1.36396
[800]	valid_0's l1: 1.36473
Early stopping, best iteration is:
[854]	valid_0's l1: 1.36368
Early stopping, best iteration is:
[854]	valid_0's l1: 1.36429
[600]	valid_0's l1: 1.36722
[600]	valid_0's l1: 1.36229


[32m[I 2022-01-01 19:14:33,310][0m Trial 86 finished with value: 1.3642865452549342 and parameters: {'reg_alpha': 0.12634937303356303, 'reg_lambda': 0.5869490988069395, 'num_leaves': 235, 'feature_fraction': 0.4320592236234365, 'subsample': 0.9753643331240774, 'subsample_freq': 5, 'min_child_samples': 49, 'learning_rate': 0.04534854708936275}. Best is trial 63 with value: 1.3606505292543247.[0m
[32m[I 2022-01-01 19:14:33,498][0m Trial 87 finished with value: 1.3636797357929649 and parameters: {'reg_alpha': 0.5224783988127659, 'reg_lambda': 0.5960231788192423, 'num_leaves': 235, 'feature_fraction': 0.4394942222608906, 'subsample': 0.956150892276277, 'subsample_freq': 5, 'min_child_samples': 97, 'learning_rate': 0.04843124283772629}. Best is trial 63 with value: 1.3606505292543247.[0m


Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
[800]	valid_0's l1: 1.3614
[800]	valid_0's l1: 1.36625
[200]	valid_0's l1: 1.36921
[200]	valid_0's l1: 1.36707
Early stopping, best iteration is:
[870]	valid_0's l1: 1.36122


[32m[I 2022-01-01 19:16:48,563][0m Trial 90 finished with value: 1.3612208063757187 and parameters: {'reg_alpha': 0.2779030350511046, 'reg_lambda': 0.6547784482327931, 'num_leaves': 234, 'feature_fraction': 0.42991824278657514, 'subsample': 0.6731063392492177, 'subsample_freq': 5, 'min_child_samples': 85, 'learning_rate': 0.050176456925167166}. Best is trial 63 with value: 1.3606505292543247.[0m


[400]	valid_0's l1: 1.36593
[1000]	valid_0's l1: 1.36575
Did not meet early stopping. Best iteration is:
[983]	valid_0's l1: 1.36575
[400]	valid_0's l1: 1.36327


[32m[I 2022-01-01 19:17:39,257][0m Trial 89 finished with value: 1.3657455976361363 and parameters: {'reg_alpha': 0.518869722409942, 'reg_lambda': 0.5561299954250023, 'num_leaves': 215, 'feature_fraction': 0.4398138986239549, 'subsample': 0.9971950701249148, 'subsample_freq': 5, 'min_child_samples': 97, 'learning_rate': 0.04617546597694592}. Best is trial 63 with value: 1.3606505292543247.[0m


Early stopping, best iteration is:
[501]	valid_0's l1: 1.36534


[32m[I 2022-01-01 19:18:08,855][0m Trial 92 finished with value: 1.3653373327910887 and parameters: {'reg_alpha': 0.5233943849117308, 'reg_lambda': 0.6574312920648473, 'num_leaves': 223, 'feature_fraction': 0.415818035498764, 'subsample': 0.6835335104004079, 'subsample_freq': 5, 'min_child_samples': 62, 'learning_rate': 0.06567255049078936}. Best is trial 63 with value: 1.3606505292543247.[0m


[600]	valid_0's l1: 1.36216
[800]	valid_0's l1: 1.36154
Early stopping, best iteration is:
[845]	valid_0's l1: 1.36144


[32m[I 2022-01-01 19:18:50,815][0m Trial 91 finished with value: 1.3614368791086748 and parameters: {'reg_alpha': 0.5099134364493241, 'reg_lambda': 0.6147456683798871, 'num_leaves': 217, 'feature_fraction': 0.4177823693174164, 'subsample': 0.6733939314852503, 'subsample_freq': 4, 'min_child_samples': 85, 'learning_rate': 0.045583999269574}. Best is trial 63 with value: 1.3606505292543247.[0m


## Train with tuned lgbm ##


In [32]:
Class_1 = LGBMRegressor(
    task = 'train',
    boosting_type = 'gbdt',
    objective = 'mae',
    random_state = 101,
    importance_type = 'gain',
    n_estimators = 1000,
    n_jobs = multiprocessing.cpu_count(),
    metric = 'mae'
)
Class_1.set_params(
    **study.best_trial.params
)
Class_1.fit(
    X_train, 
    y_train_1,
    eval_set = [(X_valid, y_valid_1)],
    eval_names = ['valid'],
    early_stopping_rounds = 50,verbose=200
)

Training until validation scores don't improve for 50 rounds
[200]	valid's l1: 1.36564
[400]	valid's l1: 1.363
[600]	valid's l1: 1.36172
[800]	valid's l1: 1.36085
Early stopping, best iteration is:
[797]	valid's l1: 1.36085


LGBMRegressor(feature_fraction=0.4487727288089311, importance_type='gain',
              learning_rate=0.05678266844862463, metric='mae',
              min_child_samples=100, n_estimators=1000, n_jobs=4,
              num_leaves=244, objective='mae', random_state=101,
              reg_alpha=0.10650029101278541, reg_lambda=0.5399843503291755,
              subsample=0.7873138460840563, subsample_freq=5, task='train')

In [33]:
import joblib
joblib_file = "Class_1.pkl"  
joblib.dump(Class_1, joblib_file)



['Class_1.pkl']