In [278]:
## Library Import

# Basic
import re
import warnings
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 100)
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'

# Preprocessing
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer

# AutoML optuna library
import optuna
import optuna.integration.lightgbm as lgb

# Linear Model
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

# Tree Model
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# Metric
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score

In [279]:
# data load
df = pd.read_csv('original_full_data.csv', index_col = 0)
mmdf = pd.read_csv('bloomberg_map_index_20200130.csv', index_col = 0)

In [280]:
# From the date the target data is present ~
df = df.loc[9132 :, :]

In [281]:
df.reset_index(inplace = True, drop = True)

In [282]:
# Class generates derived variables & shifting target variable

class Builder:

    def __init__(self, input_df, map_df, target_day):
        self.df = input_df
        self.mdf = map_df
        self.lag_days = [1, 5, 20, 60, 120, 260] # lagging day variation [1day 1week 1month 3month 6month 1year]
        self.target_day = target_day # ex) 1, 260


    # shifting function

    def targeter(self, target_column):
        self.df[target_column] = self.df[target_column].shift(-1*int(self.target_day))

        return self.df
    
    # generating derived columns function : return ex) 20060107/20060101

    def returner(self, target_column):      

        col_name =  []
        # creating new column name
        for i in self.lag_days:
            col_name.append(target_column + str(i))
            
        # appending lagged columns
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = self.df[target_column] / self.df[target_column].shift(periods = j, axis = 0)

        return self.df 

    # generating derived function : minus ex) 20060107 - 20060101
    
    def differ(self, target_column):

        col_name = []
        for i in self.lag_days:
            col_name.append(target_column + str(i))
        for i, j in enumerate(self.lag_days):
            self.df[locals()['col_name'][i]] = self.df[target_column] - self.df[target_column].shift(periods = j, axis = 0)
        return self.df

    # generating derived function : volume * close  -> convert to return 

    def producter(self, target_column):

        col_name = (str(target_column).split('_'))[0]       
        self.df[locals()['col_name'] + '_product'] = self.df[col_name + '_volume'] * self.df[col_name + '_close'] # j volume과 j close를 곱해서 

        return self.df
    
    # executing functions
    
    def execution(self):
        for i, j in enumerate(self.mdf['ticker']): # mdf에서 ticker 확인
            if j in self.df.columns:
                if self.mdf.loc[i, :][1] == 'product':  # key값이 product인 경우
                    self.producter(j)
                    col = str(j).split('_')[0]
                    self.df[col + '_product'] = self.df[col + '_product'] / self.df[col + '_product'].shift(1)
                    
                #elif self.mdf.loc[i, :][1] == 'minus': # key값이 minus인 경우
                #    self.differ(j)

                #elif self.mdf.loc[i, :][1] == 'change': # key값이 change인 경우
                #    self.returner(j)


                elif self.mdf.loc[i, :][1] == 'target': # key값이 target인 경우 (현재 KRXsemiconductor_change)
                    self.targeter(j)

                else:
                    pass
            else:
                pass
                            
        return self.df


    # side : creating auto correlation graph function

    def acf_cal(self, target):
        data = pd.DataFrame(self.df[target])
        data = data.interpolate(method = 'cubic', limit_area = 'inside').fillna(method = 'ffill').fillna(method = 'bfill')
        acf = sm.tsa.acf(data)      
        plt.stem(acf)
        plt.show()

In [283]:
newdf = df.copy()

In [284]:
len(newdf.columns)

417

In [285]:
# date preprocessing

newdf['date'] = pd.to_datetime(newdf['date'])
newdf.set_index('date', inplace = True)

In [286]:
# fillna cubic -> ffill -> bfill

newdf = newdf.interpolate(method = 'cubic', limit_area = 'inside')
newdf = newdf.fillna(method = 'ffill')
newdf = newdf.fillna(method = 'bfill')

In [287]:
# check na value

newdf.isna().sum().sum()

0

In [288]:
# class load

cla = Builder(newdf, mmdf, 120)

In [289]:
# execution function
# 새로운 데이터프레임 생성 
cla.execution()

Unnamed: 0_level_0,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,NFPTCHIndex,...,AMD_product,APPLE_product,AppliedMaterials_product,Aspeed_product,Dell_product,Facebook_product,Google_product,HPE_product,Intel_product,LamResearch_product,MediaTek_product,Micron_product,Microsoft_product,Nuvoton_product,Nvidia_product,Philadelphia_product,QCOM_product,SamsungElectronics_product,ShinEtsuChemical_product,Siltronic_product,SKhynix_product,Sumco_product,TokyoElectron_product,TSMC_product,UMC_product
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2006-01-02,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.700000,8.400000,326.0,2561.0,278.000000,...,,,,,,,,,,,,,,,,,,,,,,,,,
2006-01-03,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.700000,8.400000,326.0,2561.0,278.000000,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.862009,1.000000,1.000000,1.048766,1.000000,1.000000,1.000000,1.000000
2006-01-04,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.700000,8.400000,326.0,2561.0,278.000000,...,0.682030,0.771197,1.026287,1.000000,1.000000,1.000000,1.223552,1.000000,0.829714,0.967572,1.000000,1.380558,0.743634,1.000000,1.109161,0.967923,1.097724,2.307115,1.000000,1.000000,1.549763,1.000000,1.000000,0.788606,0.581332
2006-01-05,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.700000,8.400000,326.0,2561.0,278.000000,...,1.264163,0.718678,1.094089,1.000000,1.000000,1.000000,0.710449,1.000000,1.204864,0.824310,1.000000,0.901202,0.836536,1.000000,1.294707,1.168471,0.642743,0.878133,1.000000,1.000000,0.913503,1.000000,4.149537,0.835657,2.780952
2006-01-06,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.700000,8.400000,326.0,2561.0,278.000000,...,0.966656,1.606885,1.514631,1.000000,1.000000,1.000000,1.685084,1.000000,0.833842,1.282477,1.000000,0.947752,2.059981,1.000000,0.980556,1.047620,2.572150,0.504080,1.000000,1.000000,0.596882,1.000000,1.117321,1.553101,1.545230
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-10-27,1.9,2.0,2.9,-1.5,2.0,69.0,21525.82,3.7,1.7,2.2,1.7,2.4,1.3,1.66611,0.0,0.1,-0.3,-0.3,1.4,2.0,3.543428,6.908116,218.0,1690.0,128.639710,...,0.900501,1.010875,1.764097,0.820641,0.808665,1.410060,1.415943,0.954658,0.776571,2.092578,0.829932,1.022473,1.347411,1.486079,0.898487,0.954025,1.223596,-7.016632,1.443049,0.882592,0.724875,0.591650,1.185079,0.728242,8.957344
2019-10-28,1.9,2.0,2.9,-1.5,2.0,69.0,21525.82,3.7,1.7,2.2,1.7,2.4,1.3,1.66611,0.0,0.1,-0.3,-0.3,1.4,2.0,3.556001,6.928135,218.0,1690.0,128.219228,...,1.213115,1.395378,1.417193,0.968297,1.516745,1.109701,1.114263,1.105058,0.581300,1.697246,3.008876,0.982582,1.005550,0.918295,0.709275,0.988696,0.869484,3.268112,0.911087,0.984864,0.598464,0.436636,1.103782,1.596400,-4.887075
2019-10-29,1.9,2.0,2.9,-1.5,2.0,69.0,21525.82,3.7,1.7,2.2,1.7,2.4,1.3,1.66611,0.0,0.1,-0.3,-0.3,1.4,2.0,3.569604,6.950086,218.0,1690.0,127.968071,...,1.506408,1.445075,0.717119,1.250066,1.680148,1.011607,0.793725,1.167880,0.557896,0.832619,1.914615,0.789692,0.580119,0.417745,0.626504,1.023202,0.493114,1.174228,0.410159,1.089446,0.832789,1.231150,1.131356,1.864625,2.523049
2019-10-30,1.9,2.0,2.9,-1.5,2.0,69.0,21525.82,3.7,1.7,2.2,1.7,2.4,1.3,1.66611,0.0,0.1,-0.3,-0.3,1.4,2.0,3.584262,6.974023,218.0,1690.0,127.892806,...,0.997037,0.871637,0.660472,0.599521,0.656067,2.089199,0.552940,0.991999,0.843633,0.559295,0.826001,1.056501,0.899376,0.506841,0.975763,0.925346,0.793448,1.165541,0.758679,1.083804,1.150234,0.694483,1.647300,0.877406,0.370442


In [290]:
# fill na for shifted data

newdf = cla.df.fillna(method = 'ffill').fillna(method = 'bfill')

In [291]:
# check na

newdf.isna().sum().sum()

0

In [292]:
# features

len(newdf.columns)

442

In [293]:
newdf.drop('SamsungElectronics_product', inplace = True, axis = 1)

In [294]:
newdf.to_csv('googleautomltest_120_200226.csv')

# Scaling

In [178]:
newdf.reset_index(inplace = True)

In [179]:
newdf.head()

Unnamed: 0,date,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,...,AMD_product,APPLE_product,AppliedMaterials_product,Aspeed_product,Dell_product,Facebook_product,Google_product,HPE_product,Intel_product,LamResearch_product,MediaTek_product,Micron_product,Microsoft_product,Nuvoton_product,Nvidia_product,Philadelphia_product,QCOM_product,SamsungElectronics_product,ShinEtsuChemical_product,Siltronic_product,SKhynix_product,Sumco_product,TokyoElectron_product,TSMC_product,UMC_product
0,2006-01-02,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.862009,1.0,1.0,1.048766,1.0,1.0,1.0,1.0
1,2006-01-03,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.862009,1.0,1.0,1.048766,1.0,1.0,1.0,1.0
2,2006-01-04,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,...,0.68203,0.771197,1.026287,1.0,1.0,1.0,1.223552,1.0,0.829714,0.967572,1.0,1.380558,0.743634,1.0,1.109161,0.967923,1.097724,2.307115,1.0,1.0,1.549763,1.0,1.0,0.788606,0.581332
3,2006-01-05,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,...,1.264163,0.718678,1.094089,1.0,1.0,1.0,0.710449,1.0,1.204864,0.82431,1.0,0.901202,0.836536,1.0,1.294707,1.168471,0.642743,0.878133,1.0,1.0,0.913503,1.0,4.149537,0.835657,2.780952
4,2006-01-06,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,...,0.966656,1.606885,1.514631,1.0,1.0,1.0,1.685084,1.0,0.833842,1.282477,1.0,0.947752,2.059981,1.0,0.980556,1.04762,2.57215,0.50408,1.0,1.0,0.596882,1.0,1.117321,1.553101,1.54523


In [180]:
date = newdf.date

In [181]:
newdf.drop('date', axis = True, inplace = True)

In [182]:
# extract target
y_target = newdf['KRXsemiconductor_change']

In [215]:
# target drop
newdf.drop(['KRXsemiconductor_change'], axis = 1, inplace = True)


In [216]:
# 삼성전자가 이상하게 안됨 점검 필요 자꾸 infinity가 뜨므로 일단 제거
newdf.drop('SamsungElectronics_product', inplace = True, axis = 1)

In [185]:
# 이 지점에서 각자 잡은 feature를 넣어주자


newdf = newdf[['KOSPI200_change', 'Dell_volume', 'KOIMPTIIndex120', 'USWHTOTIndex120', 'MTSLRL$Index1', 'LEINWCNIndex5', 'SKLIMORDIndex260', 'KOIPMCIndex5', 'CONCCONFIndex60', 'OUTFGAFIndex20', 'KOHHDIndex20', 'QCOM_change', 'OEKRN022Index1', 'KOIMTOTIndex60', 'KOBPFINIndex260', 'EMPRGBCIIndex120', 'MPMIJPMAIndex20', 'ShinEtsuChemical_change', 'COMFCOMFIndex120', 'ShinEtsuChemical_close', 'KODIBALIndex260', 'MPMIUSMAIndex5', 'KOECSEMQIndex', 'KOBPFINIndex60', 'MPMIUSSAIndex120', 'FRNTTOTLIndex120', 'KOFDITIndex', 'INJCJCIndex20', 'KOWDRIndex5', 'MPMIJPMAIndex5', 'USDollarIndex_change', 'MTSLRL$Index5', 'KWCDCCurncy1', 'NAPMNEWOIndex5', 'KOPSIYIndex', 'Nvidia_product', 'KOBPFINIndex1', 'Intel_product', 'SKLIMORDIndex120', 'Facebook_product', 'KODSDISCIndex', 'LamResearch_change', 'UMC_change', 'LEIMNOIndex60', 'KOBSNMCIndex120', 'KORSTIndex260', 'EMDINP1MIndex1', 'AppliedMaterials_volume', 'KOHSTRIndex', 'Philadelphia_product', 'TSMC_volume', 'KOHHLIndex1', 'USWHTOTIndex1', 'KOWDRIndex120', 'LEIAVGWIndex5', 'USTBIMPIndex5', 'KOWDRIndex60', 'FRNTTNETIndex60', 'Micron_product', 'TSMC_change', 'TokyoElectron_change', 'g2', 'Micron_volume', 'KOTRBALIndex5', 'ShinEtsuChemical_volume', 'SKLISVCIIndex60', 'SKLIWNRSIndex1', 'NAPMPMIIndex5', 'APPLE_product', 'KOBPCAIndex260', 'SKLISVCIIndex120', 'KOIPOPSMIndex', 'MediaTek_change', 'KOBPCAIndex', 'KOBONTLIndex5', 'AMD_product', 'KOEXTOTIndex20', 'SKhynix_change', 'NAPMNMIIndex1', 'SKLILIIndex1', 'KOFDITIndex1', 'FRNTTNETIndex260', 'MAPMINDXIndex1', 'KOFDITIndex5', 'AppliedMaterials_product', 'KOEXTOTIndex260', 'KOSPI200_volume', 'IPIndex120', 'KOHHDIndex1', 'Microsoft_product', 'KOMSM2YIndex', 'KOCGCGSMIndex', 'SKhynix_product', 'UMC_product', 'USTBTOTIndex20', 'KOGFBALIndex1', 'OEKRN022Index5', 'KOHCTTLIndex', 'LEIWKIJIndex60', 'EMPRGBCIIndex20', 'SKLICONEIndex120', 'Amazon_product', 'FRNTTOTLIndex1', 'SKLINBARIndex5', 'EUR_KRW_volume', 'JPY_KRW_volume', 'USDollarIndex_volume', 'USD_KRW_volume', 'PIDSPINXIndex120', 'EOKOS002Index', 'KOEXPTIIndex20', 'KOEXPTIYIndex', 'LEIAVGWIndex1', 'KOULMGFIndex120', 'PITLCHNGIndex', 'KOBSNMCIndex5', 'KOBSNMCIndex20', 'ShinEtsuChemical_product', 'Google_product', 'TokyoElectron_product', 'GVSK10YRIndex', 'SKBSICIndex60', 'UMC_volume', 'GDPPIQQIndex', 'INJCSPIndex5', 'EMPRGBCIIndex5', 'SKLILIIndex60', 'SKCITTLIndex20', 'Aspeed_close', 'SKLIMORDIndex60', 'AWHTOTLIndex60', 'CONCCONFIndex20', 'Amazon_volume', 'TokyoElectron_volume']]

KeyError: "['LEIAVGWIndex5', 'KOIMPTIIndex120', 'KOHHLIndex1', 'KOEXPTIIndex20', 'KOEXTOTIndex260', 'SKLIMORDIndex260', 'KOHHDIndex1', 'KOBPFINIndex60', 'INJCSPIndex5', 'SKLINBARIndex5', 'KODIBALIndex260', 'INJCJCIndex20', 'FRNTTOTLIndex120', 'KORSTIndex260', 'EMPRGBCIIndex5', 'SKLILIIndex60', 'FRNTTNETIndex260', 'USWHTOTIndex1', 'SKLIMORDIndex120', 'SKBSICIndex60', 'MPMIUSSAIndex120', 'CONCCONFIndex60', 'COMFCOMFIndex120', 'MTSLRL$Index1', 'OEKRN022Index5', 'MPMIJPMAIndex20', 'NAPMNMIIndex1', 'KWCDCCurncy1', 'KOBSNMCIndex5', 'KOFDITIndex5', 'KOULMGFIndex120', 'SKCITTLIndex20', 'KOWDRIndex5', 'KOBPFINIndex1', 'KOHHDIndex20', 'LEIMNOIndex60', 'CONCCONFIndex20', 'SKLISVCIIndex60', 'SKLIWNRSIndex1', 'KOIMTOTIndex60', 'KOWDRIndex120', 'KOWDRIndex60', 'MTSLRL$Index5', 'SKLISVCIIndex120', 'KOBPFINIndex260', 'SKLICONEIndex120', 'OUTFGAFIndex20', 'NAPMNEWOIndex5', 'KOIPMCIndex5', 'KOBPCAIndex260', 'NAPMPMIIndex5', 'FRNTTOTLIndex1', 'PIDSPINXIndex120', 'EMDINP1MIndex1', 'KOBSNMCIndex20', 'AWHTOTLIndex60', 'USTBTOTIndex20', 'KOEXTOTIndex20', 'LEIAVGWIndex1', 'KOTRBALIndex5', 'LEINWCNIndex5', 'FRNTTNETIndex60', 'IPIndex120', 'KOBSNMCIndex120', 'SKLIMORDIndex60', 'MPMIUSMAIndex5', 'OEKRN022Index1', 'USWHTOTIndex120', 'USTBIMPIndex5', 'EMPRGBCIIndex20', 'KOFDITIndex1', 'KOGFBALIndex1', 'MPMIJPMAIndex5', 'KOBONTLIndex5', 'MAPMINDXIndex1', 'EMPRGBCIIndex120', 'SKLILIIndex1', 'LEIWKIJIndex60'] not in index"

In [130]:
len(newdf.columns)

110

Unnamed: 0,GDPCQOQIndex,GDPCYOYIndex,GDPCTOT%Index,GPDITOC%Index,GPGSTOC%Index,RGCDCIPIIndex,GDPCUR$Index,GDPCURYIndex,GDPPIQQIndex,GDPCPCECIndex,CPIYOYIndex,CPIXYOYIndex,PCEDEFYIndex,PCECYOYIndex,CPICHNGIndex,CPUPXCHGIndex,FDIDFDMOIndex,FDIDSGMOIndex,FDIUFDYOIndex,FDIUSGYOIndex,USURTOTIndex,USUDMAERIndex,INJCJCIndex,INJCSPIndex,NFPTCHIndex,...,Amazon_product,AMD_product,APPLE_product,AppliedMaterials_product,Aspeed_product,Dell_product,Facebook_product,Google_product,HPE_product,Intel_product,LamResearch_product,MediaTek_product,Micron_product,Microsoft_product,Nuvoton_product,Nvidia_product,Philadelphia_product,QCOM_product,ShinEtsuChemical_product,Siltronic_product,SKhynix_product,Sumco_product,TokyoElectron_product,TSMC_product,UMC_product
0,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.048766,1.0,1.0,1.0,1.0
1,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.048766,1.0,1.0,1.0,1.0
2,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,0.990868,0.68203,0.771197,1.026287,1.0,1.0,1.0,1.223552,1.0,0.829714,0.967572,1.0,1.380558,0.743634,1.0,1.109161,0.967923,1.097724,1.0,1.0,1.549763,1.0,1.0,0.788606,0.581332
3,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,0.735048,1.264163,0.718678,1.094089,1.0,1.0,1.0,0.710449,1.0,1.204864,0.82431,1.0,0.901202,0.836536,1.0,1.294707,1.168471,0.642743,1.0,1.0,0.913503,1.0,4.149537,0.835657,2.780952
4,5.4,3.4,4.5,6.1,5.2,102.7,13603.93,6.6,2.8,2.3,4.0,2.1,3.2,2.06622,0.6,0.2,0.1,0.3,2.6,2.2,4.7,8.4,326.0,2561.0,278.0,...,1.139744,0.966656,1.606885,1.514631,1.0,1.0,1.0,1.685084,1.0,0.833842,1.282477,1.0,0.947752,2.059981,1.0,0.980556,1.04762,2.57215,1.0,1.0,0.596882,1.0,1.117321,1.553101,1.54523


In [131]:
# scaling 하기전에 columns 뽑기 
col_name = newdf.columns

In [132]:
# scaling과 동시에 변환
#newdf = PowerTransformer().fit_transform(newdf)
newdf = StandardScaler().fit_transform(newdf)
#newdf = newdf.apply(lambda x : np.log1p(x))
#newdf = MinMaxScaler().fit_transfrom(newdf)

In [133]:
# scaling하면 colname사라지므로 다시 생성
newdf = pd.DataFrame(newdf, columns = col_name)

In [134]:
len(newdf.columns)

110

In [135]:
newdf.isna().sum().sum()

0

In [136]:
# extract X_data
X_data = newdf.copy()

In [137]:
# random split과 not random split
#X_train, X_test, y_train, y_test = train_test_split(X_data, y_target, test_size = 0.3, random_state = 156)
X_train, X_test, y_train, y_test = train_test_split(X_data, y_target, test_size = 0.2, shuffle = False)

# Linear Feature Selection
## 안돌려도 무방

In [138]:
'''
## Feature Selection Code
# Forward stepwise feature selection
# linear model 기준으로 유효 feature 뽑기. 돌리지 않아도 되는 코드

np.random.seed(0)

feature_name = list(X_train.columns)
feature_selected = []
score = []

for i in range(X_train.shape[1]):
    score_temp_list = np.zeros(len(feature_name))
    for j in range(len(feature_name)):
        temp = feature_selected.copy() 
        temp.append(feature_name[j])
        x_temp = X_train[temp]
        lr_temp = LinearRegression() 
        lr_temp.fit(x_temp, y_train)
        predict_temp = lr_temp.predict(X_test[temp])
        r2_temp = r2_score(y_test, predict_temp)
        score_temp = r2_temp
        score_temp_list[j] = score_temp 
        
    temp = feature_name[score_temp_list.argmax()]
    feature_selected.append(temp)
    feature_name.remove(temp)
    score.append(score_temp_list.max())
    print("%02d Selected: "%i, feature_selected)
    print("%02d Score : "%i, np.round(10000*np.array(score))/10000)
        
# finally selected features
feature_selected_final = feature_selected[:(np.array(score).argmax() + 1)]

SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-138-dcc4a0f6e932>, line 33)

# Metric Class

In [139]:
# metric 구성

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score

class CustomMetric():

    def __init__(self, pred, real):
            self.pred = pred
            self.real = real
            self.thr25 = np.percentile(self.real, 25)
            self.thr75 = np.percentile(self.real, 75)
            self.df = pd.DataFrame({'pred' : self.pred, 'real' : self.real})
        
    def issame(self, data1, data2):
        if data1 > 0 and data2 > 0 : 
            return 'T'
        elif data1 > 0 and data2 < 0 :
            return 'F'
        elif data1 < 0 and data2 > 0 :
            return 'F'
        elif data1 < 0 and data2 < 0 :
            return 'T'
        elif data1 == 0 or data2 == 0 :
            return 0
        else :
            return 'notcal'

    def getouter(self, data1, data2): #quantile_25 = np.percentile(values, 25)

        if data1 > 0 and data2 >= self.thr75 :
            return 'T'
        elif data1 < 0 and data2 <= self.thr25 :
            return 'T'
        elif data1 >= 0 and data2 <= self.thr25 :
            return 'F'
        elif data1 <= 0 and data2 >= self.thr75: 
            return 'F'
        else:
            return 'notcal'
        
    def makedf(self):
        self.df['TF'] = self.df.apply(lambda x : self.issame(x['pred'], x['real']), axis = 1)
        self.df['thrTF'] = self.df.apply(lambda x : self.getouter(x['pred'], x['real']), axis = 1)
        
        return self.df
        
    def execution(self):
        mdf = pd.DataFrame()
        mdf['CORR'] = [self.df['real'].corr(self.df['pred'], method = 'pearson')]
        mdf['R2'] = ["{0:0.4f}".format(r2_score(self.df['real'], self.df['pred']))]
        mdf['MAE'] = ["{0:0.4f}".format(mean_absolute_error(self.df['pred'], self.df['real']))]
        mdf['RMSE'] = ["{0:0.4f}".format(np.sqrt(mean_squared_error(self.df['pred'], self.df['real'])))]
        mdf['ACR'] = [sum(self.df['TF'] == 'T')/len(self.df['TF'])]
        
        mdf['threshACR'] = [sum(self.df['thrTF'] == 'T') / sum(self.df['thrTF'] != 'notcal')]
        
        return mdf     

# Linear Model

In [140]:
# linear model select
lr_reg = LinearRegression()
#ridge = Ridge(alpha = 100)
#lasso = Lasso(alpha = 100)

In [141]:
lr_reg.fit(X_train, y_train)
y_pred = lr_reg.predict(X_test)

# Tree Model

In [98]:
dt_reg = DecisionTreeRegressor(random_state = 0, max_depth = 100)
rf_reg = RandomForestRegressor(random_state = 0, n_estimators = 1000)
gb_reg = GradientBoostingRegressor(random_state = 0, n_estimators = 1000)
xgb_reg = XGBRegressor(n_estimators = 1000)
lgb_reg = LGBMRegressor(n_estimators = 3000)

In [None]:
lgb_reg.fit(X_train, y_train)

In [44]:
y_pred = lgb_reg.predict(X_test)

### Metric function 실행 

In [142]:
metr = CustomMetric(y_pred, y_test)
metr.makedf()
metr.execution()

Unnamed: 0,CORR,R2,MAE,RMSE,ACR,threshACR
0,0.315524,0.0987,1.4147,1.8702,0.570722,0.609467


# Feature Importance Visualization

Linear Model은 회귀계수로 나오므로 아래와 같이 시각화

In [None]:
coef_imp = pd.DataFrame(sorted(zip(lr_reg.coef_, X_train.columns)), columns = ['Value', 'Feature'])
coef_imp.sort_values(by = 'Value', ascending = False)
plt.figure(figsize = (10, 30))
sns.barplot(x = 'Value', y = 'Feature', data = coef_imp.sort_values(by = 'Value', ascending = False))
plt.title('Linear Regressor Features')
plt.tight_layout()
plt.show()

Tree Model은 feature importance가 따로 뽑히므로 아래와 같이 시각화

In [None]:
feat_imp = pd.DataFrame(sorted(zip(lgb_reg.feature_importances_, X_train.columns)), columns = ['Value', 'Feature'])
feat_imp.sort_values(by = 'Value', ascending = False)
plt.figure(figsize = (10, 30))
sns.barplot(x = 'Value', y = 'Feature', data = feat_imp.sort_values(by = 'Value', ascending = False))
plt.title('LightGBM Features')
plt.tight_layout()
plt.savefig('lgbm_importances_1.png')
plt.show()

# 번외 : Optuna AutoML을 통한 LightGBM Hyperparameter tuning 예시

In [183]:
import sklearn.ensemble
import sklearn.model_selection
from sklearn.metrics import r2_score
import optuna
import optuna.integration.lightgbm as lgb

In [184]:
dtrain = lgb.Dataset(X_train, label=y_train)
dval = lgb.Dataset(X_test, label=y_test)

params = {
    'objective': 'regression',
    'metric': 'l2',
    'verbosity': -1,
    'boosting_type': 'gbdt',
}

best_params, tuning_history = dict(), list()

model = lgb.train(params,
                  dtrain,
                  valid_sets=[dtrain, dval],
                  best_params=best_params,
                  tuning_history=tuning_history,
                  verbose_eval=100,
                  early_stopping_rounds=100,
                  )

prediction = np.rint(model.predict(X_test, num_iteration=model.best_iteration))
r2_score = r2_score(y_test, prediction)

print('Number of finished trials: {}'.format(len(tuning_history)))
print('Best params:', best_params)
print('  r2_score = {}'.format(r2_score))
print('  Params: ')
for key, value in best_params.items():
    print('    {}: {}'.format(key, value))

tune_feature_fraction, val_score: inf:   0%|          | 0/7 [00:00<?, ?it/s]

Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.743039:  14%|#4        | 1/7 [00:00<00:03,  1.96it/s][32m[I 2020-02-11 18:02:45,383][0m Finished trial#0 resulted in value: 3.743039295223581. Current best value is 3.743039295223581 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 3.743039:  14%|#4        | 1/7 [00:00<00:03,  1.96it/s]

[100]	training's l2: 1.14427	valid_1's l2: 4.37173
Early stopping, best iteration is:
[2]	training's l2: 4.35093	valid_1's l2: 3.74304
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.743039:  29%|##8       | 2/7 [00:01<00:02,  1.98it/s][32m[I 2020-02-11 18:02:45,881][0m Finished trial#1 resulted in value: 3.7694221469647315. Current best value is 3.743039295223581 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 3.743039:  29%|##8       | 2/7 [00:01<00:02,  1.98it/s]

[100]	training's l2: 1.13166	valid_1's l2: 4.4361
Early stopping, best iteration is:
[1]	training's l2: 4.45106	valid_1's l2: 3.76942
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.743039:  43%|####2     | 3/7 [00:01<00:02,  1.84it/s][32m[I 2020-02-11 18:02:46,511][0m Finished trial#2 resulted in value: 3.7865627785977276. Current best value is 3.743039295223581 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 3.743039:  43%|####2     | 3/7 [00:01<00:02,  1.84it/s]

[100]	training's l2: 1.10724	valid_1's l2: 4.3306
Early stopping, best iteration is:
[1]	training's l2: 4.45355	valid_1's l2: 3.78656
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.743039:  57%|#####7    | 4/7 [00:02<00:01,  1.73it/s][32m[I 2020-02-11 18:02:47,164][0m Finished trial#3 resulted in value: 3.7565527076227063. Current best value is 3.743039295223581 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 3.743039:  57%|#####7    | 4/7 [00:02<00:01,  1.73it/s]

[100]	training's l2: 1.06745	valid_1's l2: 4.42483
Early stopping, best iteration is:
[2]	training's l2: 4.34275	valid_1's l2: 3.75655
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.743039:  71%|#######1  | 5/7 [00:02<00:01,  1.73it/s][32m[I 2020-02-11 18:02:47,748][0m Finished trial#4 resulted in value: 3.7675234068430754. Current best value is 3.743039295223581 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 3.743039:  71%|#######1  | 5/7 [00:02<00:01,  1.73it/s]

[100]	training's l2: 1.04923	valid_1's l2: 4.19544
Early stopping, best iteration is:
[2]	training's l2: 4.33672	valid_1's l2: 3.76752
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.743039:  86%|########5 | 6/7 [00:03<00:00,  1.53it/s][32m[I 2020-02-11 18:02:48,572][0m Finished trial#5 resulted in value: 3.848364833873613. Current best value is 3.743039295223581 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 3.743039:  86%|########5 | 6/7 [00:03<00:00,  1.53it/s]

[100]	training's l2: 1.05643	valid_1's l2: 4.68182
Early stopping, best iteration is:
[1]	training's l2: 4.45704	valid_1's l2: 3.84836
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.743039: 100%|##########| 7/7 [00:04<00:00,  1.61it/s][32m[I 2020-02-11 18:02:49,118][0m Finished trial#6 resulted in value: 3.8606053544201333. Current best value is 3.743039295223581 with parameters: {'feature_fraction': 0.4}.[0m
tune_feature_fraction, val_score: 3.743039: 100%|##########| 7/7 [00:04<00:00,  1.63it/s]
tune_num_leaves, val_score: 3.743039:   0%|          | 0/20 [00:00<?, ?it/s]

[100]	training's l2: 0.989447	valid_1's l2: 4.47646
Early stopping, best iteration is:
[1]	training's l2: 4.45269	valid_1's l2: 3.86061
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:   5%|5         | 1/20 [00:01<00:23,  1.21s/it][32m[I 2020-02-11 18:02:50,387][0m Finished trial#0 resulted in value: 3.735220592924571. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:   5%|5         | 1/20 [00:01<00:23,  1.21s/it]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  10%|#         | 2/20 [00:02<00:21,  1.17s/it][32m[I 2020-02-11 18:02:51,472][0m Finished trial#1 resulted in value: 3.735220592924571. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  10%|#         | 2/20 [00:02<00:21,  1.17s/it]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  15%|#5        | 3/20 [00:03<00:20,  1.20s/it][32m[I 2020-02-11 18:02:52,743][0m Finished trial#2 resulted in value: 3.7504955549707115. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  15%|#5        | 3/20 [00:03<00:20,  1.20s/it]

[100]	training's l2: 0.23057	valid_1's l2: 4.37326
Early stopping, best iteration is:
[2]	training's l2: 4.19343	valid_1's l2: 3.7505
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  20%|##        | 4/20 [00:04<00:18,  1.17s/it][32m[I 2020-02-11 18:02:53,827][0m Finished trial#3 resulted in value: 3.735220592924571. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  20%|##        | 4/20 [00:04<00:18,  1.17s/it]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  25%|##5       | 5/20 [00:05<00:16,  1.07s/it][32m[I 2020-02-11 18:02:54,688][0m Finished trial#4 resulted in value: 3.756219050882487. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  25%|##5       | 5/20 [00:05<00:16,  1.07s/it]

[100]	training's l2: 0.197672	valid_1's l2: 4.16885
Early stopping, best iteration is:
[2]	training's l2: 4.18319	valid_1's l2: 3.75622
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  30%|###       | 6/20 [00:06<00:15,  1.10s/it][32m[I 2020-02-11 18:02:55,854][0m Finished trial#5 resulted in value: 3.768136207598261. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  30%|###       | 6/20 [00:06<00:15,  1.10s/it]

[100]	training's l2: 0.163086	valid_1's l2: 4.47864
Early stopping, best iteration is:
[2]	training's l2: 4.16611	valid_1's l2: 3.76814
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  35%|###5      | 7/20 [00:08<00:18,  1.39s/it][32m[I 2020-02-11 18:02:57,898][0m Finished trial#6 resulted in value: 3.735220592924571. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  35%|###5      | 7/20 [00:08<00:18,  1.39s/it]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  40%|####      | 8/20 [00:10<00:18,  1.55s/it][32m[I 2020-02-11 18:02:59,837][0m Finished trial#7 resulted in value: 3.735220592924571. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  40%|####      | 8/20 [00:10<00:18,  1.55s/it]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  45%|####5     | 9/20 [00:11<00:15,  1.45s/it][32m[I 2020-02-11 18:03:01,046][0m Finished trial#8 resulted in value: 3.740980363405558. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  45%|####5     | 9/20 [00:11<00:15,  1.45s/it]

[100]	training's l2: 0.108789	valid_1's l2: 4.5437
Early stopping, best iteration is:
[2]	training's l2: 4.1219	valid_1's l2: 3.74098
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.735221:  50%|#####     | 10/20 [00:13<00:13,  1.38s/it][32m[I 2020-02-11 18:03:02,288][0m Finished trial#9 resulted in value: 3.7685868161301714. Current best value is 3.735220592924571 with parameters: {'num_leaves': 214}.[0m
tune_num_leaves, val_score: 3.735221:  50%|#####     | 10/20 [00:13<00:13,  1.38s/it]

[100]	training's l2: 0.15994	valid_1's l2: 4.60279
Early stopping, best iteration is:
[2]	training's l2: 4.16203	valid_1's l2: 3.76859
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  55%|#####5    | 11/20 [00:13<00:09,  1.07s/it][32m[I 2020-02-11 18:03:02,613][0m Finished trial#10 resulted in value: 3.731569542412721. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  60%|######    | 12/20 [00:13<00:06,  1.27it/s][32m[I 2020-02-11 18:03:02,736][0m Finished trial#11 resulted in value: 3.7518110998846352. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  60%|######    | 12/20 [00:13<00:06,  1.27it/s]

[100]	training's l2: 2.19025	valid_1's l2: 4.31079
Early stopping, best iteration is:
[1]	training's l2: 4.50694	valid_1's l2: 3.73157
Training until validation scores don't improve for 100 rounds
[100]	training's l2: 4.32042	valid_1's l2: 3.88483
Early stopping, best iteration is:
[2]	training's l2: 4.57488	valid_1's l2: 3.75181
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  65%|######5   | 13/20 [00:13<00:04,  1.68it/s][32m[I 2020-02-11 18:03:02,883][0m Finished trial#12 resulted in value: 3.7494616911379484. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  65%|######5   | 13/20 [00:13<00:04,  1.68it/s]

[100]	training's l2: 3.7473	valid_1's l2: 4.32062
Early stopping, best iteration is:
[2]	training's l2: 4.53344	valid_1's l2: 3.74946
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  70%|#######   | 14/20 [00:15<00:04,  1.22it/s][32m[I 2020-02-11 18:03:04,218][0m Finished trial#13 resulted in value: 3.735220592924571. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  70%|#######   | 14/20 [00:15<00:04,  1.22it/s]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  75%|#######5  | 15/20 [00:15<00:03,  1.30it/s][32m[I 2020-02-11 18:03:04,872][0m Finished trial#14 resulted in value: 3.7389696293551546. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  75%|#######5  | 15/20 [00:15<00:03,  1.30it/s]

[100]	training's l2: 0.753086	valid_1's l2: 4.57753
Early stopping, best iteration is:
[2]	training's l2: 4.29803	valid_1's l2: 3.73897
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  80%|########  | 16/20 [00:17<00:04,  1.04s/it][32m[I 2020-02-11 18:03:06,564][0m Finished trial#15 resulted in value: 3.735220592924571. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  80%|########  | 16/20 [00:17<00:04,  1.04s/it]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  85%|########5 | 17/20 [00:17<00:02,  1.13it/s][32m[I 2020-02-11 18:03:07,062][0m Finished trial#16 resulted in value: 3.739077006227947. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  85%|########5 | 17/20 [00:17<00:02,  1.13it/s]

[100]	training's l2: 0.781549	valid_1's l2: 4.33439
Early stopping, best iteration is:
[2]	training's l2: 4.3024	valid_1's l2: 3.73908
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  90%|######### | 18/20 [00:19<00:02,  1.04s/it][32m[I 2020-02-11 18:03:08,489][0m Finished trial#17 resulted in value: 3.739571691030669. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  90%|######### | 18/20 [00:19<00:02,  1.04s/it]

[100]	training's l2: 0.112003	valid_1's l2: 4.57667
Early stopping, best iteration is:
[2]	training's l2: 4.12728	valid_1's l2: 3.73957
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570:  95%|#########5| 19/20 [00:20<00:01,  1.21s/it][32m[I 2020-02-11 18:03:10,086][0m Finished trial#18 resulted in value: 3.735220592924571. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570:  95%|#########5| 19/20 [00:20<00:01,  1.21s/it]

[100]	training's l2: 0.107172	valid_1's l2: 4.48901
Early stopping, best iteration is:
[2]	training's l2: 4.1154	valid_1's l2: 3.73522
Training until validation scores don't improve for 100 rounds


tune_num_leaves, val_score: 3.731570: 100%|##########| 20/20 [00:21<00:00,  1.02s/it][32m[I 2020-02-11 18:03:10,675][0m Finished trial#19 resulted in value: 3.750199068394421. Current best value is 3.731569542412721 with parameters: {'num_leaves': 15}.[0m
tune_num_leaves, val_score: 3.731570: 100%|##########| 20/20 [00:21<00:00,  1.08s/it]
tune_bagging_fraction_and_bagging_freq, val_score: 3.731570:   0%|          | 0/10 [00:00<?, ?it/s]

[100]	training's l2: 0.642406	valid_1's l2: 4.41874
Early stopping, best iteration is:
[2]	training's l2: 4.27882	valid_1's l2: 3.7502
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.704491:  10%|#         | 1/10 [00:00<00:01,  4.92it/s][32m[I 2020-02-11 18:03:10,936][0m Finished trial#0 resulted in value: 3.704490809895457. Current best value is 3.704490809895457 with parameters: {'bagging_fraction': 0.9846476358806338, 'bagging_freq': 5}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.704491:  10%|#         | 1/10 [00:00<00:01,  4.92it/s]

[100]	training's l2: 2.17483	valid_1's l2: 4.11138
Early stopping, best iteration is:
[2]	training's l2: 4.44053	valid_1's l2: 3.70449
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  20%|##        | 2/10 [00:00<00:01,  4.42it/s][32m[I 2020-02-11 18:03:11,216][0m Finished trial#1 resulted in value: 3.693085549207284. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  20%|##        | 2/10 [00:00<00:01,  4.42it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  30%|###       | 3/10 [00:00<00:02,  3.45it/s][32m[I 2020-02-11 18:03:11,655][0m Finished trial#2 resulted in value: 3.7747270526961327. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  30%|###       | 3/10 [00:00<00:02,  3.45it/s]

[100]	training's l2: 2.24708	valid_1's l2: 4.36171
Early stopping, best iteration is:
[1]	training's l2: 4.51632	valid_1's l2: 3.77473
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  40%|####      | 4/10 [00:01<00:01,  3.52it/s][32m[I 2020-02-11 18:03:11,929][0m Finished trial#3 resulted in value: 3.7391765177587026. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  40%|####      | 4/10 [00:01<00:01,  3.52it/s]

[100]	training's l2: 2.14561	valid_1's l2: 4.29109
Early stopping, best iteration is:
[1]	training's l2: 4.51101	valid_1's l2: 3.73918
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  50%|#####     | 5/10 [00:01<00:01,  3.47it/s][32m[I 2020-02-11 18:03:12,222][0m Finished trial#4 resulted in value: 3.7687333055249015. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  50%|#####     | 5/10 [00:01<00:01,  3.47it/s]

[100]	training's l2: 2.15463	valid_1's l2: 4.58127
Early stopping, best iteration is:
[2]	training's l2: 4.43508	valid_1's l2: 3.76873
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  60%|######    | 6/10 [00:01<00:01,  3.70it/s][32m[I 2020-02-11 18:03:12,457][0m Finished trial#5 resulted in value: 3.767117485799114. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  60%|######    | 6/10 [00:01<00:01,  3.70it/s]

[100]	training's l2: 2.18257	valid_1's l2: 4.2411
Early stopping, best iteration is:
[1]	training's l2: 4.50745	valid_1's l2: 3.76712
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  70%|#######   | 7/10 [00:02<00:00,  3.59it/s][32m[I 2020-02-11 18:03:12,758][0m Finished trial#6 resulted in value: 3.7255405576165215. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  70%|#######   | 7/10 [00:02<00:00,  3.59it/s]

[100]	training's l2: 2.15296	valid_1's l2: 4.18
Early stopping, best iteration is:
[2]	training's l2: 4.42798	valid_1's l2: 3.72554
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  80%|########  | 8/10 [00:02<00:00,  3.59it/s][32m[I 2020-02-11 18:03:13,029][0m Finished trial#7 resulted in value: 3.7720268532453587. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  80%|########  | 8/10 [00:02<00:00,  3.59it/s]

[100]	training's l2: 2.1925	valid_1's l2: 4.42254
Early stopping, best iteration is:
[2]	training's l2: 4.45791	valid_1's l2: 3.77203
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  90%|######### | 9/10 [00:02<00:00,  3.68it/s][32m[I 2020-02-11 18:03:13,283][0m Finished trial#8 resulted in value: 3.6971380704784944. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086:  90%|######### | 9/10 [00:02<00:00,  3.68it/s]

[100]	training's l2: 2.19251	valid_1's l2: 4.24742
Early stopping, best iteration is:
[2]	training's l2: 4.44539	valid_1's l2: 3.69714
Training until validation scores don't improve for 100 rounds


tune_bagging_fraction_and_bagging_freq, val_score: 3.693086: 100%|##########| 10/10 [00:02<00:00,  3.59it/s][32m[I 2020-02-11 18:03:13,578][0m Finished trial#9 resulted in value: 3.7136995447104506. Current best value is 3.693085549207284 with parameters: {'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1}.[0m
tune_bagging_fraction_and_bagging_freq, val_score: 3.693086: 100%|##########| 10/10 [00:02<00:00,  3.45it/s]
tune_feature_fraction, val_score: 3.693086:   0%|          | 0/3 [00:00<?, ?it/s]

[100]	training's l2: 2.15005	valid_1's l2: 4.27549
Early stopping, best iteration is:
[2]	training's l2: 4.43851	valid_1's l2: 3.7137
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.693086:  33%|###3      | 1/3 [00:00<00:00,  4.72it/s][32m[I 2020-02-11 18:03:13,848][0m Finished trial#0 resulted in value: 3.693085549207284. Current best value is 3.693085549207284 with parameters: {'feature_fraction': 0.41600000000000004}.[0m
tune_feature_fraction, val_score: 3.693086:  33%|###3      | 1/3 [00:00<00:00,  4.72it/s]

[100]	training's l2: 2.16136	valid_1's l2: 4.55744
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.693086:  67%|######6   | 2/3 [00:00<00:00,  3.61it/s][32m[I 2020-02-11 18:03:14,274][0m Finished trial#1 resulted in value: 3.75545761318674. Current best value is 3.693085549207284 with parameters: {'feature_fraction': 0.41600000000000004}.[0m
tune_feature_fraction, val_score: 3.693086:  67%|######6   | 2/3 [00:00<00:00,  3.61it/s]

[100]	training's l2: 2.12625	valid_1's l2: 4.4212
Early stopping, best iteration is:
[1]	training's l2: 4.50849	valid_1's l2: 3.75546
Training until validation scores don't improve for 100 rounds


tune_feature_fraction, val_score: 3.693086: 100%|##########| 3/3 [00:00<00:00,  3.65it/s][32m[I 2020-02-11 18:03:14,542][0m Finished trial#2 resulted in value: 3.75545761318674. Current best value is 3.693085549207284 with parameters: {'feature_fraction': 0.41600000000000004}.[0m
tune_feature_fraction, val_score: 3.693086: 100%|##########| 3/3 [00:00<00:00,  3.12it/s]
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:   0%|          | 0/20 [00:00<?, ?it/s]

[100]	training's l2: 2.13224	valid_1's l2: 4.36267
Early stopping, best iteration is:
[1]	training's l2: 4.50849	valid_1's l2: 3.75546
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:   5%|5         | 1/20 [00:00<00:04,  4.13it/s][32m[I 2020-02-11 18:03:14,842][0m Finished trial#0 resulted in value: 3.6931224435506356. Current best value is 3.6931224435506356 with parameters: {'lambda_l1': 0.0004091354760566695, 'lambda_l2': 0.015454082789431823}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:   5%|5         | 1/20 [00:00<00:04,  4.13it/s]

[100]	training's l2: 2.20321	valid_1's l2: 4.42422
Early stopping, best iteration is:
[2]	training's l2: 4.43786	valid_1's l2: 3.69312
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  10%|#         | 2/20 [00:00<00:04,  4.11it/s][32m[I 2020-02-11 18:03:15,086][0m Finished trial#1 resulted in value: 3.69964872383405. Current best value is 3.6931224435506356 with parameters: {'lambda_l1': 0.0004091354760566695, 'lambda_l2': 0.015454082789431823}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  10%|#         | 2/20 [00:00<00:04,  4.11it/s]

[100]	training's l2: 2.22339	valid_1's l2: 4.38409
Early stopping, best iteration is:
[2]	training's l2: 4.4443	valid_1's l2: 3.69965
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  15%|#5        | 3/20 [00:00<00:04,  4.17it/s][32m[I 2020-02-11 18:03:15,318][0m Finished trial#2 resulted in value: 3.7723058642600455. Current best value is 3.6931224435506356 with parameters: {'lambda_l1': 0.0004091354760566695, 'lambda_l2': 0.015454082789431823}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  15%|#5        | 3/20 [00:00<00:04,  4.17it/s]

[100]	training's l2: 2.37843	valid_1's l2: 4.30682
Early stopping, best iteration is:
[2]	training's l2: 4.47339	valid_1's l2: 3.77231
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  20%|##        | 4/20 [00:00<00:03,  4.14it/s][32m[I 2020-02-11 18:03:15,563][0m Finished trial#3 resulted in value: 3.6930856496798548. Current best value is 3.6930856496798548 with parameters: {'lambda_l1': 1.2746027537109919e-08, 'lambda_l2': 4.267659803128042e-05}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  20%|##        | 4/20 [00:01<00:03,  4.14it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  25%|##5       | 5/20 [00:01<00:03,  3.77it/s][32m[I 2020-02-11 18:03:15,883][0m Finished trial#4 resulted in value: 3.69390744817483. Current best value is 3.6930856496798548 with parameters: {'lambda_l1': 1.2746027537109919e-08, 'lambda_l2': 4.267659803128042e-05}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  25%|##5       | 5/20 [00:01<00:03,  3.77it/s]

[100]	training's l2: 2.17227	valid_1's l2: 4.32682
Early stopping, best iteration is:
[2]	training's l2: 4.43961	valid_1's l2: 3.69391
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  30%|###       | 6/20 [00:01<00:03,  3.70it/s][32m[I 2020-02-11 18:03:16,175][0m Finished trial#5 resulted in value: 3.696966717255604. Current best value is 3.6930856496798548 with parameters: {'lambda_l1': 1.2746027537109919e-08, 'lambda_l2': 4.267659803128042e-05}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  30%|###       | 6/20 [00:01<00:03,  3.70it/s]

[100]	training's l2: 2.20741	valid_1's l2: 4.41957
Early stopping, best iteration is:
[2]	training's l2: 4.44121	valid_1's l2: 3.69697
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  35%|###5      | 7/20 [00:01<00:03,  3.27it/s][32m[I 2020-02-11 18:03:16,557][0m Finished trial#6 resulted in value: 3.752889718813778. Current best value is 3.6930856496798548 with parameters: {'lambda_l1': 1.2746027537109919e-08, 'lambda_l2': 4.267659803128042e-05}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  35%|###5      | 7/20 [00:02<00:03,  3.27it/s]

[100]	training's l2: 2.25461	valid_1's l2: 4.23827
Early stopping, best iteration is:
[2]	training's l2: 4.4547	valid_1's l2: 3.75289
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  40%|####      | 8/20 [00:02<00:03,  3.41it/s][32m[I 2020-02-11 18:03:16,819][0m Finished trial#7 resulted in value: 3.6932798470758272. Current best value is 3.6930856496798548 with parameters: {'lambda_l1': 1.2746027537109919e-08, 'lambda_l2': 4.267659803128042e-05}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  40%|####      | 8/20 [00:02<00:03,  3.41it/s]

[100]	training's l2: 2.16689	valid_1's l2: 4.47396
Early stopping, best iteration is:
[2]	training's l2: 4.43811	valid_1's l2: 3.69328
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  45%|####5     | 9/20 [00:02<00:03,  3.34it/s][32m[I 2020-02-11 18:03:17,134][0m Finished trial#8 resulted in value: 3.7017072062649756. Current best value is 3.6930856496798548 with parameters: {'lambda_l1': 1.2746027537109919e-08, 'lambda_l2': 4.267659803128042e-05}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  45%|####5     | 9/20 [00:02<00:03,  3.34it/s]

[100]	training's l2: 2.21972	valid_1's l2: 4.15326
Early stopping, best iteration is:
[2]	training's l2: 4.44716	valid_1's l2: 3.70171
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  50%|#####     | 10/20 [00:02<00:02,  3.47it/s][32m[I 2020-02-11 18:03:17,394][0m Finished trial#9 resulted in value: 3.693126416548805. Current best value is 3.6930856496798548 with parameters: {'lambda_l1': 1.2746027537109919e-08, 'lambda_l2': 4.267659803128042e-05}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  50%|#####     | 10/20 [00:02<00:02,  3.47it/s]

[100]	training's l2: 2.20334	valid_1's l2: 4.42413
Early stopping, best iteration is:
[2]	training's l2: 4.43786	valid_1's l2: 3.69313
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  55%|#####5    | 11/20 [00:03<00:02,  3.59it/s][32m[I 2020-02-11 18:03:17,653][0m Finished trial#10 resulted in value: 3.693085549950353. Current best value is 3.693085549950353 with parameters: {'lambda_l1': 1.872072499566791e-08, 'lambda_l2': 3.0537428441761945e-07}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  55%|#####5    | 11/20 [00:03<00:02,  3.59it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  60%|######    | 12/20 [00:03<00:02,  3.30it/s][32m[I 2020-02-11 18:03:18,012][0m Finished trial#11 resulted in value: 3.6930855497933504. Current best value is 3.6930855497933504 with parameters: {'lambda_l1': 1.8281151832217688e-08, 'lambda_l2': 2.3890353551064293e-07}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  60%|######    | 12/20 [00:03<00:02,  3.30it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  65%|######5   | 13/20 [00:03<00:01,  3.52it/s][32m[I 2020-02-11 18:03:18,251][0m Finished trial#12 resulted in value: 3.693085549295667. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  65%|######5   | 13/20 [00:03<00:01,  3.52it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  70%|#######   | 14/20 [00:03<00:01,  3.66it/s][32m[I 2020-02-11 18:03:18,497][0m Finished trial#13 resulted in value: 3.693085551280851. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  70%|#######   | 14/20 [00:03<00:01,  3.66it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  75%|#######5  | 15/20 [00:04<00:01,  3.81it/s][32m[I 2020-02-11 18:03:18,740][0m Finished trial#14 resulted in value: 3.693085551473689. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  75%|#######5  | 15/20 [00:04<00:01,  3.81it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  80%|########  | 16/20 [00:04<00:01,  3.68it/s][32m[I 2020-02-11 18:03:19,029][0m Finished trial#15 resulted in value: 3.693085549368055. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  80%|########  | 16/20 [00:04<00:01,  3.68it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  85%|########5 | 17/20 [00:04<00:00,  3.86it/s][32m[I 2020-02-11 18:03:19,261][0m Finished trial#16 resulted in value: 3.693085564903099. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  85%|########5 | 17/20 [00:04<00:00,  3.86it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  90%|######### | 18/20 [00:04<00:00,  3.86it/s][32m[I 2020-02-11 18:03:19,523][0m Finished trial#17 resulted in value: 3.6930855879777025. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  90%|######### | 18/20 [00:04<00:00,  3.86it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  95%|#########5| 19/20 [00:05<00:00,  3.68it/s][32m[I 2020-02-11 18:03:19,827][0m Finished trial#18 resulted in value: 3.693085602211528. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086:  95%|#########5| 19/20 [00:05<00:00,  3.68it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_lambda_l1_and_lambda_l2, val_score: 3.693086: 100%|##########| 20/20 [00:05<00:00,  3.75it/s][32m[I 2020-02-11 18:03:20,073][0m Finished trial#19 resulted in value: 3.6930855494426478. Current best value is 3.693085549295667 with parameters: {'lambda_l1': 1.4674443979062974e-08, 'lambda_l2': 2.9426028891628908e-08}.[0m
tune_lambda_l1_and_lambda_l2, val_score: 3.693086: 100%|##########| 20/20 [00:05<00:00,  3.62it/s]
tune_min_child_samples, val_score: 3.693086:   0%|          | 0/5 [00:00<?, ?it/s]

[100]	training's l2: 2.20261	valid_1's l2: 4.42469
Early stopping, best iteration is:
[2]	training's l2: 4.43778	valid_1's l2: 3.69309
Training until validation scores don't improve for 100 rounds


tune_min_child_samples, val_score: 3.673303:  20%|##        | 1/5 [00:00<00:00,  4.66it/s][32m[I 2020-02-11 18:03:20,343][0m Finished trial#0 resulted in value: 3.6733028125293257. Current best value is 3.6733028125293257 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 3.673303:  20%|##        | 1/5 [00:00<00:00,  4.66it/s]

[100]	training's l2: 1.96797	valid_1's l2: 4.58294
Early stopping, best iteration is:
[2]	training's l2: 4.39761	valid_1's l2: 3.6733
Training until validation scores don't improve for 100 rounds


tune_min_child_samples, val_score: 3.673303:  40%|####      | 2/5 [00:00<00:00,  4.36it/s][32m[I 2020-02-11 18:03:20,607][0m Finished trial#1 resulted in value: 3.696103143346446. Current best value is 3.6733028125293257 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 3.673303:  40%|####      | 2/5 [00:00<00:00,  4.36it/s]

[100]	training's l2: 2.03812	valid_1's l2: 4.34099
Early stopping, best iteration is:
[2]	training's l2: 4.42925	valid_1's l2: 3.6961
Training until validation scores don't improve for 100 rounds


tune_min_child_samples, val_score: 3.673303:  60%|######    | 3/5 [00:00<00:00,  4.36it/s][32m[I 2020-02-11 18:03:20,842][0m Finished trial#2 resulted in value: 3.767396359996281. Current best value is 3.6733028125293257 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 3.673303:  60%|######    | 3/5 [00:00<00:00,  4.36it/s]

[100]	training's l2: 2.23093	valid_1's l2: 4.53729
Early stopping, best iteration is:
[2]	training's l2: 4.44043	valid_1's l2: 3.7674
Training until validation scores don't improve for 100 rounds


tune_min_child_samples, val_score: 3.673303:  80%|########  | 4/5 [00:01<00:00,  3.90it/s][32m[I 2020-02-11 18:03:21,156][0m Finished trial#3 resulted in value: 3.7783779775951953. Current best value is 3.6733028125293257 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 3.673303:  80%|########  | 4/5 [00:01<00:00,  3.90it/s]

[100]	training's l2: 2.43349	valid_1's l2: 4.29193
Early stopping, best iteration is:
[1]	training's l2: 4.52346	valid_1's l2: 3.77838
Training until validation scores don't improve for 100 rounds
[100]	training's l2: 2.72726	valid_1's l2: 4.30335


tune_min_child_samples, val_score: 3.673303: 100%|##########| 5/5 [00:01<00:00,  4.15it/s][32m[I 2020-02-11 18:03:21,367][0m Finished trial#4 resulted in value: 3.7465455442848334. Current best value is 3.6733028125293257 with parameters: {'min_child_samples': 5}.[0m
tune_min_child_samples, val_score: 3.673303: 100%|##########| 5/5 [00:01<00:00,  3.87it/s]

Early stopping, best iteration is:
[2]	training's l2: 4.48874	valid_1's l2: 3.74655
Number of finished trials: 65
Best params: {'lambda_l1': 0.0, 'lambda_l2': 0.0, 'num_leaves': 15, 'feature_fraction': 0.4, 'bagging_fraction': 0.7225765831000759, 'bagging_freq': 1, 'min_child_samples': 5}
  r2_score = -0.0458964390586587
  Params: 
    lambda_l1: 0.0
    lambda_l2: 0.0
    num_leaves: 15
    feature_fraction: 0.4
    bagging_fraction: 0.7225765831000759
    bagging_freq: 1
    min_child_samples: 5





### Tuning된 값 입력하여 모델 선언

In [57]:
lgb_reg = LGBMRegressor(lambda_l1 = 0.6885870703417802, lambda_l2 = 1.1741445421730667, num_leaves = 33, feature_fraction = 0.8999999999999999, bagging_fraction = 1.0, bagging_freq = 0, min_child_samples = 20)

In [58]:
lgb_reg.fit(X_train, y_train)

LGBMRegressor(bagging_fraction=0.4999, bagging_freq=6, boosting_type='gbdt',
              class_weight=None, colsample_bytree=1.0, feature_fraction=0.616,
              importance_type='split', lambda_l1=0, lambda_l2=0,
              learning_rate=0.1, max_depth=-1, min_child_samples=20,
              min_child_weight=0.001, min_split_gain=0.0, n_estimators=100,
              n_jobs=-1, num_leaves=50, objective=None, random_state=None,
              reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,
              subsample_for_bin=200000, subsample_freq=0)

In [59]:
y_pred = lgb_reg.predict(X_test)