## Please input your directory for the top level folder
folder name : SUBMISSION MODEL

In [1]:
dir_ = 'E:/Seminararbeit/Code/A1/' # input only here

#### setting other directory

In [2]:
raw_data_dir = dir_+'2. data/'
processed_data_dir = dir_+'2. data/processed/'
log_dir = dir_+'4. logs/'
model_dir = dir_+'5. models/'

In [3]:
####################################################################################
##################### 1-3. recursive model by store & dept #########################
####################################################################################

In [4]:
ver, KKK = 'priv', 0

In [5]:
STORES = ['WI_1', 'WI_2', 'WI_3'] #'CA_1', 'CA_2', 'CA_3', 'CA_4', 'TX_1', 'TX_2', 'TX_3', 
DEPTS = ['HOBBIES_1', 'HOBBIES_2', 'HOUSEHOLD_1', 'HOUSEHOLD_2', 'FOODS_1', 'FOODS_2', 'FOODS_3']

In [6]:
import numpy as np
import pandas as pd
import os, sys, gc, time, warnings, pickle, psutil, random

from multiprocessing import Pool

warnings.filterwarnings('ignore')

In [7]:
########################### Helpers
#################################################################################
## Seeder
def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)

    
## Multiprocess Runs
def df_parallelize_run(func, t_split):
    num_cores = np.min([N_CORES,len(t_split)])
    pool = Pool(num_cores)
    df = pd.concat(pool.map(func, t_split), axis=1)
    pool.close()
    pool.join()
    return df

In [8]:
########################### Helper to load data by store ID
#################################################################################
# Read data
def get_data_by_store(store, dept):
    
    # Read and contact basic feature
    df = pd.concat([pd.read_pickle(BASE),
                    pd.read_pickle(PRICE).iloc[:,2:],
                    pd.read_pickle(CALENDAR).iloc[:,2:]],
                    axis=1)
    
    df = df[df['d']>=START_TRAIN]
    
    df = df[(df['store_id']==store) & (df['dept_id']==dept)]

    df2 = pd.read_pickle(MEAN_ENC)[mean_features]
    df2 = df2[df2.index.isin(df.index)]
        
    df3 = pd.read_pickle(LAGS).iloc[:,3:]
    df3 = df3[df3.index.isin(df.index)]
    
    df = pd.concat([df, df2], axis=1)
    del df2
    
    df = pd.concat([df, df3], axis=1)
    del df3
    
    features = [col for col in list(df) if col not in remove_features]
    df = df[['id','d',TARGET]+features]
    
    df = df.reset_index(drop=True)
    
    return df, features

# Recombine Test set after training
def get_base_test():
    base_test = pd.DataFrame()

    for store_id in STORES:
        for state_id in DEPTS:
            temp_df = pd.read_pickle(processed_data_dir+'test_'+store_id+'_'+state_id+'.pkl')
            temp_df['store_id'] = store_id
            temp_df['dept_id'] = state_id
            base_test = pd.concat([base_test, temp_df]).reset_index(drop=True)
    
    return base_test


########################### Helper to make dynamic rolling lags
#################################################################################
def make_lag(LAG_DAY):
    lag_df = base_test[['id','d',TARGET]]
    col_name = 'sales_lag_'+str(LAG_DAY)
    lag_df[col_name] = lag_df.groupby(['id'])[TARGET].transform(lambda x: x.shift(LAG_DAY)).astype(np.float16)
    return lag_df[[col_name]]


def make_lag_roll(LAG_DAY):
    shift_day = LAG_DAY[0]
    roll_wind = LAG_DAY[1]
    lag_df = base_test[['id','d',TARGET]]
    col_name = 'rolling_mean_tmp_'+str(shift_day)+'_'+str(roll_wind)
    lag_df[col_name] = lag_df.groupby(['id'])[TARGET].transform(lambda x: x.shift(shift_day).rolling(roll_wind).mean())
    return lag_df[[col_name]]

In [9]:
########################### Model params
#################################################################################
import lightgbm as lgb
lgb_params = {
                    'boosting_type': 'gbdt',
                    'objective': 'tweedie',
                    'tweedie_variance_power': 1.1,
                    'metric': 'rmse',
                    'subsample': 0.5,
                    'subsample_freq': 1,
                    'learning_rate': 0.015,
                    'num_leaves': 2**8-1,
                    'min_data_in_leaf': 2**8-1,
                    'feature_fraction': 0.5,
                    'max_bin': 100,
                    'n_estimators': 3000,
                    'boost_from_average': False,
                    'verbose': 1
                } 

In [10]:
########################### Vars
#################################################################################
VER = 1                          
SEED = 42                        
seed_everything(SEED)            
lgb_params['seed'] = SEED        
N_CORES = psutil.cpu_count()     


#LIMITS and const
TARGET      = 'sales'            
START_TRAIN = 700                
END_TRAIN   = 1941 - 28*KKK      
P_HORIZON   = 28                 
USE_AUX     = False             

remove_features = ['id','cat_id', 'state_id','store_id','dept_id',
                   'date','wm_yr_wk','d',TARGET]
mean_features   = ['enc_item_id_store_id_mean','enc_item_id_store_id_std'] 

ORIGINAL = raw_data_dir
BASE     = processed_data_dir+'processedgrid_part_1.pkl'
PRICE    = processed_data_dir+'processedgrid_part_2.pkl'
CALENDAR = processed_data_dir+'processedgrid_part_3.pkl'
LAGS     = processed_data_dir+'processedlags_df_28.pkl'
MEAN_ENC = processed_data_dir+'processedmean_encoding_df.pkl'


#SPLITS for lags creation
SHIFT_DAY  = 28
N_LAGS     = 15
LAGS_SPLIT = [col for col in range(SHIFT_DAY,SHIFT_DAY+N_LAGS)]
ROLS_SPLIT = []
for i in [1,7,14]:
    for j in [7,14,30,60]:
        ROLS_SPLIT.append([i,j])

In [11]:
########################### Train Models
#################################################################################
for store_id in STORES:
    for state_id in DEPTS:
        print('Train', store_id, state_id)

        grid_df, features_columns = get_data_by_store(store_id, state_id)

        train_mask = grid_df['d']<=END_TRAIN
        valid_mask = train_mask&(grid_df['d']>(END_TRAIN-P_HORIZON))
        preds_mask = (grid_df['d']>(END_TRAIN-100)) & (grid_df['d'] <= END_TRAIN+P_HORIZON)

        train_data = lgb.Dataset(grid_df[train_mask][features_columns], 
                           label=grid_df[train_mask][TARGET])

        valid_data = lgb.Dataset(grid_df[valid_mask][features_columns], 
                           label=grid_df[valid_mask][TARGET])

        grid_df = grid_df[preds_mask].reset_index(drop=True)
        keep_cols = [col for col in list(grid_df) if '_tmp_' not in col]
        grid_df = grid_df[keep_cols]

        d_sales = grid_df[['d','sales']]
        substitute = d_sales['sales'].values
        substitute[(d_sales['d'] > END_TRAIN)] = np.nan
        grid_df['sales'] = substitute

        grid_df.to_pickle(processed_data_dir+'test_'+store_id+'_'+state_id+'.pkl')
        del grid_df, d_sales, substitute

        seed_everything(SEED)
        print('Starte Training:', store_id, state_id)
        estimator = lgb.train(lgb_params,
                              train_data,
                              valid_sets = [valid_data],
                              callbacks=[lgb.log_evaluation(1, True)]
                              
                              )
        
        display(pd.DataFrame({'name':estimator.feature_name(),
                              'imp':estimator.feature_importance()}).sort_values('imp',ascending=False).head(25))
        
        model_name = model_dir+'lgb_model_'+store_id+'_'+state_id+'_v'+str(VER)+'.bin'
        pickle.dump(estimator, open(model_name, 'wb'))

        del train_data, valid_data, estimator
        gc.collect()

        MODEL_FEATURES = features_columns

Train WI_1 HOBBIES_1
Starte Training: WI_1 HOBBIES_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.034191 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4738
[LightGBM] [Info] Number of data points in the train set: 489351, number of used features: 66
[1]	valid_0's rmse: 2.14737
[2]	valid_0's rmse: 2.13657
[3]	valid_0's rmse: 2.12618
[4]	valid_0's rmse: 2.1158
[5]	valid_0's rmse: 2.10531
[6]	valid_0's rmse: 2.09498
[7]	valid_0's rmse: 2.08477
[8]	valid_0's rmse: 2.07447
[9]	valid_0's rmse: 2.06432
[10]	valid_0's rmse: 2.05408
[11]	valid_0's rmse: 2.04436
[12]	valid_0's rmse: 2.03471
[13]	valid_0's rmse: 2.02488
[14]	valid_0's rmse: 2.01548
[15]	valid_0's rmse: 2.00566
[16]	valid_0's rmse: 1.99601
[17]	valid_0's rmse: 1.98698
[18]	valid_0's rmse: 1.97779
[19]	valid_0's rmse: 1.96898
[20]	valid_0's rmse: 1.95999
[21]	valid_0's rmse: 1.

Unnamed: 0,name,imp
0,item_id,94263
21,tm_w,23653
20,tm_d,22307
52,rolling_mean_180,19470
54,rolling_mean_tmp_1_7,19308
49,rolling_std_30,18718
53,rolling_std_180,18365
45,rolling_std_7,18255
47,rolling_std_14,17938
51,rolling_std_60,17908


Train WI_1 HOBBIES_2
Starte Training: WI_1 HOBBIES_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009553 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3285
[LightGBM] [Info] Number of data points in the train set: 168440, number of used features: 66
[1]	valid_0's rmse: 1.04146
[2]	valid_0's rmse: 1.03233
[3]	valid_0's rmse: 1.02347
[4]	valid_0's rmse: 1.01481
[5]	valid_0's rmse: 1.0063
[6]	valid_0's rmse: 0.997996
[7]	valid_0's rmse: 0.989907
[8]	valid_0's rmse: 0.982118
[9]	valid_0's rmse: 0.974421
[10]	valid_0's rmse: 0.966835
[11]	valid_0's rmse: 0.959591
[12]	valid_0's rmse: 0.952409
[13]	valid_0's rmse: 0.945498
[14]	valid_0's rmse: 0.938727
[15]	valid_0's rmse: 0.93211
[16]	valid_0's rmse: 0.925783
[17]	valid_0's rmse: 0.919657
[18]	valid_0's rmse: 0.913557
[19]	valid_0's rmse: 0.907661
[20]	valid_0's rmse: 0.902
[21]	valid_0

Unnamed: 0,name,imp
0,item_id,76262
52,rolling_mean_180,36536
53,rolling_std_180,36531
21,tm_w,36031
20,tm_d,35109
51,rolling_std_60,31656
49,rolling_std_30,25241
50,rolling_mean_60,22385
25,tm_dw,21243
65,rolling_mean_tmp_14_60,20500


Train WI_1 HOUSEHOLD_1
Starte Training: WI_1 HOUSEHOLD_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.027443 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4650
[LightGBM] [Info] Number of data points in the train set: 596596, number of used features: 66
[1]	valid_0's rmse: 1.78301
[2]	valid_0's rmse: 1.77351
[3]	valid_0's rmse: 1.76473
[4]	valid_0's rmse: 1.75591
[5]	valid_0's rmse: 1.74664
[6]	valid_0's rmse: 1.73751
[7]	valid_0's rmse: 1.72878
[8]	valid_0's rmse: 1.71974
[9]	valid_0's rmse: 1.71083
[10]	valid_0's rmse: 1.70238
[11]	valid_0's rmse: 1.69393
[12]	valid_0's rmse: 1.68569
[13]	valid_0's rmse: 1.67704
[14]	valid_0's rmse: 1.66883
[15]	valid_0's rmse: 1.66057
[16]	valid_0's rmse: 1.65229
[17]	valid_0's rmse: 1.64447
[18]	valid_0's rmse: 1.63636
[19]	valid_0's rmse: 1.62898
[20]	valid_0's rmse: 1.62137
[21]	valid_0's rms

Unnamed: 0,name,imp
0,item_id,118129
21,tm_w,25238
20,tm_d,23498
52,rolling_mean_180,20652
13,event_name_1,19697
53,rolling_std_180,19629
49,rolling_std_30,19162
47,rolling_std_14,18310
51,rolling_std_60,18191
45,rolling_std_7,17222


Train WI_1 HOUSEHOLD_2
Starte Training: WI_1 HOUSEHOLD_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.037339 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4061
[LightGBM] [Info] Number of data points in the train set: 597177, number of used features: 66
[1]	valid_0's rmse: 0.992813
[2]	valid_0's rmse: 0.984002
[3]	valid_0's rmse: 0.975533
[4]	valid_0's rmse: 0.967194
[5]	valid_0's rmse: 0.959003
[6]	valid_0's rmse: 0.950999
[7]	valid_0's rmse: 0.943338
[8]	valid_0's rmse: 0.93571
[9]	valid_0's rmse: 0.928308
[10]	valid_0's rmse: 0.921089
[11]	valid_0's rmse: 0.914094
[12]	valid_0's rmse: 0.90729
[13]	valid_0's rmse: 0.900527
[14]	valid_0's rmse: 0.894055
[15]	valid_0's rmse: 0.887651
[16]	valid_0's rmse: 0.881374
[17]	valid_0's rmse: 0.875321
[18]	valid_0's rmse: 0.869346
[19]	valid_0's rmse: 0.863627
[20]	valid_0's rmse: 0.857956


Unnamed: 0,name,imp
0,item_id,117169
21,tm_w,29727
52,rolling_mean_180,29225
20,tm_d,28221
53,rolling_std_180,27815
51,rolling_std_60,25061
49,rolling_std_30,22430
50,rolling_mean_60,19201
13,event_name_1,18228
65,rolling_mean_tmp_14_60,16747


Train WI_1 FOODS_1
Starte Training: WI_1 FOODS_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009504 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4437
[LightGBM] [Info] Number of data points in the train set: 259405, number of used features: 66
[1]	valid_0's rmse: 2.98556
[2]	valid_0's rmse: 2.97097
[3]	valid_0's rmse: 2.95675
[4]	valid_0's rmse: 2.94304
[5]	valid_0's rmse: 2.92767
[6]	valid_0's rmse: 2.91233
[7]	valid_0's rmse: 2.89667
[8]	valid_0's rmse: 2.88081
[9]	valid_0's rmse: 2.86488
[10]	valid_0's rmse: 2.84991
[11]	valid_0's rmse: 2.83418
[12]	valid_0's rmse: 2.81853
[13]	valid_0's rmse: 2.80268
[14]	valid_0's rmse: 2.78702
[15]	valid_0's rmse: 2.77156
[16]	valid_0's rmse: 2.7553
[17]	valid_0's rmse: 2.74008
[18]	valid_0's rmse: 2.72412
[19]	valid_0's rmse: 2.70887
[20]	valid_0's rmse: 2.69302
[21]	valid_0's rmse: 2.6782

Unnamed: 0,name,imp
0,item_id,95793
21,tm_w,25620
20,tm_d,24226
52,rolling_mean_180,21207
53,rolling_std_180,19962
47,rolling_std_14,19323
45,rolling_std_7,19068
49,rolling_std_30,18841
54,rolling_mean_tmp_1_7,18451
51,rolling_std_60,17895


Train WI_1 FOODS_2
Starte Training: WI_1 FOODS_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.018512 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4640
[LightGBM] [Info] Number of data points in the train set: 446926, number of used features: 66
[1]	valid_0's rmse: 2.43361
[2]	valid_0's rmse: 2.42123
[3]	valid_0's rmse: 2.40904
[4]	valid_0's rmse: 2.39678
[5]	valid_0's rmse: 2.38411
[6]	valid_0's rmse: 2.37167
[7]	valid_0's rmse: 2.35949
[8]	valid_0's rmse: 2.34704
[9]	valid_0's rmse: 2.33476
[10]	valid_0's rmse: 2.32285
[11]	valid_0's rmse: 2.311
[12]	valid_0's rmse: 2.29915
[13]	valid_0's rmse: 2.28676
[14]	valid_0's rmse: 2.27509
[15]	valid_0's rmse: 2.26307
[16]	valid_0's rmse: 2.25138
[17]	valid_0's rmse: 2.2399
[18]	valid_0's rmse: 2.22824
[19]	valid_0's rmse: 2.21712
[20]	valid_0's rmse: 2.20578
[21]	valid_0's rmse: 2.19511


Unnamed: 0,name,imp
0,item_id,114002
20,tm_d,22928
21,tm_w,22858
52,rolling_mean_180,22850
53,rolling_std_180,19317
51,rolling_std_60,17734
45,rolling_std_7,17602
54,rolling_mean_tmp_1_7,17359
47,rolling_std_14,17078
50,rolling_mean_60,16846


Train WI_1 FOODS_3
Starte Training: WI_1 FOODS_3
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.034817 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5521
[LightGBM] [Info] Number of data points in the train set: 937495, number of used features: 66
[1]	valid_0's rmse: 3.77751
[2]	valid_0's rmse: 3.75806
[3]	valid_0's rmse: 3.73882
[4]	valid_0's rmse: 3.71962
[5]	valid_0's rmse: 3.69896
[6]	valid_0's rmse: 3.67804
[7]	valid_0's rmse: 3.65699
[8]	valid_0's rmse: 3.63535
[9]	valid_0's rmse: 3.61334
[10]	valid_0's rmse: 3.59181
[11]	valid_0's rmse: 3.5697
[12]	valid_0's rmse: 3.54754
[13]	valid_0's rmse: 3.52457
[14]	valid_0's rmse: 3.50198
[15]	valid_0's rmse: 3.47885
[16]	valid_0's rmse: 3.45592
[17]	valid_0's rmse: 3.43342
[18]	valid_0's rmse: 3.41044
[19]	valid_0's rmse: 3.38783
[20]	valid_0's rmse: 3.36478
[21]	valid_0's rmse: 3.3432

Unnamed: 0,name,imp
0,item_id,121943
13,event_name_1,25733
21,tm_w,22313
20,tm_d,21012
54,rolling_mean_tmp_1_7,18194
52,rolling_mean_180,18073
53,rolling_std_180,16810
45,rolling_std_7,16683
51,rolling_std_60,15858
49,rolling_std_30,15456


Train WI_2 HOBBIES_1
Starte Training: WI_2 HOBBIES_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029939 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4681
[LightGBM] [Info] Number of data points in the train set: 485806, number of used features: 66
[1]	valid_0's rmse: 1.69774
[2]	valid_0's rmse: 1.6901
[3]	valid_0's rmse: 1.68254
[4]	valid_0's rmse: 1.67525
[5]	valid_0's rmse: 1.66827
[6]	valid_0's rmse: 1.66131
[7]	valid_0's rmse: 1.65428
[8]	valid_0's rmse: 1.64746
[9]	valid_0's rmse: 1.6409
[10]	valid_0's rmse: 1.63455
[11]	valid_0's rmse: 1.62834
[12]	valid_0's rmse: 1.62213
[13]	valid_0's rmse: 1.61605
[14]	valid_0's rmse: 1.61013
[15]	valid_0's rmse: 1.60452
[16]	valid_0's rmse: 1.59879
[17]	valid_0's rmse: 1.59329
[18]	valid_0's rmse: 1.58801
[19]	valid_0's rmse: 1.58277
[20]	valid_0's rmse: 1.57753
[21]	valid_0's rmse: 1.5

Unnamed: 0,name,imp
0,item_id,97057
21,tm_w,23569
20,tm_d,23525
52,rolling_mean_180,23195
53,rolling_std_180,21565
51,rolling_std_60,20086
49,rolling_std_30,19860
54,rolling_mean_tmp_1_7,17831
47,rolling_std_14,17696
50,rolling_mean_60,17451


Train WI_2 HOBBIES_2
Starte Training: WI_2 HOBBIES_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008675 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3557
[LightGBM] [Info] Number of data points in the train set: 168415, number of used features: 66
[1]	valid_0's rmse: 1.07387
[2]	valid_0's rmse: 1.06418
[3]	valid_0's rmse: 1.05478
[4]	valid_0's rmse: 1.04567
[5]	valid_0's rmse: 1.03684
[6]	valid_0's rmse: 1.02805
[7]	valid_0's rmse: 1.01953
[8]	valid_0's rmse: 1.01127
[9]	valid_0's rmse: 1.00318
[10]	valid_0's rmse: 0.995394
[11]	valid_0's rmse: 0.987803
[12]	valid_0's rmse: 0.980301
[13]	valid_0's rmse: 0.973077
[14]	valid_0's rmse: 0.96593
[15]	valid_0's rmse: 0.958954
[16]	valid_0's rmse: 0.952183
[17]	valid_0's rmse: 0.945692
[18]	valid_0's rmse: 0.939419
[19]	valid_0's rmse: 0.933071
[20]	valid_0's rmse: 0.926997
[21]	valid_0

Unnamed: 0,name,imp
0,item_id,74573
53,rolling_std_180,37440
52,rolling_mean_180,37290
20,tm_d,36456
21,tm_w,36135
51,rolling_std_60,31737
49,rolling_std_30,24507
50,rolling_mean_60,22133
25,tm_dw,21345
65,rolling_mean_tmp_14_60,19936


Train WI_2 HOUSEHOLD_1
Starte Training: WI_2 HOUSEHOLD_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025520 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5111
[LightGBM] [Info] Number of data points in the train set: 597300, number of used features: 66
[1]	valid_0's rmse: 3.72016
[2]	valid_0's rmse: 3.7009
[3]	valid_0's rmse: 3.68146
[4]	valid_0's rmse: 3.66172
[5]	valid_0's rmse: 3.64094
[6]	valid_0's rmse: 3.62003
[7]	valid_0's rmse: 3.59865
[8]	valid_0's rmse: 3.57656
[9]	valid_0's rmse: 3.55432
[10]	valid_0's rmse: 3.53266
[11]	valid_0's rmse: 3.51002
[12]	valid_0's rmse: 3.4867
[13]	valid_0's rmse: 3.46337
[14]	valid_0's rmse: 3.44019
[15]	valid_0's rmse: 3.41657
[16]	valid_0's rmse: 3.39257
[17]	valid_0's rmse: 3.3697
[18]	valid_0's rmse: 3.34587
[19]	valid_0's rmse: 3.32272
[20]	valid_0's rmse: 3.2986
[21]	valid_0's rmse: 3

Unnamed: 0,name,imp
0,item_id,111261
21,tm_w,24027
20,tm_d,23212
13,event_name_1,19515
54,rolling_mean_tmp_1_7,18829
52,rolling_mean_180,18800
53,rolling_std_180,17724
45,rolling_std_7,17626
47,rolling_std_14,16560
58,rolling_mean_tmp_7_7,16394


Train WI_2 HOUSEHOLD_2
Starte Training: WI_2 HOUSEHOLD_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.032381 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4099
[LightGBM] [Info] Number of data points in the train set: 597321, number of used features: 66
[1]	valid_0's rmse: 1.07941
[2]	valid_0's rmse: 1.06941
[3]	valid_0's rmse: 1.05971
[4]	valid_0's rmse: 1.0502
[5]	valid_0's rmse: 1.04085
[6]	valid_0's rmse: 1.03177
[7]	valid_0's rmse: 1.02286
[8]	valid_0's rmse: 1.01412
[9]	valid_0's rmse: 1.00559
[10]	valid_0's rmse: 0.997317
[11]	valid_0's rmse: 0.989169
[12]	valid_0's rmse: 0.981221
[13]	valid_0's rmse: 0.973398
[14]	valid_0's rmse: 0.965785
[15]	valid_0's rmse: 0.958286
[16]	valid_0's rmse: 0.95101
[17]	valid_0's rmse: 0.943925
[18]	valid_0's rmse: 0.936946
[19]	valid_0's rmse: 0.930161
[20]	valid_0's rmse: 0.92353
[21]	valid

Unnamed: 0,name,imp
0,item_id,106778
21,tm_w,31912
20,tm_d,30979
52,rolling_mean_180,29318
53,rolling_std_180,28145
51,rolling_std_60,23945
49,rolling_std_30,20316
50,rolling_mean_60,19519
13,event_name_1,17941
25,tm_dw,17927


Train WI_2 FOODS_1
Starte Training: WI_2 FOODS_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.019239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4779
[LightGBM] [Info] Number of data points in the train set: 259157, number of used features: 66
[1]	valid_0's rmse: 5.70489
[2]	valid_0's rmse: 5.6849
[3]	valid_0's rmse: 5.66495
[4]	valid_0's rmse: 5.64623
[5]	valid_0's rmse: 5.62335
[6]	valid_0's rmse: 5.60048
[7]	valid_0's rmse: 5.5758
[8]	valid_0's rmse: 5.55043
[9]	valid_0's rmse: 5.52424
[10]	valid_0's rmse: 5.50005
[11]	valid_0's rmse: 5.47234
[12]	valid_0's rmse: 5.44479
[13]	valid_0's rmse: 5.41675
[14]	valid_0's rmse: 5.38752
[15]	valid_0's rmse: 5.3582
[16]	valid_0's rmse: 5.32782
[17]	valid_0's rmse: 5.29947
[18]	valid_0's rmse: 5.26945
[19]	valid_0's rmse: 5.24046
[20]	valid_0's rmse: 5.20932
[21]	valid_0's rmse: 5.18179


Unnamed: 0,name,imp
0,item_id,89315
21,tm_w,24414
20,tm_d,24054
52,rolling_mean_180,21219
53,rolling_std_180,19457
49,rolling_std_30,18669
54,rolling_mean_tmp_1_7,18613
47,rolling_std_14,18512
45,rolling_std_7,18433
51,rolling_std_60,17717


Train WI_2 FOODS_2
Starte Training: WI_2 FOODS_2
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.108685 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5487
[LightGBM] [Info] Number of data points in the train set: 446608, number of used features: 66
[1]	valid_0's rmse: 6.49622
[2]	valid_0's rmse: 6.47133
[3]	valid_0's rmse: 6.44549
[4]	valid_0's rmse: 6.41873
[5]	valid_0's rmse: 6.39073
[6]	valid_0's rmse: 6.36197
[7]	valid_0's rmse: 6.33119
[8]	valid_0's rmse: 6.29981
[9]	valid_0's rmse: 6.26691
[10]	valid_0's rmse: 6.23435
[11]	valid_0's rmse: 6.19976
[12]	valid_0's rmse: 6.16421
[13]	valid_0's rmse: 6.12898
[14]	valid_0's rmse: 6.09214
[15]	valid_0's rmse: 6.05513
[16]	valid_0's rmse: 6.01688
[17]	valid_0's rmse: 5.9782
[18]	valid_0's rmse: 5.9387
[19]	valid_0's rmse: 5.89899
[20]	valid_0's rmse: 5.8586
[21]	valid_0's rmse: 5.81876
[22]	valid_0's rmse: 5.77714
[23]	valid_0's rmse: 5.736
[24]	val

Unnamed: 0,name,imp
0,item_id,108662
21,tm_w,21516
20,tm_d,20382
52,rolling_mean_180,20111
54,rolling_mean_tmp_1_7,18785
45,rolling_std_7,16656
53,rolling_std_180,16638
13,event_name_1,16129
58,rolling_mean_tmp_7_7,15831
55,rolling_mean_tmp_1_14,15546


Train WI_2 FOODS_3
Starte Training: WI_2 FOODS_3
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.039115 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5878
[LightGBM] [Info] Number of data points in the train set: 940527, number of used features: 66
[1]	valid_0's rmse: 7.12919
[2]	valid_0's rmse: 7.10378
[3]	valid_0's rmse: 7.07756
[4]	valid_0's rmse: 7.05082
[5]	valid_0's rmse: 7.02152
[6]	valid_0's rmse: 6.99164
[7]	valid_0's rmse: 6.95985
[8]	valid_0's rmse: 6.92702
[9]	valid_0's rmse: 6.89329
[10]	valid_0's rmse: 6.86028
[11]	valid_0's rmse: 6.82455
[12]	valid_0's rmse: 6.7877
[13]	valid_0's rmse: 6.75135
[14]	valid_0's rmse: 6.71327
[15]	valid_0's rmse: 6.6756
[16]	valid_0's rmse: 6.63679
[17]	valid_0's rmse: 6.59846
[18]	valid_0's rmse: 6.55799
[19]	valid_0's rmse: 6.51863
[20]	valid_0's rmse: 6.47817
[21]	valid_0's rmse: 6.44089

Unnamed: 0,name,imp
0,item_id,105655
13,event_name_1,22843
21,tm_w,18311
54,rolling_mean_tmp_1_7,18225
20,tm_d,18034
58,rolling_mean_tmp_7_7,16794
52,rolling_mean_180,15503
62,rolling_mean_tmp_14_7,15313
45,rolling_std_7,15194
36,sales_lag_35,14345


Train WI_3 HOBBIES_1
Starte Training: WI_3 HOBBIES_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.031471 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4665
[LightGBM] [Info] Number of data points in the train set: 487692, number of used features: 66
[1]	valid_0's rmse: 1.70928
[2]	valid_0's rmse: 1.70232
[3]	valid_0's rmse: 1.69553
[4]	valid_0's rmse: 1.68909
[5]	valid_0's rmse: 1.68257
[6]	valid_0's rmse: 1.67616
[7]	valid_0's rmse: 1.66986
[8]	valid_0's rmse: 1.6638
[9]	valid_0's rmse: 1.6579
[10]	valid_0's rmse: 1.65208
[11]	valid_0's rmse: 1.64633
[12]	valid_0's rmse: 1.6408
[13]	valid_0's rmse: 1.63534
[14]	valid_0's rmse: 1.63025
[15]	valid_0's rmse: 1.62484
[16]	valid_0's rmse: 1.61977
[17]	valid_0's rmse: 1.61493
[18]	valid_0's rmse: 1.60999
[19]	valid_0's rmse: 1.60521
[20]	valid_0's rmse: 1.60056
[21]	valid_0's rmse: 1.59

Unnamed: 0,name,imp
0,item_id,94409
21,tm_w,23167
20,tm_d,22146
52,rolling_mean_180,21896
53,rolling_std_180,21098
51,rolling_std_60,20620
49,rolling_std_30,20296
47,rolling_std_14,19045
55,rolling_mean_tmp_1_14,18007
50,rolling_mean_60,17703


Train WI_3 HOBBIES_2
Starte Training: WI_3 HOBBIES_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009548 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3213
[LightGBM] [Info] Number of data points in the train set: 166272, number of used features: 66
[1]	valid_0's rmse: 0.987609
[2]	valid_0's rmse: 0.977558
[3]	valid_0's rmse: 0.967665
[4]	valid_0's rmse: 0.95794
[5]	valid_0's rmse: 0.948407
[6]	valid_0's rmse: 0.939115
[7]	valid_0's rmse: 0.930196
[8]	valid_0's rmse: 0.921396
[9]	valid_0's rmse: 0.912869
[10]	valid_0's rmse: 0.904521
[11]	valid_0's rmse: 0.896432
[12]	valid_0's rmse: 0.888549
[13]	valid_0's rmse: 0.880883
[14]	valid_0's rmse: 0.873267
[15]	valid_0's rmse: 0.86585
[16]	valid_0's rmse: 0.858615
[17]	valid_0's rmse: 0.851723
[18]	valid_0's rmse: 0.84492
[19]	valid_0's rmse: 0.838087
[20]	valid_0's rmse: 0.831605
[21]	

Unnamed: 0,name,imp
0,item_id,69224
20,tm_d,39230
53,rolling_std_180,39194
21,tm_w,39051
52,rolling_mean_180,37827
51,rolling_std_60,30821
25,tm_dw,23019
49,rolling_std_30,22810
50,rolling_mean_60,21814
65,rolling_mean_tmp_14_60,19810


Train WI_3 HOUSEHOLD_1
Starte Training: WI_3 HOUSEHOLD_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025694 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4743
[LightGBM] [Info] Number of data points in the train set: 600617, number of used features: 66
[1]	valid_0's rmse: 2.28545
[2]	valid_0's rmse: 2.27202
[3]	valid_0's rmse: 2.25897
[4]	valid_0's rmse: 2.24614
[5]	valid_0's rmse: 2.23264
[6]	valid_0's rmse: 2.21905
[7]	valid_0's rmse: 2.2058
[8]	valid_0's rmse: 2.19249
[9]	valid_0's rmse: 2.17911
[10]	valid_0's rmse: 2.16639
[11]	valid_0's rmse: 2.15348
[12]	valid_0's rmse: 2.14048
[13]	valid_0's rmse: 2.12739
[14]	valid_0's rmse: 2.11452
[15]	valid_0's rmse: 2.10168
[16]	valid_0's rmse: 2.08903
[17]	valid_0's rmse: 2.07645
[18]	valid_0's rmse: 2.06383
[19]	valid_0's rmse: 2.05196
[20]	valid_0's rmse: 2.03971
[21]	valid_0's rmse

Unnamed: 0,name,imp
0,item_id,116125
21,tm_w,24756
20,tm_d,24155
52,rolling_mean_180,20640
13,event_name_1,19898
53,rolling_std_180,19216
54,rolling_mean_tmp_1_7,17735
49,rolling_std_30,17600
51,rolling_std_60,17148
47,rolling_std_14,16907


Train WI_3 HOUSEHOLD_2
Starte Training: WI_3 HOUSEHOLD_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.036561 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4174
[LightGBM] [Info] Number of data points in the train set: 592231, number of used features: 66
[1]	valid_0's rmse: 1.0965
[2]	valid_0's rmse: 1.08653
[3]	valid_0's rmse: 1.07671
[4]	valid_0's rmse: 1.06718
[5]	valid_0's rmse: 1.05753
[6]	valid_0's rmse: 1.0482
[7]	valid_0's rmse: 1.03905
[8]	valid_0's rmse: 1.03004
[9]	valid_0's rmse: 1.02116
[10]	valid_0's rmse: 1.01261
[11]	valid_0's rmse: 1.00417
[12]	valid_0's rmse: 0.995788
[13]	valid_0's rmse: 0.987655
[14]	valid_0's rmse: 0.979758
[15]	valid_0's rmse: 0.97192
[16]	valid_0's rmse: 0.964337
[17]	valid_0's rmse: 0.957207
[18]	valid_0's rmse: 0.949916
[19]	valid_0's rmse: 0.942927
[20]	valid_0's rmse: 0.93599
[21]	valid_0'

Unnamed: 0,name,imp
0,item_id,111810
21,tm_w,31851
20,tm_d,30595
52,rolling_mean_180,30383
53,rolling_std_180,29316
51,rolling_std_60,24985
49,rolling_std_30,20823
50,rolling_mean_60,19716
13,event_name_1,18021
25,tm_dw,17494


Train WI_3 FOODS_1
Starte Training: WI_3 FOODS_1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.017068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4431
[LightGBM] [Info] Number of data points in the train set: 259031, number of used features: 66
[1]	valid_0's rmse: 3.40622
[2]	valid_0's rmse: 3.38936
[3]	valid_0's rmse: 3.3736
[4]	valid_0's rmse: 3.35925
[5]	valid_0's rmse: 3.34164
[6]	valid_0's rmse: 3.3236
[7]	valid_0's rmse: 3.30468
[8]	valid_0's rmse: 3.28574
[9]	valid_0's rmse: 3.26625
[10]	valid_0's rmse: 3.24924
[11]	valid_0's rmse: 3.22953
[12]	valid_0's rmse: 3.20984
[13]	valid_0's rmse: 3.19071
[14]	valid_0's rmse: 3.17107
[15]	valid_0's rmse: 3.15181
[16]	valid_0's rmse: 3.13184
[17]	valid_0's rmse: 3.11471
[18]	valid_0's rmse: 3.09474
[19]	valid_0's rmse: 3.07697
[20]	valid_0's rmse: 3.0578
[21]	valid_0's rmse: 3.04156


Unnamed: 0,name,imp
0,item_id,92298
21,tm_w,26288
20,tm_d,25905
52,rolling_mean_180,23160
53,rolling_std_180,21435
49,rolling_std_30,19520
51,rolling_std_60,19427
47,rolling_std_14,18467
50,rolling_mean_60,18352
54,rolling_mean_tmp_1_7,17219


Train WI_3 FOODS_2
Starte Training: WI_3 FOODS_2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.032477 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4720
[LightGBM] [Info] Number of data points in the train set: 451841, number of used features: 66
[1]	valid_0's rmse: 2.74034
[2]	valid_0's rmse: 2.72582
[3]	valid_0's rmse: 2.71143
[4]	valid_0's rmse: 2.69747
[5]	valid_0's rmse: 2.68258
[6]	valid_0's rmse: 2.66772
[7]	valid_0's rmse: 2.65258
[8]	valid_0's rmse: 2.63747
[9]	valid_0's rmse: 2.62213
[10]	valid_0's rmse: 2.60723
[11]	valid_0's rmse: 2.59176
[12]	valid_0's rmse: 2.57623
[13]	valid_0's rmse: 2.56074
[14]	valid_0's rmse: 2.54519
[15]	valid_0's rmse: 2.53006
[16]	valid_0's rmse: 2.51504
[17]	valid_0's rmse: 2.50018
[18]	valid_0's rmse: 2.48472
[19]	valid_0's rmse: 2.46973
[20]	valid_0's rmse: 2.45459
[21]	valid_0's rmse: 2.440

Unnamed: 0,name,imp
0,item_id,112121
21,tm_w,23445
20,tm_d,22968
52,rolling_mean_180,21799
53,rolling_std_180,19162
51,rolling_std_60,17536
49,rolling_std_30,16890
47,rolling_std_14,16781
11,price_momentum_m,16528
50,rolling_mean_60,16375


Train WI_3 FOODS_3
Starte Training: WI_3 FOODS_3
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042344 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5906
[LightGBM] [Info] Number of data points in the train set: 932368, number of used features: 66
[1]	valid_0's rmse: 6.42821
[2]	valid_0's rmse: 6.40475
[3]	valid_0's rmse: 6.38032
[4]	valid_0's rmse: 6.35504
[5]	valid_0's rmse: 6.32769
[6]	valid_0's rmse: 6.299
[7]	valid_0's rmse: 6.26916
[8]	valid_0's rmse: 6.23785
[9]	valid_0's rmse: 6.20548
[10]	valid_0's rmse: 6.17266
[11]	valid_0's rmse: 6.13807
[12]	valid_0's rmse: 6.10232
[13]	valid_0's rmse: 6.06544
[14]	valid_0's rmse: 6.02766
[15]	valid_0's rmse: 5.98888
[16]	valid_0's rmse: 5.94897
[17]	valid_0's rmse: 5.90952
[18]	valid_0's rmse: 5.86794
[19]	valid_0's rmse: 5.82692
[20]	valid_0's rmse: 5.78461
[21]	valid_0's rmse: 5.7431


Unnamed: 0,name,imp
0,item_id,118371
13,event_name_1,24853
21,tm_w,21009
20,tm_d,20303
54,rolling_mean_tmp_1_7,17600
52,rolling_mean_180,16711
53,rolling_std_180,15877
58,rolling_mean_tmp_7_7,15515
45,rolling_std_7,15498
62,rolling_mean_tmp_14_7,14430
