### Recruit Visitor Forecasting - Gradient Boosted Trees (with lgbm)

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [17]:
#Contibutions from: DSEverything - Mean Mix - Math, Geo, Harmonic
#https://www.kaggle.com/dongxu027/mean-mix-math-geo-harmonic-lb-0-493 
#JdPaletto - Surprised Yet? - Part2
#https://www.kaggle.com/jdpaletto/surprised-yet-part2-lb-0-503
#hklee - weighted mean comparisons
#https://www.kaggle.com/zeemeen/weighted-mean-comparisons-lb-0-497-1st

#Load required libraries
import glob, re
import numpy as np
import pandas as pd
from sklearn import *
from datetime import datetime
import lightgbm as lgb

In [3]:
data = {
    'tra': pd.read_csv('air_visit_data.csv'),
    'as': pd.read_csv('air_store_info.csv'),
    'hs': pd.read_csv('hpg_store_info.csv'),
    'ar': pd.read_csv('air_reserve.csv'),
    'hr': pd.read_csv('hpg_reserve.csv'),
    'id': pd.read_csv('store_id_relation.csv'),
    'tes': pd.read_csv('sample_submission.csv'),
    'hol': pd.read_csv('date_info.csv').rename(columns={'calendar_date':'visit_date'})
    }

In [4]:
data['hr'] = pd.merge(data['hr'], data['id'], how='inner', on=['hpg_store_id'])
data['hr'].head(5)

Unnamed: 0,hpg_store_id,visit_datetime,reserve_datetime,reserve_visitors,air_store_id
0,hpg_878cc70b1abc76f7,2016-01-01 19:00:00,2016-01-01 15:00:00,4,air_db80363d35f10926
1,hpg_878cc70b1abc76f7,2016-01-02 19:00:00,2016-01-02 14:00:00,2,air_db80363d35f10926
2,hpg_878cc70b1abc76f7,2016-01-03 18:00:00,2016-01-02 20:00:00,6,air_db80363d35f10926
3,hpg_878cc70b1abc76f7,2016-01-06 20:00:00,2016-01-04 22:00:00,3,air_db80363d35f10926
4,hpg_878cc70b1abc76f7,2016-01-11 18:00:00,2016-01-11 14:00:00,2,air_db80363d35f10926


In [5]:
for df in ['ar','hr']:
    data[df]['visit_datetime'] = pd.to_datetime(data[df]['visit_datetime'])
    data[df]['visit_datetime'] = data[df]['visit_datetime'].dt.date
    data[df]['reserve_datetime'] = pd.to_datetime(data[df]['reserve_datetime'])
    data[df]['reserve_datetime'] = data[df]['reserve_datetime'].dt.date
    data[df]['reserve_datetime_diff'] = data[df].apply(lambda r: (r['visit_datetime'] - r['reserve_datetime']).days, axis=1)
    tmp1 = data[df].groupby(['air_store_id','visit_datetime'], as_index=False)[['reserve_datetime_diff', 'reserve_visitors']].sum().rename(columns={'visit_datetime':'visit_date', 'reserve_datetime_diff': 'rs1', 'reserve_visitors':'rv1'})
    tmp2 = data[df].groupby(['air_store_id','visit_datetime'], as_index=False)[['reserve_datetime_diff', 'reserve_visitors']].mean().rename(columns={'visit_datetime':'visit_date', 'reserve_datetime_diff': 'rs2', 'reserve_visitors':'rv2'})
    data[df] = pd.merge(tmp1, tmp2, how='inner', on=['air_store_id','visit_date'])

data['ar'].head(5)
data['hr'].head(5)

Unnamed: 0,air_store_id,visit_date,rs1,rv1,rs2,rv2
0,air_00a91d42b08b08d9,2016-10-31,0,2,0.0,2.0
1,air_00a91d42b08b08d9,2016-12-05,4,9,4.0,9.0
2,air_00a91d42b08b08d9,2016-12-14,6,18,6.0,18.0
3,air_00a91d42b08b08d9,2016-12-17,6,2,6.0,2.0
4,air_00a91d42b08b08d9,2016-12-20,2,4,2.0,4.0


Unnamed: 0,air_store_id,visit_date,rs1,rv1,rs2,rv2
0,air_00a91d42b08b08d9,2016-01-14,3,2,3.0,2.0
1,air_00a91d42b08b08d9,2016-01-15,6,4,6.0,4.0
2,air_00a91d42b08b08d9,2016-01-16,3,2,3.0,2.0
3,air_00a91d42b08b08d9,2016-01-22,3,2,3.0,2.0
4,air_00a91d42b08b08d9,2016-01-29,6,5,6.0,5.0


In [6]:
data['tra']['visit_date'] = pd.to_datetime(data['tra']['visit_date'])
data['tra']['dow'] = data['tra']['visit_date'].dt.dayofweek
data['tra']['year'] = data['tra']['visit_date'].dt.year
data['tra']['month'] = data['tra']['visit_date'].dt.month
data['tra']['visit_date'] = data['tra']['visit_date'].dt.date

data['tes']['visit_date'] = data['tes']['id'].map(lambda x: str(x).split('_')[2])
data['tes']['air_store_id'] = data['tes']['id'].map(lambda x: '_'.join(x.split('_')[:2]))
data['tes']['visit_date'] = pd.to_datetime(data['tes']['visit_date'])
data['tes']['dow'] = data['tes']['visit_date'].dt.dayofweek
data['tes']['year'] = data['tes']['visit_date'].dt.year
data['tes']['month'] = data['tes']['visit_date'].dt.month
data['tes']['visit_date'] = data['tes']['visit_date'].dt.date

data['tra'].head(5)
data['tes'].head(5)

unique_stores = data['tes']['air_store_id'].unique()
stores = pd.concat([pd.DataFrame({'air_store_id': unique_stores, 'dow': [i]*len(unique_stores)}) for i in range(7)], axis=0, ignore_index=True).reset_index(drop=True)

print(unique_stores)
stores.head(5)

Unnamed: 0,air_store_id,visit_date,visitors,dow,year,month
0,air_ba937bf13d40fb24,2016-01-13,25,2,2016,1
1,air_ba937bf13d40fb24,2016-01-14,32,3,2016,1
2,air_ba937bf13d40fb24,2016-01-15,29,4,2016,1
3,air_ba937bf13d40fb24,2016-01-16,22,5,2016,1
4,air_ba937bf13d40fb24,2016-01-18,6,0,2016,1


Unnamed: 0,id,visitors,visit_date,air_store_id,dow,year,month
0,air_00a91d42b08b08d9_2017-04-23,0,2017-04-23,air_00a91d42b08b08d9,6,2017,4
1,air_00a91d42b08b08d9_2017-04-24,0,2017-04-24,air_00a91d42b08b08d9,0,2017,4
2,air_00a91d42b08b08d9_2017-04-25,0,2017-04-25,air_00a91d42b08b08d9,1,2017,4
3,air_00a91d42b08b08d9_2017-04-26,0,2017-04-26,air_00a91d42b08b08d9,2,2017,4
4,air_00a91d42b08b08d9_2017-04-27,0,2017-04-27,air_00a91d42b08b08d9,3,2017,4


['air_00a91d42b08b08d9' 'air_0164b9927d20bcc3' 'air_0241aa3964b7f861'
 'air_0328696196e46f18' 'air_034a3d5b40d5b1b1' 'air_036d4f1ee7285390'
 'air_0382c794b73b51ad' 'air_03963426c9312048' 'air_04341b588bde96cd'
 'air_049f6d5b402a31b2' 'air_04cae7c1bc9b2a0b' 'air_0585011fa179bcce'
 'air_05c325d315cc17f5' 'air_0647f17b4dc041c8' 'air_064e203265ee5753'
 'air_066f0221b8a4d533' 'air_06f95ac5c33aca10' 'air_0728814bd98f7367'
 'air_0768ab3910f7967f' 'air_07b314d83059c4d2' 'air_07bb665f9cdfbdfb'
 'air_082908692355165e' 'air_083ddc520ea47e1e' 'air_0845d8395f30c6bb'
 'air_084d98859256acf0' 'air_0867f7bebad6a649' 'air_08ba8cd01b3ba010'
 'air_08cb3c4ee6cd6a22' 'air_08ef81d5b7a0d13f' 'air_08f994758a1e76d4'
 'air_09040f6df960ddb8' 'air_0919d54f0c9a24b8' 'air_09661c0f3259cc04'
 'air_09a845d5b5944b01' 'air_09fd1f5c58583141' 'air_0a74a5408a0b8642'
 'air_0b184ec04c741a6a' 'air_0b1e72d2d4422b20' 'air_0b9038300f8b2b50'
 'air_0e1eae99b8723bc1' 'air_0e7c11b9abc50163' 'air_0f0cdeee6c9bf3d7'
 'air_0f2f96335f2748

Unnamed: 0,air_store_id,dow
0,air_00a91d42b08b08d9,0
1,air_0164b9927d20bcc3,0
2,air_0241aa3964b7f861,0
3,air_0328696196e46f18,0
4,air_034a3d5b40d5b1b1,0


In [7]:
tmp = data['tra'].groupby(['air_store_id','dow'], as_index=False)['visitors'].min().rename(columns={'visitors':'min_visitors'})
stores = pd.merge(stores, tmp, how='left', on=['air_store_id','dow']) 
tmp = data['tra'].groupby(['air_store_id','dow'], as_index=False)['visitors'].mean().rename(columns={'visitors':'mean_visitors'})
stores = pd.merge(stores, tmp, how='left', on=['air_store_id','dow'])
tmp = data['tra'].groupby(['air_store_id','dow'], as_index=False)['visitors'].median().rename(columns={'visitors':'median_visitors'})
stores = pd.merge(stores, tmp, how='left', on=['air_store_id','dow'])
tmp = data['tra'].groupby(['air_store_id','dow'], as_index=False)['visitors'].max().rename(columns={'visitors':'max_visitors'})
stores = pd.merge(stores, tmp, how='left', on=['air_store_id','dow'])
tmp = data['tra'].groupby(['air_store_id','dow'], as_index=False)['visitors'].count().rename(columns={'visitors':'count_observations'})
stores = pd.merge(stores, tmp, how='left', on=['air_store_id','dow']) 

stores = pd.merge(stores, data['as'], how='left', on=['air_store_id']) 
stores.head(5)

Unnamed: 0,air_store_id,dow,min_visitors,mean_visitors,median_visitors,max_visitors,count_observations,air_genre_name,air_area_name,latitude,longitude
0,air_00a91d42b08b08d9,0,1.0,22.457143,19.0,47.0,35.0,Italian/French,Tōkyō-to Chiyoda-ku Kudanminami,35.694003,139.753595
1,air_0164b9927d20bcc3,0,2.0,7.5,6.0,19.0,20.0,Italian/French,Tōkyō-to Minato-ku Shibakōen,35.658068,139.751599
2,air_0241aa3964b7f861,0,2.0,8.920635,8.0,23.0,63.0,Izakaya,Tōkyō-to Taitō-ku Higashiueno,35.712607,139.779996
3,air_0328696196e46f18,0,2.0,6.416667,4.0,27.0,12.0,Dining bar,Ōsaka-fu Ōsaka-shi Nakanochō,34.701279,135.52809
4,air_034a3d5b40d5b1b1,0,1.0,11.864865,10.0,66.0,37.0,Cafe/Sweets,Ōsaka-fu Ōsaka-shi Ōhiraki,34.692337,135.472229


In [8]:
stores['air_genre_name'] = stores['air_genre_name'].map(lambda x: str(str(x).replace('/',' ')))
stores['air_area_name'] = stores['air_area_name'].map(lambda x: str(str(x).replace('-',' ')))
lbl = preprocessing.LabelEncoder()
for i in range(10):
    stores['air_genre_name'+str(i)] = lbl.fit_transform(stores['air_genre_name'].map(lambda x: str(str(x).split(' ')[i]) if len(str(x).split(' '))>i else ''))
    stores['air_area_name'+str(i)] = lbl.fit_transform(stores['air_area_name'].map(lambda x: str(str(x).split(' ')[i]) if len(str(x).split(' '))>i else ''))
stores['air_genre_name'] = lbl.fit_transform(stores['air_genre_name'])
stores['air_area_name'] = lbl.fit_transform(stores['air_area_name'])

stores.head(5)

Unnamed: 0,air_store_id,dow,min_visitors,mean_visitors,median_visitors,max_visitors,count_observations,air_genre_name,air_area_name,latitude,...,air_genre_name5,air_area_name5,air_genre_name6,air_area_name6,air_genre_name7,air_area_name7,air_genre_name8,air_area_name8,air_genre_name9,air_area_name9
0,air_00a91d42b08b08d9,0,1.0,22.457143,19.0,47.0,35.0,6,44,35.694003,...,0,0,0,0,0,0,0,0,0,0
1,air_0164b9927d20bcc3,0,2.0,7.5,6.0,19.0,20.0,6,62,35.658068,...,0,0,0,0,0,0,0,0,0,0
2,air_0241aa3964b7f861,0,2.0,8.920635,8.0,23.0,63.0,7,82,35.712607,...,0,0,0,0,0,0,0,0,0,0
3,air_0328696196e46f18,0,2.0,6.416667,4.0,27.0,12.0,4,98,34.701279,...,0,0,0,0,0,0,0,0,0,0
4,air_034a3d5b40d5b1b1,0,1.0,11.864865,10.0,66.0,37.0,2,102,34.692337,...,0,0,0,0,0,0,0,0,0,0


In [9]:
list(stores)

['air_store_id',
 'dow',
 'min_visitors',
 'mean_visitors',
 'median_visitors',
 'max_visitors',
 'count_observations',
 'air_genre_name',
 'air_area_name',
 'latitude',
 'longitude',
 'air_genre_name0',
 'air_area_name0',
 'air_genre_name1',
 'air_area_name1',
 'air_genre_name2',
 'air_area_name2',
 'air_genre_name3',
 'air_area_name3',
 'air_genre_name4',
 'air_area_name4',
 'air_genre_name5',
 'air_area_name5',
 'air_genre_name6',
 'air_area_name6',
 'air_genre_name7',
 'air_area_name7',
 'air_genre_name8',
 'air_area_name8',
 'air_genre_name9',
 'air_area_name9']

In [10]:
data['hol']['visit_date'] = pd.to_datetime(data['hol']['visit_date'])
data['hol']['day_of_week'] = lbl.fit_transform(data['hol']['day_of_week'])
data['hol']['visit_date'] = data['hol']['visit_date'].dt.date
train = pd.merge(data['tra'], data['hol'], how='left', on=['visit_date']) 
test = pd.merge(data['tes'], data['hol'], how='left', on=['visit_date']) 

train = pd.merge(train, stores, how='left', on=['air_store_id','dow']) 
test = pd.merge(test, stores, how='left', on=['air_store_id','dow'])

for df in ['ar','hr']:
    train = pd.merge(train, data[df], how='left', on=['air_store_id','visit_date']) 
    test = pd.merge(test, data[df], how='left', on=['air_store_id','visit_date'])

train['id'] = train.apply(lambda r: '_'.join([str(r['air_store_id']), str(r['visit_date'])]), axis=1)

train.head(5)
test.head(5)

Unnamed: 0,air_store_id,visit_date,visitors,dow,year,month,day_of_week,holiday_flg,min_visitors,mean_visitors,...,air_area_name9,rs1_x,rv1_x,rs2_x,rv2_x,rs1_y,rv1_y,rs2_y,rv2_y,id
0,air_ba937bf13d40fb24,2016-01-13,25,2,2016,1,6,0,7.0,23.84375,...,0.0,,,,,,,,,air_ba937bf13d40fb24_2016-01-13
1,air_ba937bf13d40fb24,2016-01-14,32,3,2016,1,4,0,2.0,20.292308,...,0.0,,,,,,,,,air_ba937bf13d40fb24_2016-01-14
2,air_ba937bf13d40fb24,2016-01-15,29,4,2016,1,0,0,4.0,34.738462,...,0.0,,,,,,,,,air_ba937bf13d40fb24_2016-01-15
3,air_ba937bf13d40fb24,2016-01-16,22,5,2016,1,2,0,6.0,27.651515,...,0.0,,,,,,,,,air_ba937bf13d40fb24_2016-01-16
4,air_ba937bf13d40fb24,2016-01-18,6,0,2016,1,1,0,2.0,13.754386,...,0.0,,,,,,,,,air_ba937bf13d40fb24_2016-01-18


Unnamed: 0,id,visitors,visit_date,air_store_id,dow,year,month,day_of_week,holiday_flg,min_visitors,...,air_genre_name9,air_area_name9,rs1_x,rv1_x,rs2_x,rv2_x,rs1_y,rv1_y,rs2_y,rv2_y
0,air_00a91d42b08b08d9_2017-04-23,0,2017-04-23,air_00a91d42b08b08d9,6,2017,4,3,0,2.0,...,0,0,,,,,,,,
1,air_00a91d42b08b08d9_2017-04-24,0,2017-04-24,air_00a91d42b08b08d9,0,2017,4,1,0,1.0,...,0,0,,,,,,,,
2,air_00a91d42b08b08d9_2017-04-25,0,2017-04-25,air_00a91d42b08b08d9,1,2017,4,5,0,1.0,...,0,0,,,,,,,,
3,air_00a91d42b08b08d9_2017-04-26,0,2017-04-26,air_00a91d42b08b08d9,2,2017,4,6,0,15.0,...,0,0,,,,,,,,
4,air_00a91d42b08b08d9_2017-04-27,0,2017-04-27,air_00a91d42b08b08d9,3,2017,4,4,0,15.0,...,0,0,,,,,,,,


In [11]:
train['total_reserv_sum'] = train['rv1_x'] + train['rv1_y']
train['total_reserv_mean'] = (train['rv2_x'] + train['rv2_y']) / 2
train['total_reserv_dt_diff_mean'] = (train['rs2_x'] + train['rs2_y']) / 2

test['total_reserv_sum'] = test['rv1_x'] + test['rv1_y']
test['total_reserv_mean'] = (test['rv2_x'] + test['rv2_y']) / 2
test['total_reserv_dt_diff_mean'] = (test['rs2_x'] + test['rs2_y']) / 2

train.head(5)
test.head(5)

Unnamed: 0,air_store_id,visit_date,visitors,dow,year,month,day_of_week,holiday_flg,min_visitors,mean_visitors,...,rs2_x,rv2_x,rs1_y,rv1_y,rs2_y,rv2_y,id,total_reserv_sum,total_reserv_mean,total_reserv_dt_diff_mean
0,air_ba937bf13d40fb24,2016-01-13,25,2,2016,1,6,0,7.0,23.84375,...,,,,,,,air_ba937bf13d40fb24_2016-01-13,,,
1,air_ba937bf13d40fb24,2016-01-14,32,3,2016,1,4,0,2.0,20.292308,...,,,,,,,air_ba937bf13d40fb24_2016-01-14,,,
2,air_ba937bf13d40fb24,2016-01-15,29,4,2016,1,0,0,4.0,34.738462,...,,,,,,,air_ba937bf13d40fb24_2016-01-15,,,
3,air_ba937bf13d40fb24,2016-01-16,22,5,2016,1,2,0,6.0,27.651515,...,,,,,,,air_ba937bf13d40fb24_2016-01-16,,,
4,air_ba937bf13d40fb24,2016-01-18,6,0,2016,1,1,0,2.0,13.754386,...,,,,,,,air_ba937bf13d40fb24_2016-01-18,,,


Unnamed: 0,id,visitors,visit_date,air_store_id,dow,year,month,day_of_week,holiday_flg,min_visitors,...,rv1_x,rs2_x,rv2_x,rs1_y,rv1_y,rs2_y,rv2_y,total_reserv_sum,total_reserv_mean,total_reserv_dt_diff_mean
0,air_00a91d42b08b08d9_2017-04-23,0,2017-04-23,air_00a91d42b08b08d9,6,2017,4,3,0,2.0,...,,,,,,,,,,
1,air_00a91d42b08b08d9_2017-04-24,0,2017-04-24,air_00a91d42b08b08d9,0,2017,4,1,0,1.0,...,,,,,,,,,,
2,air_00a91d42b08b08d9_2017-04-25,0,2017-04-25,air_00a91d42b08b08d9,1,2017,4,5,0,1.0,...,,,,,,,,,,
3,air_00a91d42b08b08d9_2017-04-26,0,2017-04-26,air_00a91d42b08b08d9,2,2017,4,6,0,15.0,...,,,,,,,,,,
4,air_00a91d42b08b08d9_2017-04-27,0,2017-04-27,air_00a91d42b08b08d9,3,2017,4,4,0,15.0,...,,,,,,,,,,


In [12]:
train['date_int'] = train['visit_date'].apply(lambda x: x.strftime('%Y%m%d')).astype(int)
test['date_int'] = test['visit_date'].apply(lambda x: x.strftime('%Y%m%d')).astype(int)
train['var_max_lat'] = train['latitude'].max() - train['latitude']
train['var_max_long'] = train['longitude'].max() - train['longitude']
test['var_max_lat'] = test['latitude'].max() - test['latitude']
test['var_max_long'] = test['longitude'].max() - test['longitude']

train.head(5)
test.head(5)

Unnamed: 0,air_store_id,visit_date,visitors,dow,year,month,day_of_week,holiday_flg,min_visitors,mean_visitors,...,rv1_y,rs2_y,rv2_y,id,total_reserv_sum,total_reserv_mean,total_reserv_dt_diff_mean,date_int,var_max_lat,var_max_long
0,air_ba937bf13d40fb24,2016-01-13,25,2,2016,1,6,0,7.0,23.84375,...,,,,air_ba937bf13d40fb24_2016-01-13,,,,20160113,8.362564,4.521799
1,air_ba937bf13d40fb24,2016-01-14,32,3,2016,1,4,0,2.0,20.292308,...,,,,air_ba937bf13d40fb24_2016-01-14,,,,20160114,8.362564,4.521799
2,air_ba937bf13d40fb24,2016-01-15,29,4,2016,1,0,0,4.0,34.738462,...,,,,air_ba937bf13d40fb24_2016-01-15,,,,20160115,8.362564,4.521799
3,air_ba937bf13d40fb24,2016-01-16,22,5,2016,1,2,0,6.0,27.651515,...,,,,air_ba937bf13d40fb24_2016-01-16,,,,20160116,8.362564,4.521799
4,air_ba937bf13d40fb24,2016-01-18,6,0,2016,1,1,0,2.0,13.754386,...,,,,air_ba937bf13d40fb24_2016-01-18,,,,20160118,8.362564,4.521799


Unnamed: 0,id,visitors,visit_date,air_store_id,dow,year,month,day_of_week,holiday_flg,min_visitors,...,rs1_y,rv1_y,rs2_y,rv2_y,total_reserv_sum,total_reserv_mean,total_reserv_dt_diff_mean,date_int,var_max_lat,var_max_long
0,air_00a91d42b08b08d9_2017-04-23,0,2017-04-23,air_00a91d42b08b08d9,6,2017,4,3,0,2.0,...,,,,,,,,20170423,8.326629,4.519803
1,air_00a91d42b08b08d9_2017-04-24,0,2017-04-24,air_00a91d42b08b08d9,0,2017,4,1,0,1.0,...,,,,,,,,20170424,8.326629,4.519803
2,air_00a91d42b08b08d9_2017-04-25,0,2017-04-25,air_00a91d42b08b08d9,1,2017,4,5,0,1.0,...,,,,,,,,20170425,8.326629,4.519803
3,air_00a91d42b08b08d9_2017-04-26,0,2017-04-26,air_00a91d42b08b08d9,2,2017,4,6,0,15.0,...,,,,,,,,20170426,8.326629,4.519803
4,air_00a91d42b08b08d9_2017-04-27,0,2017-04-27,air_00a91d42b08b08d9,3,2017,4,4,0,15.0,...,,,,,,,,20170427,8.326629,4.519803


In [13]:
train['lon_plus_lat'] = train['longitude'] + train['latitude'] 
test['lon_plus_lat'] = test['longitude'] + test['latitude']

lbl = preprocessing.LabelEncoder()
train['air_store_id2'] = lbl.fit_transform(train['air_store_id'])
test['air_store_id2'] = lbl.transform(test['air_store_id'])

col = [c for c in train if c not in ['id', 'air_store_id', 'visit_date','visitors']]
train = train.fillna(-1)
test = test.fillna(-1)

In [14]:
train.head(5)
test.head(5)

Unnamed: 0,air_store_id,visit_date,visitors,dow,year,month,day_of_week,holiday_flg,min_visitors,mean_visitors,...,rv2_y,id,total_reserv_sum,total_reserv_mean,total_reserv_dt_diff_mean,date_int,var_max_lat,var_max_long,lon_plus_lat,air_store_id2
0,air_ba937bf13d40fb24,2016-01-13,25,2,2016,1,6,0,7.0,23.84375,...,-1.0,air_ba937bf13d40fb24_2016-01-13,-1.0,-1.0,-1.0,20160113,8.362564,4.521799,175.409667,603
1,air_ba937bf13d40fb24,2016-01-14,32,3,2016,1,4,0,2.0,20.292308,...,-1.0,air_ba937bf13d40fb24_2016-01-14,-1.0,-1.0,-1.0,20160114,8.362564,4.521799,175.409667,603
2,air_ba937bf13d40fb24,2016-01-15,29,4,2016,1,0,0,4.0,34.738462,...,-1.0,air_ba937bf13d40fb24_2016-01-15,-1.0,-1.0,-1.0,20160115,8.362564,4.521799,175.409667,603
3,air_ba937bf13d40fb24,2016-01-16,22,5,2016,1,2,0,6.0,27.651515,...,-1.0,air_ba937bf13d40fb24_2016-01-16,-1.0,-1.0,-1.0,20160116,8.362564,4.521799,175.409667,603
4,air_ba937bf13d40fb24,2016-01-18,6,0,2016,1,1,0,2.0,13.754386,...,-1.0,air_ba937bf13d40fb24_2016-01-18,-1.0,-1.0,-1.0,20160118,8.362564,4.521799,175.409667,603


Unnamed: 0,id,visitors,visit_date,air_store_id,dow,year,month,day_of_week,holiday_flg,min_visitors,...,rs2_y,rv2_y,total_reserv_sum,total_reserv_mean,total_reserv_dt_diff_mean,date_int,var_max_lat,var_max_long,lon_plus_lat,air_store_id2
0,air_00a91d42b08b08d9_2017-04-23,0,2017-04-23,air_00a91d42b08b08d9,6,2017,4,3,0,2.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,20170423,8.326629,4.519803,175.447598,0
1,air_00a91d42b08b08d9_2017-04-24,0,2017-04-24,air_00a91d42b08b08d9,0,2017,4,1,0,1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,20170424,8.326629,4.519803,175.447598,0
2,air_00a91d42b08b08d9_2017-04-25,0,2017-04-25,air_00a91d42b08b08d9,1,2017,4,5,0,1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,20170425,8.326629,4.519803,175.447598,0
3,air_00a91d42b08b08d9_2017-04-26,0,2017-04-26,air_00a91d42b08b08d9,2,2017,4,6,0,15.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,20170426,8.326629,4.519803,175.447598,0
4,air_00a91d42b08b08d9_2017-04-27,0,2017-04-27,air_00a91d42b08b08d9,3,2017,4,4,0,15.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,20170427,8.326629,4.519803,175.447598,0


In [15]:
#RMSE function for evaluation
def RMSLE(y, pred):
    return metrics.mean_squared_error(y, pred)**0.5

In [20]:
#Run models for Gradient Boosted tree, lgbm and knn
params = {}
params['application'] = 'regression'
params['boosting'] = 'gbdt'
params['learning_rate'] = 0.01
params['num_leaves'] = 32
params['min_sum_hessian_in_leaf'] = 1e-2
params['min_gain_to_split'] = 0
params['bagging_fraction'] = 0.8
params['feature_fraction'] = 0.8
params['num_threads'] = 4
params['metric'] = 'rmse'

from sklearn.model_selection import train_test_split
X_train, X_valid = train_test_split(train, test_size=0.05, random_state=42, shuffle=False)

X = X_train[col]
y = np.log1p(X_train['visitors'].values)
d_train = lgb.Dataset(X, y)

X = X_valid[col]
y = np.log1p(X_valid['visitors'].values)
d_valid = lgb.Dataset(X, y)

watchlist = [d_train, d_valid]

model1 = ensemble.GradientBoostingRegressor(learning_rate=0.1, random_state=3, n_estimators=200, subsample=0.8, 
                      max_depth =10)
model2 = neighbors.KNeighborsRegressor(n_jobs=-1, n_neighbors=4)
model3 = lgb.train(params, train_set=d_train, num_boost_round=50000, valid_sets=watchlist,
verbose_eval=10)

model1.fit(train[col], np.log1p(train['visitors'].values))
model2.fit(train[col], np.log1p(train['visitors'].values))

preds1 = model1.predict(train[col])
preds2 = model2.predict(train[col])
preds3 = model3.predict(train[col])

print('RMSE GradientBoostingRegressor: ', RMSLE(np.log1p(train['visitors'].values), preds1))
print('RMSE KNeighborsRegressor: ', RMSLE(np.log1p(train['visitors'].values), preds2))
print('RMSE LGBMRegressor: ', RMSLE(np.log1p(train['visitors'].values), preds3))

[10]	training's rmse: 0.76324	valid_1's rmse: 0.765292
[20]	training's rmse: 0.724673	valid_1's rmse: 0.730534
[30]	training's rmse: 0.691867	valid_1's rmse: 0.701307
[40]	training's rmse: 0.663143	valid_1's rmse: 0.675826
[50]	training's rmse: 0.638571	valid_1's rmse: 0.653839
[60]	training's rmse: 0.617639	valid_1's rmse: 0.635325
[70]	training's rmse: 0.599861	valid_1's rmse: 0.619599
[80]	training's rmse: 0.58482	valid_1's rmse: 0.606347
[90]	training's rmse: 0.572156	valid_1's rmse: 0.595299
[100]	training's rmse: 0.561606	valid_1's rmse: 0.586079
[110]	training's rmse: 0.552616	valid_1's rmse: 0.578308
[120]	training's rmse: 0.545062	valid_1's rmse: 0.571789
[130]	training's rmse: 0.538733	valid_1's rmse: 0.566275
[140]	training's rmse: 0.533507	valid_1's rmse: 0.561763
[150]	training's rmse: 0.529032	valid_1's rmse: 0.557898
[160]	training's rmse: 0.525255	valid_1's rmse: 0.554713
[170]	training's rmse: 0.522054	valid_1's rmse: 0.55195
[180]	training's rmse: 0.519375	valid_1's r

[1450]	training's rmse: 0.48509	valid_1's rmse: 0.53012
[1460]	training's rmse: 0.485006	valid_1's rmse: 0.530107
[1470]	training's rmse: 0.484939	valid_1's rmse: 0.53013
[1480]	training's rmse: 0.484856	valid_1's rmse: 0.530124
[1490]	training's rmse: 0.484768	valid_1's rmse: 0.530152
[1500]	training's rmse: 0.484679	valid_1's rmse: 0.530168
[1510]	training's rmse: 0.484596	valid_1's rmse: 0.530157
[1520]	training's rmse: 0.484513	valid_1's rmse: 0.530153
[1530]	training's rmse: 0.484423	valid_1's rmse: 0.530141
[1540]	training's rmse: 0.484334	valid_1's rmse: 0.530133
[1550]	training's rmse: 0.484265	valid_1's rmse: 0.530112
[1560]	training's rmse: 0.48416	valid_1's rmse: 0.530108
[1570]	training's rmse: 0.48408	valid_1's rmse: 0.530109
[1580]	training's rmse: 0.484019	valid_1's rmse: 0.530107
[1590]	training's rmse: 0.483949	valid_1's rmse: 0.530088
[1600]	training's rmse: 0.483865	valid_1's rmse: 0.530092
[1610]	training's rmse: 0.483806	valid_1's rmse: 0.530082
[1620]	training's r

[2870]	training's rmse: 0.47565	valid_1's rmse: 0.529395
[2880]	training's rmse: 0.475588	valid_1's rmse: 0.529394
[2890]	training's rmse: 0.475526	valid_1's rmse: 0.529379
[2900]	training's rmse: 0.47548	valid_1's rmse: 0.529378
[2910]	training's rmse: 0.475435	valid_1's rmse: 0.529375
[2920]	training's rmse: 0.475374	valid_1's rmse: 0.529375
[2930]	training's rmse: 0.475323	valid_1's rmse: 0.529368
[2940]	training's rmse: 0.475268	valid_1's rmse: 0.529357
[2950]	training's rmse: 0.475204	valid_1's rmse: 0.529354
[2960]	training's rmse: 0.475151	valid_1's rmse: 0.529356
[2970]	training's rmse: 0.475079	valid_1's rmse: 0.529359
[2980]	training's rmse: 0.475024	valid_1's rmse: 0.529343
[2990]	training's rmse: 0.474965	valid_1's rmse: 0.529338
[3000]	training's rmse: 0.474904	valid_1's rmse: 0.529328
[3010]	training's rmse: 0.474834	valid_1's rmse: 0.529321
[3020]	training's rmse: 0.474779	valid_1's rmse: 0.529313
[3030]	training's rmse: 0.474724	valid_1's rmse: 0.529324
[3040]	training'

[4290]	training's rmse: 0.468576	valid_1's rmse: 0.529184
[4300]	training's rmse: 0.468532	valid_1's rmse: 0.529192
[4310]	training's rmse: 0.468489	valid_1's rmse: 0.529191
[4320]	training's rmse: 0.468461	valid_1's rmse: 0.52919
[4330]	training's rmse: 0.468426	valid_1's rmse: 0.529182
[4340]	training's rmse: 0.468374	valid_1's rmse: 0.529175
[4350]	training's rmse: 0.468344	valid_1's rmse: 0.529181
[4360]	training's rmse: 0.468309	valid_1's rmse: 0.529179
[4370]	training's rmse: 0.468281	valid_1's rmse: 0.529184
[4380]	training's rmse: 0.468241	valid_1's rmse: 0.529182
[4390]	training's rmse: 0.4682	valid_1's rmse: 0.529171
[4400]	training's rmse: 0.46815	valid_1's rmse: 0.529165
[4410]	training's rmse: 0.468109	valid_1's rmse: 0.52915
[4420]	training's rmse: 0.468073	valid_1's rmse: 0.529147
[4430]	training's rmse: 0.468036	valid_1's rmse: 0.529138
[4440]	training's rmse: 0.468005	valid_1's rmse: 0.52914
[4450]	training's rmse: 0.467979	valid_1's rmse: 0.529123
[4460]	training's rm

[5710]	training's rmse: 0.462947	valid_1's rmse: 0.529219
[5720]	training's rmse: 0.462916	valid_1's rmse: 0.529219
[5730]	training's rmse: 0.462887	valid_1's rmse: 0.529227
[5740]	training's rmse: 0.462846	valid_1's rmse: 0.529216
[5750]	training's rmse: 0.462817	valid_1's rmse: 0.529212
[5760]	training's rmse: 0.462774	valid_1's rmse: 0.529196
[5770]	training's rmse: 0.462739	valid_1's rmse: 0.529192
[5780]	training's rmse: 0.462703	valid_1's rmse: 0.529175
[5790]	training's rmse: 0.462664	valid_1's rmse: 0.529171
[5800]	training's rmse: 0.462621	valid_1's rmse: 0.529177
[5810]	training's rmse: 0.462575	valid_1's rmse: 0.529175
[5820]	training's rmse: 0.462536	valid_1's rmse: 0.529177
[5830]	training's rmse: 0.462498	valid_1's rmse: 0.529184
[5840]	training's rmse: 0.462463	valid_1's rmse: 0.529174
[5850]	training's rmse: 0.462425	valid_1's rmse: 0.529171
[5860]	training's rmse: 0.462395	valid_1's rmse: 0.529179
[5870]	training's rmse: 0.462358	valid_1's rmse: 0.529169
[5880]	trainin

[7130]	training's rmse: 0.457835	valid_1's rmse: 0.529188
[7140]	training's rmse: 0.4578	valid_1's rmse: 0.529189
[7150]	training's rmse: 0.457767	valid_1's rmse: 0.529179
[7160]	training's rmse: 0.457736	valid_1's rmse: 0.529184
[7170]	training's rmse: 0.457706	valid_1's rmse: 0.52919
[7180]	training's rmse: 0.457677	valid_1's rmse: 0.529191
[7190]	training's rmse: 0.45764	valid_1's rmse: 0.529171
[7200]	training's rmse: 0.457606	valid_1's rmse: 0.529166
[7210]	training's rmse: 0.457575	valid_1's rmse: 0.529161
[7220]	training's rmse: 0.457534	valid_1's rmse: 0.52916
[7230]	training's rmse: 0.457501	valid_1's rmse: 0.529178
[7240]	training's rmse: 0.457459	valid_1's rmse: 0.529183
[7250]	training's rmse: 0.457422	valid_1's rmse: 0.529178
[7260]	training's rmse: 0.457387	valid_1's rmse: 0.529172
[7270]	training's rmse: 0.457354	valid_1's rmse: 0.529178
[7280]	training's rmse: 0.457311	valid_1's rmse: 0.529168
[7290]	training's rmse: 0.457277	valid_1's rmse: 0.529169
[7300]	training's r

[8550]	training's rmse: 0.453286	valid_1's rmse: 0.529265
[8560]	training's rmse: 0.453252	valid_1's rmse: 0.529269
[8570]	training's rmse: 0.453229	valid_1's rmse: 0.529285
[8580]	training's rmse: 0.453204	valid_1's rmse: 0.529285
[8590]	training's rmse: 0.453174	valid_1's rmse: 0.529286
[8600]	training's rmse: 0.453155	valid_1's rmse: 0.529294
[8610]	training's rmse: 0.453134	valid_1's rmse: 0.529299
[8620]	training's rmse: 0.4531	valid_1's rmse: 0.529311
[8630]	training's rmse: 0.453059	valid_1's rmse: 0.529318
[8640]	training's rmse: 0.453025	valid_1's rmse: 0.529322
[8650]	training's rmse: 0.45299	valid_1's rmse: 0.52932
[8660]	training's rmse: 0.452949	valid_1's rmse: 0.529329
[8670]	training's rmse: 0.452922	valid_1's rmse: 0.529329
[8680]	training's rmse: 0.452899	valid_1's rmse: 0.529337
[8690]	training's rmse: 0.452879	valid_1's rmse: 0.529336
[8700]	training's rmse: 0.452852	valid_1's rmse: 0.529345
[8710]	training's rmse: 0.452828	valid_1's rmse: 0.529355
[8720]	training's 

[9970]	training's rmse: 0.449313	valid_1's rmse: 0.52949
[9980]	training's rmse: 0.449279	valid_1's rmse: 0.529491
[9990]	training's rmse: 0.449242	valid_1's rmse: 0.529489
[10000]	training's rmse: 0.449216	valid_1's rmse: 0.529491
[10010]	training's rmse: 0.449184	valid_1's rmse: 0.529498
[10020]	training's rmse: 0.449159	valid_1's rmse: 0.529505
[10030]	training's rmse: 0.449137	valid_1's rmse: 0.5295
[10040]	training's rmse: 0.449111	valid_1's rmse: 0.529504
[10050]	training's rmse: 0.449078	valid_1's rmse: 0.529504
[10060]	training's rmse: 0.449052	valid_1's rmse: 0.529499
[10070]	training's rmse: 0.44902	valid_1's rmse: 0.529505
[10080]	training's rmse: 0.448999	valid_1's rmse: 0.529506
[10090]	training's rmse: 0.44897	valid_1's rmse: 0.529506
[10100]	training's rmse: 0.448948	valid_1's rmse: 0.52951
[10110]	training's rmse: 0.448918	valid_1's rmse: 0.529511
[10120]	training's rmse: 0.448895	valid_1's rmse: 0.529508
[10130]	training's rmse: 0.448875	valid_1's rmse: 0.529514
[10140

[11370]	training's rmse: 0.445546	valid_1's rmse: 0.529764
[11380]	training's rmse: 0.445519	valid_1's rmse: 0.529755
[11390]	training's rmse: 0.44549	valid_1's rmse: 0.529753
[11400]	training's rmse: 0.445465	valid_1's rmse: 0.529756
[11410]	training's rmse: 0.445435	valid_1's rmse: 0.529751
[11420]	training's rmse: 0.445411	valid_1's rmse: 0.529752
[11430]	training's rmse: 0.445384	valid_1's rmse: 0.529757
[11440]	training's rmse: 0.445362	valid_1's rmse: 0.52976
[11450]	training's rmse: 0.445333	valid_1's rmse: 0.529785
[11460]	training's rmse: 0.445309	valid_1's rmse: 0.529832
[11470]	training's rmse: 0.445282	valid_1's rmse: 0.529842
[11480]	training's rmse: 0.445256	valid_1's rmse: 0.529852
[11490]	training's rmse: 0.445229	valid_1's rmse: 0.529855
[11500]	training's rmse: 0.445204	valid_1's rmse: 0.529859
[11510]	training's rmse: 0.44518	valid_1's rmse: 0.529856
[11520]	training's rmse: 0.44516	valid_1's rmse: 0.529853
[11530]	training's rmse: 0.445139	valid_1's rmse: 0.529851
[

[12770]	training's rmse: 0.442002	valid_1's rmse: 0.530171
[12780]	training's rmse: 0.441977	valid_1's rmse: 0.530167
[12790]	training's rmse: 0.441948	valid_1's rmse: 0.530147
[12800]	training's rmse: 0.441927	valid_1's rmse: 0.530148
[12810]	training's rmse: 0.441905	valid_1's rmse: 0.530146
[12820]	training's rmse: 0.441877	valid_1's rmse: 0.530148
[12830]	training's rmse: 0.441853	valid_1's rmse: 0.530151
[12840]	training's rmse: 0.441822	valid_1's rmse: 0.530153
[12850]	training's rmse: 0.441793	valid_1's rmse: 0.530153
[12860]	training's rmse: 0.44177	valid_1's rmse: 0.530156
[12870]	training's rmse: 0.441747	valid_1's rmse: 0.530167
[12880]	training's rmse: 0.441726	valid_1's rmse: 0.530172
[12890]	training's rmse: 0.441701	valid_1's rmse: 0.530177
[12900]	training's rmse: 0.441675	valid_1's rmse: 0.530182
[12910]	training's rmse: 0.441656	valid_1's rmse: 0.530181
[12920]	training's rmse: 0.441631	valid_1's rmse: 0.530183
[12930]	training's rmse: 0.441613	valid_1's rmse: 0.53018

[14170]	training's rmse: 0.438478	valid_1's rmse: 0.530345
[14180]	training's rmse: 0.438448	valid_1's rmse: 0.530349
[14190]	training's rmse: 0.438422	valid_1's rmse: 0.530356
[14200]	training's rmse: 0.438395	valid_1's rmse: 0.530358
[14210]	training's rmse: 0.438365	valid_1's rmse: 0.530355
[14220]	training's rmse: 0.438342	valid_1's rmse: 0.530357
[14230]	training's rmse: 0.438308	valid_1's rmse: 0.530353
[14240]	training's rmse: 0.438287	valid_1's rmse: 0.530357
[14250]	training's rmse: 0.438259	valid_1's rmse: 0.530358
[14260]	training's rmse: 0.438234	valid_1's rmse: 0.530349
[14270]	training's rmse: 0.438211	valid_1's rmse: 0.530335
[14280]	training's rmse: 0.438191	valid_1's rmse: 0.530343
[14290]	training's rmse: 0.438167	valid_1's rmse: 0.530348
[14300]	training's rmse: 0.43815	valid_1's rmse: 0.530349
[14310]	training's rmse: 0.438127	valid_1's rmse: 0.530349
[14320]	training's rmse: 0.438105	valid_1's rmse: 0.530347
[14330]	training's rmse: 0.438079	valid_1's rmse: 0.53034

[15570]	training's rmse: 0.435331	valid_1's rmse: 0.530658
[15580]	training's rmse: 0.435302	valid_1's rmse: 0.530673
[15590]	training's rmse: 0.435272	valid_1's rmse: 0.530675
[15600]	training's rmse: 0.435247	valid_1's rmse: 0.530671
[15610]	training's rmse: 0.435219	valid_1's rmse: 0.530675
[15620]	training's rmse: 0.435187	valid_1's rmse: 0.530674
[15630]	training's rmse: 0.435161	valid_1's rmse: 0.530676
[15640]	training's rmse: 0.435125	valid_1's rmse: 0.530678
[15650]	training's rmse: 0.435097	valid_1's rmse: 0.530688
[15660]	training's rmse: 0.435077	valid_1's rmse: 0.530689
[15670]	training's rmse: 0.435053	valid_1's rmse: 0.530702
[15680]	training's rmse: 0.435028	valid_1's rmse: 0.530705
[15690]	training's rmse: 0.435001	valid_1's rmse: 0.53071
[15700]	training's rmse: 0.43498	valid_1's rmse: 0.530705
[15710]	training's rmse: 0.43496	valid_1's rmse: 0.53071
[15720]	training's rmse: 0.434939	valid_1's rmse: 0.530711
[15730]	training's rmse: 0.434914	valid_1's rmse: 0.530712
[

[16970]	training's rmse: 0.432181	valid_1's rmse: 0.530956
[16980]	training's rmse: 0.432158	valid_1's rmse: 0.530967
[16990]	training's rmse: 0.432127	valid_1's rmse: 0.530972
[17000]	training's rmse: 0.432107	valid_1's rmse: 0.530968
[17010]	training's rmse: 0.432077	valid_1's rmse: 0.530976
[17020]	training's rmse: 0.432053	valid_1's rmse: 0.530979
[17030]	training's rmse: 0.432025	valid_1's rmse: 0.530976
[17040]	training's rmse: 0.431998	valid_1's rmse: 0.530969
[17050]	training's rmse: 0.431976	valid_1's rmse: 0.530975
[17060]	training's rmse: 0.431954	valid_1's rmse: 0.530975
[17070]	training's rmse: 0.431932	valid_1's rmse: 0.530978
[17080]	training's rmse: 0.431916	valid_1's rmse: 0.530992
[17090]	training's rmse: 0.431895	valid_1's rmse: 0.530994
[17100]	training's rmse: 0.431871	valid_1's rmse: 0.531003
[17110]	training's rmse: 0.431844	valid_1's rmse: 0.530998
[17120]	training's rmse: 0.431822	valid_1's rmse: 0.530999
[17130]	training's rmse: 0.431806	valid_1's rmse: 0.5310

[18370]	training's rmse: 0.429161	valid_1's rmse: 0.531164
[18380]	training's rmse: 0.429146	valid_1's rmse: 0.53117
[18390]	training's rmse: 0.429133	valid_1's rmse: 0.531174
[18400]	training's rmse: 0.429113	valid_1's rmse: 0.531174
[18410]	training's rmse: 0.429095	valid_1's rmse: 0.531181
[18420]	training's rmse: 0.429074	valid_1's rmse: 0.531186
[18430]	training's rmse: 0.429057	valid_1's rmse: 0.53119
[18440]	training's rmse: 0.429039	valid_1's rmse: 0.531197
[18450]	training's rmse: 0.429022	valid_1's rmse: 0.531204
[18460]	training's rmse: 0.428998	valid_1's rmse: 0.531205
[18470]	training's rmse: 0.428977	valid_1's rmse: 0.531206
[18480]	training's rmse: 0.42896	valid_1's rmse: 0.531204
[18490]	training's rmse: 0.428942	valid_1's rmse: 0.531205
[18500]	training's rmse: 0.428927	valid_1's rmse: 0.531208
[18510]	training's rmse: 0.428909	valid_1's rmse: 0.531206
[18520]	training's rmse: 0.428883	valid_1's rmse: 0.531214
[18530]	training's rmse: 0.428857	valid_1's rmse: 0.531224


[19770]	training's rmse: 0.426276	valid_1's rmse: 0.531306
[19780]	training's rmse: 0.426248	valid_1's rmse: 0.531301
[19790]	training's rmse: 0.426225	valid_1's rmse: 0.531299
[19800]	training's rmse: 0.426201	valid_1's rmse: 0.531299
[19810]	training's rmse: 0.426181	valid_1's rmse: 0.5313
[19820]	training's rmse: 0.426168	valid_1's rmse: 0.531306
[19830]	training's rmse: 0.426147	valid_1's rmse: 0.531308
[19840]	training's rmse: 0.426132	valid_1's rmse: 0.531315
[19850]	training's rmse: 0.426111	valid_1's rmse: 0.531325
[19860]	training's rmse: 0.42609	valid_1's rmse: 0.531325
[19870]	training's rmse: 0.426071	valid_1's rmse: 0.531329
[19880]	training's rmse: 0.426049	valid_1's rmse: 0.531334
[19890]	training's rmse: 0.426027	valid_1's rmse: 0.531338
[19900]	training's rmse: 0.426007	valid_1's rmse: 0.531346
[19910]	training's rmse: 0.425982	valid_1's rmse: 0.531347
[19920]	training's rmse: 0.425963	valid_1's rmse: 0.531344
[19930]	training's rmse: 0.425939	valid_1's rmse: 0.531339


[21170]	training's rmse: 0.42356	valid_1's rmse: 0.531503
[21180]	training's rmse: 0.423537	valid_1's rmse: 0.531509
[21190]	training's rmse: 0.423518	valid_1's rmse: 0.531511
[21200]	training's rmse: 0.4235	valid_1's rmse: 0.531514
[21210]	training's rmse: 0.42347	valid_1's rmse: 0.531519
[21220]	training's rmse: 0.423452	valid_1's rmse: 0.531515
[21230]	training's rmse: 0.423436	valid_1's rmse: 0.531512
[21240]	training's rmse: 0.423425	valid_1's rmse: 0.531511
[21250]	training's rmse: 0.423408	valid_1's rmse: 0.531514
[21260]	training's rmse: 0.423393	valid_1's rmse: 0.531512
[21270]	training's rmse: 0.423378	valid_1's rmse: 0.531508
[21280]	training's rmse: 0.423355	valid_1's rmse: 0.531502
[21290]	training's rmse: 0.423339	valid_1's rmse: 0.531501
[21300]	training's rmse: 0.423323	valid_1's rmse: 0.531498
[21310]	training's rmse: 0.423307	valid_1's rmse: 0.531498
[21320]	training's rmse: 0.423285	valid_1's rmse: 0.531493
[21330]	training's rmse: 0.423271	valid_1's rmse: 0.531492
[

[22570]	training's rmse: 0.421118	valid_1's rmse: 0.531716
[22580]	training's rmse: 0.421092	valid_1's rmse: 0.531711
[22590]	training's rmse: 0.421071	valid_1's rmse: 0.531709
[22600]	training's rmse: 0.421052	valid_1's rmse: 0.531708
[22610]	training's rmse: 0.421041	valid_1's rmse: 0.531711
[22620]	training's rmse: 0.421023	valid_1's rmse: 0.531711
[22630]	training's rmse: 0.421005	valid_1's rmse: 0.531712
[22640]	training's rmse: 0.420991	valid_1's rmse: 0.531711
[22650]	training's rmse: 0.420973	valid_1's rmse: 0.53171
[22660]	training's rmse: 0.42096	valid_1's rmse: 0.531714
[22670]	training's rmse: 0.420936	valid_1's rmse: 0.531718
[22680]	training's rmse: 0.420922	valid_1's rmse: 0.531717
[22690]	training's rmse: 0.420905	valid_1's rmse: 0.531718
[22700]	training's rmse: 0.420885	valid_1's rmse: 0.531726
[22710]	training's rmse: 0.420867	valid_1's rmse: 0.531726
[22720]	training's rmse: 0.420848	valid_1's rmse: 0.53173
[22730]	training's rmse: 0.420826	valid_1's rmse: 0.531733


[23970]	training's rmse: 0.418355	valid_1's rmse: 0.53186
[23980]	training's rmse: 0.418341	valid_1's rmse: 0.531866
[23990]	training's rmse: 0.418324	valid_1's rmse: 0.531867
[24000]	training's rmse: 0.418307	valid_1's rmse: 0.531867
[24010]	training's rmse: 0.418287	valid_1's rmse: 0.531865
[24020]	training's rmse: 0.418265	valid_1's rmse: 0.531865
[24030]	training's rmse: 0.41824	valid_1's rmse: 0.531869
[24040]	training's rmse: 0.418223	valid_1's rmse: 0.531868
[24050]	training's rmse: 0.418205	valid_1's rmse: 0.531868
[24060]	training's rmse: 0.418188	valid_1's rmse: 0.531875
[24070]	training's rmse: 0.41817	valid_1's rmse: 0.53187
[24080]	training's rmse: 0.418155	valid_1's rmse: 0.531874
[24090]	training's rmse: 0.418136	valid_1's rmse: 0.53187
[24100]	training's rmse: 0.418118	valid_1's rmse: 0.531872
[24110]	training's rmse: 0.418098	valid_1's rmse: 0.531873
[24120]	training's rmse: 0.418079	valid_1's rmse: 0.53187
[24130]	training's rmse: 0.418064	valid_1's rmse: 0.531868
[24

[25370]	training's rmse: 0.416006	valid_1's rmse: 0.532114
[25380]	training's rmse: 0.415989	valid_1's rmse: 0.532116
[25390]	training's rmse: 0.415974	valid_1's rmse: 0.532117
[25400]	training's rmse: 0.415956	valid_1's rmse: 0.532121
[25410]	training's rmse: 0.415937	valid_1's rmse: 0.532125
[25420]	training's rmse: 0.415921	valid_1's rmse: 0.532123
[25430]	training's rmse: 0.415903	valid_1's rmse: 0.532125
[25440]	training's rmse: 0.415886	valid_1's rmse: 0.532128
[25450]	training's rmse: 0.41587	valid_1's rmse: 0.532132
[25460]	training's rmse: 0.415854	valid_1's rmse: 0.532131
[25470]	training's rmse: 0.415831	valid_1's rmse: 0.532133
[25480]	training's rmse: 0.415818	valid_1's rmse: 0.532132
[25490]	training's rmse: 0.415796	valid_1's rmse: 0.532129
[25500]	training's rmse: 0.415773	valid_1's rmse: 0.532128
[25510]	training's rmse: 0.415755	valid_1's rmse: 0.532127
[25520]	training's rmse: 0.415739	valid_1's rmse: 0.532132
[25530]	training's rmse: 0.415727	valid_1's rmse: 0.53213

[26770]	training's rmse: 0.413593	valid_1's rmse: 0.532275
[26780]	training's rmse: 0.413571	valid_1's rmse: 0.532278
[26790]	training's rmse: 0.413555	valid_1's rmse: 0.532288
[26800]	training's rmse: 0.413535	valid_1's rmse: 0.532293
[26810]	training's rmse: 0.413513	valid_1's rmse: 0.532295
[26820]	training's rmse: 0.413495	valid_1's rmse: 0.532302
[26830]	training's rmse: 0.413482	valid_1's rmse: 0.53231
[26840]	training's rmse: 0.413465	valid_1's rmse: 0.532308
[26850]	training's rmse: 0.413441	valid_1's rmse: 0.532305
[26860]	training's rmse: 0.413431	valid_1's rmse: 0.532305
[26870]	training's rmse: 0.413416	valid_1's rmse: 0.532303
[26880]	training's rmse: 0.413397	valid_1's rmse: 0.532301
[26890]	training's rmse: 0.413378	valid_1's rmse: 0.532298
[26900]	training's rmse: 0.413356	valid_1's rmse: 0.532299
[26910]	training's rmse: 0.413337	valid_1's rmse: 0.532292
[26920]	training's rmse: 0.413323	valid_1's rmse: 0.532293
[26930]	training's rmse: 0.413308	valid_1's rmse: 0.53229

[28170]	training's rmse: 0.411232	valid_1's rmse: 0.532447
[28180]	training's rmse: 0.411216	valid_1's rmse: 0.53245
[28190]	training's rmse: 0.411194	valid_1's rmse: 0.532446
[28200]	training's rmse: 0.411178	valid_1's rmse: 0.53245
[28210]	training's rmse: 0.41116	valid_1's rmse: 0.532449
[28220]	training's rmse: 0.411143	valid_1's rmse: 0.532439
[28230]	training's rmse: 0.411132	valid_1's rmse: 0.532447
[28240]	training's rmse: 0.411113	valid_1's rmse: 0.532443
[28250]	training's rmse: 0.411093	valid_1's rmse: 0.532449
[28260]	training's rmse: 0.411071	valid_1's rmse: 0.532447
[28270]	training's rmse: 0.411051	valid_1's rmse: 0.532445
[28280]	training's rmse: 0.411033	valid_1's rmse: 0.532448
[28290]	training's rmse: 0.411016	valid_1's rmse: 0.532447
[28300]	training's rmse: 0.410996	valid_1's rmse: 0.532456
[28310]	training's rmse: 0.410974	valid_1's rmse: 0.532458
[28320]	training's rmse: 0.410955	valid_1's rmse: 0.532458
[28330]	training's rmse: 0.410936	valid_1's rmse: 0.532459


[29570]	training's rmse: 0.408929	valid_1's rmse: 0.532645
[29580]	training's rmse: 0.408914	valid_1's rmse: 0.532648
[29590]	training's rmse: 0.408898	valid_1's rmse: 0.532655
[29600]	training's rmse: 0.408881	valid_1's rmse: 0.532659
[29610]	training's rmse: 0.408866	valid_1's rmse: 0.532659
[29620]	training's rmse: 0.40885	valid_1's rmse: 0.532658
[29630]	training's rmse: 0.40883	valid_1's rmse: 0.532658
[29640]	training's rmse: 0.408817	valid_1's rmse: 0.532663
[29650]	training's rmse: 0.408802	valid_1's rmse: 0.532662
[29660]	training's rmse: 0.408784	valid_1's rmse: 0.532665
[29670]	training's rmse: 0.40877	valid_1's rmse: 0.532666
[29680]	training's rmse: 0.408753	valid_1's rmse: 0.532671
[29690]	training's rmse: 0.408735	valid_1's rmse: 0.532672
[29700]	training's rmse: 0.408721	valid_1's rmse: 0.532678
[29710]	training's rmse: 0.408702	valid_1's rmse: 0.532678
[29720]	training's rmse: 0.408682	valid_1's rmse: 0.532678
[29730]	training's rmse: 0.40866	valid_1's rmse: 0.532689
[

[30970]	training's rmse: 0.406617	valid_1's rmse: 0.53284
[30980]	training's rmse: 0.406609	valid_1's rmse: 0.53284
[30990]	training's rmse: 0.406597	valid_1's rmse: 0.53284
[31000]	training's rmse: 0.406582	valid_1's rmse: 0.532846
[31010]	training's rmse: 0.406564	valid_1's rmse: 0.532846
[31020]	training's rmse: 0.406552	valid_1's rmse: 0.532846
[31030]	training's rmse: 0.406536	valid_1's rmse: 0.532848
[31040]	training's rmse: 0.406523	valid_1's rmse: 0.532845
[31050]	training's rmse: 0.406513	valid_1's rmse: 0.532847
[31060]	training's rmse: 0.406499	valid_1's rmse: 0.532853
[31070]	training's rmse: 0.406485	valid_1's rmse: 0.532854
[31080]	training's rmse: 0.406468	valid_1's rmse: 0.532861
[31090]	training's rmse: 0.406453	valid_1's rmse: 0.532861
[31100]	training's rmse: 0.406441	valid_1's rmse: 0.532865
[31110]	training's rmse: 0.406428	valid_1's rmse: 0.532867
[31120]	training's rmse: 0.406425	valid_1's rmse: 0.532866
[31130]	training's rmse: 0.406418	valid_1's rmse: 0.532868


[32370]	training's rmse: 0.404662	valid_1's rmse: 0.533059
[32380]	training's rmse: 0.404648	valid_1's rmse: 0.533062
[32390]	training's rmse: 0.404636	valid_1's rmse: 0.533064
[32400]	training's rmse: 0.404622	valid_1's rmse: 0.533061
[32410]	training's rmse: 0.404607	valid_1's rmse: 0.533067
[32420]	training's rmse: 0.404595	valid_1's rmse: 0.533067
[32430]	training's rmse: 0.404585	valid_1's rmse: 0.533065
[32440]	training's rmse: 0.404574	valid_1's rmse: 0.533067
[32450]	training's rmse: 0.404564	valid_1's rmse: 0.533068
[32460]	training's rmse: 0.404548	valid_1's rmse: 0.533068
[32470]	training's rmse: 0.404539	valid_1's rmse: 0.533063
[32480]	training's rmse: 0.404528	valid_1's rmse: 0.533067
[32490]	training's rmse: 0.404515	valid_1's rmse: 0.533071
[32500]	training's rmse: 0.404499	valid_1's rmse: 0.533075
[32510]	training's rmse: 0.404485	valid_1's rmse: 0.533081
[32520]	training's rmse: 0.404469	valid_1's rmse: 0.533089
[32530]	training's rmse: 0.404456	valid_1's rmse: 0.5330

[33770]	training's rmse: 0.40261	valid_1's rmse: 0.533168
[33780]	training's rmse: 0.402594	valid_1's rmse: 0.533165
[33790]	training's rmse: 0.40258	valid_1's rmse: 0.533166
[33800]	training's rmse: 0.402565	valid_1's rmse: 0.533162
[33810]	training's rmse: 0.402549	valid_1's rmse: 0.533166
[33820]	training's rmse: 0.402533	valid_1's rmse: 0.53317
[33830]	training's rmse: 0.40251	valid_1's rmse: 0.533168
[33840]	training's rmse: 0.402495	valid_1's rmse: 0.533167
[33850]	training's rmse: 0.402481	valid_1's rmse: 0.533167
[33860]	training's rmse: 0.402462	valid_1's rmse: 0.533163
[33870]	training's rmse: 0.402445	valid_1's rmse: 0.53316
[33880]	training's rmse: 0.40243	valid_1's rmse: 0.533159
[33890]	training's rmse: 0.402415	valid_1's rmse: 0.533156
[33900]	training's rmse: 0.402402	valid_1's rmse: 0.533157
[33910]	training's rmse: 0.402391	valid_1's rmse: 0.533155
[33920]	training's rmse: 0.402378	valid_1's rmse: 0.533154
[33930]	training's rmse: 0.402365	valid_1's rmse: 0.533154
[33

[35170]	training's rmse: 0.400579	valid_1's rmse: 0.533245
[35180]	training's rmse: 0.400567	valid_1's rmse: 0.533249
[35190]	training's rmse: 0.400555	valid_1's rmse: 0.533252
[35200]	training's rmse: 0.400541	valid_1's rmse: 0.533255
[35210]	training's rmse: 0.400531	valid_1's rmse: 0.533259
[35220]	training's rmse: 0.400514	valid_1's rmse: 0.533262
[35230]	training's rmse: 0.400503	valid_1's rmse: 0.533263
[35240]	training's rmse: 0.400488	valid_1's rmse: 0.533268
[35250]	training's rmse: 0.400475	valid_1's rmse: 0.533275
[35260]	training's rmse: 0.400461	valid_1's rmse: 0.533279
[35270]	training's rmse: 0.400443	valid_1's rmse: 0.533279
[35280]	training's rmse: 0.400431	valid_1's rmse: 0.533282
[35290]	training's rmse: 0.400418	valid_1's rmse: 0.533282
[35300]	training's rmse: 0.400401	valid_1's rmse: 0.533283
[35310]	training's rmse: 0.400387	valid_1's rmse: 0.533286
[35320]	training's rmse: 0.400374	valid_1's rmse: 0.533287
[35330]	training's rmse: 0.400358	valid_1's rmse: 0.5332

[36570]	training's rmse: 0.398533	valid_1's rmse: 0.533409
[36580]	training's rmse: 0.398519	valid_1's rmse: 0.533407
[36590]	training's rmse: 0.398506	valid_1's rmse: 0.533407
[36600]	training's rmse: 0.398493	valid_1's rmse: 0.533408
[36610]	training's rmse: 0.398478	valid_1's rmse: 0.533399
[36620]	training's rmse: 0.398463	valid_1's rmse: 0.533403
[36630]	training's rmse: 0.39845	valid_1's rmse: 0.533399
[36640]	training's rmse: 0.398437	valid_1's rmse: 0.533397
[36650]	training's rmse: 0.398424	valid_1's rmse: 0.533404
[36660]	training's rmse: 0.398411	valid_1's rmse: 0.533402
[36670]	training's rmse: 0.398397	valid_1's rmse: 0.533405
[36680]	training's rmse: 0.398384	valid_1's rmse: 0.5334
[36690]	training's rmse: 0.398372	valid_1's rmse: 0.533403
[36700]	training's rmse: 0.398357	valid_1's rmse: 0.533392
[36710]	training's rmse: 0.398344	valid_1's rmse: 0.533394
[36720]	training's rmse: 0.39833	valid_1's rmse: 0.533402
[36730]	training's rmse: 0.398316	valid_1's rmse: 0.5334
[36

[37970]	training's rmse: 0.396631	valid_1's rmse: 0.533588
[37980]	training's rmse: 0.396622	valid_1's rmse: 0.533589
[37990]	training's rmse: 0.396607	valid_1's rmse: 0.533588
[38000]	training's rmse: 0.396595	valid_1's rmse: 0.533593
[38010]	training's rmse: 0.39658	valid_1's rmse: 0.533591
[38020]	training's rmse: 0.396565	valid_1's rmse: 0.533592
[38030]	training's rmse: 0.396549	valid_1's rmse: 0.533585
[38040]	training's rmse: 0.396531	valid_1's rmse: 0.533585
[38050]	training's rmse: 0.396518	valid_1's rmse: 0.533584
[38060]	training's rmse: 0.396504	valid_1's rmse: 0.533584
[38070]	training's rmse: 0.39649	valid_1's rmse: 0.533584
[38080]	training's rmse: 0.396473	valid_1's rmse: 0.533582
[38090]	training's rmse: 0.396459	valid_1's rmse: 0.533581
[38100]	training's rmse: 0.396448	valid_1's rmse: 0.533588
[38110]	training's rmse: 0.396437	valid_1's rmse: 0.533587
[38120]	training's rmse: 0.396421	valid_1's rmse: 0.533589
[38130]	training's rmse: 0.396404	valid_1's rmse: 0.533595

[39370]	training's rmse: 0.394603	valid_1's rmse: 0.533768
[39380]	training's rmse: 0.394593	valid_1's rmse: 0.533771
[39390]	training's rmse: 0.394576	valid_1's rmse: 0.533768
[39400]	training's rmse: 0.394561	valid_1's rmse: 0.533768
[39410]	training's rmse: 0.394549	valid_1's rmse: 0.533778
[39420]	training's rmse: 0.394537	valid_1's rmse: 0.53378
[39430]	training's rmse: 0.394523	valid_1's rmse: 0.533781
[39440]	training's rmse: 0.394508	valid_1's rmse: 0.533781
[39450]	training's rmse: 0.394499	valid_1's rmse: 0.533783
[39460]	training's rmse: 0.394484	valid_1's rmse: 0.533789
[39470]	training's rmse: 0.394475	valid_1's rmse: 0.533792
[39480]	training's rmse: 0.394462	valid_1's rmse: 0.533792
[39490]	training's rmse: 0.394453	valid_1's rmse: 0.533789
[39500]	training's rmse: 0.394442	valid_1's rmse: 0.533797
[39510]	training's rmse: 0.39443	valid_1's rmse: 0.533799
[39520]	training's rmse: 0.394419	valid_1's rmse: 0.533803
[39530]	training's rmse: 0.394402	valid_1's rmse: 0.533811

[40770]	training's rmse: 0.392632	valid_1's rmse: 0.534079
[40780]	training's rmse: 0.392616	valid_1's rmse: 0.534081
[40790]	training's rmse: 0.392605	valid_1's rmse: 0.534081
[40800]	training's rmse: 0.392586	valid_1's rmse: 0.534079
[40810]	training's rmse: 0.392572	valid_1's rmse: 0.534079
[40820]	training's rmse: 0.392556	valid_1's rmse: 0.53408
[40830]	training's rmse: 0.392546	valid_1's rmse: 0.534079
[40840]	training's rmse: 0.392534	valid_1's rmse: 0.534081
[40850]	training's rmse: 0.39252	valid_1's rmse: 0.534097
[40860]	training's rmse: 0.392508	valid_1's rmse: 0.534102
[40870]	training's rmse: 0.392491	valid_1's rmse: 0.534106
[40880]	training's rmse: 0.392477	valid_1's rmse: 0.534103
[40890]	training's rmse: 0.392462	valid_1's rmse: 0.534106
[40900]	training's rmse: 0.392451	valid_1's rmse: 0.534108
[40910]	training's rmse: 0.392442	valid_1's rmse: 0.53411
[40920]	training's rmse: 0.392429	valid_1's rmse: 0.534115
[40930]	training's rmse: 0.392407	valid_1's rmse: 0.534121


[42170]	training's rmse: 0.390691	valid_1's rmse: 0.534295
[42180]	training's rmse: 0.390683	valid_1's rmse: 0.534293
[42190]	training's rmse: 0.390678	valid_1's rmse: 0.534291
[42200]	training's rmse: 0.390666	valid_1's rmse: 0.534297
[42210]	training's rmse: 0.390654	valid_1's rmse: 0.534297
[42220]	training's rmse: 0.390641	valid_1's rmse: 0.534295
[42230]	training's rmse: 0.390628	valid_1's rmse: 0.534298
[42240]	training's rmse: 0.39062	valid_1's rmse: 0.534297
[42250]	training's rmse: 0.390607	valid_1's rmse: 0.534305
[42260]	training's rmse: 0.390592	valid_1's rmse: 0.534306
[42270]	training's rmse: 0.390577	valid_1's rmse: 0.534307
[42280]	training's rmse: 0.390562	valid_1's rmse: 0.53431
[42290]	training's rmse: 0.390551	valid_1's rmse: 0.534313
[42300]	training's rmse: 0.390541	valid_1's rmse: 0.534311
[42310]	training's rmse: 0.390524	valid_1's rmse: 0.534309
[42320]	training's rmse: 0.390513	valid_1's rmse: 0.534309
[42330]	training's rmse: 0.390496	valid_1's rmse: 0.53431


[43570]	training's rmse: 0.388912	valid_1's rmse: 0.534391
[43580]	training's rmse: 0.388893	valid_1's rmse: 0.53439
[43590]	training's rmse: 0.388882	valid_1's rmse: 0.534392
[43600]	training's rmse: 0.388862	valid_1's rmse: 0.534395
[43610]	training's rmse: 0.388845	valid_1's rmse: 0.534398
[43620]	training's rmse: 0.38883	valid_1's rmse: 0.534399
[43630]	training's rmse: 0.38882	valid_1's rmse: 0.534397
[43640]	training's rmse: 0.388808	valid_1's rmse: 0.534396
[43650]	training's rmse: 0.388797	valid_1's rmse: 0.534395
[43660]	training's rmse: 0.388776	valid_1's rmse: 0.534391
[43670]	training's rmse: 0.38876	valid_1's rmse: 0.534391
[43680]	training's rmse: 0.388749	valid_1's rmse: 0.534395
[43690]	training's rmse: 0.388735	valid_1's rmse: 0.534396
[43700]	training's rmse: 0.388716	valid_1's rmse: 0.534396
[43710]	training's rmse: 0.388701	valid_1's rmse: 0.534392
[43720]	training's rmse: 0.388688	valid_1's rmse: 0.534393
[43730]	training's rmse: 0.388681	valid_1's rmse: 0.534394
[

[44970]	training's rmse: 0.387122	valid_1's rmse: 0.534557
[44980]	training's rmse: 0.387113	valid_1's rmse: 0.534558
[44990]	training's rmse: 0.387101	valid_1's rmse: 0.534561
[45000]	training's rmse: 0.387091	valid_1's rmse: 0.534558
[45010]	training's rmse: 0.387083	valid_1's rmse: 0.534557
[45020]	training's rmse: 0.38707	valid_1's rmse: 0.534554
[45030]	training's rmse: 0.387056	valid_1's rmse: 0.534552
[45040]	training's rmse: 0.387039	valid_1's rmse: 0.534558
[45050]	training's rmse: 0.387028	valid_1's rmse: 0.534562
[45060]	training's rmse: 0.387021	valid_1's rmse: 0.534562
[45070]	training's rmse: 0.387007	valid_1's rmse: 0.534567
[45080]	training's rmse: 0.386997	valid_1's rmse: 0.534565
[45090]	training's rmse: 0.386985	valid_1's rmse: 0.534569
[45100]	training's rmse: 0.386975	valid_1's rmse: 0.534569
[45110]	training's rmse: 0.386965	valid_1's rmse: 0.534574
[45120]	training's rmse: 0.386956	valid_1's rmse: 0.534566
[45130]	training's rmse: 0.386944	valid_1's rmse: 0.53457

[46370]	training's rmse: 0.385434	valid_1's rmse: 0.534648
[46380]	training's rmse: 0.38542	valid_1's rmse: 0.534646
[46390]	training's rmse: 0.385411	valid_1's rmse: 0.534647
[46400]	training's rmse: 0.385398	valid_1's rmse: 0.534649
[46410]	training's rmse: 0.385386	valid_1's rmse: 0.534649
[46420]	training's rmse: 0.385369	valid_1's rmse: 0.53465
[46430]	training's rmse: 0.385352	valid_1's rmse: 0.534653
[46440]	training's rmse: 0.385334	valid_1's rmse: 0.534648
[46450]	training's rmse: 0.385323	valid_1's rmse: 0.534655
[46460]	training's rmse: 0.385309	valid_1's rmse: 0.534653
[46470]	training's rmse: 0.385289	valid_1's rmse: 0.53465
[46480]	training's rmse: 0.385275	valid_1's rmse: 0.534651
[46490]	training's rmse: 0.385268	valid_1's rmse: 0.534649
[46500]	training's rmse: 0.385258	valid_1's rmse: 0.534651
[46510]	training's rmse: 0.385243	valid_1's rmse: 0.534645
[46520]	training's rmse: 0.385223	valid_1's rmse: 0.534643
[46530]	training's rmse: 0.385207	valid_1's rmse: 0.534636


[47770]	training's rmse: 0.383478	valid_1's rmse: 0.534588
[47780]	training's rmse: 0.383465	valid_1's rmse: 0.534592
[47790]	training's rmse: 0.383449	valid_1's rmse: 0.5346
[47800]	training's rmse: 0.383431	valid_1's rmse: 0.534601
[47810]	training's rmse: 0.383414	valid_1's rmse: 0.534604
[47820]	training's rmse: 0.383401	valid_1's rmse: 0.534604
[47830]	training's rmse: 0.383386	valid_1's rmse: 0.5346
[47840]	training's rmse: 0.383371	valid_1's rmse: 0.534604
[47850]	training's rmse: 0.383355	valid_1's rmse: 0.534599
[47860]	training's rmse: 0.383339	valid_1's rmse: 0.534599
[47870]	training's rmse: 0.383322	valid_1's rmse: 0.534591
[47880]	training's rmse: 0.383301	valid_1's rmse: 0.534589
[47890]	training's rmse: 0.383288	valid_1's rmse: 0.534592
[47900]	training's rmse: 0.383269	valid_1's rmse: 0.534588
[47910]	training's rmse: 0.383254	valid_1's rmse: 0.534589
[47920]	training's rmse: 0.383243	valid_1's rmse: 0.534588
[47930]	training's rmse: 0.383233	valid_1's rmse: 0.534592
[

[49170]	training's rmse: 0.381709	valid_1's rmse: 0.534659
[49180]	training's rmse: 0.381696	valid_1's rmse: 0.534656
[49190]	training's rmse: 0.381678	valid_1's rmse: 0.534657
[49200]	training's rmse: 0.381663	valid_1's rmse: 0.534661
[49210]	training's rmse: 0.381653	valid_1's rmse: 0.534657
[49220]	training's rmse: 0.381641	valid_1's rmse: 0.534662
[49230]	training's rmse: 0.381629	valid_1's rmse: 0.534659
[49240]	training's rmse: 0.381615	valid_1's rmse: 0.534656
[49250]	training's rmse: 0.381598	valid_1's rmse: 0.53466
[49260]	training's rmse: 0.381583	valid_1's rmse: 0.534659
[49270]	training's rmse: 0.381564	valid_1's rmse: 0.534662
[49280]	training's rmse: 0.381547	valid_1's rmse: 0.534658
[49290]	training's rmse: 0.381527	valid_1's rmse: 0.534658
[49300]	training's rmse: 0.381516	valid_1's rmse: 0.53466
[49310]	training's rmse: 0.381502	valid_1's rmse: 0.534659
[49320]	training's rmse: 0.38149	valid_1's rmse: 0.534661
[49330]	training's rmse: 0.381479	valid_1's rmse: 0.53466
[

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=10, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=200, presort='auto', random_state=3,
             subsample=0.8, verbose=0, warm_start=False)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=-1, n_neighbors=4, p=2,
          weights='uniform')

RMSE GradientBoostingRegressor:  0.39762162831
RMSE KNeighborsRegressor:  0.419451716114
RMSE LGBMRegressor:  0.389782127403


In [21]:
preds1t = model1.predict(test[col])
preds2t = model2.predict(test[col])
preds3t = model3.predict(test[col])

test['visitors'] = 0.5*preds1t+0.5*preds3t
test['visitors'] = np.expm1(test['visitors']).clip(lower=0.)
sub1 = test[['id','visitors']].copy()
sub1.head(5)
test['visitors'] = 0.3*preds1t+0.3*preds2t+0.4*preds3t
test['visitors'] = np.expm1(test['visitors']).clip(lower=0.)
sub2 = test[['id','visitors']].copy()
sub2.head(5)

Unnamed: 0,id,visitors
0,air_00a91d42b08b08d9_2017-04-23,2.1039
1,air_00a91d42b08b08d9_2017-04-24,20.934192
2,air_00a91d42b08b08d9_2017-04-25,30.482635
3,air_00a91d42b08b08d9_2017-04-26,26.992753
4,air_00a91d42b08b08d9_2017-04-27,28.172828


Unnamed: 0,id,visitors
0,air_00a91d42b08b08d9_2017-04-23,2.449946
1,air_00a91d42b08b08d9_2017-04-24,22.545993
2,air_00a91d42b08b08d9_2017-04-25,30.828327
3,air_00a91d42b08b08d9_2017-04-26,27.408919
4,air_00a91d42b08b08d9_2017-04-27,29.669246


In [22]:
#Getting csvs for submission
sub1[['id', 'visitors']].to_csv('lgbsubmission1.csv', index=False)
sub2[['id', 'visitors']].to_csv('lgbsubmission2.csv', index=False)

In [23]:
#Adding mean weighted results to ensemble and checking (optional)

air_visit_data = pd.read_csv('air_visit_data.csv')
air_store_info = pd.read_csv('air_store_info.csv')
hpg_store_info = pd.read_csv('hpg_store_info.csv')
air_reserve = pd.read_csv('air_reserve.csv')
hpg_reserve = pd.read_csv('hpg_reserve.csv')
store_id_relation = pd.read_csv('store_id_relation.csv')
sample_submission = pd.read_csv('sample_submission.csv')
date_info = pd.read_csv('date_info.csv')

In [24]:
wkend_holidays = date_info.apply(
    (lambda x:(x.day_of_week=='Sunday' or x.day_of_week=='Saturday') and x.holiday_flg==1), axis=1)
date_info.loc[wkend_holidays, 'holiday_flg'] = 0
date_info['weight'] = ((date_info.index + 1) / len(date_info)) ** 5  

visit_data = air_visit_data.merge(date_info, left_on='visit_date', right_on='calendar_date', how='left')
visit_data.drop('calendar_date', axis=1, inplace=True)
visit_data['visitors'] = visit_data.visitors.map(pd.np.log1p)

wmean = lambda x:( (x.weight * x.visitors).sum() / x.weight.sum() )
visitors = visit_data.groupby(['air_store_id', 'day_of_week', 'holiday_flg']).apply(wmean).reset_index()
visitors.rename(columns={0:'visitors'}, inplace=True)

sample_submission['air_store_id'] = sample_submission.id.map(lambda x: '_'.join(x.split('_')[:-1]))
sample_submission['calendar_date'] = sample_submission.id.map(lambda x: x.split('_')[2])
sample_submission.drop('visitors', axis=1, inplace=True)
sample_submission = sample_submission.merge(date_info, on='calendar_date', how='left')
sample_submission = sample_submission.merge(visitors, on=[
    'air_store_id', 'day_of_week', 'holiday_flg'], how='left')

missings = sample_submission.visitors.isnull()
sample_submission.loc[missings, 'visitors'] = sample_submission[missings].merge(
    visitors[visitors.holiday_flg==0], on=('air_store_id', 'day_of_week'), 
    how='left')['visitors_y'].values

missings = sample_submission.visitors.isnull()
sample_submission.loc[missings, 'visitors'] = sample_submission[missings].merge(
    visitors[['air_store_id', 'visitors']].groupby('air_store_id').mean().reset_index(), 
    on='air_store_id', how='left')['visitors_y'].values

sample_submission['visitors'] = sample_submission.visitors.map(pd.np.expm1)

In [25]:
sub3 = sample_submission[['id', 'visitors']].copy()
sub4= pd.merge(sub1, sub3, on='id', how='inner')
sub5= pd.merge(sub2, sub3, on='id', how='inner')

In [26]:
sub4['visitors'] = 0.7*sub4['visitors_x'] + 0.3*sub4['visitors_y']* 1.1
sub5['visitors'] = 0.7*sub5['visitors_x'] + 0.3*sub5['visitors_y']* 1.1
sub4[['id', 'visitors']].to_csv('lgbsubmission3.csv', index=False)
sub5[['id', 'visitors']].to_csv('lgbsubmission4.csv', index=False)