In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
import pickle
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion, TransformerMixin
import gc

from os import path
from sklearn.preprocessing import LabelEncoder
from pandas.core.dtypes.dtypes import CategoricalDtype
from tqdm import tqdm
from datetime import date 
import holidays
import lightgbm as lgb


warnings.simplefilter('ignore')
sns.set()

%matplotlib inline

# label encoding
le = LabelEncoder()

ModuleNotFoundError: No module named 'sklearn'

In [2]:
class ConvertToDatetime(TransformerMixin):
        
    def transform(self, df, **transform_params):
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [3]:
file_dtype = {
    'train': {'building_id': np.int16, 'meter': np.int8, 'meter_reading': np.float32},
    'test': {'building_id': np.int16, 'meter': np.int8},
    'building_metadata': {'site_id': np.int8, 'building_id': np.uint16, 'square_feet': np.float32, 'year_built': np.float16, 'floor_count': np.float16},
    'weather_test' : {'site_id': np.int8, 'air_temperature': np.float16, 'cloud_coverage': np.float16, 'dew_temperature': np.float16,
                     'precip_depth_1_hr': np.float16, 'sea_level_pressure': np.float16, 'wind_direction': np.float16, 'wind_speed': np.float16},
    'weather_train' : {'site_id': np.int8, 'air_temperature': np.float16, 'cloud_coverage': np.float16, 'dew_temperature': np.float16,
                     'precip_depth_1_hr': np.float16, 'sea_level_pressure': np.float16, 'wind_direction': np.float16, 'wind_speed': np.float16}
}

def loadFile(name):
    for dir_path in ['../input/ashrae-energy-prediction/','../input/_ashrae-energy-prediction/']:
        if path.exists(dir_path + name + '.csv'):
            return  ConvertToDatetime().transform(
                pd.read_csv(dir_path + name + '.csv', dtype=file_dtype[name]))
        


In [4]:
building = loadFile('building_metadata')
weather_train = loadFile('weather_train')
weather_test = loadFile('weather_test')
train = loadFile('train')
test = loadFile('test')



In [5]:
# align weather
def weatherSiteOffsets():
    weather = pd.concat([weather_train,weather_test],ignore_index=True)
    weather['timestamp'] = pd.to_datetime(weather['timestamp'])
    weather_key = ['site_id', 'timestamp']

    temp_skeleton = weather[weather_key + ['air_temperature']].drop_duplicates(subset=weather_key).sort_values(by=weather_key).copy()

    # calculate ranks of hourly temperatures within date/site_id chunks
    temp_skeleton['temp_rank'] = temp_skeleton.groupby(['site_id', temp_skeleton.timestamp.dt.date])['air_temperature'].rank('average')
    
    # create a dataframe of site_ids (0-16) x mean hour rank of temperature within day (0-23)
    df_2d = temp_skeleton.groupby(['site_id', temp_skeleton.timestamp.dt.hour])['temp_rank'].mean().unstack(level=1)

    # Subtract the columnID of temperature peak by 14, getting the timestamp alignment gap.
    site_ids_offsets = pd.Series(df_2d.values.argmax(axis=1) - 14)
    site_ids_offsets.index.name = 'site_id'
    return site_ids_offsets

site_time_offsets_df = weatherSiteOffsets()

def alignWeather(df):
        df['offset'] = df.site_id.map(site_time_offsets_df)
        df['timestamp_aligned'] = (df.timestamp - pd.to_timedelta(df.offset, unit='H'))
        df['timestamp'] = df['timestamp_aligned']
        del df['timestamp_aligned'], df['offset']
        gc.collect()
        return df

weather_train = alignWeather(weather_train)
weather_test = alignWeather(weather_test)

del site_time_offsets_df

In [48]:
# See holiday notebook to generate
holiday_df = None
if path.exists('../input/holiday-pickle/holiday_df.pickle'):
    holiday_df = pd.read_pickle('../input/holiday-pickle/holiday_df.pickle')
elif path.exists('../input/ashrae-energy-prediction/holiday_df.pickle'):
    holiday_df = pd.read_pickle('../input/ashrae-energy-prediction/holiday_df.pickle')

In [7]:
# "As you can see above, this data looks weired until May 20. It is 
# reported in this discussion by @barnwellguy that All electricity
# meter is 0 until May 20 for site_id == 0. Let's remove these data 
# from training data."
# https://www.kaggle.com/kaushal2896/ashrae-eda-fe-lightgbm-1-13
class RmS0M0(TransformerMixin):
  
    def transform(self, df, **transform_params):
        return df.query('not (building_id <= 104 & meter == 0 & timestamp <= "2016-05-20")')

    def fit(self, X, y=None, **fit_params):
        return self
    


In [None]:
# TODO: write filter to remove any 0 meter reading that continue more than N days (try 3)
# Also we need to account for this by meter

In [8]:
# TODO: try both for direction
class ImputeWeather(TransformerMixin):

    def __init__(self, method:str='linear', gap_limit:int=None, limit_direction:str='forward'):
        self._method = method
        self._gap_limit = gap_limit
        self._limit_direction = limit_direction
        
    def transform(self, weather_df, **transform_params):
        grouped_weather_df = weather_df.groupby('site_id').apply(lambda group: group.interpolate(method=self._method, limit=self._gap_limit, limit_direction=self._limit_direction))
        if 'cloud_coverage' in grouped_weather_df.columns:
            grouped_weather_df['cloud_coverage'] = grouped_weather_df['cloud_coverage'].round(decimals=0).clip(0,8)
        grouped_weather_df.reset_index(inplace=True)
        weather_df = grouped_weather_df.drop(['index'], axis=1)
        gc.collect()
        return weather_df

    def fit(self, X, y=None, **fit_params):
        return self
print(weather_train.head(20))
print(ImputeWeather().transform(weather_train.head(20)))

    site_id           timestamp  air_temperature  cloud_coverage  \
0         0 2015-12-31 19:00:00        25.000000             6.0   
1         0 2015-12-31 20:00:00        24.406250             NaN   
2         0 2015-12-31 21:00:00        22.796875             2.0   
3         0 2015-12-31 22:00:00        21.093750             2.0   
4         0 2015-12-31 23:00:00        20.000000             2.0   
5         0 2016-01-01 00:00:00        19.406250             NaN   
6         0 2016-01-01 01:00:00        21.093750             6.0   
7         0 2016-01-01 02:00:00        21.093750             NaN   
8         0 2016-01-01 03:00:00        20.593750             NaN   
9         0 2016-01-01 04:00:00        21.093750             NaN   
10        0 2016-01-01 05:00:00        21.093750             NaN   
11        0 2016-01-01 06:00:00        20.593750             NaN   
12        0 2016-01-01 07:00:00        18.906250             6.0   
13        0 2016-01-01 08:00:00        20.000000

In [9]:
# TODO: rename to rolling
class AddWeatherLags(TransformerMixin):
    
    def __init__(self, window):
        self._window = window
        
    def transform(self, weather_df, **transform_params):
        group_df = weather_df.groupby(['site_id'])
        cols = ['air_temperature', 'cloud_coverage', 'dew_temperature', 'precip_depth_1_hr', 'sea_level_pressure', 'wind_direction', 'wind_speed']
        rolled = group_df[cols].rolling(window=self._window, min_periods=0)
        lag_mean = rolled.mean().reset_index().astype(np.float16)
        lag_max = rolled.max().reset_index().astype(np.float16)
        lag_min = rolled.min().reset_index().astype(np.float16)
        lag_std = rolled.std().reset_index().astype(np.float16)
        for col in cols:
            weather_df[f'{col}_mean_lag{self._window}'] = lag_mean[col]
            weather_df[f'{col}_max_lag{self._window}'] = lag_max[col]
            weather_df[f'{col}_min_lag{self._window}'] = lag_min[col]
            weather_df[f'{col}_std_lag{self._window}'] = lag_std[col]
        del group_df, rolled
        gc.collect()
        return weather_df

    def fit(self, X, y=None, **fit_params):
        return self
    
print(AddWeatherLags(72).transform(weather_train.head(20)))

    site_id           timestamp  air_temperature  cloud_coverage  \
0         0 2015-12-31 19:00:00        25.000000             6.0   
1         0 2015-12-31 20:00:00        24.406250             NaN   
2         0 2015-12-31 21:00:00        22.796875             2.0   
3         0 2015-12-31 22:00:00        21.093750             2.0   
4         0 2015-12-31 23:00:00        20.000000             2.0   
5         0 2016-01-01 00:00:00        19.406250             NaN   
6         0 2016-01-01 01:00:00        21.093750             6.0   
7         0 2016-01-01 02:00:00        21.093750             NaN   
8         0 2016-01-01 03:00:00        20.593750             NaN   
9         0 2016-01-01 04:00:00        21.093750             NaN   
10        0 2016-01-01 05:00:00        21.093750             NaN   
11        0 2016-01-01 06:00:00        20.593750             NaN   
12        0 2016-01-01 07:00:00        18.906250             6.0   
13        0 2016-01-01 08:00:00        20.000000

[20 rows x 37 columns]


In [10]:
class AddBuilding(TransformerMixin):

    def __init__(self, building_df):
        self._b_df = building_df
        
    def transform(self, df, **transform_params):
        return df.merge(_b_df, on='building_id', how='left')

    def fit(self, X, y=None, **fit_params):
        return self

In [None]:
# TODO: try rolling with power

In [11]:
    
# https://www.kaggle.com/c/ashrae-energy-prediction/discussion/114483#latest-660771
# https://www.kaggle.com/c/ashrae-energy-prediction/discussion/114874#latest-660970
class AddHolidays(TransformerMixin):
    def transform(self, df, **transform_params):
        df = df.merge(holiday_df, on=['building_id','meter','timestamp','site_id'], how='left')
        return df

    def fit(self, X, y=None, **fit_params):
        return self
# Test 
addHolidays = AddHolidays()
print(addHolidays.transform(train.head(2000).merge(building, on='building_id', how='left'))[['building_id','timestamp','holiday']])

      building_id  timestamp         holiday
0               0 2016-01-01  New Year's Day
1               1 2016-01-01  New Year's Day
2               2 2016-01-01  New Year's Day
3               3 2016-01-01  New Year's Day
4               4 2016-01-01  New Year's Day
5               5 2016-01-01  New Year's Day
6               6 2016-01-01  New Year's Day
7               7 2016-01-01  New Year's Day
8               8 2016-01-01  New Year's Day
9               9 2016-01-01  New Year's Day
10             10 2016-01-01  New Year's Day
11             11 2016-01-01  New Year's Day
12             12 2016-01-01  New Year's Day
13             13 2016-01-01  New Year's Day
14             14 2016-01-01  New Year's Day
15             15 2016-01-01  New Year's Day
16             16 2016-01-01  New Year's Day
17             17 2016-01-01  New Year's Day
18             18 2016-01-01  New Year's Day
19             19 2016-01-01  New Year's Day
20             20 2016-01-01  New Year's Day
21        

In [49]:
class RmHolidays(TransformerMixin):
    def transform(self, df, **transform_params):
        if holiday_df is not None:
            df = df.merge(holiday_df, on=['building_id','meter','timestamp','site_id'], how='left')
            df = df.drop(df[df['holiday'].notnull()].index)
            df = df.drop(['holiday'], axis=1)
            gc.collect()
        else:
            print("Warning: Holiday DF is missing")
        return df

    def fit(self, X, y=None, **fit_params):
        return self

# Test you should see the new years removed
rmHolidays = RmHolidays()
print(rmHolidays.transform(train.head(100000).merge(building, on='building_id', how='left')))

       building_id  meter           timestamp  meter_reading  site_id  \
0                0      0 2016-01-01 00:00:00       0.000000        0   
1                1      0 2016-01-01 00:00:00       0.000000        0   
2                2      0 2016-01-01 00:00:00       0.000000        0   
3                3      0 2016-01-01 00:00:00       0.000000        0   
4                4      0 2016-01-01 00:00:00       0.000000        0   
5                5      0 2016-01-01 00:00:00       0.000000        0   
6                6      0 2016-01-01 00:00:00       0.000000        0   
7                7      0 2016-01-01 00:00:00       0.000000        0   
8                8      0 2016-01-01 00:00:00       0.000000        0   
9                9      0 2016-01-01 00:00:00       0.000000        0   
10              10      0 2016-01-01 00:00:00       0.000000        0   
11              11      0 2016-01-01 00:00:00       0.000000        0   
12              12      0 2016-01-01 00:00:00      

[100000 rows x 11 columns]


In [13]:
class LogSquareFeet(TransformerMixin):
        
    def transform(self, df, **transform_params):
        df['log_square_feet'] = np.float16(np.log(df['square_feet']))
        return df

    def fit(self, X, y=None, **fit_params):
        return self
print(building.head(20)['square_feet'])

0       7432.0
1       2720.0
2       5376.0
3      23685.0
4     116607.0
5       8000.0
6      27926.0
7     121074.0
8      60809.0
9      27000.0
10    370773.0
11     49073.0
12     37100.0
13     99380.0
14     86250.0
15     83957.0
16     54644.0
17     15250.0
18    111891.0
19     18717.0
Name: square_feet, dtype: float32


In [14]:
class SetCatTypes(TransformerMixin):
        
    def transform(self, df, **transform_params):
        df['primary_use']= df['primary_use'].astype('category')
        df['meter'] = df["meter"].astype('category')
        df['site_id'] = df["site_id"].astype('category')
        df['building_id'] = df['building_id'].astype('category')
        gc.collect()
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [15]:
class ImputeCloudCoverage(TransformerMixin):
        
    def transform(self, df, **transform_params):
        # set age of building to mediam of site_id
        # else if set ot overall median
        median = df['cloud_coverage'].median()
        # Set all year_built NaNs to site mean for year_built
        for i, i_median in df.groupby(['site_id'])['cloud_coverage'].median().items():
            # TODO add in +9 as a NAN
            if not np.isnan(i_median):
                df.loc[(df['cloud_coverage'].isnull()) & (df['site_id'] == i), 'cloud_coverage'] = i_median
            else:
                df.loc[(df['cloud_coverage'].isnull()) & (df['site_id'] == i), 'cloud_coverage'] = median
        df['cloud_coverage'] = np.uint8(df['cloud_coverage'])
        df['cloud_coverage'] = df['cloud_coverage']
        del median
        gc.collect()
        return df
        
    def fit(self, X, y=None, **fit_params):
        return self


In [None]:
# TODO: Play with scaling cloud coverage

In [16]:
class CloudTimeCat(TransformerMixin):
        
    def transform(self, df, **transform_params):
        tempDf = df[['cloud_coverage', 'hour']].astype('int')
        tempDf['cloud_coverage'] = (tempDf['cloud_coverage']).astype('int')
        tempDf['hour'] = (tempDf['hour']).astype('int')
        tempDf = tempDf.astype('str')
        df['cloud_time_cat'] = 'c' + tempDf['cloud_coverage'] + 't' + tempDf['hour']
        df['cloud_time_cat'] = df['cloud_time_cat'].astype('category')
        del tempDf
        gc.collect()
        return df
        
    def fit(self, X, y=None, **fit_params):
        return self


In [17]:
class DropCols(TransformerMixin):

    def __init__(self, drop_cols):
        self._drop_cols = drop_cols
        
    def transform(self, df, **transform_params):
        df = df.drop(self._drop_cols, axis=1)
        gc.collect()
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [18]:
class ImputeYearBuilt(TransformerMixin):

    def transform(self, df, **transform_params):
        # revisit the choice of median vs anything else
        year_built_median = df['year_built'].median()
        # Set all year_built NaNs to site mean for year_built
        for i, i_median in df.groupby(['site_id'])['year_built'].median().items():
            if not np.isnan(i_median):
                df.loc[(df['year_built'].isnull()) & (df['site_id'] == i), 'year_built'] = i_median
            else:
                df.loc[(df['year_built'].isnull()) & (df['site_id'] == i), 'year_built'] = year_built_median
        df['building_age'] = np.uint8(df['year_built']-1900)
        del year_built_median
        gc.collect()
        return df

    def fit(self, X, y=None, **fit_params):
        return self


In [19]:
class AddMeterDummies(TransformerMixin):
        
    def transform(self, df_a, **transform_params):
        df = df_a
        for i in range(4):
            df['_meter_'+str(i)] = (df['building_id'].isin(
                train.loc[train['meter'] == i].building_id.unique()))
        return df
        
    def fit(self, X, y=None, **fit_params):
        return self

In [20]:
class AddTimeFeatures(TransformerMixin):
        
    def transform(self, df_a, **transform_params):
        # TODO: try week of year as numerical 
        df = df_a
        df['dayofweek'] = df['timestamp'].dt.dayofweek.astype('category') # vs weekend?
        #df['weekday'] = df['timestamp'].dt.weekday.astype('category')
        #df['dayofweek_hour'] = (df['timestamp'].dt.dayofweek * 24) + df['timestamp'].dt.hour
        #df['dayofweek_hour'] = df['dayofweek_hour'].astype('category')
        #df['week'] = df['timestamp'].dt.week.astype('category')
        df['hour'] = df['timestamp'].dt.hour.astype('category')
        return df
        
    def fit(self, X, y=None, **fit_params):
        return self

In [21]:
class AddRelativeHumidity(TransformerMixin):
        
    def transform(self, df_a, **transform_params):
        df = df_a
        # code here
        return df
        
    def fit(self, X, y=None, **fit_params):
        return self

In [22]:
class FillMean(TransformerMixin):

    def __init__(self, cols):
        self._cols = cols
        
    def transform(self, df, **transform_params):
        for col in self._cols:
            df[col] = df[col].fillna(df[col].mean())
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [23]:
class FillZeros(TransformerMixin):

    def __init__(self, cols):
        self._cols = cols
        
    def transform(self, df, **transform_params):
        for col in self._cols:
            df[col] = df[col].fillna(0)
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [24]:
class FillMedian(TransformerMixin):

    def __init__(self, cols):
        self._cols = cols
        
    def transform(self, df, **transform_params):
        for col in self._cols:
            df[col] = df[col].fillna(df[col].median())
        return df

    def fit(self, X, y=None, **fit_params):
        return self


In [25]:
class FillPopular(TransformerMixin):

    def __init__(self, cols):
        self._cols = cols
        
    def transform(self, df, **transform_params):
        for col in self._cols:
            df[col] = df[col].fillna(df[col].value_counts()[0])
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [26]:
class MarkNaNs(TransformerMixin):
        
    def transform(self, df, **transform_params):
        for col in  df.columns[df.isna().any()].tolist():
            df['_' + col + '_nan' ] = df[col].isnull()
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [27]:
class GC(TransformerMixin):
        
    def transform(self, df, **transform_params):
        gc.collect()
        return df

    def fit(self, X, y=None, **fit_params):
        return self

In [28]:
# declare model
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, mean_squared_log_error, mean_squared_error
from lightgbm import LGBMRegressor


def rmsle(y, y_pred):
    # hack to prevent negative numbers
    return np.sqrt(mean_squared_log_error(y, y_pred.clip(0)))

def rmse(y, y_pred):
    # hack to prevent negative numbers
    return mean_squared_error(y, y_pred.clip(0))

def rmsee(y, y_pred):
    # hack to prevent negative numbers
    return np.sqrt(mean_squared_log_error(np.expm1(y.clip(0)), np.expm1(y_pred.clip(0))))
    
rmsle_scorer = make_scorer(
    lambda y_true, y_pred : rmsle(y_true, y_pred), 
    greater_is_better=False)

rmse_scorer = make_scorer(
    lambda y_true, y_pred : rmsle(y_true, y_pred), 
    greater_is_better=False)

rmsee_scorer = make_scorer(
    lambda y_true, y_pred : rmsee(y_true, y_pred), 
    greater_is_better=False)

def lbm_rmsle(y_true, y_pred):
    return 'RMSLE', np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False

# rob's custome function to do RMSLE while in the log1p space
def lbm_rmslee(y_true, y_pred):
    return 'RMSLEE', np.sqrt(np.mean(np.power(y_pred - y_true, 2))), False



In [29]:
weather_pipes = Pipeline(
    steps=[
        #('convertToDatetime', ConvertToDatetime()),
        ('imputeWeather', ImputeWeather()),
        ('fillMean',FillMean(['air_temperature','dew_temperature'
                              , 'precip_depth_1_hr', 'sea_level_pressure'])),
        ('imputeCloudCoverage', ImputeCloudCoverage()),
        ('addWeatherLags3', AddWeatherLags(3)),
        ('addWeatherLags72', AddWeatherLags(72)),
    ]
)

building_pipes = Pipeline(
    steps=[
        ('logSquareFeet', LogSquareFeet()),
        ('imputeYearBuilt', ImputeYearBuilt()),
        ('fillMean',FillMean(['floor_count'])),
        ('dropClos', DropCols(['square_feet', 'year_built'])),
    ]
)


# pre_a_pipes is for preprocessing that doesn't change impute
# values
x_pipes = Pipeline(
    steps=[
        #('markNans',MarkNaNs()),
        #('convertToDatetime', ConvertToDatetime()),
        ('rmS0M0', RmS0M0()),
        #('addHolidays', AddHolidays()),
        #('rmHolidays', RmHolidays()), called manually in fold
        ('addRelativeHumidity',AddRelativeHumidity()),
        ('addTimeFeatures', AddTimeFeatures()),
        ('setCatTypes', SetCatTypes()),
        ('fillMean',FillMean([])),
        ('fillZeros',FillZeros([])),
        ('dropCols', DropCols(['timestamp'])),
        ('GC', GC())
    ]
)

In [30]:
def getOutsideFoldXY(train_index):
    X = train.iloc[train_index].drop('meter_reading', axis=1)
    X_buildings = building[building['building_id'].isin(X['building_id'].unique())]
    X_weather = building[building['building_id'].isin(X['building_id'].unique())]
    X = x_pipes.transform(
        X
            .merge(building_pipes.transform(X_buildings), on='building_id', how='left')
            .merge(weather_train_trans, on=['site_id', 'timestamp'], how='left')
        )
    f_train_y = np.log1p(train.iloc[train_index]['meter_reading'])
    print(X.columns)
    return X,f_train_y



def getInFoldXY(train_index):
    X = train.iloc[train_index]
    X_buildings = building[building['building_id'].isin(X['building_id'].unique())]
    X = X.merge(building_pipes.transform(X_buildings), on='building_id', how='left')
    X_weather = weather_train[
        (weather_train['site_id'].isin(X['site_id'].unique())) 
         & (weather_train['timestamp'].isin(X['timestamp'].unique())) 
    ]
    X = x_pipes.transform(
        rmHolidays.transform(
            X.merge(weather_pipes.transform(X_weather), how='left')))
    return X

sample_train_X = getInFoldXY(train.sample(n=20,  random_state=42).index)
print(sample_train_X)

   building_id meter  meter_reading site_id                    primary_use  \
0         1324     1       0.000000      14  Entertainment/public assembly   
1         1013     0      32.000099      10                      Education   
2          229     1     567.655029       2                      Education   
3          217     3       0.000000       2                      Education   
4         1434     0      65.750000      15                      Education   
5         1047     0      90.983299      12                Public services   
6          911     1     295.063995       9                      Education   
7         1039     0      16.900000      12                      Education   
8          265     0     128.369995       2                         Office   
9          896     0     300.000000       9                      Education   
10         973     0     247.000000       9                         Office   
11         813     0      10.958300       8                Publi

In [31]:
# model fit params
gbm_params = {
    'n_estimators' : 500, # for accuracy use large numbers like 6000 
    'max_depth' : 9,
    'learning_rate': 0.1,
    'bagging_fraction': 0.1, # TODO: try 0.9
    
    'feature_fraction' : 0.9,
    'bagging_freq': 5,
    'subsample' : 0.1,  # 
    'subsample_freq' : 1,
    'num_leaves' : 20,
    'metric':'rmse',
    'lambda_l1' : 1,  # Try defaults
    'lambda_l2': 1, # Try defaults
    'verbose': 100
}

In [33]:
## cross val mdoels



# this stratified strategy from
# https://www.kaggle.com/isaienkov/lightgbm-fe-1-19/notebook
folds = 5
kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42)

def cvTrainEnsemble(train, gbm_params):
    meter_models = [ [], [], [], [] ]
    for train_index, val_index in kf.split(train, train['building_id']):
        f_train = getInFoldXY(train_index)
        f_val = getInFoldXY(val_index)
        for i in range(4):
            f_train_m = f_train[f_train['meter'] == i]
            f_val_m = f_val[f_val['meter'] == i]
            gbm_params_m = gbm_params
            if i == 0:
                gbm_params_m['n_estimators']=500
            elif i == 1:
                gbm_params_m['learning_rate']=0.05
                gbm_params_m['bagging_fraction']=0.05
            elif i == 2:
                gbm_params_m['learning_rate']=0.05
                gbm_params_m['bagging_fraction']=0.08
            else:
                gbm_params_m['learning_rate']=0.03
                gbm_params_m['bagging_fraction']=0.09                
            gbm = LGBMRegressor(**gbm_params_m)
            gbm.fit(f_train_m.drop('meter_reading', axis=1), np.log1p(f_train_m['meter_reading']),
                eval_set=[(f_val_m.drop('meter_reading', axis=1), np.log1p(f_val_m['meter_reading']))],
                # https://www.kaggle.com/c/ashrae-energy-prediction/discussion/114722#latest-660848
                # eval_metric=lbm_rmslee,
                early_stopping_rounds=20)
            meter_models[i].append(gbm)
            del f_train_m, f_val_m, gbm
            gc.collect()
        del f_train, f_val
        gc.collect()
    return meter_models


In [34]:
%%time
meter_models = cvTrainEnsemble(train, gbm_params)

[1]	valid_0's rmse: 1.46621
Training until validation scores don't improve for 20 rounds
[2]	valid_0's rmse: 1.36041
[3]	valid_0's rmse: 1.26609
[4]	valid_0's rmse: 1.18325
[5]	valid_0's rmse: 1.11098
[6]	valid_0's rmse: 1.05872
[7]	valid_0's rmse: 1.00196
[8]	valid_0's rmse: 0.952217
[9]	valid_0's rmse: 0.908818
[10]	valid_0's rmse: 0.871351
[11]	valid_0's rmse: 0.846817
[12]	valid_0's rmse: 0.816523
[13]	valid_0's rmse: 0.790894
[14]	valid_0's rmse: 0.768847
[15]	valid_0's rmse: 0.750301
[16]	valid_0's rmse: 0.734739
[17]	valid_0's rmse: 0.720429
[18]	valid_0's rmse: 0.708115
[19]	valid_0's rmse: 0.697256
[20]	valid_0's rmse: 0.687343
[21]	valid_0's rmse: 0.679643
[22]	valid_0's rmse: 0.67141
[23]	valid_0's rmse: 0.664146
[24]	valid_0's rmse: 0.658155
[25]	valid_0's rmse: 0.652445
[26]	valid_0's rmse: 0.647583
[27]	valid_0's rmse: 0.643188
[28]	valid_0's rmse: 0.639107
[29]	valid_0's rmse: 0.634173
[30]	valid_0's rmse: 0.630941
[31]	valid_0's rmse: 0.626381
[32]	valid_0's rmse: 0.622

[267]	valid_0's rmse: 0.46046
[268]	valid_0's rmse: 0.459817
[269]	valid_0's rmse: 0.459547
[270]	valid_0's rmse: 0.45948
[271]	valid_0's rmse: 0.459239
[272]	valid_0's rmse: 0.459159
[273]	valid_0's rmse: 0.459037
[274]	valid_0's rmse: 0.458889
[275]	valid_0's rmse: 0.458728
[276]	valid_0's rmse: 0.458283
[277]	valid_0's rmse: 0.458071
[278]	valid_0's rmse: 0.45795
[279]	valid_0's rmse: 0.457779
[280]	valid_0's rmse: 0.457614
[281]	valid_0's rmse: 0.457017
[282]	valid_0's rmse: 0.456862
[283]	valid_0's rmse: 0.456542
[284]	valid_0's rmse: 0.456446
[285]	valid_0's rmse: 0.45634
[286]	valid_0's rmse: 0.456204
[287]	valid_0's rmse: 0.456071
[288]	valid_0's rmse: 0.455962
[289]	valid_0's rmse: 0.455773
[290]	valid_0's rmse: 0.455548
[291]	valid_0's rmse: 0.455353
[292]	valid_0's rmse: 0.455215
[293]	valid_0's rmse: 0.455028
[294]	valid_0's rmse: 0.454914
[295]	valid_0's rmse: 0.454652
[296]	valid_0's rmse: 0.454539
[297]	valid_0's rmse: 0.454296
[298]	valid_0's rmse: 0.454053
[299]	valid_

[30]	valid_0's rmse: 1.4171
[31]	valid_0's rmse: 1.40516
[32]	valid_0's rmse: 1.39364
[33]	valid_0's rmse: 1.38641
[34]	valid_0's rmse: 1.3797
[35]	valid_0's rmse: 1.37071
[36]	valid_0's rmse: 1.36096
[37]	valid_0's rmse: 1.35166
[38]	valid_0's rmse: 1.34462
[39]	valid_0's rmse: 1.33705
[40]	valid_0's rmse: 1.32901
[41]	valid_0's rmse: 1.32204
[42]	valid_0's rmse: 1.31581
[43]	valid_0's rmse: 1.31011
[44]	valid_0's rmse: 1.30335
[45]	valid_0's rmse: 1.29844
[46]	valid_0's rmse: 1.29284
[47]	valid_0's rmse: 1.28756
[48]	valid_0's rmse: 1.28166
[49]	valid_0's rmse: 1.27729
[50]	valid_0's rmse: 1.27159
[51]	valid_0's rmse: 1.26661
[52]	valid_0's rmse: 1.26297
[53]	valid_0's rmse: 1.26031
[54]	valid_0's rmse: 1.25632
[55]	valid_0's rmse: 1.25282
[56]	valid_0's rmse: 1.24937
[57]	valid_0's rmse: 1.24638
[58]	valid_0's rmse: 1.24258
[59]	valid_0's rmse: 1.23909
[60]	valid_0's rmse: 1.23541
[61]	valid_0's rmse: 1.23263
[62]	valid_0's rmse: 1.22972
[63]	valid_0's rmse: 1.227
[64]	valid_0's rms

[306]	valid_0's rmse: 1.06174
[307]	valid_0's rmse: 1.06127
[308]	valid_0's rmse: 1.06096
[309]	valid_0's rmse: 1.0607
[310]	valid_0's rmse: 1.06042
[311]	valid_0's rmse: 1.06019
[312]	valid_0's rmse: 1.05999
[313]	valid_0's rmse: 1.0599
[314]	valid_0's rmse: 1.0598
[315]	valid_0's rmse: 1.05926
[316]	valid_0's rmse: 1.05908
[317]	valid_0's rmse: 1.05885
[318]	valid_0's rmse: 1.05874
[319]	valid_0's rmse: 1.05854
[320]	valid_0's rmse: 1.05827
[321]	valid_0's rmse: 1.05761
[322]	valid_0's rmse: 1.05705
[323]	valid_0's rmse: 1.0563
[324]	valid_0's rmse: 1.05572
[325]	valid_0's rmse: 1.05521
[326]	valid_0's rmse: 1.05494
[327]	valid_0's rmse: 1.05466
[328]	valid_0's rmse: 1.05416
[329]	valid_0's rmse: 1.05373
[330]	valid_0's rmse: 1.05348
[331]	valid_0's rmse: 1.05329
[332]	valid_0's rmse: 1.05313
[333]	valid_0's rmse: 1.05291
[334]	valid_0's rmse: 1.05249
[335]	valid_0's rmse: 1.05227
[336]	valid_0's rmse: 1.05205
[337]	valid_0's rmse: 1.0518
[338]	valid_0's rmse: 1.05115
[339]	valid_0's

[78]	valid_0's rmse: 1.30906
[79]	valid_0's rmse: 1.30672
[80]	valid_0's rmse: 1.30505
[81]	valid_0's rmse: 1.30292
[82]	valid_0's rmse: 1.30112
[83]	valid_0's rmse: 1.29918
[84]	valid_0's rmse: 1.29789
[85]	valid_0's rmse: 1.29668
[86]	valid_0's rmse: 1.29443
[87]	valid_0's rmse: 1.29239
[88]	valid_0's rmse: 1.29104
[89]	valid_0's rmse: 1.28852
[90]	valid_0's rmse: 1.28689
[91]	valid_0's rmse: 1.28567
[92]	valid_0's rmse: 1.28448
[93]	valid_0's rmse: 1.28362
[94]	valid_0's rmse: 1.28221
[95]	valid_0's rmse: 1.28097
[96]	valid_0's rmse: 1.27905
[97]	valid_0's rmse: 1.27745
[98]	valid_0's rmse: 1.27627
[99]	valid_0's rmse: 1.27548
[100]	valid_0's rmse: 1.27419
[101]	valid_0's rmse: 1.27292
[102]	valid_0's rmse: 1.27186
[103]	valid_0's rmse: 1.27079
[104]	valid_0's rmse: 1.27004
[105]	valid_0's rmse: 1.26888
[106]	valid_0's rmse: 1.26748
[107]	valid_0's rmse: 1.26637
[108]	valid_0's rmse: 1.26573
[109]	valid_0's rmse: 1.26499
[110]	valid_0's rmse: 1.2631
[111]	valid_0's rmse: 1.26205
[11

[352]	valid_0's rmse: 1.14931
[353]	valid_0's rmse: 1.14919
[354]	valid_0's rmse: 1.14902
[355]	valid_0's rmse: 1.14889
[356]	valid_0's rmse: 1.14869
[357]	valid_0's rmse: 1.14843
[358]	valid_0's rmse: 1.14827
[359]	valid_0's rmse: 1.14807
[360]	valid_0's rmse: 1.14739
[361]	valid_0's rmse: 1.14723
[362]	valid_0's rmse: 1.14683
[363]	valid_0's rmse: 1.14664
[364]	valid_0's rmse: 1.14638
[365]	valid_0's rmse: 1.14624
[366]	valid_0's rmse: 1.14607
[367]	valid_0's rmse: 1.14595
[368]	valid_0's rmse: 1.14589
[369]	valid_0's rmse: 1.14574
[370]	valid_0's rmse: 1.14558
[371]	valid_0's rmse: 1.14533
[372]	valid_0's rmse: 1.14495
[373]	valid_0's rmse: 1.14477
[374]	valid_0's rmse: 1.14452
[375]	valid_0's rmse: 1.14426
[376]	valid_0's rmse: 1.14407
[377]	valid_0's rmse: 1.14336
[378]	valid_0's rmse: 1.14325
[379]	valid_0's rmse: 1.14301
[380]	valid_0's rmse: 1.14282
[381]	valid_0's rmse: 1.14216
[382]	valid_0's rmse: 1.14201
[383]	valid_0's rmse: 1.14178
[384]	valid_0's rmse: 1.14151
[385]	vali

[124]	valid_0's rmse: 1.37804
[125]	valid_0's rmse: 1.37705
[126]	valid_0's rmse: 1.37592
[127]	valid_0's rmse: 1.37534
[128]	valid_0's rmse: 1.37453
[129]	valid_0's rmse: 1.37383
[130]	valid_0's rmse: 1.373
[131]	valid_0's rmse: 1.37222
[132]	valid_0's rmse: 1.37166
[133]	valid_0's rmse: 1.37084
[134]	valid_0's rmse: 1.37023
[135]	valid_0's rmse: 1.36962
[136]	valid_0's rmse: 1.36902
[137]	valid_0's rmse: 1.36839
[138]	valid_0's rmse: 1.36774
[139]	valid_0's rmse: 1.36734
[140]	valid_0's rmse: 1.36688
[141]	valid_0's rmse: 1.36629
[142]	valid_0's rmse: 1.36583
[143]	valid_0's rmse: 1.36539
[144]	valid_0's rmse: 1.36481
[145]	valid_0's rmse: 1.36439
[146]	valid_0's rmse: 1.36359
[147]	valid_0's rmse: 1.36283
[148]	valid_0's rmse: 1.36213
[149]	valid_0's rmse: 1.36135
[150]	valid_0's rmse: 1.36081
[151]	valid_0's rmse: 1.35998
[152]	valid_0's rmse: 1.35956
[153]	valid_0's rmse: 1.359
[154]	valid_0's rmse: 1.35847
[155]	valid_0's rmse: 1.35804
[156]	valid_0's rmse: 1.35746
[157]	valid_0'

[398]	valid_0's rmse: 1.2827
[399]	valid_0's rmse: 1.28243
[400]	valid_0's rmse: 1.28227
[401]	valid_0's rmse: 1.28211
[402]	valid_0's rmse: 1.28199
[403]	valid_0's rmse: 1.28186
[404]	valid_0's rmse: 1.28161
[405]	valid_0's rmse: 1.28124
[406]	valid_0's rmse: 1.28109
[407]	valid_0's rmse: 1.28099
[408]	valid_0's rmse: 1.28059
[409]	valid_0's rmse: 1.28041
[410]	valid_0's rmse: 1.28004
[411]	valid_0's rmse: 1.2799
[412]	valid_0's rmse: 1.27974
[413]	valid_0's rmse: 1.27964
[414]	valid_0's rmse: 1.2795
[415]	valid_0's rmse: 1.2794
[416]	valid_0's rmse: 1.2792
[417]	valid_0's rmse: 1.27892
[418]	valid_0's rmse: 1.27885
[419]	valid_0's rmse: 1.27866
[420]	valid_0's rmse: 1.27848
[421]	valid_0's rmse: 1.27834
[422]	valid_0's rmse: 1.27829
[423]	valid_0's rmse: 1.27798
[424]	valid_0's rmse: 1.27783
[425]	valid_0's rmse: 1.27765
[426]	valid_0's rmse: 1.27746
[427]	valid_0's rmse: 1.27722
[428]	valid_0's rmse: 1.27702
[429]	valid_0's rmse: 1.27687
[430]	valid_0's rmse: 1.27671
[431]	valid_0's

[166]	valid_0's rmse: 0.583065
[167]	valid_0's rmse: 0.582777
[168]	valid_0's rmse: 0.582478
[169]	valid_0's rmse: 0.582163
[170]	valid_0's rmse: 0.581893
[171]	valid_0's rmse: 0.581051
[172]	valid_0's rmse: 0.580236
[173]	valid_0's rmse: 0.579339
[174]	valid_0's rmse: 0.578748
[175]	valid_0's rmse: 0.578306
[176]	valid_0's rmse: 0.577376
[177]	valid_0's rmse: 0.577039
[178]	valid_0's rmse: 0.576613
[179]	valid_0's rmse: 0.576362
[180]	valid_0's rmse: 0.575946
[181]	valid_0's rmse: 0.575128
[182]	valid_0's rmse: 0.57467
[183]	valid_0's rmse: 0.574341
[184]	valid_0's rmse: 0.57363
[185]	valid_0's rmse: 0.57331
[186]	valid_0's rmse: 0.572979
[187]	valid_0's rmse: 0.572535
[188]	valid_0's rmse: 0.57199
[189]	valid_0's rmse: 0.57148
[190]	valid_0's rmse: 0.571139
[191]	valid_0's rmse: 0.570527
[192]	valid_0's rmse: 0.569795
[193]	valid_0's rmse: 0.569502
[194]	valid_0's rmse: 0.56883
[195]	valid_0's rmse: 0.568219
[196]	valid_0's rmse: 0.567951
[197]	valid_0's rmse: 0.567698
[198]	valid_0'

[430]	valid_0's rmse: 0.507762
[431]	valid_0's rmse: 0.507616
[432]	valid_0's rmse: 0.507456
[433]	valid_0's rmse: 0.507281
[434]	valid_0's rmse: 0.507131
[435]	valid_0's rmse: 0.506818
[436]	valid_0's rmse: 0.506692
[437]	valid_0's rmse: 0.506567
[438]	valid_0's rmse: 0.506432
[439]	valid_0's rmse: 0.506301
[440]	valid_0's rmse: 0.50619
[441]	valid_0's rmse: 0.506052
[442]	valid_0's rmse: 0.505931
[443]	valid_0's rmse: 0.505811
[444]	valid_0's rmse: 0.505729
[445]	valid_0's rmse: 0.505556
[446]	valid_0's rmse: 0.505346
[447]	valid_0's rmse: 0.505183
[448]	valid_0's rmse: 0.505037
[449]	valid_0's rmse: 0.504839
[450]	valid_0's rmse: 0.504648
[451]	valid_0's rmse: 0.504572
[452]	valid_0's rmse: 0.504436
[453]	valid_0's rmse: 0.504328
[454]	valid_0's rmse: 0.504218
[455]	valid_0's rmse: 0.504061
[456]	valid_0's rmse: 0.503873
[457]	valid_0's rmse: 0.503725
[458]	valid_0's rmse: 0.503545
[459]	valid_0's rmse: 0.50341
[460]	valid_0's rmse: 0.503243
[461]	valid_0's rmse: 0.50311
[462]	valid

[200]	valid_0's rmse: 1.09898
[201]	valid_0's rmse: 1.09816
[202]	valid_0's rmse: 1.09787
[203]	valid_0's rmse: 1.09769
[204]	valid_0's rmse: 1.09741
[205]	valid_0's rmse: 1.09714
[206]	valid_0's rmse: 1.09648
[207]	valid_0's rmse: 1.09541
[208]	valid_0's rmse: 1.09496
[209]	valid_0's rmse: 1.09454
[210]	valid_0's rmse: 1.09411
[211]	valid_0's rmse: 1.09354
[212]	valid_0's rmse: 1.09305
[213]	valid_0's rmse: 1.09234
[214]	valid_0's rmse: 1.09177
[215]	valid_0's rmse: 1.09149
[216]	valid_0's rmse: 1.09118
[217]	valid_0's rmse: 1.091
[218]	valid_0's rmse: 1.09081
[219]	valid_0's rmse: 1.0906
[220]	valid_0's rmse: 1.09036
[221]	valid_0's rmse: 1.09005
[222]	valid_0's rmse: 1.08977
[223]	valid_0's rmse: 1.08947
[224]	valid_0's rmse: 1.08924
[225]	valid_0's rmse: 1.08904
[226]	valid_0's rmse: 1.08876
[227]	valid_0's rmse: 1.08829
[228]	valid_0's rmse: 1.08805
[229]	valid_0's rmse: 1.08776
[230]	valid_0's rmse: 1.08749
[231]	valid_0's rmse: 1.08717
[232]	valid_0's rmse: 1.08672
[233]	valid_0

[474]	valid_0's rmse: 1.02487
[475]	valid_0's rmse: 1.02479
[476]	valid_0's rmse: 1.02475
[477]	valid_0's rmse: 1.02454
[478]	valid_0's rmse: 1.02432
[479]	valid_0's rmse: 1.02419
[480]	valid_0's rmse: 1.02414
[481]	valid_0's rmse: 1.02406
[482]	valid_0's rmse: 1.02398
[483]	valid_0's rmse: 1.02381
[484]	valid_0's rmse: 1.02372
[485]	valid_0's rmse: 1.02367
[486]	valid_0's rmse: 1.02329
[487]	valid_0's rmse: 1.02314
[488]	valid_0's rmse: 1.02285
[489]	valid_0's rmse: 1.0226
[490]	valid_0's rmse: 1.02246
[491]	valid_0's rmse: 1.0222
[492]	valid_0's rmse: 1.02206
[493]	valid_0's rmse: 1.02185
[494]	valid_0's rmse: 1.02176
[495]	valid_0's rmse: 1.02172
[496]	valid_0's rmse: 1.02148
[497]	valid_0's rmse: 1.0213
[498]	valid_0's rmse: 1.02115
[499]	valid_0's rmse: 1.02107
[500]	valid_0's rmse: 1.02088
Did not meet early stopping. Best iteration is:
[500]	valid_0's rmse: 1.02088
[1]	valid_0's rmse: 2.54728
Training until validation scores don't improve for 20 rounds
[2]	valid_0's rmse: 2.4660

[247]	valid_0's rmse: 1.18309
[248]	valid_0's rmse: 1.18293
[249]	valid_0's rmse: 1.18276
[250]	valid_0's rmse: 1.18262
[251]	valid_0's rmse: 1.18209
[252]	valid_0's rmse: 1.1817
[253]	valid_0's rmse: 1.18141
[254]	valid_0's rmse: 1.18098
[255]	valid_0's rmse: 1.18054
[256]	valid_0's rmse: 1.18028
[257]	valid_0's rmse: 1.18009
[258]	valid_0's rmse: 1.17992
[259]	valid_0's rmse: 1.17976
[260]	valid_0's rmse: 1.1795
[261]	valid_0's rmse: 1.17935
[262]	valid_0's rmse: 1.17913
[263]	valid_0's rmse: 1.17887
[264]	valid_0's rmse: 1.1785
[265]	valid_0's rmse: 1.17809
[266]	valid_0's rmse: 1.17782
[267]	valid_0's rmse: 1.17749
[268]	valid_0's rmse: 1.1771
[269]	valid_0's rmse: 1.1768
[270]	valid_0's rmse: 1.17656
[271]	valid_0's rmse: 1.17606
[272]	valid_0's rmse: 1.1756
[273]	valid_0's rmse: 1.17512
[274]	valid_0's rmse: 1.1747
[275]	valid_0's rmse: 1.17429
[276]	valid_0's rmse: 1.17393
[277]	valid_0's rmse: 1.17371
[278]	valid_0's rmse: 1.17343
[279]	valid_0's rmse: 1.17323
[280]	valid_0's r

[17]	valid_0's rmse: 1.98442
[18]	valid_0's rmse: 1.95901
[19]	valid_0's rmse: 1.93501
[20]	valid_0's rmse: 1.9117
[21]	valid_0's rmse: 1.8892
[22]	valid_0's rmse: 1.86829
[23]	valid_0's rmse: 1.84866
[24]	valid_0's rmse: 1.82924
[25]	valid_0's rmse: 1.81047
[26]	valid_0's rmse: 1.79273
[27]	valid_0's rmse: 1.77547
[28]	valid_0's rmse: 1.75944
[29]	valid_0's rmse: 1.7441
[30]	valid_0's rmse: 1.72927
[31]	valid_0's rmse: 1.71474
[32]	valid_0's rmse: 1.70091
[33]	valid_0's rmse: 1.69208
[34]	valid_0's rmse: 1.68371
[35]	valid_0's rmse: 1.67157
[36]	valid_0's rmse: 1.65964
[37]	valid_0's rmse: 1.64828
[38]	valid_0's rmse: 1.64144
[39]	valid_0's rmse: 1.63089
[40]	valid_0's rmse: 1.621
[41]	valid_0's rmse: 1.61123
[42]	valid_0's rmse: 1.60202
[43]	valid_0's rmse: 1.59327
[44]	valid_0's rmse: 1.58476
[45]	valid_0's rmse: 1.57958
[46]	valid_0's rmse: 1.5717
[47]	valid_0's rmse: 1.56432
[48]	valid_0's rmse: 1.55724
[49]	valid_0's rmse: 1.55036
[50]	valid_0's rmse: 1.54398
[51]	valid_0's rmse:

[294]	valid_0's rmse: 1.30468
[295]	valid_0's rmse: 1.30447
[296]	valid_0's rmse: 1.30419
[297]	valid_0's rmse: 1.30386
[298]	valid_0's rmse: 1.30367
[299]	valid_0's rmse: 1.30348
[300]	valid_0's rmse: 1.30334
[301]	valid_0's rmse: 1.30312
[302]	valid_0's rmse: 1.30272
[303]	valid_0's rmse: 1.30246
[304]	valid_0's rmse: 1.30226
[305]	valid_0's rmse: 1.30178
[306]	valid_0's rmse: 1.30157
[307]	valid_0's rmse: 1.30137
[308]	valid_0's rmse: 1.30105
[309]	valid_0's rmse: 1.30073
[310]	valid_0's rmse: 1.30045
[311]	valid_0's rmse: 1.30027
[312]	valid_0's rmse: 1.30008
[313]	valid_0's rmse: 1.29991
[314]	valid_0's rmse: 1.29967
[315]	valid_0's rmse: 1.29947
[316]	valid_0's rmse: 1.29935
[317]	valid_0's rmse: 1.29919
[318]	valid_0's rmse: 1.29905
[319]	valid_0's rmse: 1.29889
[320]	valid_0's rmse: 1.29873
[321]	valid_0's rmse: 1.29847
[322]	valid_0's rmse: 1.29826
[323]	valid_0's rmse: 1.2981
[324]	valid_0's rmse: 1.29802
[325]	valid_0's rmse: 1.2978
[326]	valid_0's rmse: 1.29764
[327]	valid_

[64]	valid_0's rmse: 0.707066
[65]	valid_0's rmse: 0.70377
[66]	valid_0's rmse: 0.700686
[67]	valid_0's rmse: 0.697747
[68]	valid_0's rmse: 0.694879
[69]	valid_0's rmse: 0.692825
[70]	valid_0's rmse: 0.690228
[71]	valid_0's rmse: 0.68771
[72]	valid_0's rmse: 0.685183
[73]	valid_0's rmse: 0.682779
[74]	valid_0's rmse: 0.680586
[75]	valid_0's rmse: 0.678247
[76]	valid_0's rmse: 0.675768
[77]	valid_0's rmse: 0.673463
[78]	valid_0's rmse: 0.671273
[79]	valid_0's rmse: 0.669277
[80]	valid_0's rmse: 0.6672
[81]	valid_0's rmse: 0.665213
[82]	valid_0's rmse: 0.663335
[83]	valid_0's rmse: 0.661494
[84]	valid_0's rmse: 0.65977
[85]	valid_0's rmse: 0.658314
[86]	valid_0's rmse: 0.656704
[87]	valid_0's rmse: 0.655089
[88]	valid_0's rmse: 0.653592
[89]	valid_0's rmse: 0.652107
[90]	valid_0's rmse: 0.65064
[91]	valid_0's rmse: 0.649263
[92]	valid_0's rmse: 0.647948
[93]	valid_0's rmse: 0.64666
[94]	valid_0's rmse: 0.645133
[95]	valid_0's rmse: 0.643899
[96]	valid_0's rmse: 0.642274
[97]	valid_0's rm

[330]	valid_0's rmse: 0.528844
[331]	valid_0's rmse: 0.528636
[332]	valid_0's rmse: 0.528436
[333]	valid_0's rmse: 0.528172
[334]	valid_0's rmse: 0.527865
[335]	valid_0's rmse: 0.527664
[336]	valid_0's rmse: 0.527383
[337]	valid_0's rmse: 0.527121
[338]	valid_0's rmse: 0.52681
[339]	valid_0's rmse: 0.526636
[340]	valid_0's rmse: 0.526362
[341]	valid_0's rmse: 0.526214
[342]	valid_0's rmse: 0.526057
[343]	valid_0's rmse: 0.525936
[344]	valid_0's rmse: 0.525716
[345]	valid_0's rmse: 0.525558
[346]	valid_0's rmse: 0.525329
[347]	valid_0's rmse: 0.524895
[348]	valid_0's rmse: 0.52464
[349]	valid_0's rmse: 0.524407
[350]	valid_0's rmse: 0.524207
[351]	valid_0's rmse: 0.523993
[352]	valid_0's rmse: 0.52385
[353]	valid_0's rmse: 0.523681
[354]	valid_0's rmse: 0.523563
[355]	valid_0's rmse: 0.523324
[356]	valid_0's rmse: 0.523119
[357]	valid_0's rmse: 0.522953
[358]	valid_0's rmse: 0.522602
[359]	valid_0's rmse: 0.522457
[360]	valid_0's rmse: 0.522044
[361]	valid_0's rmse: 0.521771
[362]	valid

[97]	valid_0's rmse: 1.1637
[98]	valid_0's rmse: 1.1624
[99]	valid_0's rmse: 1.16073
[100]	valid_0's rmse: 1.15954
[101]	valid_0's rmse: 1.15866
[102]	valid_0's rmse: 1.1576
[103]	valid_0's rmse: 1.15654
[104]	valid_0's rmse: 1.15533
[105]	valid_0's rmse: 1.15441
[106]	valid_0's rmse: 1.15297
[107]	valid_0's rmse: 1.15195
[108]	valid_0's rmse: 1.15066
[109]	valid_0's rmse: 1.14945
[110]	valid_0's rmse: 1.14844
[111]	valid_0's rmse: 1.14708
[112]	valid_0's rmse: 1.14597
[113]	valid_0's rmse: 1.14485
[114]	valid_0's rmse: 1.14374
[115]	valid_0's rmse: 1.14286
[116]	valid_0's rmse: 1.14211
[117]	valid_0's rmse: 1.14146
[118]	valid_0's rmse: 1.14078
[119]	valid_0's rmse: 1.1395
[120]	valid_0's rmse: 1.13891
[121]	valid_0's rmse: 1.13796
[122]	valid_0's rmse: 1.13709
[123]	valid_0's rmse: 1.13644
[124]	valid_0's rmse: 1.13577
[125]	valid_0's rmse: 1.1351
[126]	valid_0's rmse: 1.13474
[127]	valid_0's rmse: 1.13393
[128]	valid_0's rmse: 1.13343
[129]	valid_0's rmse: 1.13289
[130]	valid_0's rm

[371]	valid_0's rmse: 1.04579
[372]	valid_0's rmse: 1.04562
[373]	valid_0's rmse: 1.04552
[374]	valid_0's rmse: 1.04538
[375]	valid_0's rmse: 1.04509
[376]	valid_0's rmse: 1.04488
[377]	valid_0's rmse: 1.04431
[378]	valid_0's rmse: 1.04406
[379]	valid_0's rmse: 1.04393
[380]	valid_0's rmse: 1.0438
[381]	valid_0's rmse: 1.04355
[382]	valid_0's rmse: 1.04337
[383]	valid_0's rmse: 1.04319
[384]	valid_0's rmse: 1.04307
[385]	valid_0's rmse: 1.04292
[386]	valid_0's rmse: 1.04284
[387]	valid_0's rmse: 1.04272
[388]	valid_0's rmse: 1.04251
[389]	valid_0's rmse: 1.04244
[390]	valid_0's rmse: 1.04236
[391]	valid_0's rmse: 1.04214
[392]	valid_0's rmse: 1.04181
[393]	valid_0's rmse: 1.04152
[394]	valid_0's rmse: 1.04141
[395]	valid_0's rmse: 1.04102
[396]	valid_0's rmse: 1.04083
[397]	valid_0's rmse: 1.04067
[398]	valid_0's rmse: 1.04052
[399]	valid_0's rmse: 1.04041
[400]	valid_0's rmse: 1.04021
[401]	valid_0's rmse: 1.04004
[402]	valid_0's rmse: 1.03984
[403]	valid_0's rmse: 1.03907
[404]	valid

[144]	valid_0's rmse: 1.23641
[145]	valid_0's rmse: 1.23591
[146]	valid_0's rmse: 1.23556
[147]	valid_0's rmse: 1.23525
[148]	valid_0's rmse: 1.2344
[149]	valid_0's rmse: 1.234
[150]	valid_0's rmse: 1.23333
[151]	valid_0's rmse: 1.23264
[152]	valid_0's rmse: 1.23182
[153]	valid_0's rmse: 1.23054
[154]	valid_0's rmse: 1.22976
[155]	valid_0's rmse: 1.22896
[156]	valid_0's rmse: 1.22814
[157]	valid_0's rmse: 1.22745
[158]	valid_0's rmse: 1.22688
[159]	valid_0's rmse: 1.22594
[160]	valid_0's rmse: 1.22546
[161]	valid_0's rmse: 1.22474
[162]	valid_0's rmse: 1.22389
[163]	valid_0's rmse: 1.22297
[164]	valid_0's rmse: 1.22228
[165]	valid_0's rmse: 1.22138
[166]	valid_0's rmse: 1.22069
[167]	valid_0's rmse: 1.22027
[168]	valid_0's rmse: 1.21993
[169]	valid_0's rmse: 1.21922
[170]	valid_0's rmse: 1.21888
[171]	valid_0's rmse: 1.21862
[172]	valid_0's rmse: 1.21788
[173]	valid_0's rmse: 1.21732
[174]	valid_0's rmse: 1.2171
[175]	valid_0's rmse: 1.21636
[176]	valid_0's rmse: 1.21599
[177]	valid_0'

[418]	valid_0's rmse: 1.13343
[419]	valid_0's rmse: 1.13326
[420]	valid_0's rmse: 1.13312
[421]	valid_0's rmse: 1.13297
[422]	valid_0's rmse: 1.13248
[423]	valid_0's rmse: 1.13231
[424]	valid_0's rmse: 1.13219
[425]	valid_0's rmse: 1.13202
[426]	valid_0's rmse: 1.13187
[427]	valid_0's rmse: 1.13171
[428]	valid_0's rmse: 1.13158
[429]	valid_0's rmse: 1.13144
[430]	valid_0's rmse: 1.13136
[431]	valid_0's rmse: 1.13108
[432]	valid_0's rmse: 1.13072
[433]	valid_0's rmse: 1.13054
[434]	valid_0's rmse: 1.13024
[435]	valid_0's rmse: 1.13012
[436]	valid_0's rmse: 1.12992
[437]	valid_0's rmse: 1.12975
[438]	valid_0's rmse: 1.12954
[439]	valid_0's rmse: 1.12935
[440]	valid_0's rmse: 1.12899
[441]	valid_0's rmse: 1.12886
[442]	valid_0's rmse: 1.12864
[443]	valid_0's rmse: 1.12853
[444]	valid_0's rmse: 1.12841
[445]	valid_0's rmse: 1.1283
[446]	valid_0's rmse: 1.12818
[447]	valid_0's rmse: 1.12808
[448]	valid_0's rmse: 1.12794
[449]	valid_0's rmse: 1.12785
[450]	valid_0's rmse: 1.12777
[451]	valid

[191]	valid_0's rmse: 1.34303
[192]	valid_0's rmse: 1.34275
[193]	valid_0's rmse: 1.34262
[194]	valid_0's rmse: 1.34187
[195]	valid_0's rmse: 1.3414
[196]	valid_0's rmse: 1.34075
[197]	valid_0's rmse: 1.3403
[198]	valid_0's rmse: 1.33978
[199]	valid_0's rmse: 1.33894
[200]	valid_0's rmse: 1.33846
[201]	valid_0's rmse: 1.33833
[202]	valid_0's rmse: 1.33806
[203]	valid_0's rmse: 1.33763
[204]	valid_0's rmse: 1.33737
[205]	valid_0's rmse: 1.337
[206]	valid_0's rmse: 1.33621
[207]	valid_0's rmse: 1.33603
[208]	valid_0's rmse: 1.33542
[209]	valid_0's rmse: 1.33498
[210]	valid_0's rmse: 1.33456
[211]	valid_0's rmse: 1.3341
[212]	valid_0's rmse: 1.33377
[213]	valid_0's rmse: 1.33356
[214]	valid_0's rmse: 1.33331
[215]	valid_0's rmse: 1.33292
[216]	valid_0's rmse: 1.33256
[217]	valid_0's rmse: 1.33215
[218]	valid_0's rmse: 1.33174
[219]	valid_0's rmse: 1.33142
[220]	valid_0's rmse: 1.3311
[221]	valid_0's rmse: 1.33073
[222]	valid_0's rmse: 1.33039
[223]	valid_0's rmse: 1.3301
[224]	valid_0's r

[465]	valid_0's rmse: 1.27174
[466]	valid_0's rmse: 1.27144
[467]	valid_0's rmse: 1.27125
[468]	valid_0's rmse: 1.27122
[469]	valid_0's rmse: 1.271
[470]	valid_0's rmse: 1.27085
[471]	valid_0's rmse: 1.27062
[472]	valid_0's rmse: 1.27051
[473]	valid_0's rmse: 1.27046
[474]	valid_0's rmse: 1.27028
[475]	valid_0's rmse: 1.27013
[476]	valid_0's rmse: 1.26999
[477]	valid_0's rmse: 1.26976
[478]	valid_0's rmse: 1.26964
[479]	valid_0's rmse: 1.26956
[480]	valid_0's rmse: 1.26944
[481]	valid_0's rmse: 1.26922
[482]	valid_0's rmse: 1.2691
[483]	valid_0's rmse: 1.26894
[484]	valid_0's rmse: 1.26877
[485]	valid_0's rmse: 1.26863
[486]	valid_0's rmse: 1.26854
[487]	valid_0's rmse: 1.26828
[488]	valid_0's rmse: 1.26813
[489]	valid_0's rmse: 1.26795
[490]	valid_0's rmse: 1.26775
[491]	valid_0's rmse: 1.26757
[492]	valid_0's rmse: 1.26734
[493]	valid_0's rmse: 1.26718
[494]	valid_0's rmse: 1.26702
[495]	valid_0's rmse: 1.26666
[496]	valid_0's rmse: 1.26657
[497]	valid_0's rmse: 1.26649
[498]	valid_0

[230]	valid_0's rmse: 0.552516
[231]	valid_0's rmse: 0.552186
[232]	valid_0's rmse: 0.551875
[233]	valid_0's rmse: 0.551504
[234]	valid_0's rmse: 0.551205
[235]	valid_0's rmse: 0.550891
[236]	valid_0's rmse: 0.550669
[237]	valid_0's rmse: 0.550439
[238]	valid_0's rmse: 0.550188
[239]	valid_0's rmse: 0.549957
[240]	valid_0's rmse: 0.549585
[241]	valid_0's rmse: 0.549392
[242]	valid_0's rmse: 0.548905
[243]	valid_0's rmse: 0.548689
[244]	valid_0's rmse: 0.548304
[245]	valid_0's rmse: 0.548063
[246]	valid_0's rmse: 0.547684
[247]	valid_0's rmse: 0.547383
[248]	valid_0's rmse: 0.547077
[249]	valid_0's rmse: 0.546837
[250]	valid_0's rmse: 0.546402
[251]	valid_0's rmse: 0.546082
[252]	valid_0's rmse: 0.545622
[253]	valid_0's rmse: 0.545243
[254]	valid_0's rmse: 0.544959
[255]	valid_0's rmse: 0.544637
[256]	valid_0's rmse: 0.544463
[257]	valid_0's rmse: 0.544222
[258]	valid_0's rmse: 0.5441
[259]	valid_0's rmse: 0.543599
[260]	valid_0's rmse: 0.543352
[261]	valid_0's rmse: 0.54307
[262]	valid

[495]	valid_0's rmse: 0.498843
[496]	valid_0's rmse: 0.498774
[497]	valid_0's rmse: 0.498652
[498]	valid_0's rmse: 0.498266
[499]	valid_0's rmse: 0.497719
[500]	valid_0's rmse: 0.497649
Did not meet early stopping. Best iteration is:
[500]	valid_0's rmse: 0.497649
[1]	valid_0's rmse: 2.44817
Training until validation scores don't improve for 20 rounds
[2]	valid_0's rmse: 2.37061
[3]	valid_0's rmse: 2.29832
[4]	valid_0's rmse: 2.23025
[5]	valid_0's rmse: 2.1673
[6]	valid_0's rmse: 2.12674
[7]	valid_0's rmse: 2.06993
[8]	valid_0's rmse: 2.01736
[9]	valid_0's rmse: 1.96761
[10]	valid_0's rmse: 1.92072
[11]	valid_0's rmse: 1.87683
[12]	valid_0's rmse: 1.83622
[13]	valid_0's rmse: 1.79832
[14]	valid_0's rmse: 1.76296
[15]	valid_0's rmse: 1.72959
[16]	valid_0's rmse: 1.69868
[17]	valid_0's rmse: 1.66921
[18]	valid_0's rmse: 1.64244
[19]	valid_0's rmse: 1.61688
[20]	valid_0's rmse: 1.59367
[21]	valid_0's rmse: 1.57088
[22]	valid_0's rmse: 1.55021
[23]	valid_0's rmse: 1.53051
[24]	valid_0's rm

[267]	valid_0's rmse: 1.07689
[268]	valid_0's rmse: 1.07618
[269]	valid_0's rmse: 1.07595
[270]	valid_0's rmse: 1.07581
[271]	valid_0's rmse: 1.07539
[272]	valid_0's rmse: 1.075
[273]	valid_0's rmse: 1.07477
[274]	valid_0's rmse: 1.07445
[275]	valid_0's rmse: 1.07408
[276]	valid_0's rmse: 1.07384
[277]	valid_0's rmse: 1.07367
[278]	valid_0's rmse: 1.07337
[279]	valid_0's rmse: 1.07322
[280]	valid_0's rmse: 1.07311
[281]	valid_0's rmse: 1.07277
[282]	valid_0's rmse: 1.07229
[283]	valid_0's rmse: 1.0717
[284]	valid_0's rmse: 1.07138
[285]	valid_0's rmse: 1.07118
[286]	valid_0's rmse: 1.0708
[287]	valid_0's rmse: 1.0705
[288]	valid_0's rmse: 1.07009
[289]	valid_0's rmse: 1.06976
[290]	valid_0's rmse: 1.06935
[291]	valid_0's rmse: 1.06903
[292]	valid_0's rmse: 1.0687
[293]	valid_0's rmse: 1.0684
[294]	valid_0's rmse: 1.06812
[295]	valid_0's rmse: 1.06798
[296]	valid_0's rmse: 1.06774
[297]	valid_0's rmse: 1.06756
[298]	valid_0's rmse: 1.06733
[299]	valid_0's rmse: 1.06669
[300]	valid_0's r

[37]	valid_0's rmse: 1.43588
[38]	valid_0's rmse: 1.43136
[39]	valid_0's rmse: 1.42459
[40]	valid_0's rmse: 1.41799
[41]	valid_0's rmse: 1.41122
[42]	valid_0's rmse: 1.40528
[43]	valid_0's rmse: 1.39971
[44]	valid_0's rmse: 1.39495
[45]	valid_0's rmse: 1.39219
[46]	valid_0's rmse: 1.38748
[47]	valid_0's rmse: 1.38291
[48]	valid_0's rmse: 1.37891
[49]	valid_0's rmse: 1.3744
[50]	valid_0's rmse: 1.37061
[51]	valid_0's rmse: 1.36644
[52]	valid_0's rmse: 1.36301
[53]	valid_0's rmse: 1.36121
[54]	valid_0's rmse: 1.35794
[55]	valid_0's rmse: 1.35463
[56]	valid_0's rmse: 1.35156
[57]	valid_0's rmse: 1.34902
[58]	valid_0's rmse: 1.34649
[59]	valid_0's rmse: 1.34399
[60]	valid_0's rmse: 1.34144
[61]	valid_0's rmse: 1.33911
[62]	valid_0's rmse: 1.33702
[63]	valid_0's rmse: 1.33484
[64]	valid_0's rmse: 1.33256
[65]	valid_0's rmse: 1.33033
[66]	valid_0's rmse: 1.32912
[67]	valid_0's rmse: 1.32605
[68]	valid_0's rmse: 1.32359
[69]	valid_0's rmse: 1.32033
[70]	valid_0's rmse: 1.31816
[71]	valid_0's 

[313]	valid_0's rmse: 1.15861
[314]	valid_0's rmse: 1.15832
[315]	valid_0's rmse: 1.15788
[316]	valid_0's rmse: 1.1576
[317]	valid_0's rmse: 1.15739
[318]	valid_0's rmse: 1.15721
[319]	valid_0's rmse: 1.15706
[320]	valid_0's rmse: 1.15687
[321]	valid_0's rmse: 1.15659
[322]	valid_0's rmse: 1.15634
[323]	valid_0's rmse: 1.15611
[324]	valid_0's rmse: 1.15568
[325]	valid_0's rmse: 1.15548
[326]	valid_0's rmse: 1.15529
[327]	valid_0's rmse: 1.15503
[328]	valid_0's rmse: 1.15429
[329]	valid_0's rmse: 1.15411
[330]	valid_0's rmse: 1.15397
[331]	valid_0's rmse: 1.15363
[332]	valid_0's rmse: 1.15328
[333]	valid_0's rmse: 1.15297
[334]	valid_0's rmse: 1.15274
[335]	valid_0's rmse: 1.15252
[336]	valid_0's rmse: 1.15243
[337]	valid_0's rmse: 1.15231
[338]	valid_0's rmse: 1.15117
[339]	valid_0's rmse: 1.15097
[340]	valid_0's rmse: 1.15069
[341]	valid_0's rmse: 1.15061
[342]	valid_0's rmse: 1.15048
[343]	valid_0's rmse: 1.1504
[344]	valid_0's rmse: 1.15016
[345]	valid_0's rmse: 1.15011
[346]	valid_

[85]	valid_0's rmse: 1.42539
[86]	valid_0's rmse: 1.42312
[87]	valid_0's rmse: 1.42081
[88]	valid_0's rmse: 1.41876
[89]	valid_0's rmse: 1.4169
[90]	valid_0's rmse: 1.41512
[91]	valid_0's rmse: 1.41363
[92]	valid_0's rmse: 1.412
[93]	valid_0's rmse: 1.41052
[94]	valid_0's rmse: 1.40898
[95]	valid_0's rmse: 1.40759
[96]	valid_0's rmse: 1.40608
[97]	valid_0's rmse: 1.40467
[98]	valid_0's rmse: 1.40347
[99]	valid_0's rmse: 1.40216
[100]	valid_0's rmse: 1.40088
[101]	valid_0's rmse: 1.39957
[102]	valid_0's rmse: 1.3982
[103]	valid_0's rmse: 1.3971
[104]	valid_0's rmse: 1.39593
[105]	valid_0's rmse: 1.39487
[106]	valid_0's rmse: 1.39367
[107]	valid_0's rmse: 1.39274
[108]	valid_0's rmse: 1.39171
[109]	valid_0's rmse: 1.39085
[110]	valid_0's rmse: 1.3902
[111]	valid_0's rmse: 1.3889
[112]	valid_0's rmse: 1.38775
[113]	valid_0's rmse: 1.38662
[114]	valid_0's rmse: 1.38595
[115]	valid_0's rmse: 1.38476
[116]	valid_0's rmse: 1.38395
[117]	valid_0's rmse: 1.38324
[118]	valid_0's rmse: 1.38257
[1

[359]	valid_0's rmse: 1.29048
[360]	valid_0's rmse: 1.29027
[361]	valid_0's rmse: 1.29004
[362]	valid_0's rmse: 1.28986
[363]	valid_0's rmse: 1.28961
[364]	valid_0's rmse: 1.28934
[365]	valid_0's rmse: 1.28911
[366]	valid_0's rmse: 1.28903
[367]	valid_0's rmse: 1.28872
[368]	valid_0's rmse: 1.28862
[369]	valid_0's rmse: 1.28853
[370]	valid_0's rmse: 1.28827
[371]	valid_0's rmse: 1.28796
[372]	valid_0's rmse: 1.28776
[373]	valid_0's rmse: 1.28744
[374]	valid_0's rmse: 1.28714
[375]	valid_0's rmse: 1.28693
[376]	valid_0's rmse: 1.28675
[377]	valid_0's rmse: 1.2866
[378]	valid_0's rmse: 1.28637
[379]	valid_0's rmse: 1.28601
[380]	valid_0's rmse: 1.28566
[381]	valid_0's rmse: 1.28549
[382]	valid_0's rmse: 1.28535
[383]	valid_0's rmse: 1.28516
[384]	valid_0's rmse: 1.28498
[385]	valid_0's rmse: 1.2849
[386]	valid_0's rmse: 1.28464
[387]	valid_0's rmse: 1.28442
[388]	valid_0's rmse: 1.28399
[389]	valid_0's rmse: 1.28389
[390]	valid_0's rmse: 1.28369
[391]	valid_0's rmse: 1.28342
[392]	valid_

[128]	valid_0's rmse: 0.604711
[129]	valid_0's rmse: 0.603743
[130]	valid_0's rmse: 0.602324
[131]	valid_0's rmse: 0.601886
[132]	valid_0's rmse: 0.601297
[133]	valid_0's rmse: 0.600292
[134]	valid_0's rmse: 0.599014
[135]	valid_0's rmse: 0.598523
[136]	valid_0's rmse: 0.597892
[137]	valid_0's rmse: 0.597388
[138]	valid_0's rmse: 0.596418
[139]	valid_0's rmse: 0.595329
[140]	valid_0's rmse: 0.594876
[141]	valid_0's rmse: 0.594229
[142]	valid_0's rmse: 0.593565
[143]	valid_0's rmse: 0.592316
[144]	valid_0's rmse: 0.591671
[145]	valid_0's rmse: 0.591264
[146]	valid_0's rmse: 0.590608
[147]	valid_0's rmse: 0.589491
[148]	valid_0's rmse: 0.588559
[149]	valid_0's rmse: 0.588095
[150]	valid_0's rmse: 0.586968
[151]	valid_0's rmse: 0.586616
[152]	valid_0's rmse: 0.585875
[153]	valid_0's rmse: 0.585352
[154]	valid_0's rmse: 0.584923
[155]	valid_0's rmse: 0.583982
[156]	valid_0's rmse: 0.583513
[157]	valid_0's rmse: 0.583094
[158]	valid_0's rmse: 0.582333
[159]	valid_0's rmse: 0.581963
[160]	va

[393]	valid_0's rmse: 0.513511
[394]	valid_0's rmse: 0.513288
[395]	valid_0's rmse: 0.513019
[396]	valid_0's rmse: 0.512874
[397]	valid_0's rmse: 0.512662
[398]	valid_0's rmse: 0.512428
[399]	valid_0's rmse: 0.512289
[400]	valid_0's rmse: 0.512055
[401]	valid_0's rmse: 0.511886
[402]	valid_0's rmse: 0.511774
[403]	valid_0's rmse: 0.511522
[404]	valid_0's rmse: 0.511408
[405]	valid_0's rmse: 0.51127
[406]	valid_0's rmse: 0.511091
[407]	valid_0's rmse: 0.510971
[408]	valid_0's rmse: 0.510827
[409]	valid_0's rmse: 0.510688
[410]	valid_0's rmse: 0.510614
[411]	valid_0's rmse: 0.510445
[412]	valid_0's rmse: 0.510219
[413]	valid_0's rmse: 0.51006
[414]	valid_0's rmse: 0.509864
[415]	valid_0's rmse: 0.509673
[416]	valid_0's rmse: 0.509554
[417]	valid_0's rmse: 0.509418
[418]	valid_0's rmse: 0.509287
[419]	valid_0's rmse: 0.509165
[420]	valid_0's rmse: 0.509056
[421]	valid_0's rmse: 0.508933
[422]	valid_0's rmse: 0.508772
[423]	valid_0's rmse: 0.508662
[424]	valid_0's rmse: 0.508502
[425]	vali

[162]	valid_0's rmse: 1.11469
[163]	valid_0's rmse: 1.11436
[164]	valid_0's rmse: 1.11372
[165]	valid_0's rmse: 1.11313
[166]	valid_0's rmse: 1.11268
[167]	valid_0's rmse: 1.11196
[168]	valid_0's rmse: 1.11153
[169]	valid_0's rmse: 1.11089
[170]	valid_0's rmse: 1.1101
[171]	valid_0's rmse: 1.10965
[172]	valid_0's rmse: 1.10898
[173]	valid_0's rmse: 1.10834
[174]	valid_0's rmse: 1.10804
[175]	valid_0's rmse: 1.10731
[176]	valid_0's rmse: 1.10688
[177]	valid_0's rmse: 1.10655
[178]	valid_0's rmse: 1.10626
[179]	valid_0's rmse: 1.10593
[180]	valid_0's rmse: 1.10541
[181]	valid_0's rmse: 1.10474
[182]	valid_0's rmse: 1.10441
[183]	valid_0's rmse: 1.10388
[184]	valid_0's rmse: 1.10359
[185]	valid_0's rmse: 1.10323
[186]	valid_0's rmse: 1.10272
[187]	valid_0's rmse: 1.10188
[188]	valid_0's rmse: 1.1015
[189]	valid_0's rmse: 1.10116
[190]	valid_0's rmse: 1.1007
[191]	valid_0's rmse: 1.10036
[192]	valid_0's rmse: 1.10004
[193]	valid_0's rmse: 1.09964
[194]	valid_0's rmse: 1.09918
[195]	valid_0

[436]	valid_0's rmse: 1.02977
[437]	valid_0's rmse: 1.0296
[438]	valid_0's rmse: 1.02938
[439]	valid_0's rmse: 1.02924
[440]	valid_0's rmse: 1.02899
[441]	valid_0's rmse: 1.02862
[442]	valid_0's rmse: 1.02844
[443]	valid_0's rmse: 1.02831
[444]	valid_0's rmse: 1.02816
[445]	valid_0's rmse: 1.02798
[446]	valid_0's rmse: 1.02777
[447]	valid_0's rmse: 1.02765
[448]	valid_0's rmse: 1.02752
[449]	valid_0's rmse: 1.02742
[450]	valid_0's rmse: 1.02724
[451]	valid_0's rmse: 1.02704
[452]	valid_0's rmse: 1.02684
[453]	valid_0's rmse: 1.02666
[454]	valid_0's rmse: 1.02651
[455]	valid_0's rmse: 1.02645
[456]	valid_0's rmse: 1.02634
[457]	valid_0's rmse: 1.0262
[458]	valid_0's rmse: 1.02603
[459]	valid_0's rmse: 1.02574
[460]	valid_0's rmse: 1.0256
[461]	valid_0's rmse: 1.02499
[462]	valid_0's rmse: 1.02488
[463]	valid_0's rmse: 1.02471
[464]	valid_0's rmse: 1.02432
[465]	valid_0's rmse: 1.02425
[466]	valid_0's rmse: 1.02412
[467]	valid_0's rmse: 1.02398
[468]	valid_0's rmse: 1.02386
[469]	valid_0

[209]	valid_0's rmse: 1.20473
[210]	valid_0's rmse: 1.20415
[211]	valid_0's rmse: 1.20373
[212]	valid_0's rmse: 1.20305
[213]	valid_0's rmse: 1.2026
[214]	valid_0's rmse: 1.20207
[215]	valid_0's rmse: 1.20164
[216]	valid_0's rmse: 1.20129
[217]	valid_0's rmse: 1.2007
[218]	valid_0's rmse: 1.20011
[219]	valid_0's rmse: 1.19973
[220]	valid_0's rmse: 1.19942
[221]	valid_0's rmse: 1.1991
[222]	valid_0's rmse: 1.19882
[223]	valid_0's rmse: 1.19854
[224]	valid_0's rmse: 1.19814
[225]	valid_0's rmse: 1.19788
[226]	valid_0's rmse: 1.19763
[227]	valid_0's rmse: 1.19641
[228]	valid_0's rmse: 1.19599
[229]	valid_0's rmse: 1.1955
[230]	valid_0's rmse: 1.19516
[231]	valid_0's rmse: 1.19506
[232]	valid_0's rmse: 1.19439
[233]	valid_0's rmse: 1.19376
[234]	valid_0's rmse: 1.19358
[235]	valid_0's rmse: 1.19322
[236]	valid_0's rmse: 1.19303
[237]	valid_0's rmse: 1.19164
[238]	valid_0's rmse: 1.19132
[239]	valid_0's rmse: 1.19106
[240]	valid_0's rmse: 1.19066
[241]	valid_0's rmse: 1.19035
[242]	valid_0'

[483]	valid_0's rmse: 1.13138
[484]	valid_0's rmse: 1.13131
[485]	valid_0's rmse: 1.13121
[486]	valid_0's rmse: 1.13095
[487]	valid_0's rmse: 1.13069
[488]	valid_0's rmse: 1.13054
[489]	valid_0's rmse: 1.13039
[490]	valid_0's rmse: 1.13025
[491]	valid_0's rmse: 1.13003
[492]	valid_0's rmse: 1.12977
[493]	valid_0's rmse: 1.12965
[494]	valid_0's rmse: 1.12952
[495]	valid_0's rmse: 1.12938
[496]	valid_0's rmse: 1.1293
[497]	valid_0's rmse: 1.12921
[498]	valid_0's rmse: 1.12906
[499]	valid_0's rmse: 1.12897
[500]	valid_0's rmse: 1.12883
Did not meet early stopping. Best iteration is:
[500]	valid_0's rmse: 1.12883
[1]	valid_0's rmse: 2.55404
Training until validation scores don't improve for 20 rounds
[2]	valid_0's rmse: 2.50621
[3]	valid_0's rmse: 2.46074
[4]	valid_0's rmse: 2.41711
[5]	valid_0's rmse: 2.3753
[6]	valid_0's rmse: 2.34707
[7]	valid_0's rmse: 2.30717
[8]	valid_0's rmse: 2.2687
[9]	valid_0's rmse: 2.23246
[10]	valid_0's rmse: 2.19705
[11]	valid_0's rmse: 2.16365
[12]	valid_0's

[256]	valid_0's rmse: 1.31109
[257]	valid_0's rmse: 1.31074
[258]	valid_0's rmse: 1.31044
[259]	valid_0's rmse: 1.31017
[260]	valid_0's rmse: 1.30988
[261]	valid_0's rmse: 1.30972
[262]	valid_0's rmse: 1.30942
[263]	valid_0's rmse: 1.30924
[264]	valid_0's rmse: 1.30894
[265]	valid_0's rmse: 1.30877
[266]	valid_0's rmse: 1.30866
[267]	valid_0's rmse: 1.30838
[268]	valid_0's rmse: 1.30813
[269]	valid_0's rmse: 1.30773
[270]	valid_0's rmse: 1.30737
[271]	valid_0's rmse: 1.30707
[272]	valid_0's rmse: 1.30682
[273]	valid_0's rmse: 1.30659
[274]	valid_0's rmse: 1.30615
[275]	valid_0's rmse: 1.3059
[276]	valid_0's rmse: 1.30563
[277]	valid_0's rmse: 1.30552
[278]	valid_0's rmse: 1.30536
[279]	valid_0's rmse: 1.30518
[280]	valid_0's rmse: 1.30506
[281]	valid_0's rmse: 1.30487
[282]	valid_0's rmse: 1.30467
[283]	valid_0's rmse: 1.30449
[284]	valid_0's rmse: 1.30434
[285]	valid_0's rmse: 1.30394
[286]	valid_0's rmse: 1.30362
[287]	valid_0's rmse: 1.30347
[288]	valid_0's rmse: 1.30327
[289]	valid

In [35]:
for i in range(4):
    print('meter: '+ str(i))
    for model in meter_models[i]:
        print(model.best_score_['valid_0']['rmse'])


meter: 0
0.42627665698789113
0.49676590592147607
0.4992672347887715
0.4976493263439429
0.4957624670950901
meter: 1
1.01959150215646
1.0208772022038057
1.0224185968084838
1.0227118868666978
1.018449919716836
meter: 2
1.1192404948383146
1.1194080310568875
1.1197819545421337
1.116363786910713
1.1288256973092121
meter: 3
1.2653070230898402
1.266976922189613
1.2662716178369082
1.2648836301441395
1.263143006906069


In [36]:
# Importance rank for first model in cross val models
for i in range(4):
    print('meter: '+ str(i))
    imprtc_df = pd.DataFrame()
    imprtc_df['feature'] = sample_train_X.drop('meter_reading', axis=1).columns   
    imprtc_df['importance'] = meter_models[i][0].feature_importances_
    imprtc_df.sort_values('importance', ascending=False, inplace= True)
    print(imprtc_df)


meter: 0
                          feature  importance
0                     building_id        3102
71                           hour         393
29     precip_depth_1_hr_std_lag3         383
43      air_temperature_max_lag72         356
5                 log_square_feet         294
44      air_temperature_min_lag72         281
42     air_temperature_mean_lag72         278
52      dew_temperature_min_lag72         276
59   sea_level_pressure_max_lag72         266
60   sea_level_pressure_min_lag72         224
51      dew_temperature_max_lag72         198
54   precip_depth_1_hr_mean_lag72         177
50     dew_temperature_mean_lag72         163
62      wind_direction_mean_lag72         157
57    precip_depth_1_hr_std_lag72         157
2                         site_id         155
65       wind_direction_std_lag72         154
66          wind_speed_mean_lag72         149
55    precip_depth_1_hr_max_lag72         148
67           wind_speed_max_lag72         145
45      air_temperature_s

[72 rows x 2 columns]
meter: 3
                          feature  importance
0                     building_id        3375
71                           hour         732
42     air_temperature_mean_lag72         346
44      air_temperature_min_lag72         316
5                 log_square_feet         296
43      air_temperature_max_lag72         234
52      dew_temperature_min_lag72         210
59   sea_level_pressure_max_lag72         189
50     dew_temperature_mean_lag72         173
62      wind_direction_mean_lag72         160
16       air_temperature_min_lag3         158
60   sea_level_pressure_min_lag72         153
54   precip_depth_1_hr_mean_lag72         151
29     precip_depth_1_hr_std_lag3         150
45      air_temperature_std_lag72         144
57    precip_depth_1_hr_std_lag72         136
58  sea_level_pressure_mean_lag72         135
15       air_temperature_max_lag3         133
14      air_temperature_mean_lag3         133
51      dew_temperature_max_lag72         128
66 

In [37]:
# %%time
# ## Single fit single model

# gbm = LGBMRegressor(**gbm_params)
# f_train_X, f_train_y = getInFoldXY(train.index)
# gbm.fit(f_train_X, f_train_y)

In [38]:
# Generate test_X
test_X = x_pipes.transform(
    test
        .merge(building_pipes.transform(building), on='building_id', how='left').drop(['row_id'], axis=1)
        .merge(weather_pipes.transform(weather_test), on=['site_id', 'timestamp'], how='left')
    )

print(test_X.sample(n=20,  random_state=42))
print(test_X.shape)
print(test_X.dtypes)

         building_id meter site_id                    primary_use  \
3573457          173     0       2                      Education   
8315486          222     1       2  Entertainment/public assembly   
40305643        1354     2      15                      Education   
16083617         712     0       5                      Education   
37204119        1344     2      15                      Education   
32144852        1119     1      13                         Office   
5105044          249     0       2  Entertainment/public assembly   
36982844        1303     1      14                     Healthcare   
20487823         945     2       9                         Office   
8404196          217     1       2                      Education   
6889602          241     0       2  Entertainment/public assembly   
16963616         784     0       6                      Education   
39666699        1381     2      15                         Office   
26802058        1179     1      13

[20 rows x 72 columns]
(41697600, 72)
building_id                      category
meter                            category
site_id                          category
primary_use                      category
floor_count                       float16
log_square_feet                   float16
building_age                        uint8
air_temperature                   float16
cloud_coverage                    float64
dew_temperature                   float16
precip_depth_1_hr                 float16
sea_level_pressure                float16
wind_direction                    float16
wind_speed                        float16
air_temperature_mean_lag3         float16
air_temperature_max_lag3          float16
air_temperature_min_lag3          float16
air_temperature_std_lag3          float16
cloud_coverage_mean_lag3          float16
cloud_coverage_max_lag3           float16
cloud_coverage_min_lag3           float16
cloud_coverage_std_lag3           float16
dew_temperature_mean_lag3         floa

In [39]:
def predMeters(test_X):
    test_y = test_X[['meter']] 
    test_y['meter_reading'] = np.nan
    for i in range(4):
        X = test_X[test_X['meter'] == i]
        if X.shape[0] > 0:
            preds = np.expm1(sum([model.predict(X) for model in meter_models[i]])/folds)
            test_y.loc[test_y['meter'] == i, 'meter_reading'] = preds
    return test_y['meter_reading'].tolist()
    
print(predMeters(test_X.sample(n=20,  random_state=42)))    


[155.86914324054328, 9.161408466501603, 50.5329895827927, 1.955456933438529, 1117.9930205655742, 8.753386683405793, 334.0877642340085, 1047.9033188592055, 360.70628523453104, 77.02413245123641, 44.59347946174745, 815.7054079380478, 1010.0334751106095, 37.37729451514661, 61.42518238082995, 56.237187050302715, 14.772930734819782, 77.34541380077053, 48.902045215067524, 172.89980764847536]


In [40]:
# Predict using cross val models ensemble 
i=0
res=[]
step_size = 50000
for j in tqdm(range(int(np.ceil(test_X.shape[0]/50000)))):
    res.append(predMeters(test_X.iloc[i:i+step_size]))
    i+=step_size
    gc.collect()


100%|████████████████████████████████████████████████████████████████████████████████| 834/834 [48:08<00:00,  4.00s/it]


In [41]:
# Save using cross val models ensemble 
res = np.concatenate(res)
print(len(res))
submission = pd.read_csv('../input/ashrae-energy-prediction/sample_submission.csv')
submission['meter_reading'] = res
submission.loc[submission['meter_reading']<0, 'meter_reading'] = 0
submission.to_csv('submission_meter.csv.zip', index=False)
submission.shape

41697600


(41697600, 2)

In [None]:
# # Predict single model fit
# i=0
# res=[]
# step_size = 50000
# for j in tqdm(range(int(np.ceil(test_X.shape[0]/50000)))):
#    #res.append(np.expm1(sum([model.predict(test_X.iloc[i:i+step_size]) for model in models])/folds))
#    res.append(np.expm1(gbm.predict(test_X.iloc[i:i+step_size])))
#    i+=step_size
    