In [5]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import KFold, StratifiedKFold, GroupKFold
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import mean_squared_error
import gc
from sklearn import metrics

In [6]:
from pandas.api.types import is_datetime64_any_dtype as is_datetime
from pandas.api.types import is_categorical_dtype

def reduce_mem_usage(df, use_float16=False):
    """
    Iterate through all the columns of a dataframe and modify the data type to reduce memory usage.        
    """
    
    start_mem = df.memory_usage().sum() / 1024**2
    print("Memory usage of dataframe is {:.2f} MB".format(start_mem))
    
    for col in df.columns:
        if is_datetime(df[col]) or is_categorical_dtype(df[col]):
            continue
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if use_float16 and c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype("category")

    end_mem = df.memory_usage().sum() / 1024**2
    print("Memory usage after optimization is: {:.2f} MB".format(end_mem))
    print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [7]:
df_drop = reduce_mem_usage(pd.read_csv('../output/outlier_remove.csv'))
target = df_drop.logerror
features = df_drop.drop(['logerror'], axis = 1)

Memory usage of dataframe is 58.98 MB
Memory usage after optimization is: 23.37 MB
Decreased by 60.4%


In [8]:
params = {
    'iterations': 400,
    'learning_rate': 0.035,
    'depth': 7,
    'verbose': 20,
#     'l2_leaf_reg': 1000,
    'task_type': 'GPU',
    'loss_function': 'MAE',
    'eval_metric': 'MAE',
    'random_seed': 0,
}
features.fillna(-999, inplace=True)
categorical_features = ['airconditioningtypeid', 'hashottuborspa', 'heatingorsystemtypeid', 
                       'pooltypeid2', 'propertylandusetypeid', 'fips', 'regionidcounty', 
                       'buildingqualitytypeid_fill', 'regionidcity_fill', 'year', 
                       'regionidneighborhood_fill', 'taxdelinquencyflag']
for i in categorical_features:
    if features[i].dtype == 'float':
        features[i] = features[i].astype('int')

In [9]:
NFOLDS = 5
columns = features.columns
kf = StratifiedKFold(n_splits=5, shuffle=False, random_state=2319)
splits=kf.split(features,df_drop['month'])
y_oof = np.zeros(features.shape[0])
score = 0
out_folder_train_prediction= pd.DataFrame()
feature_importance_df = pd.DataFrame()

models = []

categorical_features = ['airconditioningtypeid', 'hashottuborspa', 'heatingorsystemtypeid', 
                       'pooltypeid2', 'propertylandusetypeid', 'fips', 'regionidcounty', 
                       'buildingqualitytypeid_fill', 'regionidcity_fill', 'year', 
                       'regionidneighborhood_fill', 'taxdelinquencyflag']

for fold_n, (train_index, valid_index) in enumerate(splits):
    X_tr=features.iloc[train_index]
    y_tr=target.iloc[train_index]
    X_val=features.iloc[valid_index]
    y_val=target.iloc[valid_index]
    
    model = CatBoostRegressor(**params)
        
    model.fit(X_tr, y_tr, eval_set=(X_val, y_val), cat_features=categorical_features, verbose=True)

    y_pred_valid = model.predict(X_val)
    y_oof[valid_index] = y_pred_valid
    print(f"Fold {fold_n + 1} | rmse: {np.sqrt(mean_squared_error(y_val, y_pred_valid))}")
    
    score += np.sqrt(mean_squared_error(y_val, y_pred_valid)) / NFOLDS
    
    oof_preds=pd.DataFrame()
    oof_preds['train_index']=valid_index
    oof_preds['TARGET']= y_pred_valid
    oof_preds["folder"]=fold_n + 1
    out_folder_train_prediction = pd.concat([out_folder_train_prediction, oof_preds], axis=0)
    
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df['feature']=columns
    fold_importance_df['importance']=model.get_feature_importance()
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    
    models.append(model)
    del X_val,X_tr,y_val,y_tr
    gc.collect()
    
print(f"\nMean rmse = {score}")

0:	learn: 0.0529196	test: 0.0525006	best: 0.0525006 (0)	total: 26.3ms	remaining: 10.5s
1:	learn: 0.0527719	test: 0.0523477	best: 0.0523477 (1)	total: 45.6ms	remaining: 9.07s
2:	learn: 0.0526742	test: 0.0522711	best: 0.0522711 (2)	total: 64.8ms	remaining: 8.57s
3:	learn: 0.0525929	test: 0.0522111	best: 0.0522111 (3)	total: 85.8ms	remaining: 8.49s
4:	learn: 0.0525570	test: 0.0521717	best: 0.0521717 (4)	total: 105ms	remaining: 8.28s
5:	learn: 0.0525152	test: 0.0521429	best: 0.0521429 (5)	total: 123ms	remaining: 8.05s
6:	learn: 0.0524650	test: 0.0521003	best: 0.0521003 (6)	total: 144ms	remaining: 8.09s
7:	learn: 0.0524325	test: 0.0520721	best: 0.0520721 (7)	total: 167ms	remaining: 8.17s
8:	learn: 0.0524117	test: 0.0520597	best: 0.0520597 (8)	total: 188ms	remaining: 8.16s
9:	learn: 0.0523857	test: 0.0520450	best: 0.0520450 (9)	total: 205ms	remaining: 8s
10:	learn: 0.0523680	test: 0.0520389	best: 0.0520389 (10)	total: 225ms	remaining: 7.96s
11:	learn: 0.0523281	test: 0.0520116	best: 0.052011

94:	learn: 0.0515091	test: 0.0518316	best: 0.0518277 (81)	total: 1.88s	remaining: 6.04s
95:	learn: 0.0515069	test: 0.0518319	best: 0.0518277 (81)	total: 1.9s	remaining: 6.01s
96:	learn: 0.0514936	test: 0.0518330	best: 0.0518277 (81)	total: 1.92s	remaining: 5.99s
97:	learn: 0.0514900	test: 0.0518332	best: 0.0518277 (81)	total: 1.94s	remaining: 5.97s
98:	learn: 0.0514855	test: 0.0518296	best: 0.0518277 (81)	total: 1.96s	remaining: 5.96s
99:	learn: 0.0514791	test: 0.0518306	best: 0.0518277 (81)	total: 1.98s	remaining: 5.94s
100:	learn: 0.0514729	test: 0.0518285	best: 0.0518277 (81)	total: 2s	remaining: 5.93s
101:	learn: 0.0514702	test: 0.0518288	best: 0.0518277 (81)	total: 2.02s	remaining: 5.92s
102:	learn: 0.0514659	test: 0.0518280	best: 0.0518277 (81)	total: 2.04s	remaining: 5.89s
103:	learn: 0.0514587	test: 0.0518282	best: 0.0518277 (81)	total: 2.06s	remaining: 5.87s
104:	learn: 0.0514500	test: 0.0518282	best: 0.0518277 (81)	total: 2.08s	remaining: 5.84s
105:	learn: 0.0514463	test: 0.0

193:	learn: 0.0509116	test: 0.0518150	best: 0.0518108 (170)	total: 3.74s	remaining: 3.98s
194:	learn: 0.0509080	test: 0.0518161	best: 0.0518108 (170)	total: 3.77s	remaining: 3.96s
195:	learn: 0.0509043	test: 0.0518161	best: 0.0518108 (170)	total: 3.79s	remaining: 3.95s
196:	learn: 0.0509029	test: 0.0518158	best: 0.0518108 (170)	total: 3.81s	remaining: 3.93s
197:	learn: 0.0508960	test: 0.0518125	best: 0.0518108 (170)	total: 3.83s	remaining: 3.91s
198:	learn: 0.0508929	test: 0.0518126	best: 0.0518108 (170)	total: 3.86s	remaining: 3.9s
199:	learn: 0.0508838	test: 0.0518109	best: 0.0518108 (170)	total: 3.88s	remaining: 3.88s
200:	learn: 0.0508820	test: 0.0518103	best: 0.0518103 (200)	total: 3.92s	remaining: 3.88s
201:	learn: 0.0508759	test: 0.0518123	best: 0.0518103 (200)	total: 3.93s	remaining: 3.86s
202:	learn: 0.0508735	test: 0.0518129	best: 0.0518103 (200)	total: 3.95s	remaining: 3.84s
203:	learn: 0.0508714	test: 0.0518120	best: 0.0518103 (200)	total: 3.97s	remaining: 3.82s
204:	learn:

293:	learn: 0.0504809	test: 0.0517923	best: 0.0517910 (292)	total: 5.61s	remaining: 2.02s
294:	learn: 0.0504710	test: 0.0517920	best: 0.0517910 (292)	total: 5.63s	remaining: 2s
295:	learn: 0.0504664	test: 0.0517904	best: 0.0517904 (295)	total: 5.64s	remaining: 1.98s
296:	learn: 0.0504618	test: 0.0517893	best: 0.0517893 (296)	total: 5.66s	remaining: 1.96s
297:	learn: 0.0504596	test: 0.0517884	best: 0.0517884 (297)	total: 5.68s	remaining: 1.94s
298:	learn: 0.0504560	test: 0.0517870	best: 0.0517870 (298)	total: 5.7s	remaining: 1.93s
299:	learn: 0.0504529	test: 0.0517867	best: 0.0517867 (299)	total: 5.72s	remaining: 1.91s
300:	learn: 0.0504501	test: 0.0517878	best: 0.0517867 (299)	total: 5.74s	remaining: 1.89s
301:	learn: 0.0504467	test: 0.0517886	best: 0.0517867 (299)	total: 5.76s	remaining: 1.87s
302:	learn: 0.0504396	test: 0.0517888	best: 0.0517867 (299)	total: 5.78s	remaining: 1.85s
303:	learn: 0.0504355	test: 0.0517878	best: 0.0517867 (299)	total: 5.8s	remaining: 1.83s
304:	learn: 0.0

392:	learn: 0.0500366	test: 0.0517772	best: 0.0517740 (348)	total: 7.46s	remaining: 133ms
393:	learn: 0.0500303	test: 0.0517771	best: 0.0517740 (348)	total: 7.48s	remaining: 114ms
394:	learn: 0.0500275	test: 0.0517768	best: 0.0517740 (348)	total: 7.5s	remaining: 94.9ms
395:	learn: 0.0500242	test: 0.0517763	best: 0.0517740 (348)	total: 7.52s	remaining: 76ms
396:	learn: 0.0500204	test: 0.0517777	best: 0.0517740 (348)	total: 7.54s	remaining: 57ms
397:	learn: 0.0500125	test: 0.0517786	best: 0.0517740 (348)	total: 7.55s	remaining: 38ms
398:	learn: 0.0500066	test: 0.0517794	best: 0.0517740 (348)	total: 7.58s	remaining: 19ms
399:	learn: 0.0500022	test: 0.0517774	best: 0.0517740 (348)	total: 7.6s	remaining: 0us
bestTest = 0.05177400815
bestIteration = 348
Shrink model to first 349 iterations.
Fold 1 | rmse: 0.0818421673609999
0:	learn: 0.0528847	test: 0.0525446	best: 0.0525446 (0)	total: 19.7ms	remaining: 7.88s
1:	learn: 0.0527662	test: 0.0524093	best: 0.0524093 (1)	total: 39.6ms	remaining: 7.

88:	learn: 0.0515645	test: 0.0517402	best: 0.0517402 (88)	total: 1.69s	remaining: 5.92s
89:	learn: 0.0515534	test: 0.0517356	best: 0.0517356 (89)	total: 1.71s	remaining: 5.89s
90:	learn: 0.0515454	test: 0.0517357	best: 0.0517356 (89)	total: 1.73s	remaining: 5.88s
91:	learn: 0.0515388	test: 0.0517354	best: 0.0517354 (91)	total: 1.75s	remaining: 5.86s
92:	learn: 0.0515371	test: 0.0517368	best: 0.0517354 (91)	total: 1.77s	remaining: 5.84s
93:	learn: 0.0515300	test: 0.0517349	best: 0.0517349 (93)	total: 1.79s	remaining: 5.83s
94:	learn: 0.0515232	test: 0.0517331	best: 0.0517331 (94)	total: 1.81s	remaining: 5.8s
95:	learn: 0.0515188	test: 0.0517321	best: 0.0517321 (95)	total: 1.82s	remaining: 5.77s
96:	learn: 0.0515151	test: 0.0517291	best: 0.0517291 (96)	total: 1.84s	remaining: 5.75s
97:	learn: 0.0515108	test: 0.0517273	best: 0.0517273 (97)	total: 1.86s	remaining: 5.72s
98:	learn: 0.0515019	test: 0.0517249	best: 0.0517249 (98)	total: 1.88s	remaining: 5.7s
99:	learn: 0.0514904	test: 0.05172

182:	learn: 0.0510179	test: 0.0517064	best: 0.0517004 (176)	total: 3.58s	remaining: 4.25s
183:	learn: 0.0510121	test: 0.0517066	best: 0.0517004 (176)	total: 3.6s	remaining: 4.23s
184:	learn: 0.0510054	test: 0.0517040	best: 0.0517004 (176)	total: 3.63s	remaining: 4.22s
185:	learn: 0.0510022	test: 0.0517052	best: 0.0517004 (176)	total: 3.65s	remaining: 4.2s
186:	learn: 0.0509940	test: 0.0517088	best: 0.0517004 (176)	total: 3.67s	remaining: 4.18s
187:	learn: 0.0509881	test: 0.0517083	best: 0.0517004 (176)	total: 3.69s	remaining: 4.16s
188:	learn: 0.0509806	test: 0.0517098	best: 0.0517004 (176)	total: 3.71s	remaining: 4.14s
189:	learn: 0.0509757	test: 0.0517086	best: 0.0517004 (176)	total: 3.73s	remaining: 4.12s
190:	learn: 0.0509724	test: 0.0517095	best: 0.0517004 (176)	total: 3.75s	remaining: 4.11s
191:	learn: 0.0509650	test: 0.0517109	best: 0.0517004 (176)	total: 3.77s	remaining: 4.09s
192:	learn: 0.0509610	test: 0.0517097	best: 0.0517004 (176)	total: 3.79s	remaining: 4.07s
193:	learn: 

275:	learn: 0.0505129	test: 0.0516967	best: 0.0516967 (275)	total: 5.44s	remaining: 2.44s
276:	learn: 0.0505073	test: 0.0516978	best: 0.0516967 (275)	total: 5.46s	remaining: 2.42s
277:	learn: 0.0505002	test: 0.0517004	best: 0.0516967 (275)	total: 5.48s	remaining: 2.4s
278:	learn: 0.0504970	test: 0.0517008	best: 0.0516967 (275)	total: 5.5s	remaining: 2.38s
279:	learn: 0.0504930	test: 0.0517046	best: 0.0516967 (275)	total: 5.51s	remaining: 2.36s
280:	learn: 0.0504911	test: 0.0517050	best: 0.0516967 (275)	total: 5.53s	remaining: 2.34s
281:	learn: 0.0504874	test: 0.0517048	best: 0.0516967 (275)	total: 5.56s	remaining: 2.33s
282:	learn: 0.0504824	test: 0.0517046	best: 0.0516967 (275)	total: 5.58s	remaining: 2.31s
283:	learn: 0.0504784	test: 0.0517043	best: 0.0516967 (275)	total: 5.6s	remaining: 2.29s
284:	learn: 0.0504774	test: 0.0517048	best: 0.0516967 (275)	total: 5.62s	remaining: 2.27s
285:	learn: 0.0504743	test: 0.0517056	best: 0.0516967 (275)	total: 5.64s	remaining: 2.25s
286:	learn: 0

374:	learn: 0.0500710	test: 0.0517117	best: 0.0516967 (275)	total: 7.34s	remaining: 489ms
375:	learn: 0.0500675	test: 0.0517111	best: 0.0516967 (275)	total: 7.35s	remaining: 469ms
376:	learn: 0.0500601	test: 0.0517138	best: 0.0516967 (275)	total: 7.37s	remaining: 450ms
377:	learn: 0.0500555	test: 0.0517128	best: 0.0516967 (275)	total: 7.39s	remaining: 430ms
378:	learn: 0.0500477	test: 0.0517150	best: 0.0516967 (275)	total: 7.41s	remaining: 410ms
379:	learn: 0.0500443	test: 0.0517174	best: 0.0516967 (275)	total: 7.42s	remaining: 391ms
380:	learn: 0.0500423	test: 0.0517187	best: 0.0516967 (275)	total: 7.44s	remaining: 371ms
381:	learn: 0.0500388	test: 0.0517166	best: 0.0516967 (275)	total: 7.45s	remaining: 351ms
382:	learn: 0.0500329	test: 0.0517155	best: 0.0516967 (275)	total: 7.47s	remaining: 332ms
383:	learn: 0.0500271	test: 0.0517172	best: 0.0516967 (275)	total: 7.49s	remaining: 312ms
384:	learn: 0.0500235	test: 0.0517191	best: 0.0516967 (275)	total: 7.51s	remaining: 293ms
385:	learn

72:	learn: 0.0513142	test: 0.0533701	best: 0.0533665 (66)	total: 1.46s	remaining: 6.55s
73:	learn: 0.0513093	test: 0.0533696	best: 0.0533665 (66)	total: 1.48s	remaining: 6.53s
74:	learn: 0.0513051	test: 0.0533700	best: 0.0533665 (66)	total: 1.5s	remaining: 6.5s
75:	learn: 0.0512942	test: 0.0533667	best: 0.0533665 (66)	total: 1.52s	remaining: 6.47s
76:	learn: 0.0512926	test: 0.0533668	best: 0.0533665 (66)	total: 1.53s	remaining: 6.44s
77:	learn: 0.0512892	test: 0.0533646	best: 0.0533646 (77)	total: 1.55s	remaining: 6.42s
78:	learn: 0.0512789	test: 0.0533635	best: 0.0533635 (78)	total: 1.57s	remaining: 6.39s
79:	learn: 0.0512704	test: 0.0533584	best: 0.0533584 (79)	total: 1.59s	remaining: 6.36s
80:	learn: 0.0512545	test: 0.0533590	best: 0.0533584 (79)	total: 1.61s	remaining: 6.34s
81:	learn: 0.0512462	test: 0.0533591	best: 0.0533584 (79)	total: 1.63s	remaining: 6.33s
82:	learn: 0.0512372	test: 0.0533632	best: 0.0533584 (79)	total: 1.65s	remaining: 6.31s
83:	learn: 0.0512311	test: 0.05336

167:	learn: 0.0507023	test: 0.0533419	best: 0.0533351 (157)	total: 3.33s	remaining: 4.59s
168:	learn: 0.0506952	test: 0.0533401	best: 0.0533351 (157)	total: 3.35s	remaining: 4.58s
169:	learn: 0.0506935	test: 0.0533406	best: 0.0533351 (157)	total: 3.38s	remaining: 4.57s
170:	learn: 0.0506899	test: 0.0533404	best: 0.0533351 (157)	total: 3.41s	remaining: 4.56s
171:	learn: 0.0506746	test: 0.0533440	best: 0.0533351 (157)	total: 3.44s	remaining: 4.56s
172:	learn: 0.0506697	test: 0.0533428	best: 0.0533351 (157)	total: 3.46s	remaining: 4.55s
173:	learn: 0.0506638	test: 0.0533449	best: 0.0533351 (157)	total: 3.48s	remaining: 4.53s
174:	learn: 0.0506621	test: 0.0533447	best: 0.0533351 (157)	total: 3.5s	remaining: 4.5s
175:	learn: 0.0506556	test: 0.0533467	best: 0.0533351 (157)	total: 3.52s	remaining: 4.48s
176:	learn: 0.0506508	test: 0.0533481	best: 0.0533351 (157)	total: 3.54s	remaining: 4.46s
177:	learn: 0.0506386	test: 0.0533481	best: 0.0533351 (157)	total: 3.56s	remaining: 4.43s
178:	learn: 

265:	learn: 0.0502302	test: 0.0533559	best: 0.0533351 (157)	total: 5.25s	remaining: 2.65s
266:	learn: 0.0502241	test: 0.0533561	best: 0.0533351 (157)	total: 5.27s	remaining: 2.63s
267:	learn: 0.0502192	test: 0.0533571	best: 0.0533351 (157)	total: 5.29s	remaining: 2.6s
268:	learn: 0.0502136	test: 0.0533580	best: 0.0533351 (157)	total: 5.3s	remaining: 2.58s
269:	learn: 0.0502103	test: 0.0533573	best: 0.0533351 (157)	total: 5.32s	remaining: 2.56s
270:	learn: 0.0502089	test: 0.0533572	best: 0.0533351 (157)	total: 5.34s	remaining: 2.54s
271:	learn: 0.0502033	test: 0.0533585	best: 0.0533351 (157)	total: 5.36s	remaining: 2.52s
272:	learn: 0.0502023	test: 0.0533591	best: 0.0533351 (157)	total: 5.38s	remaining: 2.5s
273:	learn: 0.0501994	test: 0.0533589	best: 0.0533351 (157)	total: 5.39s	remaining: 2.48s
274:	learn: 0.0501948	test: 0.0533590	best: 0.0533351 (157)	total: 5.41s	remaining: 2.46s
275:	learn: 0.0501924	test: 0.0533599	best: 0.0533351 (157)	total: 5.43s	remaining: 2.44s
276:	learn: 0

357:	learn: 0.0498078	test: 0.0533563	best: 0.0533351 (157)	total: 7.13s	remaining: 837ms
358:	learn: 0.0498031	test: 0.0533529	best: 0.0533351 (157)	total: 7.15s	remaining: 817ms
359:	learn: 0.0497987	test: 0.0533522	best: 0.0533351 (157)	total: 7.17s	remaining: 797ms
360:	learn: 0.0497906	test: 0.0533500	best: 0.0533351 (157)	total: 7.19s	remaining: 777ms
361:	learn: 0.0497863	test: 0.0533476	best: 0.0533351 (157)	total: 7.21s	remaining: 757ms
362:	learn: 0.0497814	test: 0.0533489	best: 0.0533351 (157)	total: 7.23s	remaining: 737ms
363:	learn: 0.0497694	test: 0.0533484	best: 0.0533351 (157)	total: 7.25s	remaining: 717ms
364:	learn: 0.0497675	test: 0.0533492	best: 0.0533351 (157)	total: 7.26s	remaining: 696ms
365:	learn: 0.0497594	test: 0.0533522	best: 0.0533351 (157)	total: 7.28s	remaining: 676ms
366:	learn: 0.0497522	test: 0.0533535	best: 0.0533351 (157)	total: 7.3s	remaining: 656ms
367:	learn: 0.0497427	test: 0.0533518	best: 0.0533351 (157)	total: 7.31s	remaining: 636ms
368:	learn:

55:	learn: 0.0518437	test: 0.0516412	best: 0.0516403 (54)	total: 1.05s	remaining: 6.44s
56:	learn: 0.0518334	test: 0.0516451	best: 0.0516403 (54)	total: 1.07s	remaining: 6.42s
57:	learn: 0.0518275	test: 0.0516448	best: 0.0516403 (54)	total: 1.09s	remaining: 6.41s
58:	learn: 0.0518235	test: 0.0516427	best: 0.0516403 (54)	total: 1.1s	remaining: 6.39s
59:	learn: 0.0518183	test: 0.0516427	best: 0.0516403 (54)	total: 1.12s	remaining: 6.35s
60:	learn: 0.0518045	test: 0.0516405	best: 0.0516403 (54)	total: 1.14s	remaining: 6.32s
61:	learn: 0.0517970	test: 0.0516370	best: 0.0516370 (61)	total: 1.16s	remaining: 6.31s
62:	learn: 0.0517884	test: 0.0516337	best: 0.0516337 (62)	total: 1.18s	remaining: 6.29s
63:	learn: 0.0517814	test: 0.0516341	best: 0.0516337 (62)	total: 1.19s	remaining: 6.27s
64:	learn: 0.0517796	test: 0.0516334	best: 0.0516334 (64)	total: 1.21s	remaining: 6.25s
65:	learn: 0.0517705	test: 0.0516344	best: 0.0516334 (64)	total: 1.23s	remaining: 6.24s
66:	learn: 0.0517629	test: 0.0516

155:	learn: 0.0511639	test: 0.0515962	best: 0.0515941 (150)	total: 3.13s	remaining: 4.89s
156:	learn: 0.0511608	test: 0.0515956	best: 0.0515941 (150)	total: 3.15s	remaining: 4.87s
157:	learn: 0.0511529	test: 0.0515967	best: 0.0515941 (150)	total: 3.17s	remaining: 4.85s
158:	learn: 0.0511501	test: 0.0515970	best: 0.0515941 (150)	total: 3.19s	remaining: 4.83s
159:	learn: 0.0511469	test: 0.0515980	best: 0.0515941 (150)	total: 3.2s	remaining: 4.81s
160:	learn: 0.0511437	test: 0.0515990	best: 0.0515941 (150)	total: 3.22s	remaining: 4.79s
161:	learn: 0.0511424	test: 0.0515987	best: 0.0515941 (150)	total: 3.24s	remaining: 4.76s
162:	learn: 0.0511342	test: 0.0516013	best: 0.0515941 (150)	total: 3.27s	remaining: 4.75s
163:	learn: 0.0511309	test: 0.0516019	best: 0.0515941 (150)	total: 3.28s	remaining: 4.72s
164:	learn: 0.0511232	test: 0.0516015	best: 0.0515941 (150)	total: 3.3s	remaining: 4.7s
165:	learn: 0.0511195	test: 0.0516013	best: 0.0515941 (150)	total: 3.32s	remaining: 4.67s
166:	learn: 0

251:	learn: 0.0506840	test: 0.0516087	best: 0.0515941 (150)	total: 4.99s	remaining: 2.93s
252:	learn: 0.0506791	test: 0.0516092	best: 0.0515941 (150)	total: 5.01s	remaining: 2.91s
253:	learn: 0.0506720	test: 0.0516096	best: 0.0515941 (150)	total: 5.03s	remaining: 2.89s
254:	learn: 0.0506657	test: 0.0516104	best: 0.0515941 (150)	total: 5.05s	remaining: 2.87s
255:	learn: 0.0506615	test: 0.0516113	best: 0.0515941 (150)	total: 5.07s	remaining: 2.85s
256:	learn: 0.0506578	test: 0.0516086	best: 0.0515941 (150)	total: 5.09s	remaining: 2.83s
257:	learn: 0.0506526	test: 0.0516109	best: 0.0515941 (150)	total: 5.11s	remaining: 2.81s
258:	learn: 0.0506470	test: 0.0516122	best: 0.0515941 (150)	total: 5.13s	remaining: 2.79s
259:	learn: 0.0506422	test: 0.0516133	best: 0.0515941 (150)	total: 5.15s	remaining: 2.77s
260:	learn: 0.0506365	test: 0.0516131	best: 0.0515941 (150)	total: 5.17s	remaining: 2.75s
261:	learn: 0.0506303	test: 0.0516130	best: 0.0515941 (150)	total: 5.19s	remaining: 2.73s
262:	learn

352:	learn: 0.0502208	test: 0.0516139	best: 0.0515941 (150)	total: 6.88s	remaining: 917ms
353:	learn: 0.0502177	test: 0.0516142	best: 0.0515941 (150)	total: 6.9s	remaining: 897ms
354:	learn: 0.0502144	test: 0.0516151	best: 0.0515941 (150)	total: 6.92s	remaining: 877ms
355:	learn: 0.0502033	test: 0.0516125	best: 0.0515941 (150)	total: 6.94s	remaining: 858ms
356:	learn: 0.0501983	test: 0.0516128	best: 0.0515941 (150)	total: 6.96s	remaining: 839ms
357:	learn: 0.0501957	test: 0.0516133	best: 0.0515941 (150)	total: 6.98s	remaining: 819ms
358:	learn: 0.0501845	test: 0.0516157	best: 0.0515941 (150)	total: 7s	remaining: 800ms
359:	learn: 0.0501805	test: 0.0516163	best: 0.0515941 (150)	total: 7.03s	remaining: 781ms
360:	learn: 0.0501779	test: 0.0516162	best: 0.0515941 (150)	total: 7.04s	remaining: 761ms
361:	learn: 0.0501656	test: 0.0516136	best: 0.0515941 (150)	total: 7.07s	remaining: 742ms
362:	learn: 0.0501586	test: 0.0516108	best: 0.0515941 (150)	total: 7.08s	remaining: 722ms
363:	learn: 0.

45:	learn: 0.0517632	test: 0.0523691	best: 0.0523691 (45)	total: 834ms	remaining: 6.42s
46:	learn: 0.0517559	test: 0.0523656	best: 0.0523656 (46)	total: 850ms	remaining: 6.38s
47:	learn: 0.0517487	test: 0.0523650	best: 0.0523650 (47)	total: 870ms	remaining: 6.38s
48:	learn: 0.0517371	test: 0.0523619	best: 0.0523619 (48)	total: 889ms	remaining: 6.37s
49:	learn: 0.0517283	test: 0.0523618	best: 0.0523618 (49)	total: 909ms	remaining: 6.36s
50:	learn: 0.0517185	test: 0.0523588	best: 0.0523588 (50)	total: 930ms	remaining: 6.36s
51:	learn: 0.0517120	test: 0.0523552	best: 0.0523552 (51)	total: 950ms	remaining: 6.36s
52:	learn: 0.0516991	test: 0.0523555	best: 0.0523552 (51)	total: 970ms	remaining: 6.35s
53:	learn: 0.0516908	test: 0.0523553	best: 0.0523552 (51)	total: 991ms	remaining: 6.35s
54:	learn: 0.0516796	test: 0.0523561	best: 0.0523552 (51)	total: 1.01s	remaining: 6.33s
55:	learn: 0.0516684	test: 0.0523549	best: 0.0523549 (55)	total: 1.03s	remaining: 6.31s
56:	learn: 0.0516573	test: 0.052

143:	learn: 0.0510819	test: 0.0523299	best: 0.0523136 (131)	total: 2.71s	remaining: 4.82s
144:	learn: 0.0510768	test: 0.0523293	best: 0.0523136 (131)	total: 2.73s	remaining: 4.8s
145:	learn: 0.0510757	test: 0.0523294	best: 0.0523136 (131)	total: 2.75s	remaining: 4.79s
146:	learn: 0.0510743	test: 0.0523293	best: 0.0523136 (131)	total: 2.77s	remaining: 4.76s
147:	learn: 0.0510648	test: 0.0523311	best: 0.0523136 (131)	total: 2.79s	remaining: 4.74s
148:	learn: 0.0510601	test: 0.0523290	best: 0.0523136 (131)	total: 2.8s	remaining: 4.72s
149:	learn: 0.0510549	test: 0.0523277	best: 0.0523136 (131)	total: 2.82s	remaining: 4.7s
150:	learn: 0.0510480	test: 0.0523235	best: 0.0523136 (131)	total: 2.84s	remaining: 4.68s
151:	learn: 0.0510417	test: 0.0523244	best: 0.0523136 (131)	total: 2.86s	remaining: 4.66s
152:	learn: 0.0510386	test: 0.0523248	best: 0.0523136 (131)	total: 2.87s	remaining: 4.64s
153:	learn: 0.0510366	test: 0.0523249	best: 0.0523136 (131)	total: 2.89s	remaining: 4.62s
154:	learn: 0

245:	learn: 0.0506027	test: 0.0523299	best: 0.0523136 (131)	total: 4.59s	remaining: 2.87s
246:	learn: 0.0505976	test: 0.0523286	best: 0.0523136 (131)	total: 4.61s	remaining: 2.85s
247:	learn: 0.0505938	test: 0.0523304	best: 0.0523136 (131)	total: 4.62s	remaining: 2.83s
248:	learn: 0.0505877	test: 0.0523318	best: 0.0523136 (131)	total: 4.64s	remaining: 2.81s
249:	learn: 0.0505866	test: 0.0523325	best: 0.0523136 (131)	total: 4.66s	remaining: 2.8s
250:	learn: 0.0505794	test: 0.0523342	best: 0.0523136 (131)	total: 4.68s	remaining: 2.78s
251:	learn: 0.0505760	test: 0.0523344	best: 0.0523136 (131)	total: 4.7s	remaining: 2.76s
252:	learn: 0.0505725	test: 0.0523383	best: 0.0523136 (131)	total: 4.72s	remaining: 2.74s
253:	learn: 0.0505674	test: 0.0523384	best: 0.0523136 (131)	total: 4.74s	remaining: 2.73s
254:	learn: 0.0505602	test: 0.0523409	best: 0.0523136 (131)	total: 4.76s	remaining: 2.71s
255:	learn: 0.0505545	test: 0.0523406	best: 0.0523136 (131)	total: 4.78s	remaining: 2.69s
256:	learn: 

341:	learn: 0.0501565	test: 0.0523571	best: 0.0523136 (131)	total: 6.45s	remaining: 1.09s
342:	learn: 0.0501547	test: 0.0523560	best: 0.0523136 (131)	total: 6.46s	remaining: 1.07s
343:	learn: 0.0501535	test: 0.0523572	best: 0.0523136 (131)	total: 6.48s	remaining: 1.05s
344:	learn: 0.0501504	test: 0.0523566	best: 0.0523136 (131)	total: 6.5s	remaining: 1.03s
345:	learn: 0.0501466	test: 0.0523572	best: 0.0523136 (131)	total: 6.51s	remaining: 1.02s
346:	learn: 0.0501403	test: 0.0523534	best: 0.0523136 (131)	total: 6.54s	remaining: 998ms
347:	learn: 0.0501373	test: 0.0523506	best: 0.0523136 (131)	total: 6.55s	remaining: 979ms
348:	learn: 0.0501337	test: 0.0523515	best: 0.0523136 (131)	total: 6.57s	remaining: 960ms
349:	learn: 0.0501279	test: 0.0523565	best: 0.0523136 (131)	total: 6.59s	remaining: 942ms
350:	learn: 0.0501233	test: 0.0523544	best: 0.0523136 (131)	total: 6.62s	remaining: 924ms
351:	learn: 0.0501194	test: 0.0523533	best: 0.0523136 (131)	total: 6.63s	remaining: 905ms
352:	learn:

In [14]:
df_sub_2016 = pd.read_csv('../output/final_sub_2016.csv').drop_duplicates('parcelid')
df_sub_2016['year'] = 0
results = {}
for month in [10,11,12]:
    df_sub_2016['month'] = month
    for i in models:
        if  month not in results.keys():
            results[month] = i.predict(df_sub_2016) / len(models)
        else:
            results[month] += i.predict(df_sub_2016) / len(models)
for i in results.keys():
    df_sub_2016[str(i)] = results[i]
df_sub = pd.read_csv('../Resources/sample_submission.csv')
df_sub = df_sub.rename(columns = {'ParcelId': 'parcelid'})
df_sub = pd.merge(df_sub[['parcelid']], df_sub_2016[['parcelid', '10', '11', '12']].drop_duplicates('parcelid'),
                  how = 'left', on = 'parcelid')
df_sub = df_sub.rename(columns = {'10': '201610', '11': '201611', '12': '201612'}).drop_duplicates('parcelid')
del df_sub_2016
gc.collect()
df_sub_2017 = pd.read_csv('../output/final_sub_2017.csv')
df_sub_2017['year'] = 1
results = {}
for month in [10,11,12]:
    df_sub_2017['month'] = month
    for i in models:
        if  month not in results.keys():
            results[month] = i.predict(df_sub_2017) / len(models)
        else:
            results[month] += i.predict(df_sub_2017) / len(models)
for i in results.keys():
    df_sub_2017[str(i)] = results[i]
df_sub = pd.merge(df_sub[['parcelid', '201610', '201611', '201612']], 
                  df_sub_2017[['parcelid', '10', '11', '12']].drop_duplicates('parcelid'), 
                  how = 'left', on = 'parcelid')
df_sub = df_sub.rename(columns = {'10': '201710', '11': '201711', '12': '201712'})
del df_sub_2017
gc.collect()
df_sub.to_csv('../output/submission/cat5_opt.csv', index = False)