* This notebook uses data from https://www.drivendata.org/competitions/44/dengai-predicting-disease-spread/.
* It contains Experiment with LightGBM

In [1]:
import pandas as pd
import numpy as np

In [2]:
train_data = pd.read_csv('data/dengue_features_train.csv')
#train_data['week_start_date'] = pd.to_datetime(train_data.week_start_date)
X_train = train_data.drop('week_start_date',axis=1)
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1456 entries, 0 to 1455
Data columns (total 23 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   city                                   1456 non-null   object 
 1   year                                   1456 non-null   int64  
 2   weekofyear                             1456 non-null   int64  
 3   ndvi_ne                                1262 non-null   float64
 4   ndvi_nw                                1404 non-null   float64
 5   ndvi_se                                1434 non-null   float64
 6   ndvi_sw                                1434 non-null   float64
 7   precipitation_amt_mm                   1443 non-null   float64
 8   reanalysis_air_temp_k                  1446 non-null   float64
 9   reanalysis_avg_temp_k                  1446 non-null   float64
 10  reanalysis_dew_point_temp_k            1446 non-null   float64
 11  rean

In [3]:
labels = pd.read_csv('data/dengue_labels_train.csv')
labels.head()
merged = X_train.merge(labels,on=['city','year','weekofyear'])
y_train = merged['total_cases']
X_train = merged.drop('total_cases',axis=1)
X_train.head()

Unnamed: 0,city,year,weekofyear,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,reanalysis_dew_point_temp_k,reanalysis_max_air_temp_k,reanalysis_min_air_temp_k,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm
0,sj,1990,18,0.1226,0.103725,0.198483,0.177617,12.42,297.572857,297.742857,292.414286,299.8,295.9,32.0,73.365714,12.42,14.012857,2.628571,25.442857,6.9,29.4,20.0,16.0
1,sj,1990,19,0.1699,0.142175,0.162357,0.155486,22.82,298.211429,298.442857,293.951429,300.9,296.4,17.94,77.368571,22.82,15.372857,2.371429,26.714286,6.371429,31.7,22.2,8.6
2,sj,1990,20,0.03225,0.172967,0.1572,0.170843,34.54,298.781429,298.878571,295.434286,300.5,297.3,26.1,82.052857,34.54,16.848571,2.3,26.714286,6.485714,32.2,22.8,41.4
3,sj,1990,21,0.128633,0.245067,0.227557,0.235886,15.36,298.987143,299.228571,295.31,301.4,297.0,13.9,80.337143,15.36,16.672857,2.428571,27.471429,6.771429,33.3,23.3,4.0
4,sj,1990,22,0.1962,0.2622,0.2512,0.24734,7.52,299.518571,299.664286,295.821429,301.9,297.5,12.2,80.46,7.52,17.21,3.014286,28.942857,9.371429,35.0,23.9,5.8


In [4]:
X_train['year'] = X_train['year'].astype('object')
X_train['weekofyear'] = X_train['weekofyear'].astype('object')

X_train['week_year'] = X_train['city']+"_"+X_train['weekofyear'].astype('str')+"_"+X_train['year'].astype('str')
X_train = X_train.drop(['city','year','weekofyear'],axis=1)
X_train.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1456 entries, 0 to 1455
Data columns (total 21 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   ndvi_ne                                1262 non-null   float64
 1   ndvi_nw                                1404 non-null   float64
 2   ndvi_se                                1434 non-null   float64
 3   ndvi_sw                                1434 non-null   float64
 4   precipitation_amt_mm                   1443 non-null   float64
 5   reanalysis_air_temp_k                  1446 non-null   float64
 6   reanalysis_avg_temp_k                  1446 non-null   float64
 7   reanalysis_dew_point_temp_k            1446 non-null   float64
 8   reanalysis_max_air_temp_k              1446 non-null   float64
 9   reanalysis_min_air_temp_k              1446 non-null   float64
 10  reanalysis_precip_amt_kg_per_m2        1446 non-null   float64
 11  rean

In [5]:
num_col = [col for col in X_train.columns if X_train[col].dtype=='int' or X_train[col].dtype=='float64']
cat_col = [col for col in X_train.columns if X_train[col].dtype=='object']


In [7]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

num_processor = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('normalizer',Normalizer())
])
cat_processor = Pipeline(steps=[
    ('encoder',OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num',num_processor,num_col),
    ('cat',cat_processor,cat_col)
])


In [8]:
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.model_selection import cross_val_score
model = CatBoostRegressor()
pipeline = Pipeline(steps=[
    ('processor',preprocessor),
    ('model',model)
])

score = -1*cross_val_score(pipeline,X_train,y_train,cv=3,scoring='neg_mean_absolute_error')
print(f'MSE Loss mean = {np.mean(score)} max = {np.max(score)}')

Learning rate set to 0.038819
0:	learn: 18.9352355	total: 66.8ms	remaining: 1m 6s
1:	learn: 18.7508256	total: 73ms	remaining: 36.4s
2:	learn: 18.5904891	total: 77.9ms	remaining: 25.9s
3:	learn: 18.4120068	total: 82.4ms	remaining: 20.5s
4:	learn: 18.2255596	total: 87.3ms	remaining: 17.4s
5:	learn: 18.1014428	total: 92ms	remaining: 15.2s
6:	learn: 17.9741768	total: 96.6ms	remaining: 13.7s
7:	learn: 17.7928498	total: 101ms	remaining: 12.5s
8:	learn: 17.7062603	total: 105ms	remaining: 11.6s
9:	learn: 17.5501158	total: 109ms	remaining: 10.8s
10:	learn: 17.4041720	total: 113ms	remaining: 10.2s
11:	learn: 17.2596396	total: 118ms	remaining: 9.68s
12:	learn: 17.1090204	total: 122ms	remaining: 9.26s
13:	learn: 17.0013853	total: 126ms	remaining: 8.88s
14:	learn: 16.8902517	total: 130ms	remaining: 8.55s
15:	learn: 16.7726058	total: 135ms	remaining: 8.31s
16:	learn: 16.6736295	total: 139ms	remaining: 8.06s
17:	learn: 16.5424007	total: 144ms	remaining: 7.83s
18:	learn: 16.4341680	total: 148ms	remain

157:	learn: 12.4041321	total: 772ms	remaining: 4.11s
158:	learn: 12.3966111	total: 777ms	remaining: 4.11s
159:	learn: 12.3881922	total: 781ms	remaining: 4.1s
160:	learn: 12.3788593	total: 785ms	remaining: 4.09s
161:	learn: 12.3668403	total: 789ms	remaining: 4.08s
162:	learn: 12.3382362	total: 794ms	remaining: 4.08s
163:	learn: 12.3278622	total: 798ms	remaining: 4.07s
164:	learn: 12.3156638	total: 803ms	remaining: 4.07s
165:	learn: 12.3083105	total: 808ms	remaining: 4.06s
166:	learn: 12.2987735	total: 812ms	remaining: 4.05s
167:	learn: 12.2890666	total: 816ms	remaining: 4.04s
168:	learn: 12.2796733	total: 820ms	remaining: 4.03s
169:	learn: 12.2713676	total: 825ms	remaining: 4.03s
170:	learn: 12.2611499	total: 830ms	remaining: 4.02s
171:	learn: 12.2499648	total: 834ms	remaining: 4.02s
172:	learn: 12.2419799	total: 839ms	remaining: 4.01s
173:	learn: 12.2338077	total: 844ms	remaining: 4s
174:	learn: 12.2144803	total: 849ms	remaining: 4s
175:	learn: 12.1701075	total: 853ms	remaining: 4s
176

323:	learn: 10.7681071	total: 1.55s	remaining: 3.23s
324:	learn: 10.7618681	total: 1.55s	remaining: 3.22s
325:	learn: 10.7554197	total: 1.55s	remaining: 3.22s
326:	learn: 10.7490489	total: 1.56s	remaining: 3.21s
327:	learn: 10.7428709	total: 1.56s	remaining: 3.21s
328:	learn: 10.7367283	total: 1.57s	remaining: 3.2s
329:	learn: 10.7306062	total: 1.57s	remaining: 3.19s
330:	learn: 10.6945685	total: 1.58s	remaining: 3.19s
331:	learn: 10.6884419	total: 1.58s	remaining: 3.18s
332:	learn: 10.6823602	total: 1.59s	remaining: 3.18s
333:	learn: 10.6761153	total: 1.59s	remaining: 3.17s
334:	learn: 10.6700980	total: 1.59s	remaining: 3.17s
335:	learn: 10.6641067	total: 1.6s	remaining: 3.16s
336:	learn: 10.6318228	total: 1.6s	remaining: 3.15s
337:	learn: 10.6258690	total: 1.61s	remaining: 3.15s
338:	learn: 10.6199405	total: 1.61s	remaining: 3.14s
339:	learn: 10.6140272	total: 1.62s	remaining: 3.14s
340:	learn: 10.6081589	total: 1.62s	remaining: 3.13s
341:	learn: 10.6023138	total: 1.62s	remaining: 3.

496:	learn: 9.3139012	total: 2.32s	remaining: 2.35s
497:	learn: 9.3099976	total: 2.33s	remaining: 2.34s
498:	learn: 9.3060992	total: 2.33s	remaining: 2.34s
499:	learn: 9.2852915	total: 2.33s	remaining: 2.33s
500:	learn: 9.2813806	total: 2.34s	remaining: 2.33s
501:	learn: 9.2774855	total: 2.34s	remaining: 2.33s
502:	learn: 9.2736091	total: 2.35s	remaining: 2.32s
503:	learn: 9.2697500	total: 2.35s	remaining: 2.31s
504:	learn: 9.2658999	total: 2.36s	remaining: 2.31s
505:	learn: 9.2620545	total: 2.36s	remaining: 2.31s
506:	learn: 9.2582636	total: 2.37s	remaining: 2.3s
507:	learn: 9.2544327	total: 2.37s	remaining: 2.3s
508:	learn: 9.2506104	total: 2.38s	remaining: 2.29s
509:	learn: 9.2468027	total: 2.38s	remaining: 2.29s
510:	learn: 9.2430054	total: 2.38s	remaining: 2.28s
511:	learn: 9.2392151	total: 2.39s	remaining: 2.28s
512:	learn: 9.2354279	total: 2.39s	remaining: 2.27s
513:	learn: 9.2316916	total: 2.4s	remaining: 2.27s
514:	learn: 9.2279107	total: 2.4s	remaining: 2.26s
515:	learn: 9.22

669:	learn: 8.2961121	total: 3.09s	remaining: 1.52s
670:	learn: 8.2932197	total: 3.1s	remaining: 1.52s
671:	learn: 8.2903384	total: 3.1s	remaining: 1.51s
672:	learn: 8.2874616	total: 3.11s	remaining: 1.51s
673:	learn: 8.2846314	total: 3.11s	remaining: 1.5s
674:	learn: 8.2714663	total: 3.12s	remaining: 1.5s
675:	learn: 8.2685960	total: 3.12s	remaining: 1.5s
676:	learn: 8.2657779	total: 3.13s	remaining: 1.49s
677:	learn: 8.2629226	total: 3.13s	remaining: 1.49s
678:	learn: 8.2597741	total: 3.13s	remaining: 1.48s
679:	learn: 8.2569273	total: 3.14s	remaining: 1.48s
680:	learn: 8.2540893	total: 3.14s	remaining: 1.47s
681:	learn: 8.2401778	total: 3.15s	remaining: 1.47s
682:	learn: 8.2373765	total: 3.15s	remaining: 1.46s
683:	learn: 8.2345391	total: 3.16s	remaining: 1.46s
684:	learn: 8.2317129	total: 3.16s	remaining: 1.45s
685:	learn: 8.2288931	total: 3.17s	remaining: 1.45s
686:	learn: 8.2086551	total: 3.17s	remaining: 1.44s
687:	learn: 8.2058237	total: 3.17s	remaining: 1.44s
688:	learn: 8.203

847:	learn: 7.3478212	total: 3.87s	remaining: 694ms
848:	learn: 7.3456074	total: 3.88s	remaining: 690ms
849:	learn: 7.3433724	total: 3.88s	remaining: 685ms
850:	learn: 7.3411447	total: 3.89s	remaining: 681ms
851:	learn: 7.3389218	total: 3.89s	remaining: 676ms
852:	learn: 7.3367269	total: 3.9s	remaining: 672ms
853:	learn: 7.3345367	total: 3.9s	remaining: 667ms
854:	learn: 7.3323713	total: 3.91s	remaining: 662ms
855:	learn: 7.3301642	total: 3.91s	remaining: 658ms
856:	learn: 7.3280068	total: 3.92s	remaining: 653ms
857:	learn: 7.3258075	total: 3.92s	remaining: 649ms
858:	learn: 7.3236408	total: 3.92s	remaining: 644ms
859:	learn: 7.3214947	total: 3.93s	remaining: 640ms
860:	learn: 7.3193540	total: 3.93s	remaining: 635ms
861:	learn: 7.3171976	total: 3.94s	remaining: 631ms
862:	learn: 7.3150116	total: 3.94s	remaining: 626ms
863:	learn: 7.3128599	total: 3.95s	remaining: 621ms
864:	learn: 7.2943997	total: 3.95s	remaining: 617ms
865:	learn: 7.2922348	total: 3.96s	remaining: 612ms
866:	learn: 7.

8:	learn: 45.2486047	total: 44.9ms	remaining: 4.94s
9:	learn: 44.8985449	total: 50.9ms	remaining: 5.03s
10:	learn: 44.3705956	total: 55.6ms	remaining: 5s
11:	learn: 43.7107472	total: 60.2ms	remaining: 4.96s
12:	learn: 43.2169064	total: 64.8ms	remaining: 4.92s
13:	learn: 42.9941868	total: 69.6ms	remaining: 4.9s
14:	learn: 42.5720382	total: 74.6ms	remaining: 4.9s
15:	learn: 42.3447263	total: 79.3ms	remaining: 4.88s
16:	learn: 42.2631215	total: 84.7ms	remaining: 4.9s
17:	learn: 41.9972287	total: 89.2ms	remaining: 4.87s
18:	learn: 41.3770457	total: 93.7ms	remaining: 4.84s
19:	learn: 41.1685619	total: 98.5ms	remaining: 4.83s
20:	learn: 40.9407080	total: 103ms	remaining: 4.8s
21:	learn: 40.8618061	total: 108ms	remaining: 4.79s
22:	learn: 40.3894451	total: 112ms	remaining: 4.76s
23:	learn: 40.3232121	total: 117ms	remaining: 4.76s
24:	learn: 40.1419100	total: 122ms	remaining: 4.75s
25:	learn: 39.7701694	total: 126ms	remaining: 4.72s
26:	learn: 39.6209360	total: 131ms	remaining: 4.72s
27:	learn

179:	learn: 28.5460432	total: 822ms	remaining: 3.75s
180:	learn: 28.5172864	total: 828ms	remaining: 3.74s
181:	learn: 28.4829511	total: 832ms	remaining: 3.74s
182:	learn: 28.4401042	total: 837ms	remaining: 3.74s
183:	learn: 28.4066191	total: 842ms	remaining: 3.73s
184:	learn: 28.3666935	total: 847ms	remaining: 3.73s
185:	learn: 28.3332690	total: 852ms	remaining: 3.73s
186:	learn: 28.3022555	total: 856ms	remaining: 3.72s
187:	learn: 28.2628499	total: 860ms	remaining: 3.71s
188:	learn: 28.2301185	total: 865ms	remaining: 3.71s
189:	learn: 28.1972642	total: 870ms	remaining: 3.71s
190:	learn: 28.1585753	total: 874ms	remaining: 3.7s
191:	learn: 28.1286551	total: 878ms	remaining: 3.7s
192:	learn: 28.0971297	total: 883ms	remaining: 3.69s
193:	learn: 28.0588838	total: 888ms	remaining: 3.69s
194:	learn: 28.0207334	total: 892ms	remaining: 3.68s
195:	learn: 27.9849831	total: 897ms	remaining: 3.68s
196:	learn: 27.9475175	total: 902ms	remaining: 3.68s
197:	learn: 27.9111014	total: 907ms	remaining: 3

351:	learn: 23.7577192	total: 1.6s	remaining: 2.94s
352:	learn: 23.7381516	total: 1.6s	remaining: 2.94s
353:	learn: 23.6845610	total: 1.61s	remaining: 2.94s
354:	learn: 23.6650812	total: 1.61s	remaining: 2.94s
355:	learn: 23.6457035	total: 1.62s	remaining: 2.93s
356:	learn: 23.6263911	total: 1.62s	remaining: 2.93s
357:	learn: 23.6071336	total: 1.63s	remaining: 2.92s
358:	learn: 23.5879749	total: 1.63s	remaining: 2.92s
359:	learn: 23.5689148	total: 1.64s	remaining: 2.91s
360:	learn: 23.5499113	total: 1.64s	remaining: 2.91s
361:	learn: 23.5309631	total: 1.65s	remaining: 2.9s
362:	learn: 23.5120865	total: 1.65s	remaining: 2.9s
363:	learn: 23.4932654	total: 1.66s	remaining: 2.89s
364:	learn: 23.4745325	total: 1.66s	remaining: 2.89s
365:	learn: 23.4558153	total: 1.66s	remaining: 2.88s
366:	learn: 23.4371424	total: 1.67s	remaining: 2.88s
367:	learn: 23.4185475	total: 1.67s	remaining: 2.88s
368:	learn: 23.3999959	total: 1.68s	remaining: 2.87s
369:	learn: 23.3815100	total: 1.68s	remaining: 2.8

522:	learn: 20.3069448	total: 2.37s	remaining: 2.17s
523:	learn: 20.2943924	total: 2.38s	remaining: 2.16s
524:	learn: 20.2818692	total: 2.38s	remaining: 2.16s
525:	learn: 20.2474943	total: 2.39s	remaining: 2.15s
526:	learn: 20.2349880	total: 2.39s	remaining: 2.15s
527:	learn: 20.2225339	total: 2.4s	remaining: 2.14s
528:	learn: 20.2101225	total: 2.4s	remaining: 2.14s
529:	learn: 20.1977401	total: 2.41s	remaining: 2.13s
530:	learn: 20.1853797	total: 2.41s	remaining: 2.13s
531:	learn: 20.1730653	total: 2.42s	remaining: 2.13s
532:	learn: 20.1608022	total: 2.42s	remaining: 2.12s
533:	learn: 20.1485785	total: 2.43s	remaining: 2.12s
534:	learn: 20.1363789	total: 2.43s	remaining: 2.11s
535:	learn: 20.0936189	total: 2.44s	remaining: 2.11s
536:	learn: 20.0814253	total: 2.44s	remaining: 2.1s
537:	learn: 20.0692677	total: 2.45s	remaining: 2.1s
538:	learn: 20.0571555	total: 2.45s	remaining: 2.1s
539:	learn: 20.0450844	total: 2.46s	remaining: 2.09s
540:	learn: 20.0330652	total: 2.46s	remaining: 2.09

678:	learn: 18.3057641	total: 3.14s	remaining: 1.48s
679:	learn: 18.2966280	total: 3.14s	remaining: 1.48s
680:	learn: 18.2875042	total: 3.15s	remaining: 1.47s
681:	learn: 18.2783971	total: 3.15s	remaining: 1.47s
682:	learn: 18.2692994	total: 3.15s	remaining: 1.46s
683:	learn: 18.2602200	total: 3.16s	remaining: 1.46s
684:	learn: 18.2511625	total: 3.16s	remaining: 1.46s
685:	learn: 18.2421315	total: 3.17s	remaining: 1.45s
686:	learn: 18.2331173	total: 3.17s	remaining: 1.45s
687:	learn: 18.2241185	total: 3.18s	remaining: 1.44s
688:	learn: 18.2151328	total: 3.18s	remaining: 1.44s
689:	learn: 18.2061612	total: 3.19s	remaining: 1.43s
690:	learn: 18.1972010	total: 3.19s	remaining: 1.43s
691:	learn: 18.1882589	total: 3.2s	remaining: 1.42s
692:	learn: 18.1793383	total: 3.2s	remaining: 1.42s
693:	learn: 18.1704401	total: 3.21s	remaining: 1.41s
694:	learn: 18.1244186	total: 3.21s	remaining: 1.41s
695:	learn: 18.1154955	total: 3.22s	remaining: 1.4s
696:	learn: 18.1066031	total: 3.22s	remaining: 1.

848:	learn: 16.4571794	total: 3.92s	remaining: 697ms
849:	learn: 16.4503271	total: 3.92s	remaining: 692ms
850:	learn: 16.4434942	total: 3.93s	remaining: 687ms
851:	learn: 16.4367360	total: 3.93s	remaining: 683ms
852:	learn: 16.4299246	total: 3.94s	remaining: 678ms
853:	learn: 16.4235541	total: 3.94s	remaining: 674ms
854:	learn: 16.4167651	total: 3.95s	remaining: 669ms
855:	learn: 16.4099884	total: 3.95s	remaining: 665ms
856:	learn: 16.4032815	total: 3.96s	remaining: 660ms
857:	learn: 16.3965362	total: 3.96s	remaining: 655ms
858:	learn: 16.3343160	total: 3.96s	remaining: 651ms
859:	learn: 16.3275681	total: 3.97s	remaining: 646ms
860:	learn: 16.3208833	total: 3.97s	remaining: 642ms
861:	learn: 16.3141537	total: 3.98s	remaining: 637ms
862:	learn: 16.3074397	total: 3.98s	remaining: 632ms
863:	learn: 16.2551504	total: 3.99s	remaining: 628ms
864:	learn: 16.2483888	total: 3.99s	remaining: 623ms
865:	learn: 16.2417287	total: 4s	remaining: 619ms
866:	learn: 16.2350396	total: 4s	remaining: 614ms

9:	learn: 49.2651122	total: 47.2ms	remaining: 4.67s
10:	learn: 49.1335052	total: 51.9ms	remaining: 4.67s
11:	learn: 48.8866984	total: 56.1ms	remaining: 4.62s
12:	learn: 48.8018038	total: 60.1ms	remaining: 4.57s
13:	learn: 48.5010029	total: 64.8ms	remaining: 4.56s
14:	learn: 48.4288430	total: 69ms	remaining: 4.53s
15:	learn: 48.3356145	total: 73.3ms	remaining: 4.5s
16:	learn: 48.2412192	total: 77.9ms	remaining: 4.51s
17:	learn: 48.1645682	total: 82.3ms	remaining: 4.49s
18:	learn: 48.0901762	total: 86.8ms	remaining: 4.48s
19:	learn: 48.0025247	total: 90.9ms	remaining: 4.46s
20:	learn: 47.7626549	total: 96.3ms	remaining: 4.49s
21:	learn: 47.6832933	total: 101ms	remaining: 4.48s
22:	learn: 47.6178498	total: 105ms	remaining: 4.48s
23:	learn: 47.5013334	total: 110ms	remaining: 4.48s
24:	learn: 47.3270381	total: 115ms	remaining: 4.47s
25:	learn: 47.0848619	total: 119ms	remaining: 4.47s
26:	learn: 47.0080431	total: 123ms	remaining: 4.45s
27:	learn: 46.9193209	total: 129ms	remaining: 4.46s
28:	

178:	learn: 36.2946762	total: 821ms	remaining: 3.77s
179:	learn: 36.2509603	total: 826ms	remaining: 3.76s
180:	learn: 36.2144634	total: 831ms	remaining: 3.76s
181:	learn: 36.1781074	total: 835ms	remaining: 3.75s
182:	learn: 36.1408374	total: 840ms	remaining: 3.75s
183:	learn: 36.0978884	total: 844ms	remaining: 3.74s
184:	learn: 36.0437554	total: 849ms	remaining: 3.74s
185:	learn: 35.9983464	total: 854ms	remaining: 3.73s
186:	learn: 35.9551611	total: 858ms	remaining: 3.73s
187:	learn: 35.9036715	total: 863ms	remaining: 3.73s
188:	learn: 35.8619890	total: 867ms	remaining: 3.72s
189:	learn: 35.8184781	total: 872ms	remaining: 3.72s
190:	learn: 35.7692515	total: 877ms	remaining: 3.71s
191:	learn: 35.7276471	total: 881ms	remaining: 3.71s
192:	learn: 35.6848352	total: 886ms	remaining: 3.7s
193:	learn: 35.6425615	total: 890ms	remaining: 3.7s
194:	learn: 35.5924245	total: 895ms	remaining: 3.7s
195:	learn: 35.5441171	total: 900ms	remaining: 3.69s
196:	learn: 35.4959855	total: 905ms	remaining: 3.

343:	learn: 29.7455494	total: 1.6s	remaining: 3.05s
344:	learn: 29.7225784	total: 1.61s	remaining: 3.05s
345:	learn: 29.6997244	total: 1.61s	remaining: 3.05s
346:	learn: 29.6769543	total: 1.62s	remaining: 3.05s
347:	learn: 29.6542493	total: 1.62s	remaining: 3.04s
348:	learn: 29.6315805	total: 1.63s	remaining: 3.04s
349:	learn: 29.6089969	total: 1.63s	remaining: 3.04s
350:	learn: 29.5865337	total: 1.64s	remaining: 3.03s
351:	learn: 29.5641435	total: 1.64s	remaining: 3.03s
352:	learn: 29.4352445	total: 1.65s	remaining: 3.02s
353:	learn: 29.4128713	total: 1.65s	remaining: 3.02s
354:	learn: 29.3907095	total: 1.66s	remaining: 3.02s
355:	learn: 29.3687013	total: 1.67s	remaining: 3.01s
356:	learn: 29.3467879	total: 1.67s	remaining: 3.01s
357:	learn: 29.3249598	total: 1.68s	remaining: 3s
358:	learn: 29.3032230	total: 1.68s	remaining: 3s
359:	learn: 29.2815605	total: 1.69s	remaining: 3s
360:	learn: 29.1999590	total: 1.69s	remaining: 2.99s
361:	learn: 29.1783693	total: 1.7s	remaining: 2.99s
362:

502:	learn: 25.9018483	total: 2.38s	remaining: 2.35s
503:	learn: 25.8875287	total: 2.38s	remaining: 2.34s
504:	learn: 25.8732353	total: 2.39s	remaining: 2.34s
505:	learn: 25.8589712	total: 2.39s	remaining: 2.33s
506:	learn: 25.8447212	total: 2.4s	remaining: 2.33s
507:	learn: 25.8304887	total: 2.4s	remaining: 2.33s
508:	learn: 25.8163264	total: 2.41s	remaining: 2.32s
509:	learn: 25.8021853	total: 2.41s	remaining: 2.32s
510:	learn: 25.7880926	total: 2.42s	remaining: 2.31s
511:	learn: 25.7740220	total: 2.42s	remaining: 2.31s
512:	learn: 25.7599840	total: 2.43s	remaining: 2.3s
513:	learn: 25.7459520	total: 2.43s	remaining: 2.3s
514:	learn: 25.7319560	total: 2.44s	remaining: 2.29s
515:	learn: 25.7180147	total: 2.44s	remaining: 2.29s
516:	learn: 25.6458598	total: 2.45s	remaining: 2.29s
517:	learn: 25.6319178	total: 2.45s	remaining: 2.28s
518:	learn: 25.5642907	total: 2.46s	remaining: 2.28s
519:	learn: 25.5503771	total: 2.46s	remaining: 2.27s
520:	learn: 25.5365518	total: 2.47s	remaining: 2.2

660:	learn: 23.1530444	total: 3.15s	remaining: 1.62s
661:	learn: 23.1172491	total: 3.16s	remaining: 1.61s
662:	learn: 23.1068714	total: 3.16s	remaining: 1.61s
663:	learn: 23.0965229	total: 3.17s	remaining: 1.6s
664:	learn: 23.0862097	total: 3.17s	remaining: 1.6s
665:	learn: 23.0759337	total: 3.18s	remaining: 1.59s
666:	learn: 23.0656876	total: 3.18s	remaining: 1.59s
667:	learn: 23.0554530	total: 3.19s	remaining: 1.58s
668:	learn: 23.0170048	total: 3.19s	remaining: 1.58s
669:	learn: 22.9613853	total: 3.2s	remaining: 1.57s
670:	learn: 22.9511254	total: 3.2s	remaining: 1.57s
671:	learn: 22.9409177	total: 3.21s	remaining: 1.56s
672:	learn: 22.9307588	total: 3.21s	remaining: 1.56s
673:	learn: 22.8697824	total: 3.22s	remaining: 1.55s
674:	learn: 22.8596001	total: 3.22s	remaining: 1.55s
675:	learn: 22.8494783	total: 3.23s	remaining: 1.55s
676:	learn: 22.8393809	total: 3.23s	remaining: 1.54s
677:	learn: 22.8293255	total: 3.24s	remaining: 1.54s
678:	learn: 22.8192962	total: 3.24s	remaining: 1.5

816:	learn: 20.7375193	total: 3.93s	remaining: 880ms
817:	learn: 20.7296650	total: 3.93s	remaining: 875ms
818:	learn: 20.7218252	total: 3.94s	remaining: 870ms
819:	learn: 20.7140086	total: 3.94s	remaining: 866ms
820:	learn: 20.7062009	total: 3.95s	remaining: 861ms
821:	learn: 20.6984060	total: 3.95s	remaining: 856ms
822:	learn: 20.6630148	total: 3.96s	remaining: 851ms
823:	learn: 20.6552275	total: 3.96s	remaining: 846ms
824:	learn: 20.6474501	total: 3.97s	remaining: 842ms
825:	learn: 20.6396910	total: 3.97s	remaining: 837ms
826:	learn: 20.6319475	total: 3.98s	remaining: 832ms
827:	learn: 20.6242172	total: 3.98s	remaining: 827ms
828:	learn: 20.6165060	total: 3.99s	remaining: 823ms
829:	learn: 20.6088119	total: 3.99s	remaining: 818ms
830:	learn: 20.6011365	total: 4s	remaining: 813ms
831:	learn: 20.5934778	total: 4s	remaining: 808ms
832:	learn: 20.5858253	total: 4.01s	remaining: 804ms
833:	learn: 20.5781908	total: 4.01s	remaining: 799ms
834:	learn: 20.5705684	total: 4.02s	remaining: 794ms

979:	learn: 18.9415006	total: 4.71s	remaining: 96.1ms
980:	learn: 18.8988380	total: 4.71s	remaining: 91.3ms
981:	learn: 18.8927481	total: 4.72s	remaining: 86.5ms
982:	learn: 18.8704022	total: 4.72s	remaining: 81.7ms
983:	learn: 18.8643199	total: 4.73s	remaining: 76.9ms
984:	learn: 18.8582673	total: 4.73s	remaining: 72.1ms
985:	learn: 18.8269534	total: 4.74s	remaining: 67.3ms
986:	learn: 18.8208928	total: 4.74s	remaining: 62.5ms
987:	learn: 18.8148651	total: 4.75s	remaining: 57.6ms
988:	learn: 18.7858689	total: 4.75s	remaining: 52.8ms
989:	learn: 18.7798356	total: 4.75s	remaining: 48ms
990:	learn: 18.7738346	total: 4.76s	remaining: 43.2ms
991:	learn: 18.7678537	total: 4.76s	remaining: 38.4ms
992:	learn: 18.7248135	total: 4.77s	remaining: 33.6ms
993:	learn: 18.7188367	total: 4.77s	remaining: 28.8ms
994:	learn: 18.7128882	total: 4.78s	remaining: 24ms
995:	learn: 18.7069588	total: 4.78s	remaining: 19.2ms
996:	learn: 18.7010405	total: 4.79s	remaining: 14.4ms
997:	learn: 18.6951297	total: 4.

In [9]:
model = pipeline.fit(X_train,y_train)

Learning rate set to 0.04173
0:	learn: 43.3232699	total: 7.61ms	remaining: 7.6s
1:	learn: 42.8046647	total: 12.8ms	remaining: 6.38s
2:	learn: 42.5261359	total: 18.7ms	remaining: 6.21s
3:	learn: 42.2169397	total: 24.7ms	remaining: 6.15s
4:	learn: 42.1011102	total: 30.3ms	remaining: 6.04s
5:	learn: 41.8800876	total: 36.4ms	remaining: 6.02s
6:	learn: 41.8063918	total: 42ms	remaining: 5.96s
7:	learn: 41.5444171	total: 47.5ms	remaining: 5.89s
8:	learn: 41.4425427	total: 53.2ms	remaining: 5.86s
9:	learn: 41.1838253	total: 58.7ms	remaining: 5.81s
10:	learn: 40.9325805	total: 64.4ms	remaining: 5.79s
11:	learn: 40.7545798	total: 70ms	remaining: 5.76s
12:	learn: 40.6687925	total: 75.5ms	remaining: 5.73s
13:	learn: 40.4440187	total: 80.8ms	remaining: 5.69s
14:	learn: 40.1972031	total: 86ms	remaining: 5.65s
15:	learn: 40.1405686	total: 91.3ms	remaining: 5.61s
16:	learn: 40.0493450	total: 96.5ms	remaining: 5.58s
17:	learn: 39.9770864	total: 102ms	remaining: 5.58s
18:	learn: 39.7868224	total: 108ms	

171:	learn: 30.0667662	total: 971ms	remaining: 4.67s
172:	learn: 30.0266671	total: 978ms	remaining: 4.67s
173:	learn: 29.9865412	total: 983ms	remaining: 4.67s
174:	learn: 29.9570538	total: 989ms	remaining: 4.66s
175:	learn: 29.9214145	total: 994ms	remaining: 4.65s
176:	learn: 29.8818064	total: 1000ms	remaining: 4.65s
177:	learn: 29.8482425	total: 1s	remaining: 4.64s
178:	learn: 29.8159818	total: 1.01s	remaining: 4.64s
179:	learn: 29.7745345	total: 1.02s	remaining: 4.63s
180:	learn: 29.7411382	total: 1.02s	remaining: 4.62s
181:	learn: 29.7002501	total: 1.03s	remaining: 4.62s
182:	learn: 29.6701084	total: 1.03s	remaining: 4.61s
183:	learn: 29.6316417	total: 1.04s	remaining: 4.6s
184:	learn: 29.5921306	total: 1.04s	remaining: 4.59s
185:	learn: 29.5528851	total: 1.05s	remaining: 4.59s
186:	learn: 29.5183720	total: 1.05s	remaining: 4.58s
187:	learn: 29.4791162	total: 1.06s	remaining: 4.58s
188:	learn: 29.4400786	total: 1.07s	remaining: 4.58s
189:	learn: 29.4032008	total: 1.07s	remaining: 4.

336:	learn: 25.1791205	total: 1.95s	remaining: 3.83s
337:	learn: 25.1596975	total: 1.95s	remaining: 3.83s
338:	learn: 25.1404109	total: 1.96s	remaining: 3.82s
339:	learn: 25.1212449	total: 1.97s	remaining: 3.81s
340:	learn: 25.0404337	total: 1.97s	remaining: 3.81s
341:	learn: 25.0213330	total: 1.98s	remaining: 3.8s
342:	learn: 25.0023601	total: 1.98s	remaining: 3.8s
343:	learn: 24.9834814	total: 1.99s	remaining: 3.79s
344:	learn: 24.9646958	total: 1.99s	remaining: 3.79s
345:	learn: 24.9459787	total: 2s	remaining: 3.78s
346:	learn: 24.9273576	total: 2s	remaining: 3.77s
347:	learn: 24.9087967	total: 2.01s	remaining: 3.77s
348:	learn: 24.8902907	total: 2.02s	remaining: 3.76s
349:	learn: 24.8718514	total: 2.02s	remaining: 3.75s
350:	learn: 24.8534616	total: 2.03s	remaining: 3.75s
351:	learn: 24.8351782	total: 2.03s	remaining: 3.75s
352:	learn: 24.8169284	total: 2.04s	remaining: 3.74s
353:	learn: 24.7987164	total: 2.04s	remaining: 3.73s
354:	learn: 24.7805808	total: 2.05s	remaining: 3.73s
3

502:	learn: 22.1582566	total: 2.93s	remaining: 2.9s
503:	learn: 22.0989134	total: 2.94s	remaining: 2.89s
504:	learn: 22.0866199	total: 2.94s	remaining: 2.89s
505:	learn: 22.0744057	total: 2.95s	remaining: 2.88s
506:	learn: 22.0622370	total: 2.96s	remaining: 2.88s
507:	learn: 22.0500760	total: 2.96s	remaining: 2.87s
508:	learn: 22.0379351	total: 2.97s	remaining: 2.86s
509:	learn: 22.0258330	total: 2.98s	remaining: 2.86s
510:	learn: 22.0137734	total: 2.98s	remaining: 2.85s
511:	learn: 22.0017456	total: 2.99s	remaining: 2.85s
512:	learn: 21.9897523	total: 2.99s	remaining: 2.84s
513:	learn: 21.9777873	total: 3s	remaining: 2.83s
514:	learn: 21.9658308	total: 3s	remaining: 2.83s
515:	learn: 21.9538966	total: 3.01s	remaining: 2.82s
516:	learn: 21.9419948	total: 3.02s	remaining: 2.82s
517:	learn: 21.9301349	total: 3.02s	remaining: 2.81s
518:	learn: 21.8659525	total: 3.03s	remaining: 2.81s
519:	learn: 21.8540839	total: 3.03s	remaining: 2.8s
520:	learn: 21.8422557	total: 3.04s	remaining: 2.79s
5

669:	learn: 20.1423032	total: 3.92s	remaining: 1.93s
670:	learn: 20.1335058	total: 3.92s	remaining: 1.92s
671:	learn: 20.1247267	total: 3.93s	remaining: 1.92s
672:	learn: 20.1159651	total: 3.94s	remaining: 1.91s
673:	learn: 20.1072143	total: 3.94s	remaining: 1.91s
674:	learn: 20.0984767	total: 3.95s	remaining: 1.9s
675:	learn: 20.0897477	total: 3.95s	remaining: 1.9s
676:	learn: 20.0810309	total: 3.96s	remaining: 1.89s
677:	learn: 20.0723434	total: 3.96s	remaining: 1.88s
678:	learn: 20.0636735	total: 3.97s	remaining: 1.88s
679:	learn: 20.0550194	total: 3.98s	remaining: 1.87s
680:	learn: 20.0463834	total: 3.98s	remaining: 1.86s
681:	learn: 20.0377632	total: 3.99s	remaining: 1.86s
682:	learn: 19.9835631	total: 3.99s	remaining: 1.85s
683:	learn: 19.9749185	total: 4s	remaining: 1.85s
684:	learn: 19.9663017	total: 4s	remaining: 1.84s
685:	learn: 19.9577099	total: 4.01s	remaining: 1.83s
686:	learn: 19.9491564	total: 4.01s	remaining: 1.83s
687:	learn: 19.9406172	total: 4.02s	remaining: 1.82s
6

843:	learn: 18.4045618	total: 4.91s	remaining: 907ms
844:	learn: 18.3979963	total: 4.91s	remaining: 902ms
845:	learn: 18.3914440	total: 4.92s	remaining: 896ms
846:	learn: 18.3849038	total: 4.92s	remaining: 890ms
847:	learn: 18.3783849	total: 4.93s	remaining: 884ms
848:	learn: 18.3718731	total: 4.94s	remaining: 878ms
849:	learn: 18.3653750	total: 4.94s	remaining: 872ms
850:	learn: 18.3588933	total: 4.95s	remaining: 867ms
851:	learn: 18.3524177	total: 4.96s	remaining: 861ms
852:	learn: 18.3038641	total: 4.96s	remaining: 855ms
853:	learn: 18.2973685	total: 4.97s	remaining: 849ms
854:	learn: 18.2773547	total: 4.97s	remaining: 844ms
855:	learn: 18.2708951	total: 4.98s	remaining: 838ms
856:	learn: 18.2644489	total: 4.99s	remaining: 832ms
857:	learn: 18.2580162	total: 4.99s	remaining: 826ms
858:	learn: 18.2515965	total: 5s	remaining: 821ms
859:	learn: 18.2451862	total: 5s	remaining: 815ms
860:	learn: 18.2387851	total: 5.01s	remaining: 809ms
861:	learn: 18.2323909	total: 5.02s	remaining: 803ms

In [10]:
X_test = pd.read_csv('data/dengue_features_test.csv')
X_test.head()
X_test = X_test.drop('week_start_date',axis=1)
X_test['year'] = X_test['year'].astype('object')
X_test['weekofyear'] = X_test['weekofyear'].astype('object')

X_test['week_year'] = X_test['city']+"_"+X_test['weekofyear'].astype('str')+"_"+X_test['year'].astype('str')
X_test = X_test.drop(['city','year','weekofyear'],axis=1)

In [11]:
X_test['total_cases'] = model.predict(X_test)

In [12]:
X_test.head()

Unnamed: 0,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,reanalysis_dew_point_temp_k,reanalysis_max_air_temp_k,reanalysis_min_air_temp_k,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm,week_year,total_cases
0,-0.0189,-0.0189,0.102729,0.0912,78.6,298.492857,298.55,294.527143,301.1,296.4,25.37,78.781429,78.6,15.918571,3.128571,26.528571,7.057143,33.3,21.7,75.2,sj_18_2008,18.335732
1,-0.018,-0.0124,0.082043,0.072314,12.56,298.475714,298.557143,294.395714,300.8,296.7,21.83,78.23,12.56,15.791429,2.571429,26.071429,5.557143,30.0,22.2,34.3,sj_19_2008,17.421449
2,-0.0015,,0.151083,0.091529,3.66,299.455714,299.357143,295.308571,302.2,296.4,4.12,78.27,3.66,16.674286,4.428571,27.928571,7.785714,32.8,22.8,3.0,sj_20_2008,17.533843
3,,-0.019867,0.124329,0.125686,0.0,299.69,299.728571,294.402857,303.0,296.9,2.2,73.015714,0.0,15.775714,4.342857,28.057143,6.271429,33.3,24.4,0.3,sj_21_2008,23.177118
4,0.0568,0.039833,0.062267,0.075914,0.76,299.78,299.671429,294.76,302.3,297.3,4.36,74.084286,0.76,16.137143,3.542857,27.614286,7.085714,33.3,23.3,84.1,sj_22_2008,21.467871


In [13]:
submission = pd.DataFrame(X_test.week_year.str.split('_').tolist(),
                                 columns = ['city','weekofyear','year'])

In [14]:
submission['total_cases'] = X_test['total_cases'].astype('int64')
submission = submission[['city','year','weekofyear','total_cases']]
submission

Unnamed: 0,city,year,weekofyear,total_cases
0,sj,2008,18,18
1,sj,2008,19,17
2,sj,2008,20,17
3,sj,2008,21,23
4,sj,2008,22,21
...,...,...,...,...
411,iq,2013,22,6
412,iq,2013,23,5
413,iq,2013,24,5
414,iq,2013,25,3


In [15]:
submission.to_csv('submission.csv', index=False)