* This notebook uses data from https://www.drivendata.org/competitions/44/dengai-predicting-disease-spread/.
* It contains Experiment with LightGBM

In [1]:
import pandas as pd
import numpy as np

In [2]:
train_data = pd.read_csv('data/dengue_features_train.csv')
#train_data['week_start_date'] = pd.to_datetime(train_data.week_start_date)
X_train = train_data.drop('week_start_date',axis=1)
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1456 entries, 0 to 1455
Data columns (total 23 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   city                                   1456 non-null   object 
 1   year                                   1456 non-null   int64  
 2   weekofyear                             1456 non-null   int64  
 3   ndvi_ne                                1262 non-null   float64
 4   ndvi_nw                                1404 non-null   float64
 5   ndvi_se                                1434 non-null   float64
 6   ndvi_sw                                1434 non-null   float64
 7   precipitation_amt_mm                   1443 non-null   float64
 8   reanalysis_air_temp_k                  1446 non-null   float64
 9   reanalysis_avg_temp_k                  1446 non-null   float64
 10  reanalysis_dew_point_temp_k            1446 non-null   float64
 11  rean

In [3]:
labels = pd.read_csv('data/dengue_labels_train.csv')
labels.head()
merged = X_train.merge(labels,on=['city','year','weekofyear'])
y_train = merged['total_cases']
X_train = merged.drop('total_cases',axis=1)
X_train.head()

Unnamed: 0,city,year,weekofyear,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,...,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm
0,sj,1990,18,0.1226,0.103725,0.198483,0.177617,12.42,297.572857,297.742857,...,32.0,73.365714,12.42,14.012857,2.628571,25.442857,6.9,29.4,20.0,16.0
1,sj,1990,19,0.1699,0.142175,0.162357,0.155486,22.82,298.211429,298.442857,...,17.94,77.368571,22.82,15.372857,2.371429,26.714286,6.371429,31.7,22.2,8.6
2,sj,1990,20,0.03225,0.172967,0.1572,0.170843,34.54,298.781429,298.878571,...,26.1,82.052857,34.54,16.848571,2.3,26.714286,6.485714,32.2,22.8,41.4
3,sj,1990,21,0.128633,0.245067,0.227557,0.235886,15.36,298.987143,299.228571,...,13.9,80.337143,15.36,16.672857,2.428571,27.471429,6.771429,33.3,23.3,4.0
4,sj,1990,22,0.1962,0.2622,0.2512,0.24734,7.52,299.518571,299.664286,...,12.2,80.46,7.52,17.21,3.014286,28.942857,9.371429,35.0,23.9,5.8


In [4]:
X_train['year'] = X_train['year'].astype('object')
X_train['weekofyear'] = X_train['weekofyear'].astype('object')

X_train['week_year'] = X_train['city']+"_"+X_train['weekofyear'].astype('str')+"_"+X_train['year'].astype('str')
X_train = X_train.drop(['city','year','weekofyear'],axis=1)
X_train.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 1456 entries, 0 to 1455
Data columns (total 21 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   ndvi_ne                                1262 non-null   float64
 1   ndvi_nw                                1404 non-null   float64
 2   ndvi_se                                1434 non-null   float64
 3   ndvi_sw                                1434 non-null   float64
 4   precipitation_amt_mm                   1443 non-null   float64
 5   reanalysis_air_temp_k                  1446 non-null   float64
 6   reanalysis_avg_temp_k                  1446 non-null   float64
 7   reanalysis_dew_point_temp_k            1446 non-null   float64
 8   reanalysis_max_air_temp_k              1446 non-null   float64
 9   reanalysis_min_air_temp_k              1446 non-null   float64
 10  reanalysis_precip_amt_kg_per_m2        1446 non-null   float64
 11  rean

In [5]:
num_col = [col for col in X_train.columns if X_train[col].dtype=='int' or X_train[col].dtype=='float64']
cat_col = [col for col in X_train.columns if X_train[col].dtype=='object']


In [7]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

num_processor = Pipeline(steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('normalizer',Normalizer())
])
cat_processor = Pipeline(steps=[
    ('encoder',OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(transformers=[
    ('num',num_processor,num_col),
    ('cat',cat_processor,cat_col)
])


In [37]:
from catboost import CatBoostRegressor
from sklearn.model_selection import cross_val_score
model = CatBoostRegressor(loss_function='MAE')
pipeline = Pipeline(steps=[
    ('processor',preprocessor),
    ('model',model)
])

score = -1*cross_val_score(pipeline,X_train,y_train,cv=3,scoring='neg_mean_absolute_error')
print(f'MSE Loss mean = {np.mean(score)} max = {np.max(score)}')

0:	learn: 10.3720093	total: 5.88ms	remaining: 5.88s
1:	learn: 10.3041586	total: 11.1ms	remaining: 5.55s
2:	learn: 10.2364337	total: 15.6ms	remaining: 5.17s
3:	learn: 10.1869038	total: 20.3ms	remaining: 5.05s
4:	learn: 10.1373297	total: 24.7ms	remaining: 4.92s
5:	learn: 10.0750751	total: 29ms	remaining: 4.81s
6:	learn: 10.0238650	total: 33.3ms	remaining: 4.73s
7:	learn: 9.9663190	total: 37.7ms	remaining: 4.67s
8:	learn: 9.9176438	total: 42ms	remaining: 4.63s
9:	learn: 9.8757391	total: 46.4ms	remaining: 4.59s
10:	learn: 9.8193022	total: 50.8ms	remaining: 4.57s
11:	learn: 9.7638274	total: 55.1ms	remaining: 4.54s
12:	learn: 9.7223137	total: 59.5ms	remaining: 4.51s
13:	learn: 9.6762948	total: 63.8ms	remaining: 4.49s
14:	learn: 9.6330732	total: 68ms	remaining: 4.46s
15:	learn: 9.5923256	total: 72.8ms	remaining: 4.48s
16:	learn: 9.5482761	total: 77.7ms	remaining: 4.49s
17:	learn: 9.5231876	total: 82.2ms	remaining: 4.48s
18:	learn: 9.4847466	total: 86.6ms	remaining: 4.47s
19:	learn: 9.4517966	

173:	learn: 7.4870900	total: 781ms	remaining: 3.71s
174:	learn: 7.4814027	total: 786ms	remaining: 3.71s
175:	learn: 7.4772487	total: 790ms	remaining: 3.7s
176:	learn: 7.4725073	total: 795ms	remaining: 3.69s
177:	learn: 7.4688340	total: 799ms	remaining: 3.69s
178:	learn: 7.4637006	total: 804ms	remaining: 3.69s
179:	learn: 7.4533985	total: 808ms	remaining: 3.68s
180:	learn: 7.4431551	total: 813ms	remaining: 3.68s
181:	learn: 7.4376187	total: 817ms	remaining: 3.67s
182:	learn: 7.4281030	total: 821ms	remaining: 3.67s
183:	learn: 7.4231542	total: 825ms	remaining: 3.66s
184:	learn: 7.4186693	total: 829ms	remaining: 3.65s
185:	learn: 7.4111655	total: 834ms	remaining: 3.65s
186:	learn: 7.4088026	total: 838ms	remaining: 3.64s
187:	learn: 7.4050907	total: 842ms	remaining: 3.64s
188:	learn: 7.3949886	total: 846ms	remaining: 3.63s
189:	learn: 7.3881505	total: 850ms	remaining: 3.63s
190:	learn: 7.3816143	total: 854ms	remaining: 3.62s
191:	learn: 7.3780270	total: 859ms	remaining: 3.61s
192:	learn: 7

346:	learn: 6.4062673	total: 1.55s	remaining: 2.93s
347:	learn: 6.3992104	total: 1.56s	remaining: 2.92s
348:	learn: 6.3968430	total: 1.56s	remaining: 2.92s
349:	learn: 6.3881788	total: 1.57s	remaining: 2.92s
350:	learn: 6.3793848	total: 1.57s	remaining: 2.91s
351:	learn: 6.3691693	total: 1.58s	remaining: 2.91s
352:	learn: 6.3671777	total: 1.58s	remaining: 2.9s
353:	learn: 6.3656591	total: 1.59s	remaining: 2.9s
354:	learn: 6.3613532	total: 1.59s	remaining: 2.89s
355:	learn: 6.3572603	total: 1.6s	remaining: 2.89s
356:	learn: 6.3512358	total: 1.6s	remaining: 2.89s
357:	learn: 6.3469470	total: 1.61s	remaining: 2.88s
358:	learn: 6.3419365	total: 1.61s	remaining: 2.88s
359:	learn: 6.3401733	total: 1.61s	remaining: 2.87s
360:	learn: 6.3357346	total: 1.62s	remaining: 2.87s
361:	learn: 6.3337965	total: 1.62s	remaining: 2.86s
362:	learn: 6.3307814	total: 1.63s	remaining: 2.86s
363:	learn: 6.3265636	total: 1.63s	remaining: 2.85s
364:	learn: 6.3172286	total: 1.64s	remaining: 2.85s
365:	learn: 6.31

519:	learn: 5.6791754	total: 2.33s	remaining: 2.15s
520:	learn: 5.6767788	total: 2.34s	remaining: 2.15s
521:	learn: 5.6753349	total: 2.34s	remaining: 2.14s
522:	learn: 5.6735324	total: 2.35s	remaining: 2.14s
523:	learn: 5.6723582	total: 2.35s	remaining: 2.13s
524:	learn: 5.6709845	total: 2.35s	remaining: 2.13s
525:	learn: 5.6668064	total: 2.36s	remaining: 2.12s
526:	learn: 5.6636860	total: 2.36s	remaining: 2.12s
527:	learn: 5.6608091	total: 2.37s	remaining: 2.12s
528:	learn: 5.6590732	total: 2.37s	remaining: 2.11s
529:	learn: 5.6532530	total: 2.37s	remaining: 2.1s
530:	learn: 5.6475275	total: 2.38s	remaining: 2.1s
531:	learn: 5.6442138	total: 2.38s	remaining: 2.1s
532:	learn: 5.6389535	total: 2.39s	remaining: 2.09s
533:	learn: 5.6366184	total: 2.39s	remaining: 2.09s
534:	learn: 5.6345057	total: 2.4s	remaining: 2.08s
535:	learn: 5.6292758	total: 2.4s	remaining: 2.08s
536:	learn: 5.6260927	total: 2.4s	remaining: 2.07s
537:	learn: 5.6196019	total: 2.41s	remaining: 2.07s
538:	learn: 5.6146

692:	learn: 5.1848585	total: 3.1s	remaining: 1.38s
693:	learn: 5.1821353	total: 3.11s	remaining: 1.37s
694:	learn: 5.1793612	total: 3.11s	remaining: 1.37s
695:	learn: 5.1789458	total: 3.12s	remaining: 1.36s
696:	learn: 5.1783528	total: 3.12s	remaining: 1.36s
697:	learn: 5.1776525	total: 3.13s	remaining: 1.35s
698:	learn: 5.1756067	total: 3.13s	remaining: 1.35s
699:	learn: 5.1755227	total: 3.14s	remaining: 1.34s
700:	learn: 5.1724275	total: 3.14s	remaining: 1.34s
701:	learn: 5.1640670	total: 3.15s	remaining: 1.33s
702:	learn: 5.1626716	total: 3.15s	remaining: 1.33s
703:	learn: 5.1577431	total: 3.15s	remaining: 1.33s
704:	learn: 5.1554783	total: 3.16s	remaining: 1.32s
705:	learn: 5.1535969	total: 3.16s	remaining: 1.32s
706:	learn: 5.1516108	total: 3.17s	remaining: 1.31s
707:	learn: 5.1499175	total: 3.17s	remaining: 1.31s
708:	learn: 5.1472588	total: 3.17s	remaining: 1.3s
709:	learn: 5.1429612	total: 3.18s	remaining: 1.3s
710:	learn: 5.1422713	total: 3.18s	remaining: 1.29s
711:	learn: 5.1

866:	learn: 4.8204179	total: 3.88s	remaining: 596ms
867:	learn: 4.8169741	total: 3.89s	remaining: 591ms
868:	learn: 4.8158952	total: 3.89s	remaining: 587ms
869:	learn: 4.8137876	total: 3.9s	remaining: 582ms
870:	learn: 4.8128180	total: 3.9s	remaining: 578ms
871:	learn: 4.8119774	total: 3.9s	remaining: 573ms
872:	learn: 4.8068391	total: 3.91s	remaining: 569ms
873:	learn: 4.8060421	total: 3.91s	remaining: 564ms
874:	learn: 4.8049681	total: 3.92s	remaining: 560ms
875:	learn: 4.8046414	total: 3.92s	remaining: 555ms
876:	learn: 4.8040174	total: 3.93s	remaining: 551ms
877:	learn: 4.8023111	total: 3.93s	remaining: 546ms
878:	learn: 4.8011185	total: 3.93s	remaining: 542ms
879:	learn: 4.8001587	total: 3.94s	remaining: 537ms
880:	learn: 4.7989675	total: 3.94s	remaining: 533ms
881:	learn: 4.7968724	total: 3.95s	remaining: 528ms
882:	learn: 4.7961873	total: 3.95s	remaining: 524ms
883:	learn: 4.7940352	total: 3.96s	remaining: 519ms
884:	learn: 4.7921903	total: 3.96s	remaining: 515ms
885:	learn: 4.7

28:	learn: 19.4718040	total: 135ms	remaining: 4.51s
29:	learn: 19.4103772	total: 140ms	remaining: 4.53s
30:	learn: 19.3642406	total: 145ms	remaining: 4.55s
31:	learn: 19.2917423	total: 150ms	remaining: 4.54s
32:	learn: 19.2371383	total: 154ms	remaining: 4.53s
33:	learn: 19.1772277	total: 159ms	remaining: 4.52s
34:	learn: 19.1276118	total: 163ms	remaining: 4.5s
35:	learn: 19.0857800	total: 168ms	remaining: 4.51s
36:	learn: 19.0447673	total: 173ms	remaining: 4.5s
37:	learn: 19.0002245	total: 177ms	remaining: 4.49s
38:	learn: 18.9681057	total: 182ms	remaining: 4.48s
39:	learn: 18.9406539	total: 186ms	remaining: 4.47s
40:	learn: 18.9040017	total: 191ms	remaining: 4.46s
41:	learn: 18.8678153	total: 195ms	remaining: 4.45s
42:	learn: 18.8298065	total: 200ms	remaining: 4.45s
43:	learn: 18.7807371	total: 204ms	remaining: 4.44s
44:	learn: 18.7299589	total: 209ms	remaining: 4.43s
45:	learn: 18.6846471	total: 214ms	remaining: 4.43s
46:	learn: 18.6584182	total: 218ms	remaining: 4.42s
47:	learn: 18.

225:	learn: 15.7745388	total: 1.09s	remaining: 3.73s
226:	learn: 15.7427302	total: 1.09s	remaining: 3.73s
227:	learn: 15.7324271	total: 1.1s	remaining: 3.73s
228:	learn: 15.7291254	total: 1.1s	remaining: 3.72s
229:	learn: 15.7237261	total: 1.11s	remaining: 3.72s
230:	learn: 15.7072631	total: 1.11s	remaining: 3.71s
231:	learn: 15.6996018	total: 1.12s	remaining: 3.71s
232:	learn: 15.6969763	total: 1.12s	remaining: 3.7s
233:	learn: 15.6704977	total: 1.13s	remaining: 3.69s
234:	learn: 15.6670511	total: 1.13s	remaining: 3.69s
235:	learn: 15.6568596	total: 1.14s	remaining: 3.68s
236:	learn: 15.6455572	total: 1.14s	remaining: 3.68s
237:	learn: 15.6364496	total: 1.15s	remaining: 3.67s
238:	learn: 15.6227277	total: 1.15s	remaining: 3.67s
239:	learn: 15.6063337	total: 1.16s	remaining: 3.66s
240:	learn: 15.6020294	total: 1.16s	remaining: 3.65s
241:	learn: 15.5988214	total: 1.16s	remaining: 3.65s
242:	learn: 15.5955893	total: 1.17s	remaining: 3.64s
243:	learn: 15.5865286	total: 1.17s	remaining: 3.

399:	learn: 14.1235079	total: 1.86s	remaining: 2.79s
400:	learn: 14.1207038	total: 1.86s	remaining: 2.78s
401:	learn: 14.1184687	total: 1.87s	remaining: 2.78s
402:	learn: 14.1151392	total: 1.87s	remaining: 2.77s
403:	learn: 14.1109510	total: 1.88s	remaining: 2.77s
404:	learn: 14.0935087	total: 1.88s	remaining: 2.77s
405:	learn: 14.0820310	total: 1.89s	remaining: 2.76s
406:	learn: 14.0773965	total: 1.89s	remaining: 2.75s
407:	learn: 14.0654142	total: 1.9s	remaining: 2.75s
408:	learn: 14.0606573	total: 1.9s	remaining: 2.75s
409:	learn: 14.0430196	total: 1.9s	remaining: 2.74s
410:	learn: 14.0385474	total: 1.91s	remaining: 2.73s
411:	learn: 14.0347405	total: 1.91s	remaining: 2.73s
412:	learn: 14.0269812	total: 1.92s	remaining: 2.72s
413:	learn: 14.0241813	total: 1.92s	remaining: 2.72s
414:	learn: 14.0189436	total: 1.92s	remaining: 2.71s
415:	learn: 14.0129170	total: 1.93s	remaining: 2.71s
416:	learn: 14.0094157	total: 1.93s	remaining: 2.7s
417:	learn: 14.0057864	total: 1.94s	remaining: 2.7

576:	learn: 12.9882864	total: 2.63s	remaining: 1.93s
577:	learn: 12.9858845	total: 2.64s	remaining: 1.93s
578:	learn: 12.9840883	total: 2.65s	remaining: 1.92s
579:	learn: 12.9787783	total: 2.65s	remaining: 1.92s
580:	learn: 12.9755600	total: 2.65s	remaining: 1.91s
581:	learn: 12.9696358	total: 2.66s	remaining: 1.91s
582:	learn: 12.9663644	total: 2.66s	remaining: 1.9s
583:	learn: 12.9650992	total: 2.67s	remaining: 1.9s
584:	learn: 12.9622408	total: 2.67s	remaining: 1.9s
585:	learn: 12.9540256	total: 2.67s	remaining: 1.89s
586:	learn: 12.9514973	total: 2.68s	remaining: 1.89s
587:	learn: 12.9496438	total: 2.68s	remaining: 1.88s
588:	learn: 12.9483775	total: 2.69s	remaining: 1.88s
589:	learn: 12.9456260	total: 2.69s	remaining: 1.87s
590:	learn: 12.9432499	total: 2.7s	remaining: 1.87s
591:	learn: 12.9414094	total: 2.7s	remaining: 1.86s
592:	learn: 12.9385235	total: 2.7s	remaining: 1.86s
593:	learn: 12.9362909	total: 2.71s	remaining: 1.85s
594:	learn: 12.9351172	total: 2.71s	remaining: 1.85s

755:	learn: 12.1687145	total: 3.41s	remaining: 1.1s
756:	learn: 12.1654510	total: 3.42s	remaining: 1.1s
757:	learn: 12.1637872	total: 3.42s	remaining: 1.09s
758:	learn: 12.1539387	total: 3.43s	remaining: 1.09s
759:	learn: 12.1409220	total: 3.43s	remaining: 1.08s
760:	learn: 12.1373299	total: 3.43s	remaining: 1.08s
761:	learn: 12.1317758	total: 3.44s	remaining: 1.07s
762:	learn: 12.1291339	total: 3.44s	remaining: 1.07s
763:	learn: 12.1232644	total: 3.45s	remaining: 1.06s
764:	learn: 12.1205504	total: 3.45s	remaining: 1.06s
765:	learn: 12.1186802	total: 3.46s	remaining: 1.05s
766:	learn: 12.1175505	total: 3.46s	remaining: 1.05s
767:	learn: 12.1158666	total: 3.46s	remaining: 1.05s
768:	learn: 12.1118300	total: 3.47s	remaining: 1.04s
769:	learn: 12.1097978	total: 3.47s	remaining: 1.04s
770:	learn: 12.1064126	total: 3.48s	remaining: 1.03s
771:	learn: 12.1028913	total: 3.48s	remaining: 1.03s
772:	learn: 12.1010773	total: 3.48s	remaining: 1.02s
773:	learn: 12.1003884	total: 3.49s	remaining: 1

935:	learn: 11.4369898	total: 4.19s	remaining: 286ms
936:	learn: 11.4357954	total: 4.19s	remaining: 282ms
937:	learn: 11.4305862	total: 4.2s	remaining: 277ms
938:	learn: 11.4289697	total: 4.2s	remaining: 273ms
939:	learn: 11.4271523	total: 4.21s	remaining: 269ms
940:	learn: 11.4252782	total: 4.21s	remaining: 264ms
941:	learn: 11.4169003	total: 4.22s	remaining: 260ms
942:	learn: 11.4157850	total: 4.22s	remaining: 255ms
943:	learn: 11.4102903	total: 4.22s	remaining: 251ms
944:	learn: 11.4068404	total: 4.23s	remaining: 246ms
945:	learn: 11.4053573	total: 4.23s	remaining: 242ms
946:	learn: 11.4042994	total: 4.24s	remaining: 237ms
947:	learn: 11.4036995	total: 4.24s	remaining: 233ms
948:	learn: 11.3970626	total: 4.25s	remaining: 228ms
949:	learn: 11.3890088	total: 4.25s	remaining: 224ms
950:	learn: 11.3779815	total: 4.25s	remaining: 219ms
951:	learn: 11.3721158	total: 4.26s	remaining: 215ms
952:	learn: 11.3715263	total: 4.26s	remaining: 210ms
953:	learn: 11.3662832	total: 4.27s	remaining: 2

100:	learn: 21.3722415	total: 448ms	remaining: 3.98s
101:	learn: 21.3622735	total: 453ms	remaining: 3.98s
102:	learn: 21.3436062	total: 458ms	remaining: 3.99s
103:	learn: 21.3198658	total: 462ms	remaining: 3.98s
104:	learn: 21.2936509	total: 467ms	remaining: 3.98s
105:	learn: 21.2845090	total: 472ms	remaining: 3.98s
106:	learn: 21.2799425	total: 477ms	remaining: 3.98s
107:	learn: 21.2668328	total: 481ms	remaining: 3.98s
108:	learn: 21.2543351	total: 486ms	remaining: 3.97s
109:	learn: 21.2451068	total: 490ms	remaining: 3.96s
110:	learn: 21.2302347	total: 494ms	remaining: 3.96s
111:	learn: 21.2172079	total: 499ms	remaining: 3.95s
112:	learn: 21.2043509	total: 503ms	remaining: 3.95s
113:	learn: 21.1909011	total: 508ms	remaining: 3.95s
114:	learn: 21.1792200	total: 512ms	remaining: 3.94s
115:	learn: 21.1634688	total: 516ms	remaining: 3.93s
116:	learn: 21.1492948	total: 521ms	remaining: 3.93s
117:	learn: 21.1319860	total: 525ms	remaining: 3.92s
118:	learn: 21.1075713	total: 529ms	remaining:

276:	learn: 18.9579456	total: 1.23s	remaining: 3.21s
277:	learn: 18.9481347	total: 1.23s	remaining: 3.2s
278:	learn: 18.9370791	total: 1.24s	remaining: 3.2s
279:	learn: 18.9155732	total: 1.24s	remaining: 3.2s
280:	learn: 18.9074594	total: 1.25s	remaining: 3.19s
281:	learn: 18.8921105	total: 1.25s	remaining: 3.19s
282:	learn: 18.8797067	total: 1.26s	remaining: 3.19s
283:	learn: 18.8721966	total: 1.26s	remaining: 3.18s
284:	learn: 18.8636169	total: 1.27s	remaining: 3.18s
285:	learn: 18.8501606	total: 1.27s	remaining: 3.18s
286:	learn: 18.8408867	total: 1.28s	remaining: 3.17s
287:	learn: 18.8233143	total: 1.28s	remaining: 3.17s
288:	learn: 18.8113199	total: 1.28s	remaining: 3.16s
289:	learn: 18.7892709	total: 1.29s	remaining: 3.16s
290:	learn: 18.7810702	total: 1.29s	remaining: 3.15s
291:	learn: 18.7755912	total: 1.3s	remaining: 3.15s
292:	learn: 18.7723345	total: 1.3s	remaining: 3.14s
293:	learn: 18.7647261	total: 1.31s	remaining: 3.14s
294:	learn: 18.7582840	total: 1.31s	remaining: 3.13

453:	learn: 17.2961484	total: 2.01s	remaining: 2.42s
454:	learn: 17.2813076	total: 2.02s	remaining: 2.41s
455:	learn: 17.2724995	total: 2.02s	remaining: 2.41s
456:	learn: 17.2665672	total: 2.02s	remaining: 2.41s
457:	learn: 17.2549895	total: 2.03s	remaining: 2.4s
458:	learn: 17.2497108	total: 2.03s	remaining: 2.4s
459:	learn: 17.2463348	total: 2.04s	remaining: 2.39s
460:	learn: 17.2411913	total: 2.04s	remaining: 2.39s
461:	learn: 17.2366709	total: 2.05s	remaining: 2.38s
462:	learn: 17.2270398	total: 2.05s	remaining: 2.38s
463:	learn: 17.2242517	total: 2.06s	remaining: 2.38s
464:	learn: 17.2175947	total: 2.06s	remaining: 2.37s
465:	learn: 17.2112218	total: 2.06s	remaining: 2.37s
466:	learn: 17.2014409	total: 2.07s	remaining: 2.36s
467:	learn: 17.1902681	total: 2.07s	remaining: 2.36s
468:	learn: 17.1869383	total: 2.08s	remaining: 2.35s
469:	learn: 17.1758163	total: 2.08s	remaining: 2.35s
470:	learn: 17.1694851	total: 2.09s	remaining: 2.34s
471:	learn: 17.1562653	total: 2.09s	remaining: 2

625:	learn: 15.9357228	total: 2.79s	remaining: 1.67s
626:	learn: 15.9243927	total: 2.79s	remaining: 1.66s
627:	learn: 15.9217635	total: 2.8s	remaining: 1.66s
628:	learn: 15.9201964	total: 2.8s	remaining: 1.65s
629:	learn: 15.9126513	total: 2.81s	remaining: 1.65s
630:	learn: 15.9062791	total: 2.81s	remaining: 1.64s
631:	learn: 15.9056209	total: 2.82s	remaining: 1.64s
632:	learn: 15.8992460	total: 2.82s	remaining: 1.64s
633:	learn: 15.8960960	total: 2.82s	remaining: 1.63s
634:	learn: 15.8944960	total: 2.83s	remaining: 1.63s
635:	learn: 15.8930324	total: 2.83s	remaining: 1.62s
636:	learn: 15.8911563	total: 2.84s	remaining: 1.62s
637:	learn: 15.8745877	total: 2.84s	remaining: 1.61s
638:	learn: 15.8701931	total: 2.85s	remaining: 1.61s
639:	learn: 15.8676475	total: 2.85s	remaining: 1.6s
640:	learn: 15.8519953	total: 2.85s	remaining: 1.6s
641:	learn: 15.8278919	total: 2.86s	remaining: 1.59s
642:	learn: 15.8230345	total: 2.86s	remaining: 1.59s
643:	learn: 15.8100714	total: 2.87s	remaining: 1.5

791:	learn: 14.8151585	total: 3.57s	remaining: 937ms
792:	learn: 14.8134999	total: 3.57s	remaining: 932ms
793:	learn: 14.8061475	total: 3.58s	remaining: 928ms
794:	learn: 14.8050353	total: 3.58s	remaining: 924ms
795:	learn: 14.8023314	total: 3.59s	remaining: 920ms
796:	learn: 14.7953908	total: 3.59s	remaining: 915ms
797:	learn: 14.7937038	total: 3.6s	remaining: 911ms
798:	learn: 14.7824492	total: 3.6s	remaining: 907ms
799:	learn: 14.7784891	total: 3.61s	remaining: 903ms
800:	learn: 14.7708658	total: 3.62s	remaining: 898ms
801:	learn: 14.7652569	total: 3.62s	remaining: 894ms
802:	learn: 14.7615472	total: 3.63s	remaining: 890ms
803:	learn: 14.7605341	total: 3.63s	remaining: 885ms
804:	learn: 14.7571812	total: 3.64s	remaining: 881ms
805:	learn: 14.7552890	total: 3.64s	remaining: 877ms
806:	learn: 14.7370067	total: 3.65s	remaining: 872ms
807:	learn: 14.7272852	total: 3.65s	remaining: 868ms
808:	learn: 14.7236970	total: 3.66s	remaining: 863ms
809:	learn: 14.7222698	total: 3.66s	remaining: 8

983:	learn: 13.9464069	total: 4.55s	remaining: 74ms
984:	learn: 13.9438180	total: 4.55s	remaining: 69.4ms
985:	learn: 13.9433014	total: 4.56s	remaining: 64.7ms
986:	learn: 13.9414957	total: 4.57s	remaining: 60.1ms
987:	learn: 13.9409724	total: 4.57s	remaining: 55.5ms
988:	learn: 13.9400474	total: 4.58s	remaining: 50.9ms
989:	learn: 13.9343278	total: 4.58s	remaining: 46.3ms
990:	learn: 13.9339361	total: 4.59s	remaining: 41.7ms
991:	learn: 13.9324614	total: 4.59s	remaining: 37ms
992:	learn: 13.9291289	total: 4.6s	remaining: 32.4ms
993:	learn: 13.9258688	total: 4.6s	remaining: 27.8ms
994:	learn: 13.9212961	total: 4.61s	remaining: 23.2ms
995:	learn: 13.9130654	total: 4.61s	remaining: 18.5ms
996:	learn: 13.9091703	total: 4.62s	remaining: 13.9ms
997:	learn: 13.9073112	total: 4.62s	remaining: 9.26ms
998:	learn: 13.9014879	total: 4.63s	remaining: 4.63ms
999:	learn: 13.8938743	total: 4.63s	remaining: 0us
MSE Loss mean = 22.762331252507682 max = 36.218904527727325


In [27]:
model = pipeline.fit(X_train,y_train)

0:	learn: 19.7764070	total: 9.69ms	remaining: 9.68s
1:	learn: 19.6647239	total: 18ms	remaining: 8.96s
2:	learn: 19.5345503	total: 23.8ms	remaining: 7.92s
3:	learn: 19.4238128	total: 29.6ms	remaining: 7.36s
4:	learn: 19.3205995	total: 35.6ms	remaining: 7.09s
5:	learn: 19.2344350	total: 42ms	remaining: 6.96s
6:	learn: 19.1515531	total: 48.6ms	remaining: 6.89s
7:	learn: 19.0713801	total: 54.6ms	remaining: 6.76s
8:	learn: 19.0064909	total: 60.9ms	remaining: 6.7s
9:	learn: 18.9265155	total: 66.4ms	remaining: 6.57s
10:	learn: 18.8434933	total: 72ms	remaining: 6.47s
11:	learn: 18.7716692	total: 77.8ms	remaining: 6.4s
12:	learn: 18.6964653	total: 83.4ms	remaining: 6.33s
13:	learn: 18.6216067	total: 89.6ms	remaining: 6.31s
14:	learn: 18.5558722	total: 95.2ms	remaining: 6.25s
15:	learn: 18.4945377	total: 101ms	remaining: 6.2s
16:	learn: 18.4270062	total: 106ms	remaining: 6.14s
17:	learn: 18.3647673	total: 112ms	remaining: 6.09s
18:	learn: 18.3133125	total: 117ms	remaining: 6.05s
19:	learn: 18.26

172:	learn: 15.6850526	total: 987ms	remaining: 4.72s
173:	learn: 15.6756688	total: 992ms	remaining: 4.71s
174:	learn: 15.6607390	total: 998ms	remaining: 4.71s
175:	learn: 15.6454424	total: 1s	remaining: 4.7s
176:	learn: 15.6430769	total: 1.01s	remaining: 4.7s
177:	learn: 15.6302970	total: 1.01s	remaining: 4.69s
178:	learn: 15.6240856	total: 1.02s	remaining: 4.68s
179:	learn: 15.6132801	total: 1.03s	remaining: 4.68s
180:	learn: 15.6029109	total: 1.03s	remaining: 4.67s
181:	learn: 15.5955922	total: 1.04s	remaining: 4.66s
182:	learn: 15.5881079	total: 1.04s	remaining: 4.65s
183:	learn: 15.5798828	total: 1.05s	remaining: 4.64s
184:	learn: 15.5692674	total: 1.05s	remaining: 4.64s
185:	learn: 15.5612994	total: 1.06s	remaining: 4.63s
186:	learn: 15.5494488	total: 1.06s	remaining: 4.62s
187:	learn: 15.5455957	total: 1.07s	remaining: 4.62s
188:	learn: 15.5361952	total: 1.07s	remaining: 4.61s
189:	learn: 15.5277078	total: 1.08s	remaining: 4.6s
190:	learn: 15.5230485	total: 1.08s	remaining: 4.6s


343:	learn: 14.4665038	total: 1.97s	remaining: 3.75s
344:	learn: 14.4628374	total: 1.97s	remaining: 3.75s
345:	learn: 14.4594730	total: 1.98s	remaining: 3.74s
346:	learn: 14.4567005	total: 1.99s	remaining: 3.74s
347:	learn: 14.4515478	total: 1.99s	remaining: 3.73s
348:	learn: 14.4459566	total: 2s	remaining: 3.73s
349:	learn: 14.4419344	total: 2s	remaining: 3.72s
350:	learn: 14.4380253	total: 2.01s	remaining: 3.71s
351:	learn: 14.4303136	total: 2.02s	remaining: 3.71s
352:	learn: 14.4246266	total: 2.02s	remaining: 3.71s
353:	learn: 14.3879327	total: 2.03s	remaining: 3.7s
354:	learn: 14.3845964	total: 2.03s	remaining: 3.69s
355:	learn: 14.3818483	total: 2.04s	remaining: 3.69s
356:	learn: 14.3754740	total: 2.04s	remaining: 3.68s
357:	learn: 14.3681127	total: 2.05s	remaining: 3.68s
358:	learn: 14.3590299	total: 2.06s	remaining: 3.67s
359:	learn: 14.3533082	total: 2.06s	remaining: 3.67s
360:	learn: 14.3419066	total: 2.07s	remaining: 3.66s
361:	learn: 14.3379607	total: 2.07s	remaining: 3.65s


514:	learn: 13.5128379	total: 2.95s	remaining: 2.77s
515:	learn: 13.5103432	total: 2.95s	remaining: 2.77s
516:	learn: 13.5077377	total: 2.96s	remaining: 2.76s
517:	learn: 13.4986017	total: 2.96s	remaining: 2.76s
518:	learn: 13.4970013	total: 2.97s	remaining: 2.75s
519:	learn: 13.4902670	total: 2.97s	remaining: 2.75s
520:	learn: 13.4887681	total: 2.98s	remaining: 2.74s
521:	learn: 13.4751026	total: 2.98s	remaining: 2.73s
522:	learn: 13.4618696	total: 2.99s	remaining: 2.73s
523:	learn: 13.4535632	total: 3s	remaining: 2.72s
524:	learn: 13.4507952	total: 3s	remaining: 2.71s
525:	learn: 13.4444933	total: 3.01s	remaining: 2.71s
526:	learn: 13.4388727	total: 3.01s	remaining: 2.7s
527:	learn: 13.4355498	total: 3.02s	remaining: 2.7s
528:	learn: 13.4338775	total: 3.02s	remaining: 2.69s
529:	learn: 13.4214788	total: 3.03s	remaining: 2.69s
530:	learn: 13.4202001	total: 3.03s	remaining: 2.68s
531:	learn: 13.4069620	total: 3.04s	remaining: 2.67s
532:	learn: 13.3964605	total: 3.04s	remaining: 2.67s
5

695:	learn: 12.7184055	total: 3.93s	remaining: 1.72s
696:	learn: 12.7173966	total: 3.94s	remaining: 1.71s
697:	learn: 12.7101558	total: 3.94s	remaining: 1.71s
698:	learn: 12.7094086	total: 3.95s	remaining: 1.7s
699:	learn: 12.7015385	total: 3.95s	remaining: 1.69s
700:	learn: 12.6970739	total: 3.96s	remaining: 1.69s
701:	learn: 12.6909249	total: 3.96s	remaining: 1.68s
702:	learn: 12.6885043	total: 3.97s	remaining: 1.68s
703:	learn: 12.6861496	total: 3.97s	remaining: 1.67s
704:	learn: 12.6844341	total: 3.98s	remaining: 1.67s
705:	learn: 12.6801435	total: 3.98s	remaining: 1.66s
706:	learn: 12.6727211	total: 3.99s	remaining: 1.65s
707:	learn: 12.6723379	total: 4s	remaining: 1.65s
708:	learn: 12.6682653	total: 4s	remaining: 1.64s
709:	learn: 12.6677970	total: 4s	remaining: 1.64s
710:	learn: 12.6659458	total: 4.01s	remaining: 1.63s
711:	learn: 12.6574799	total: 4.01s	remaining: 1.62s
712:	learn: 12.6549393	total: 4.02s	remaining: 1.62s
713:	learn: 12.6529580	total: 4.03s	remaining: 1.61s
714

868:	learn: 12.1849364	total: 4.93s	remaining: 743ms
869:	learn: 12.1837264	total: 4.93s	remaining: 737ms
870:	learn: 12.1819757	total: 4.94s	remaining: 731ms
871:	learn: 12.1798710	total: 4.94s	remaining: 726ms
872:	learn: 12.1759079	total: 4.95s	remaining: 720ms
873:	learn: 12.1705958	total: 4.96s	remaining: 714ms
874:	learn: 12.1653723	total: 4.96s	remaining: 709ms
875:	learn: 12.1624996	total: 4.97s	remaining: 703ms
876:	learn: 12.1602576	total: 4.97s	remaining: 697ms
877:	learn: 12.1585143	total: 4.98s	remaining: 692ms
878:	learn: 12.1546774	total: 4.98s	remaining: 686ms
879:	learn: 12.1527414	total: 4.99s	remaining: 680ms
880:	learn: 12.1472105	total: 4.99s	remaining: 674ms
881:	learn: 12.1442956	total: 5s	remaining: 669ms
882:	learn: 12.1368219	total: 5s	remaining: 663ms
883:	learn: 12.1319786	total: 5.01s	remaining: 657ms
884:	learn: 12.1287941	total: 5.01s	remaining: 652ms
885:	learn: 12.1254342	total: 5.02s	remaining: 646ms
886:	learn: 12.1201344	total: 5.03s	remaining: 640ms

In [28]:
X_test = pd.read_csv('data/dengue_features_test.csv')
X_test.head()
X_test = X_test.drop('week_start_date',axis=1)
X_test['year'] = X_test['year'].astype('object')
X_test['weekofyear'] = X_test['weekofyear'].astype('object')

X_test['week_year'] = X_test['city']+"_"+X_test['weekofyear'].astype('str')+"_"+X_test['year'].astype('str')
X_test = X_test.drop(['city','year','weekofyear'],axis=1)

In [29]:
X_test['total_cases'] = model.predict(X_test)

In [30]:
X_test.head()

Unnamed: 0,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,reanalysis_dew_point_temp_k,reanalysis_max_air_temp_k,reanalysis_min_air_temp_k,...,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm,week_year,total_cases
0,-0.0189,-0.0189,0.102729,0.0912,78.6,298.492857,298.55,294.527143,301.1,296.4,...,78.6,15.918571,3.128571,26.528571,7.057143,33.3,21.7,75.2,sj_18_2008,12.770209
1,-0.018,-0.0124,0.082043,0.072314,12.56,298.475714,298.557143,294.395714,300.8,296.7,...,12.56,15.791429,2.571429,26.071429,5.557143,30.0,22.2,34.3,sj_19_2008,10.636212
2,-0.0015,,0.151083,0.091529,3.66,299.455714,299.357143,295.308571,302.2,296.4,...,3.66,16.674286,4.428571,27.928571,7.785714,32.8,22.8,3.0,sj_20_2008,23.102483
3,,-0.019867,0.124329,0.125686,0.0,299.69,299.728571,294.402857,303.0,296.9,...,0.0,15.775714,4.342857,28.057143,6.271429,33.3,24.4,0.3,sj_21_2008,22.566008
4,0.0568,0.039833,0.062267,0.075914,0.76,299.78,299.671429,294.76,302.3,297.3,...,0.76,16.137143,3.542857,27.614286,7.085714,33.3,23.3,84.1,sj_22_2008,33.002181


In [31]:
submission = pd.DataFrame(X_test.week_year.str.split('_').tolist(),
                                 columns = ['city','weekofyear','year'])

In [32]:
submission['total_cases'] = X_test['total_cases'].astype('int64')
submission = submission[['city','year','weekofyear','total_cases']]
submission

Unnamed: 0,city,year,weekofyear,total_cases
0,sj,2008,18,12
1,sj,2008,19,10
2,sj,2008,20,23
3,sj,2008,21,22
4,sj,2008,22,33
...,...,...,...,...
411,iq,2013,22,6
412,iq,2013,23,4
413,iq,2013,24,1
414,iq,2013,25,4


In [34]:
submission.to_csv('submission_catboost.csv', index=False)