In [145]:
import pandas as pd
import numpy as np
%matplotlib inline
# from keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
# from keras.models import Sequential
# from keras.layers import Dense
# from keras.layers import LSTM
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Dropout
from tensorflow.keras.models import Sequential
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [146]:
pd.set_option('display.max_columns', None)

train = pd.merge(pd.read_csv('dengue_features_train.csv'),
                 pd.read_csv('dengue_labels_train.csv'))
test = pd.read_csv('dengue_features_test.csv', index_col='week_start_date', parse_dates=True)
sample_submission = pd.read_csv('submission_format.csv')

In [147]:
train.set_index('week_start_date', inplace=True, drop=True)
train.index = pd.to_datetime(train.index)

train.drop(['year', 'weekofyear'], axis=1, inplace=True)
test.drop(['year', 'weekofyear'], axis=1, inplace=True)

In [148]:
train.head()

Unnamed: 0_level_0,city,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,reanalysis_dew_point_temp_k,reanalysis_max_air_temp_k,reanalysis_min_air_temp_k,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm,total_cases
week_start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1990-04-30,sj,0.1226,0.103725,0.198483,0.177617,12.42,297.572857,297.742857,292.414286,299.8,295.9,32.0,73.365714,12.42,14.012857,2.628571,25.442857,6.9,29.4,20.0,16.0,4
1990-05-07,sj,0.1699,0.142175,0.162357,0.155486,22.82,298.211429,298.442857,293.951429,300.9,296.4,17.94,77.368571,22.82,15.372857,2.371429,26.714286,6.371429,31.7,22.2,8.6,5
1990-05-14,sj,0.03225,0.172967,0.1572,0.170843,34.54,298.781429,298.878571,295.434286,300.5,297.3,26.1,82.052857,34.54,16.848571,2.3,26.714286,6.485714,32.2,22.8,41.4,4
1990-05-21,sj,0.128633,0.245067,0.227557,0.235886,15.36,298.987143,299.228571,295.31,301.4,297.0,13.9,80.337143,15.36,16.672857,2.428571,27.471429,6.771429,33.3,23.3,4.0,3
1990-05-28,sj,0.1962,0.2622,0.2512,0.24734,7.52,299.518571,299.664286,295.821429,301.9,297.5,12.2,80.46,7.52,17.21,3.014286,28.942857,9.371429,35.0,23.9,5.8,6


In [149]:
train_sj = train[train['city'] == 'sj'].iloc[:833]
val_sj = train[train['city'] == 'sj'].iloc[832:]

train_iq = train[train['city'] == 'iq'].iloc[:417]
val_iq = train[train['city'] == 'iq'].iloc[416:]

train_sj = train_sj.round(2)
train_iq = train_iq.round(2)

train_sj.drop(['city'], axis=1, inplace=True)
val_sj.drop(['city'], axis=1, inplace=True)
train_iq.drop(['city'], axis=1, inplace=True)
val_iq.drop(['city'], axis=1, inplace=True)

In [150]:
sj_preprocess_pipeline = Pipeline([('encoder', SimpleImputer()),
                        ('model', MinMaxScaler())])

iq_preprocess_pipeline = Pipeline([('encoder', SimpleImputer()),
                        ('model', MinMaxScaler())])

# train_sj_scaled = sj_preprocess_pipeline.fit_transform(train_sj)
# val_sj_scaled = sj_preprocess_pipeline.transform(val_sj)

train_sj_scaled = sj_preprocess_pipeline.fit_transform(train_sj[['total_cases']])
val_sj_scaled = sj_preprocess_pipeline.transform(val_sj[['total_cases']])

train_iq_scaled = iq_preprocess_pipeline.fit_transform(train_iq)
val_iq_scaled = iq_preprocess_pipeline.transform(val_iq)


In [151]:
length_sj = 52 # Length of the output sequences (in number of timesteps)
batch_size_sj = 1 #Number of timeseries samples in each batch
generator = TimeseriesGenerator(train_sj_scaled, train_sj_scaled, length=length_sj, batch_size=batch_size_sj, shuffle=False)


In [152]:
# What does the first batch look like?
X,y = generator[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')

Given the Array: 
[0.00867679 0.01084599 0.00867679 0.00650759 0.01301518 0.00433839
 0.00867679 0.01084599 0.02169197 0.01301518 0.01735358 0.00433839
 0.01301518 0.03687636 0.04989154 0.02819957 0.04555315 0.06073753
 0.05206074 0.04338395 0.0867679  0.05856833 0.09110629 0.07158351
 0.09327549 0.0802603  0.12364425 0.15401302 0.09544469 0.12147505
 0.11496746 0.11279826 0.10195228 0.05639913 0.05856833 0.04555315
 0.04555315 0.05639913 0.07375271 0.0802603  0.03687636 0.04121475
 0.05422993 0.03904555 0.04555315 0.03687636 0.03687636 0.03470716
 0.03470716 0.03253796 0.04989154 0.03470716]
Predict this y: 
 [[0.03687636]]


In [153]:
from tensorflow.keras import backend as K

# model = Sequential()
#
# model.add(Bidirectional(LSTM(256, input_shape=(length_sj, train_sj_scaled.shape[1]), activation='relu')))
# model.add(Dense(train_sj_scaled.shape[1]))
# model.compile(optimizer='adam', loss='mae')

def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true)))

model = Sequential()

model.add(Bidirectional(LSTM(52, input_shape=(length_sj, train_sj_scaled.shape[1]), activation='tanh')))
model.add(Dense(train_sj_scaled.shape[1]))
model.compile(optimizer='adam', loss=root_mean_squared_error)

# model = Sequential()
# model.add(Bidirectional(LSTM(130, activation='relu'), input_shape=(length_sj, train_sj_scaled.shape[1])))
# model.add(Dropout(0.1))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mae')

# model = Sequential()
#
# model.add(LSTM(256, input_shape=(length_sj, train_sj_scaled.shape[1])))
# model.add(Dropout(0.5))
# model.add(LSTM(256))
# model.add(Dropout(0.5))
# model.add(Dense(train_sj_scaled.shape[1]))
# model.compile(optimizer='adam', loss='mae')

In [154]:
# model.summary()

In [155]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss',patience=10, restore_best_weights=True)
validation_generator = TimeseriesGenerator(val_sj_scaled,val_sj_scaled,
                                           length=length_sj, batch_size=batch_size_sj, shuffle=False)

In [156]:
model.fit_generator(generator,epochs=9999,
                    validation_data=validation_generator,
                    callbacks=[early_stop])

Epoch 1/9999
Epoch 2/9999
Epoch 3/9999
Epoch 4/9999
Epoch 5/9999
Epoch 6/9999
Epoch 7/9999
Epoch 8/9999
Epoch 9/9999
Epoch 10/9999
Epoch 11/9999
Epoch 12/9999
Epoch 13/9999
Epoch 14/9999
Epoch 15/9999
Epoch 16/9999
Epoch 17/9999
Epoch 18/9999
Epoch 19/9999
Epoch 20/9999
Epoch 21/9999
Epoch 22/9999
Epoch 23/9999
Epoch 24/9999
Epoch 25/9999
Epoch 26/9999


<tensorflow.python.keras.callbacks.History at 0x2ce7e00d308>

In [157]:

n_features = train_sj_scaled.shape[1]
test_predictions = []

first_eval_batch = train_sj_scaled[-length_sj:]
current_batch = first_eval_batch.reshape((1, length_sj, n_features))

for i in range(len(val_sj)):

    # get prediction 1 time stamp ahead ([0] is for grabbing just the number instead of [array])
    current_pred = model.predict(current_batch)[0]

    # store prediction
    test_predictions.append(current_pred)

    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [158]:
test_predictions_np = np.array(test_predictions)

In [159]:
test_predictions_np[:, 0]

array([0.0086325 , 0.01062397, 0.01260706, 0.01464124, 0.0167148 ,
       0.01880207, 0.02085591, 0.02284581, 0.02475389, 0.0265401 ,
       0.02818285, 0.02967738, 0.03100898, 0.0321903 , 0.03323029,
       0.03408096, 0.03480609, 0.0353791 , 0.03583276, 0.03618838,
       0.0364674 , 0.03665981, 0.03680613, 0.03687101, 0.03688344,
       0.03685574, 0.03680082, 0.03672044, 0.03663418, 0.03653886,
       0.03642761, 0.03632633, 0.03621841, 0.03612359, 0.0360275 ,
       0.03594192, 0.03585498, 0.0357733 , 0.03571061, 0.03565358,
       0.03560273, 0.03556535, 0.03552866, 0.0354979 , 0.03548106,
       0.03546629, 0.03545485, 0.03544772, 0.03544672, 0.03544636,
       0.03544888, 0.03545384, 0.03545968, 0.03546621, 0.03547315,
       0.03548019, 0.03548707, 0.03549355, 0.03549946, 0.0355047 ,
       0.03550916, 0.03551283, 0.03551568, 0.03551776, 0.03551909,
       0.03551972, 0.03551973, 0.0355192 , 0.03551821, 0.03551684,
       0.03551518, 0.0355133 , 0.03551127, 0.03550916, 0.03550

In [160]:
val_sj_scaled[:, 0]


array([0.00650759, 0.01518438, 0.00650759, 0.01084599, 0.02386117,
       0.01084599, 0.01084599, 0.01301518, 0.01301518, 0.00867679,
       0.00867679, 0.01735358, 0.03036876, 0.02603037, 0.03470716,
       0.02169197, 0.03470716, 0.03904555, 0.03253796, 0.04989154,
       0.03687636, 0.07158351, 0.03253796, 0.02819957, 0.02386117,
       0.03036876, 0.03687636, 0.04121475, 0.04338395, 0.02603037,
       0.04555315, 0.01518438, 0.04121475, 0.02169197, 0.02819957,
       0.02169197, 0.01735358, 0.04555315, 0.02386117, 0.01952278,
       0.03036876, 0.03036876, 0.03253796, 0.03904555, 0.03470716,
       0.02603037, 0.04338395, 0.01735358, 0.00650759, 0.02819957,
       0.00867679, 0.0021692 , 0.02169197, 0.01735358, 0.02819957,
       0.02169197, 0.04555315, 0.03904555, 0.04555315, 0.07375271,
       0.05422993, 0.07375271, 0.07158351, 0.0867679 , 0.09110629,
       0.07809111, 0.15618221, 0.1626898 , 0.164859  , 0.19956616,
       0.15401302, 0.24295011, 0.22993492, 0.21908894, 0.36876

In [161]:
true_predictions = sj_preprocess_pipeline.named_steps.model.inverse_transform(test_predictions_np)
true_validation = sj_preprocess_pipeline.named_steps.model.inverse_transform(val_sj_scaled)


In [162]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(true_validation[:, 0], true_predictions[:, 0])


model.save('saved_model/my_model')

INFO:tensorflow:Assets written to: saved_model/my_model\assets


In [163]:
true_validation[:, -1]

array([  3.,   7.,   3.,   5.,  11.,   5.,   5.,   6.,   6.,   4.,   4.,
         8.,  14.,  12.,  16.,  10.,  16.,  18.,  15.,  23.,  17.,  33.,
        15.,  13.,  11.,  14.,  17.,  19.,  20.,  12.,  21.,   7.,  19.,
        10.,  13.,  10.,   8.,  21.,  11.,   9.,  14.,  14.,  15.,  18.,
        16.,  12.,  20.,   8.,   3.,  13.,   4.,   1.,  10.,   8.,  13.,
        10.,  21.,  18.,  21.,  34.,  25.,  34.,  33.,  40.,  42.,  36.,
        72.,  75.,  76.,  92.,  71., 112., 106., 101., 170., 135., 106.,
        68.,  48.,  48.,  26.,  33.,  29.,  17.,  12.,  13.,  17.,  15.,
        14.,  15.,  10.,   9.,   2.,   6.,   8.,   5.,   1.,   2.,   3.,
         4.,   3.,   1.,   3.,   5.])

In [164]:
true_predictions[:, -1]


array([ 3.9795816,  4.8976507,  5.811854 ,  6.74961  ,  7.7055235,
        8.667755 ,  9.614573 , 10.5319195, 11.411544 , 12.234984 ,
       12.992292 , 13.681272 , 14.29514  , 14.839728 , 15.319161 ,
       15.711324 , 16.045607 , 16.309765 , 16.518902 , 16.68284  ,
       16.811472 , 16.900171 , 16.967627 , 16.997538 , 17.003263 ,
       16.990496 , 16.965178 , 16.928123 , 16.88836  , 16.844414 ,
       16.793129 , 16.746439 , 16.696688 , 16.652975 , 16.608677 ,
       16.569223 , 16.529146 , 16.49149  , 16.46259  , 16.436298 ,
       16.412859 , 16.395624 , 16.378712 , 16.364532 , 16.356768 ,
       16.349962 , 16.344685 , 16.341398 , 16.340937 , 16.340773 ,
       16.341932 , 16.344221 , 16.346912 , 16.34992  , 16.35312  ,
       16.356369 , 16.359539 , 16.362526 , 16.365253 , 16.367664 ,
       16.369724 , 16.371414 , 16.37273  , 16.373686 , 16.374298 ,
       16.374592 , 16.374596 , 16.374352 , 16.373894 , 16.373262 ,
       16.3725   , 16.371634 , 16.370697 , 16.369722 , 16.3687

In [165]:
mean_absolute_error(true_validation[:, -1], true_predictions[:, -1])


16.115208719785397

In [166]:
len(true_predictions[:, -1])

104