In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
%matplotlib inline

In [2]:
#Population values
population = {
    "Chiapas_2016-2017.csv": 5217908,
    "Colima_2016-2017.csv": 711235,
    "Guerrero_2016-2017.csv": 3533251,
    "Hidalgo_2016-2017.csv": 2858359,
    "NuevoLeon_2016-2017.csv": 5119504,
    "Oaxaca_2016-2017.csv": 3967889,
    "QuintanaRoo_2016-2017.csv": 1501562,
    "Tabasco_2016-2017.csv" : 2395272,
    "Veracruz_2016-2017.csv" : 8112505,
    "Yucatan_2016-2017.csv" : 2097175,
    
    "casanare_2016-2017.csv" : 356438,
    "cordoba_2016-2017.csv" : 1709603,
    "cundinamarca_2016-2017.csv" : 2680041,
    "huila_2016-2017.csv" : 1154804,
    "meta_2016-2017.csv" : 961292,
    "santander_2016-2017.csv" : 2061095,
    "santander_norte_2016-2017.csv" : 1355723,
    "tolima_2016-2017.csv" : 1408274,
    "valle_cauca_2016-2017.csv" : 4613377,
    
    "Alagoas_2016-2017.csv": 3375823,
    "Bahia_2016-2017.csv": 15344447,
    "Ceara_2016-2017.csv": 9020460,
    "Goias_2016-2017.csv": 6778772,
    "Maranhao_2016-2017.csv": 7000229,
    "MatoGrosso_2016-2017.csv": 3344544,
    "MinasGerais_2016-2017.csv": 21119536,
    "Para_2016-2017.csv": 8366628,
    "RioDeJaneiro_2016-2017.csv": 16718956,
    "SaoPaulo_2016-2017.csv": 45094866,
}

In [3]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [4]:
def getXY(dataset, scale):
    dataset[["Searches"]] /= 100
    dataset[["Cases"]] = dataset[["Cases"]].apply(lambda x: x*100000/scale, axis=1)
    
    values = dataset.values.astype("float32")
    
    

    n_weeks = 4
    n_features = 2

    reframed = series_to_supervised(values, n_weeks, 1)
    values = reframed.values
    print("Reframed Shape: ", reframed.shape)
    totalFeatures = reframed.shape[1]
    n_obs = n_weeks * n_features

    x, y = values[:, :-2], values[:, -1] # Pick last week's cases as y and drop last week's 

    x = x.reshape((x.shape[0], n_weeks, n_features)) # Reshape as 3-D
    return x, y 

In [5]:
def saveModel(model, modelName):
    jsonName = "{}.json".format(modelName)
    h5Name = "{}.h5".format(modelName)

    model_json = model.to_json()
    with open(jsonName, "w") as json_file:
        json_file.write(model_json)
    #seralize weights to HDF5
    model.save_weights(h5Name)

In [6]:
def createModel(modelName):
    jsonName = "{}.json".format(modelName)
    h5Name = "{}.h5".format(modelName)
    
    model = Sequential()
    model.add(LSTM(256, activation="relu", input_shape=(4, 2), return_sequences=True))
    model.add(LSTM(128, activation="relu", return_sequences=True))
    model.add(LSTM(64, activation="relu", return_sequences=True))
    model.add(LSTM(32, activation="relu", return_sequences=False))

    model.add(Dense(256, activation="relu"))
    model.add(Dense(1, activation='linear'))
    # model.add(Dense(1, activation="relu", kernel_constraint=nonneg()))
    model.compile(loss=[logcosh], optimizer="adam", metrics=["rmse"])
    model.summary()
    return model

In [7]:
for country in ["Mexico", "Brazil", "Colombia"]:
    folder = "../../data/{}/processed_data".format(country)
    files = os.listdir(folder)
    for file in files:
        dataset = pd.read_csv("{}/{}".format(folder, file))
        state = file
        
        train = dataset[:52]
        test = dataset[52:]

          Date  Searches  Cases
0   02/01/2016       100      9
1   09/01/2016       100      0
2   16/01/2016       100      3
3   23/01/2016        55     11
4   30/01/2016        98     11
5   06/02/2016        89     10
6   13/02/2016        70      9
7   20/02/2016        38      3
8   27/02/2016        58     15
9   05/03/2016        67      3
10  12/03/2016        81      7
11  19/03/2016       100      6
12  26/03/2016        65     14
13  02/04/2016        45     10
14  09/04/2016        67     11
15  16/04/2016       100      1
16  23/04/2016        99      5
17  30/04/2016        73      3
18  07/05/2016       100      0
19  14/05/2016       100      1
20  21/05/2016       100      0
21  28/05/2016       100     10
22  04/06/2016       100      7
23  11/06/2016       100      0
24  18/06/2016       100     59
25  25/06/2016       100     43
26  02/07/2016        59     45
27  09/07/2016        44     55
28  16/07/2016       100     45
29  23/07/2016        75     46
30  30/0

          Date  Searches  Cases
0   03/01/2016        24    105
1   10/01/2016        31    194
2   17/01/2016        24      0
3   24/01/2016        47    573
4   31/01/2016        44    292
5   07/02/2016        44    283
6   14/02/2016        25    231
7   21/02/2016        26    214
8   28/02/2016        27    167
9   06/03/2016        15    198
10  13/03/2016        44    142
11  20/03/2016        18     82
12  27/03/2016        15    166
13  03/04/2016        19    156
14  10/04/2016        18    113
15  17/04/2016        21     77
16  24/04/2016        33     63
17  01/05/2016        16     69
18  08/05/2016        17     66
19  15/05/2016        14     84
20  22/05/2016         4     26
21  29/05/2016        10     36
22  05/06/2016        17     34
23  12/06/2016         4     37
24  19/06/2016         9     29
25  26/06/2016        16      2
26  03/07/2016         1     17
27  10/07/2016        21     14
28  17/07/2016        26     12
29  24/07/2016        10     11
30  31/0