In [82]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from keras import Sequential
from keras.models import Model
from keras.layers import LSTM, Dense, Concatenate, Flatten, Input
from keras.layers.merge import concatenate
from keras.optimizers import Adam
from keras.losses import logcosh
import tensorflow as tf
from keras.utils import plot_model
%matplotlib inline

np.random.seed(2018)

In [59]:
#Population values
population = {
    "Chiapas_2016-2017.csv": 5217908,
    "Colima_2016-2017.csv": 711235,
    "Guerrero_2016-2017.csv": 3533251,
    "Hidalgo_2016-2017.csv": 2858359,
    "NuevoLeon_2016-2017.csv": 5119504,
    "Oaxaca_2016-2017.csv": 3967889,
    "QuintanaRoo_2016-2017.csv": 1501562,
    "Tabasco_2016-2017.csv" : 2395272,
    "Veracruz_2016-2017.csv" : 8112505,
    "Yucatan_2016-2017.csv" : 2097175,
    
    "casanare_2016-2017.csv" : 356438,
    "cordoba_2016-2017.csv" : 1709603,
    "cundinamarca_2016-2017.csv" : 2680041,
    "huila_2016-2017.csv" : 1154804,
    "meta_2016-2017.csv" : 961292,
    "santander_2016-2017.csv" : 2061095,
    "santander_norte_2016-2017.csv" : 1355723,
    "tolima_2016-2017.csv" : 1408274,
    "valle_cauca_2016-2017.csv" : 4613377,
    
    "Alagoas_2016-2017.csv": 3375823,
    "Bahia_2016-2017.csv": 15344447,
    "Ceara_2016-2017.csv": 9020460,
    "Goias_2016-2017.csv": 6778772,
    "Maranhao_2016-2017.csv": 7000229,
    "MatoGrosso_2016-2017.csv": 3344544,
    "MinasGerais_2016-2017.csv": 21119536,
    "Para_2016-2017.csv": 8366628,
    "RioDeJaneiro_2016-2017.csv": 16718956,
    "SaoPaulo_2016-2017.csv": 45094866,
}

In [60]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [61]:
def getXY(dataset, scale):
    dataset[["Searches"]] /= 100
    dataset[["Cases"]] = dataset[["Cases"]].apply(lambda x: x*100000/scale, axis=1)
    
    dataset.drop(["Date"], axis=1, inplace=True)
    values = dataset.values.astype("float32")
    
    

    n_weeks = 4
    n_features = 2

    reframed = series_to_supervised(values, n_weeks, 1)
    values = reframed.values
    print("Reframed Shape: ", reframed.shape)
    totalFeatures = reframed.shape[1]
    n_obs = n_weeks * n_features

    x, y = values[:, :-2], values[:, -1] # Pick last week's cases as y and drop last week's 

    x = x.reshape((x.shape[0], n_weeks, n_features)) # Reshape as 3-D
    return x, y 

In [62]:
def saveModel(model, modelName):
    jsonName = "{}.json".format(modelName)
    h5Name = "{}.h5".format(modelName)

    model_json = model.to_json()
    with open(jsonName, "w") as json_file:
        json_file.write(model_json)
    #seralize weights to HDF5
    model.save_weights(h5Name)

In [1]:
def createModel(modelName):
    jsonName = "{}.json".format(modelName)
    h5Name = "{}.h5".format(modelName)
    
    
    b1_in = Input(shape=(4,2))
    b1_out = LSTM(1024, return_sequences=True)(b1_in)
    b1_out = LSTM(512, activation="relu", return_sequences=True)(b1_out)
    b1_out = LSTM(512, activation="relu", return_sequences=True)(b1_out)
    b1_out = LSTM(512, activation="relu", return_sequences=True)(b1_out)
    
    model1 = Model(b1_in, b1_out)
    
    b2_in = Input(shape=(4,2))
    b2_out = Dense(512, activation="relu")(b2_in)
    b2_out = Flatten(b2_out)
    
    model2 = Model(b2_in, b2_out)
    
    concatenated = concatenate([b1_out, b2_out])
    
    out = Dense(1, activation='linear', name='output_layer')(concatenated)
    
    model = Model([b1_in, b2_in], out)
    
    model.compile(loss=["mse"], optimizer="adam", metrics=["mse"])
    
    return model

In [96]:
for country in ["Mexico", "Brazil", "Colombia"]:
    folder = "../../data/{}/processed_data".format(country)
    files = os.listdir(folder)
    for file in files:
        
        with tf.Session() as sess:
            dataset = pd.read_csv("{}/{}".format(folder, file))
            state = file

            if(not os.path.isdir("{}/{}".format(country, file))):
                os.mkdir("{}/{}".format(country, file))

            train = dataset[:52]
            test = dataset[48:] # Keep 4 previous values to be able to predict all 52 weeks of next year
            x, y = getXY(train, population[file])

            model = createModel(file)
            model.fit(x, y,
                      epochs = 200,
                      batch_size=x.shape[0],
                      verbose=1, 
                      shuffle=False)

            test_x, test_y = getXY(test, population[file])
            predictions = model.predict(test_x)
            #Transform to 1-D
            test_y = test_y.reshape((len(test_y), 1))

            #Rescale
            inv_yPred = np.apply_along_axis(lambda x: x * population[file] / 100000, 1, predictions)
            inv_y = np.apply_along_axis(lambda x: x * population[file] / 100000, 1, test_y)

            test = test[4:] # drop the first 4 values used previously to be able to predict the full year

            test["Cases"] = inv_y
            test["predictions"] = inv_yPred
            test["error"] = test["Cases"] - test["predictions"]
            test.to_csv("{}/{}/Predictions.csv".format(country, file))
            test[["Cases", "predictions"]].plot(figsize=(10,10))
            plt.title(file)
            plt.xlabel("Week")
            plt.ylabel("Cases")
            plt.legend()
            fig = plt.gcf()
            fig.savefig("{}/{}/Obs-Pred.png".format(country, file))
            plt.close("all")

            saveModel(model, "{}/{}/Model".format(country, file))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Reframed Shape:  (48, 10)


KeyboardInterrupt: 

In [97]:
model = createModel("EY")
plot_model(model)

AttributeError: 'Tensor' object has no attribute 'lower'