In [1]:
from pandas import read_csv, DataFrame, concat
from keras import Sequential
from keras.layers import LSTM, Dense, Dropout, TimeDistributed
from keras.optimizers import Adam
from keras.constraints import nonneg
from matplotlib import pyplot
from numpy import concatenate
from math import sqrt
from sklearn.metrics import mean_squared_error
from keras.models import model_from_json
from os.path import isfile
import numpy as np 
import sys
import random
import os
from keras.losses import logcosh

Using TensorFlow backend.


In [2]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [3]:
def getXY(filename, scale):

    dataset = read_csv(filename, header=0, index_col=0)
    dataset[["Searches"]] /= 100
    dataset[["Cases"]] = dataset[["Cases"]].apply(lambda x: x*100000/scale, axis=1)


    values = dataset.values.astype("float32")
    total_features = len(values[0])

    n_weeks = 4
    n_features = 2

    reframed = series_to_supervised(values, n_weeks, 1)
    values = reframed.values
    print("Reframed Shape: ", reframed.shape)
    totalFeatures = reframed.shape[1]
    n_obs = n_weeks * n_features

    x, y = values[:, :-2], values[:, -1] # Pick last week's cases as y and drop last week's 

    x = x.reshape((x.shape[0], n_weeks, n_features)) # Reshape as 3-D
    return x, y

In [4]:
def saveModel(model, modelName):
    jsonName = "{}.json".format(modelName)
    h5Name = "{}.h5".format(modelName)

    model_json = model.to_json()
    with open(jsonName, "w") as json_file:
        json_file.write(model_json)
    #seralize weights to HDF5
    model.save_weights(h5Name)


In [5]:
def saveModel(model, modelName):
    jsonName = "{}.json".format(modelName)
    h5Name = "{}.h5".format(modelName)

    model_json = model.to_json()
    with open(jsonName, "w") as json_file:
        json_file.write(model_json)
    #seralize weights to HDF5
    model.save_weights(h5Name)

In [6]:
def loadOrCreateModel(modelName): 

    jsonName = "{}.json".format(modelName)
    h5Name = "{}.h5".format(modelName)

    if(isfile(jsonName) and isfile(h5Name)):

        loaded_model_json = None
        with open(jsonName, "r") as json_file:
            loaded_model_json = json_file.read()

        model = model_from_json(loaded_model_json)
        model.load_weights(h5Name)
        model.compile(loss=[logcosh], optimizer="adam", metrics=["mse"])
        return model
    else:
        model = Sequential()
        model.add(LSTM(256, input_shape=(4, 2), return_sequences=True))
        model.add(LSTM(64, activation="relu", return_sequences=True))
        model.add(LSTM(32, activation="relu", return_sequences=False))

        model.add(Dense(128, activation="relu"))
        model.add(Dense(1, activation='linear', kernel_constraint=nonneg()))
        # model.add(Dense(1, activation="relu", kernel_constraint=nonneg()))
        model.compile(loss=[logcosh], optimizer="adam", metrics=["mse"])
        model.summary()
        return model

In [7]:
#Population values
population = {
    "Alagoas.csv": 3408510,
    "Bahia.csv": 15126371,
    "Ceara.csv": 8842791,
    "Goias.csv": 6523222,
    "Maranhao.csv": 6850884,
    "MatoGrosso.csv": 3224357,
    "MinasGerais.csv": 20734097,
    "Para.csv" : 8073924,
    "RioDeJaneiro.csv" : 16461173,
    "SaoPaulo.csv" : 44035304,
}

In [8]:
def ExportPredictions(model, folder, test_files):
    for file in test_files:
        outputName = "Predictions-{}".format(file)
        x, y = getXY("{}/{}".format(dataFolder, file), population[file])
        with open("{}/{}".format(folder, outputName), "w") as outputFile:
            outputFile.write("Value,Prediction,Error\n")
            predictions = model.predict(x)
            y = y.reshape((len(y), 1))
            inv_yPred = np.apply_along_axis(lambda x: x * population[file] / 100000, 1, predictions)
            inv_y = np.apply_along_axis(lambda x: x * population[file] / 100000, 1, y)
            for i in range(len(inv_yPred)):
                yPred = float(inv_yPred[i])
                yReal = float(inv_y[i])
                outputFile.write("{},{},{}\n".format(yReal, yPred, yPred-yReal))

In [9]:
for i in range(10):
    os.mkdir("Experiments/{}".format(i))
    #Splits the files into 7-Train 3-Test
    dataFolder = "../../../data/Brazil/processed_data/stateFiles"
    files = os.listdir(dataFolder)
    train_files = random.sample(files, 7)
    test_files = list(filter(lambda x: x not in train_files, files))
    model = loadOrCreateModel("Model{}".format(i))
    
    for file in train_files:
        x,y = getXY("{}/{}".format(dataFolder, file), population[file])
        model.fit(x, y,
                 epochs = 10,
                 batch_size=x.shape[0],
                 verbose=1, 
                 shuffle=False)

    #Evaluate model
    for file in test_files:
        x, y = getXY("{}/{}".format(dataFolder, file), population[file])
        predictions = model.predict(x)
        y = y.reshape((len(y), 1))

        inv_yPred = np.apply_along_axis(lambda x: x * population[file] / 100000, 1, predictions)
        inv_y = np.apply_along_axis(lambda x: x * population[file] / 100000, 1, y)

        rmse = sqrt(mean_squared_error(inv_y, inv_yPred))

        print('Test RMSE: %.3f' % rmse)
        print("Total", sum(inv_y))
        print("len", len(inv_y))
        pyplot.clf()
        pyplot.title("Cases {} RMSE: {:.2f}".format(file, rmse))
        pyplot.ylabel("Cases")
        pyplot.xlabel("Week #")
        pyplot.plot(inv_y, label="Cases")
        pyplot.plot(inv_yPred, label="Predictions")
        pyplot.legend()
        pngName = file.split(".")[0]
        pyplot.savefig('Experiments/{}/{}.png'.format(i, pngName))
        
        saveModel(model, "Experiments/{0}/model-{0}".format(i))
        ExportPredictions(model, "Experiments/{}".format(i), test_files)
    

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 4, 256)            265216    
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 64)             82176     
_________________________________________________________________
lstm_3 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dense_1 (Dense)              (None, 128)               4224      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 364,161
Trainable params: 364,161
Non-trainable params: 0
_________________________________________________________________
Reframed Shape:  (45, 10)
Epoch 1/10


InternalError: Blas GEMM launch failed : a.shape=(45, 256), b.shape=(256, 256), m=45, n=256, k=256
	 [[Node: lstm_1/while/MatMul_4 = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](lstm_1/while/Identity_2, lstm_1/while/MatMul_4/Enter)]]
	 [[Node: loss/mul/_159 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_4838_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'lstm_1/while/MatMul_4', defined at:
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\ipykernel\kernelapp.py", line 478, in start
    self.io_loop.start()
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\IPython\core\interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-9eda3b265e41>", line 8, in <module>
    model = loadOrCreateModel("Model{}".format(i))
  File "<ipython-input-6-9dea7c07e4d4>", line 18, in loadOrCreateModel
    model.add(LSTM(256, input_shape=(4, 2), return_sequences=True))
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\models.py", line 467, in add
    layer(x)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\layers\recurrent.py", line 488, in __call__
    return super(RNN, self).__call__(inputs, **kwargs)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\engine\topology.py", line 617, in __call__
    output = self.call(inputs, **kwargs)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\layers\recurrent.py", line 2032, in call
    initial_state=initial_state)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\layers\recurrent.py", line 595, in call
    input_length=timesteps)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\backend\tensorflow_backend.py", line 2764, in rnn
    swap_memory=True)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2623, in while_loop
    result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2456, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2406, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\backend\tensorflow_backend.py", line 2750, in _step
    tuple(constants))
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\layers\recurrent.py", line 586, in step
    return self.cell.call(inputs, states, **kwargs)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\layers\recurrent.py", line 1826, in call
    self.recurrent_kernel_i))
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\keras\backend\tensorflow_backend.py", line 1072, in dot
    out = tf.matmul(x, y)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1801, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1263, in _mat_mul
    transpose_b=transpose_b, name=name)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\framework\ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "E:\Aplicaciones\Anaconda\envs\Py2\lib\site-packages\tensorflow\python\framework\ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(45, 256), b.shape=(256, 256), m=45, n=256, k=256
	 [[Node: lstm_1/while/MatMul_4 = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](lstm_1/while/Identity_2, lstm_1/while/MatMul_4/Enter)]]
	 [[Node: loss/mul/_159 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_4838_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
