In [1]:
import gzip
import shutil
import requests
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler 

In [2]:
url = 'https://data.brasil.io/dataset/covid19/caso_full.csv.gz'

In [3]:
def download_data():
    r = requests.get(url, allow_redirects=True)
    open('data.csv.gz', 'wb').write(r.content)

def unzip_data():
    with gzip.open('data.csv.gz', 'rb') as f_in:
        with open('data.csv', 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

In [4]:
download_data()
unzip_data()

In [4]:
data = pd.read_csv('data.csv')
data = data[(data['state'] == 'SP') & data['city'].isna()]

In [5]:
data.head(5)

Unnamed: 0,city,city_ibge_code,date,epidemiological_week,estimated_population,estimated_population_2019,is_last,is_repeated,last_available_confirmed,last_available_confirmed_per_100k_inhabitants,last_available_date,last_available_death_rate,last_available_deaths,order_for_place,place_type,state,new_confirmed,new_deaths
2019613,,35.0,2020-02-25,202009,46289333.0,45919049.0,False,False,1,0.00216,2020-02-25,0.0,0,1,state,SP,1,0
2019615,,35.0,2020-02-26,202009,46289333.0,45919049.0,False,False,1,0.00216,2020-02-26,0.0,0,2,state,SP,0,0
2019617,,35.0,2020-02-27,202009,46289333.0,45919049.0,False,False,1,0.00216,2020-02-27,0.0,0,3,state,SP,0,0
2019619,,35.0,2020-02-28,202009,46289333.0,45919049.0,False,False,2,0.00432,2020-02-28,0.0,0,4,state,SP,1,0
2019621,,35.0,2020-02-29,202009,46289333.0,45919049.0,False,False,2,0.00432,2020-02-29,0.0,0,5,state,SP,0,0


In [6]:
def split_sequence(sequence, passos):
    X, y = list(), list()
    for i in range(0,len(sequence)):
        fim = i + passos
        if(fim > len(sequence)-1):
            break
        seq_x = sequence[i:fim]
        seq_y = sequence[fim][0]
        X.append(seq_x)
        y.append(seq_y)
    return(np.array(X),np.array(y))

In [7]:
passos = 10
X, y = split_sequence(data[['last_available_confirmed','estimated_population']].to_numpy(),passos=passos)

In [8]:
X[0]

array([[1.0000000e+00, 4.6289333e+07],
       [1.0000000e+00, 4.6289333e+07],
       [1.0000000e+00, 4.6289333e+07],
       [2.0000000e+00, 4.6289333e+07],
       [2.0000000e+00, 4.6289333e+07],
       [2.0000000e+00, 4.6289333e+07],
       [2.0000000e+00, 4.6289333e+07],
       [2.0000000e+00, 4.6289333e+07],
       [3.0000000e+00, 4.6289333e+07],
       [6.0000000e+00, 4.6289333e+07]])

In [9]:
y[0]

10.0

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [11]:
X_train

array([[[ 1777368., 46289333.],
        [ 1779722., 46289333.],
        [ 1794019., 46289333.],
        ...,
        [ 1849334., 46289333.],
        [ 1851776., 46289333.],
        [ 1864977., 46289333.]],

       [[  765670., 46289333.],
        [  776135., 46289333.],
        [  784453., 46289333.],
        ...,
        [  814375., 46289333.],
        [  826331., 46289333.],
        [  837978., 46289333.]],

       [[ 3630251., 46289333.],
        [ 3648202., 46289333.],
        [ 3669196., 46289333.],
        ...,
        [ 3727348., 46289333.],
        [ 3743291., 46289333.],
        [ 3762758., 46289333.]],

       ...,

       [[ 2164066., 46289333.],
        [ 2179786., 46289333.],
        [ 2195130., 46289333.],
        ...,
        [ 2261360., 46289333.],
        [ 2280033., 46289333.],
        [ 2298061., 46289333.]],

       [[ 1184496., 46289333.],
        [ 1191290., 46289333.],
        [ 1200348., 46289333.],
        ...,
        [ 1224744., 46289333.],
        [ 1229267.

In [12]:
#mm = MinMaxScaler()

#X_train = mm.inverse_transform(X_train.reshape(-1,1)).reshape(X_train.shape)
#X_test = mm.inverse_transform(X_test.reshape(-1,1)).reshape(X_test.shape)

In [13]:
n_features = 2
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],n_features)

In [14]:
X_train

array([[[ 1777368., 46289333.],
        [ 1779722., 46289333.],
        [ 1794019., 46289333.],
        ...,
        [ 1849334., 46289333.],
        [ 1851776., 46289333.],
        [ 1864977., 46289333.]],

       [[  765670., 46289333.],
        [  776135., 46289333.],
        [  784453., 46289333.],
        ...,
        [  814375., 46289333.],
        [  826331., 46289333.],
        [  837978., 46289333.]],

       [[ 3630251., 46289333.],
        [ 3648202., 46289333.],
        [ 3669196., 46289333.],
        ...,
        [ 3727348., 46289333.],
        [ 3743291., 46289333.],
        [ 3762758., 46289333.]],

       ...,

       [[ 2164066., 46289333.],
        [ 2179786., 46289333.],
        [ 2195130., 46289333.],
        ...,
        [ 2261360., 46289333.],
        [ 2280033., 46289333.],
        [ 2298061., 46289333.]],

       [[ 1184496., 46289333.],
        [ 1191290., 46289333.],
        [ 1200348., 46289333.],
        ...,
        [ 1224744., 46289333.],
        [ 1229267.

In [15]:
model = keras.Sequential([
            keras.layers.LSTM(100,activation=tf.nn.relu,input_shape=(passos,n_features),return_sequences=True),
            keras.layers.LSTM(50,activation=tf.nn.relu,return_sequences=True),
            keras.layers.LSTM(50,activation=tf.nn.relu),
            keras.layers.Dense(1,activation='linear')
        ])
model.compile(optimizer='adam',loss='mse',metrics=['mse'])

In [16]:
model.fit(X_train,y_train,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fdc49f9ab70>

In [17]:
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],n_features)

In [18]:
model.predict(X_test)

array([[1577085.2],
       [1546697. ],
       [1565123. ],
       [1577037.2],
       [1556566.9],
       [1555541.5],
       [1553423.5],
       [1570405. ],
       [1575050.2],
       [1558270.8],
       [2859789. ],
       [1545205.8],
       [2857456. ],
       [1558005.9],
       [1558350.2],
       [1530918. ],
       [1577018.8],
       [1562656.8],
       [1559032. ],
       [1577081.2],
       [2857897.8],
       [1576318.8],
       [1544462. ],
       [1569157.9],
       [1575742.8],
       [1550646.1],
       [1558956.2],
       [2857562. ],
       [1560894.1],
       [1576843.2],
       [1572825.5],
       [1543511.5],
       [1558108. ],
       [1559767. ],
       [1560796.5],
       [1566226.4],
       [1544275.5],
       [1544015.4],
       [1577094.8],
       [2855920. ],
       [1548734.8],
       [1577086.8],
       [1566485.8],
       [1562761.8],
       [2860378.5],
       [1563525. ],
       [1574397. ],
       [1564606. ],
       [1548384.9],
       [1563064. ],
