# Research notebook: 
## The quest for better machine learning models to forecast COVID-19-related infections: A case study in the state of Pará-Brazil
## authors: Renato Hidaka Torres; Wilson Rogério Soares; Orlando Ohashi; Gustavo Pessin

# Create dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns;
import numpy as np

data = pd.read_csv("../data/dataset_new.csv")

def create_dataset():    
    
    new_cases = [0]
    new_deaths = [0]

    for id in range(1, len(data['confirmados'].values)):

        new_cases.append(data['confirmados'].values[id] - data['confirmados'].values[id-1])
        new_deaths.append(data['mortes'].values[id] - data['mortes'].values[id - 1])

    data['novos casos'] = new_cases
    data['novas mortes'] = new_deaths
    data.to_csv('../data/dataset_new.csv', index = False)


# Create windonw

In [None]:
def create_window_size(N):
    return np.array(data[:-N].values),  data['confirmados'][N:].values

# Grid Search CNN

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Convolution1D
from preprocessing import create_dataset as pre
from keras.layers import Flatten
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasRegressor
import numpy as np
from sklearn import metrics

def create_model(L, F):
    def cnn():
        model = Sequential()
        model.add(Convolution1D(F, 2, input_shape=(8,1), activation='relu'))

        for i in range(1,L):
            model.add(Convolution1D(F, 2, activation='relu'))

        model.add(Flatten())
        model.add(Dense(1))
        model.compile(loss='mse', optimizer='adam', metrics=['mse'])
        return model
    return cnn

for w in range(1, 11):
    X, y = pre.create_window_size(w)
    
    X = np.expand_dims(X, axis=2)
    print(X.shape)
    
    
    X_train = X[:len(X)-10]
    X_val = X[len(X)-10:]
    y_train = y[:len(y)-10]
    y_val = y[len(y)-10:]
    
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33)
    
    print('window=',w)
    for L in range(1,7):
        for N in range(10, 70, 10):
            model = KerasRegressor(build_fn=create_model(L,N), epochs=100, batch_size=15, verbose=0)
            model.fit(X_train, y_train, validation_data=(X_test, y_test))
    
            y_pred = []
    
            for i in range(100):
                aux = model.predict(X_val)
                y_pred.append(aux)
    
            y_pred = np.mean(y_pred, axis=0)
            y_std = np.std(y_pred, axis=0)
    
            y_pred = [int(x) for x in y_pred]
    
            MAX = max(y_val)
            MIN = min(y_val)
    
            a = []
            b = []
            for y1, y2 in zip(y_val, y_pred):
                a.append((y1-MIN)/(MAX-MIN))
                b.append((y2 - MIN) / (MAX - MIN))
    
    
            print('L=', L, 'N=', N)
    
            print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_pred))
            print('Mean Squared Error:', metrics.mean_squared_error(y_val, y_pred))
            print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_pred)))
            print('R2:', metrics.r2_score(y_val, y_pred))
            print('Mean Absolute Percentage Error:', metrics.regression.explained_variance_score(y_val, y_pred))
            print()

# Grid Search Multilayer Perceptron

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from preprocessing import create_dataset as pre
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasRegressor
import numpy as np
from sklearn import metrics


def create_model(L, F):
    def lstm():
        model = Sequential()
        model.add(Dense(F))

        for i in range(1,L):
            model.add(Dense(F))

        model.add(Dense(1))
        model.compile(loss='mse', optimizer='adam', metrics=['mse'])
        return model
    return lstm


for w in range(1, 11):
    X, y = pre.create_window_size(w)
    
    X_train = X[:len(X)-10]
    X_val =  X[len(X)-10:]
    y_train = y[:len(y)-10]
    y_val =  y[len(y)-10:]
    
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33)
    
    
    print('window=',w)
    for L in range(1,7):
        for N in range(10, 70, 10):
            model = KerasRegressor(build_fn=create_model(L,N), epochs=100, batch_size=15, verbose=0)
            model.fit(X_train, y_train, validation_data=(X_test, y_test))
    
            y_pred = []
    
            for i in range(100):
                aux = model.predict(X_val)
                y_pred.append(aux)
    
            y_pred = np.mean(y_pred, axis=0)
            y_std = np.std(y_pred, axis=0)
    
            y_pred = [int(x) for x in y_pred]
    
            maior = max(y_val)
            menor = min(y_val)
    
            a = []
            b = []
            for y1, y2 in zip(y_val, y_pred):
                a.append((y1-menor)/(maior-menor))
                b.append((y2 - menor) / (maior - menor))
    
    
            y_test = np.array(a)
            y_pred = np.array(b)
    
            print('L=', L, 'N=', N)
    
            print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_pred))
            print('Mean Squared Error:', metrics.mean_squared_error(y_val, y_pred))
            print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_pred)))
            print('R2:', metrics.r2_score(y_val, y_pred))
            print('Mean Absolute Percentage Error:', metrics.regression.explained_variance_score(y_val, y_pred))
            print()

# Grid Search Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from preprocessing import create_dataset as pre
from sklearn.model_selection import GridSearchCV

for w in range(1, 11) :
    X, y = pre.create_window_size(w)
    
    X_train = X[:len(X)-10]
    X_test = X[len(X)-10:]
    y_train = y[:len(y)-10]
    y_test = y[len(y)-10:]
    
    regressor = GradientBoostingRegressor()
    
    param_grid = {'n_estimators':[75,100,125,150,175,200],
                  'min_samples_split':range(5,45,5), 'max_depth':range(2,10,2),  'learning_rate':[0.1, 0.2, 0.3, 0.4, 0.5]}
    
    gs = GridSearchCV(estimator=regressor, param_grid=param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=1)
    gs = gs.fit(X_train, y_train)
    
    print("window", w)
    print(gs.best_score_)
    print(gs.best_params_)
    print()

# Grid Search Random Forest

In [None]:

from sklearn.ensemble import RandomForestRegressor
from preprocessing import create_dataset as pre
from sklearn.model_selection import GridSearchCV

for w in range(1, 11):
    X, y = pre.create_window_size(w)
    
    X_train = X[:len(X)-10]
    X_test = X[len(X)-10:]
    y_train = y[:len(y)-10]
    y_test = y[len(y)-10:]
    
    regressor = RandomForestRegressor()
    
    param_grid = {'n_estimators':[75,100,125,150,175,200],
                  'min_samples_split':range(5,45,5), 'max_depth':range(2,10,2), 'criterion':['mse', 'mae'], 'max_features':['sqrt', 'log2']}
    
    gs = GridSearchCV(estimator=regressor, param_grid=param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=1)
    gs = gs.fit(X_train, y_train)
    
    print("window", w)
    print(gs.best_score_)
    print(gs.best_params_)
    print()

# Grid Search SVM

In [None]:
from sklearn.svm import LinearSVR
from preprocessing import create_dataset as pre
from sklearn.model_selection import GridSearchCV


for w in range(1,11):
    X, y = pre.create_window_size(w)

    X_train = X[:len(X)-10]
    X_test = X[len(X)-10:]
    y_train = y[:len(y)-10]
    y_test = y[len(y)-10:]

    regressor = LinearSVR()

    param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
    param_grid = [{'C': param_range}]

    gs = GridSearchCV(estimator=regressor, param_grid=param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=1)
    gs = gs.fit(X_train, y_train)

    print("window", w)
    print(gs.best_score_)
    print(gs.best_params_)
    print()

# Train Test CNN

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Convolution1D
from preprocessing import create_dataset as pre
from keras.layers import Flatten
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasRegressor
import numpy as np
from sklearn import metrics


def create_model(L = 1, N=60):

    def cnn():
        model = Sequential()
        model.add(Convolution1D(N, 3, input_shape=(8,1), activation='relu'))#120 ou 60

        for i in range(1,L):
            model.add(Convolution1D(N,2))

        model.add(Flatten())
        model.add(Dense(1))
        model.compile(loss='mse', optimizer='adam', metrics=['mse'])
        return model
    return cnn


def train_test(L, N, window):
    X, y = pre.create_window_size(window)
    X = np.expand_dims(X, axis=2)
    X_train = X[:len(X)-10]
    X_val =  X[len(X)-10:]
    y_train = y[:len(y)-10]
    y_val =  y[len(y)-10:]

    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33)

    model = KerasRegressor(build_fn=create_model(L, N), epochs=150, batch_size=15, verbose=0)
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=150, batch_size=15, verbose=0)


    y_pred = model.predict(X_val)
    y_std = np.std([abs(yt-yp) for yt, yp in zip(y_val, y_pred)])

    y_pred = [int(x) for x in y_pred]

    print("CNN"+str(window),"=", y_pred)

    maior = max(y_val)
    menor = min(y_val)
    a = []
    b = []
    for y1, y2 in zip(y_val, y_pred):
        a.append((y1-menor)/(maior-menor))
        b.append((y2 - menor) / (maior - menor))


    y_val = np.array(a)
    y_pred = np.array(b)


    print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y_val, y_pred))
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_pred)))
    print('R2:', metrics.r2_score(y_val, y_pred))
    print('Mean Absolute Percentage Error:', metrics.regression.explained_variance_score(y_val, y_pred))


layers = [6, 4, 3, 4, 5, 5, 6, 6, 4, 5]
neuron = [40, 50, 60, 60, 60, 20, 60, 60, 10, 60]

for L, N, w in zip(layers, neuron, range(1,11)):
    train_test(L, N, w)



# Train Test Multilayer Perceptron

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from preprocessing import create_dataset as pre
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn import metrics


def create_model(L=1, N=60):
    def mlp():
        model = Sequential()
        for i in range(L):
            model.add(Dense(N))

        model.add(Dense(1))
        model.compile(loss='mse', optimizer='adam', metrics=['mse'])
        return model
    return mlp


def train_test(L, N, window):

    X, y = pre.create_window_size(window)

    X_train = X[:len(X) - 10]
    X_val = X[len(X) - 10:]
    y_train = y[:len(y) - 10]
    y_val = y[len(y) - 10:]

    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33)

    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
    X_val = np.reshape(X_val, (X_val.shape[0], 1, X_val.shape[1]))

    model = KerasRegressor(build_fn=create_model(L, N), epochs=500, batch_size=32, verbose=0)
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=500, batch_size=32, verbose=0)
    scores = cross_val_score(model, X_train, y_train, cv=10)

    print("MLP", scores)

    y_pred = model.predict(X_val)
    y_std = np.std([abs(yt - yp) for yt, yp in zip(y_val, y_pred)])

    y_pred = [int(x) for x in y_pred]

    print('Window=', window)
    print("MLP"+str(window),"=", y_pred)
    print(y_std)

    print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y_val, y_pred))
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_pred)))
    print('R2:', metrics.r2_score(y_val, y_pred))
    print('Mean Absolute Percentage Error:', metrics.regression.explained_variance_score(y_val, y_pred))

    
for L, N, w in zip(layers, neuron, range(1,11)):
    train_test(L, N, w)

# Train Test Gradient Boosting

In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from preprocessing import create_dataset as pre


def train_test(window = 1):

    X, y = pre.create_window_size(window)

    X_train = X[:len(X) - 10]
    X_val = X[len(X) - 10:]
    y_train = y[:len(y) - 10]
    y_val = y[len(y) - 10:]

    regressor = GradientBoostingRegressor()#Here: insert hyperparameters selected on Grid Search
    regressor.fit(X_train, y_train)
    scores = cross_val_score(regressor, X_train, y_train, cv=10)

    print("CV", scores)

    y_pred = regressor.predict(X_val)
    y_pred = [int(x) for x in y_pred]

    print('Window=', window)
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y_val, y_pred))
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_pred)))
    print('R2:', metrics.r2_score(y_val, y_pred))
    print('Mean Absolute Percentage Error:', metrics.regression.explained_variance_score(y_val, y_pred))
    print()


for window in range(1,11):
    train_test(window)




# Train Test Random Forest

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from preprocessing import create_dataset as pre


def train_test(window = 1):

    X, y = pre.create_window_size(window)

    X_train = X[:len(X) - 10]
    X_val = X[len(X) - 10:]
    y_train = y[:len(y) - 10]
    y_val = y[len(y) - 10:]

    regressor = RandomForestRegressor()#Here: insert hyperparameters selected on Grid Search
    regressor.fit(X_train, y_train)
    scores = cross_val_score(regressor, X_train, y_train, cv=10)

    print("CV", scores)

    y_pred = regressor.predict(X_val)
    y_pred = [int(x) for x in y_pred]

    print('Window=', window)
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_val, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y_val, y_pred))
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_val, y_pred)))
    print('R2:', metrics.r2_score(y_val, y_pred))
    print('Mean Absolute Percentage Error:', metrics.regression.explained_variance_score(y_val, y_pred))
    print()


for window in range(1,11):
    train_test(window)




# Train Test SVM

In [None]:
import numpy as np
from sklearn.svm import LinearSVR
from sklearn import metrics
from preprocessing import create_dataset as pre
from sklearn.model_selection import cross_val_score


def train_test(window=1, c=1):
    X, y = pre.create_window_size(window)

    X_train = X[:len(X)-10]
    X_test = X[len(X)-10:]
    y_train = y[:len(y)-10]
    y_test = y[len(y)-10:]

    regressor = LinearSVR(C=c)
    regressor.fit(X_train, y_train)
    
    scores = cross_val_score(regressor, X_train, y_train, cv=10)
    print("MLP", scores)

    y_pred = regressor.predict(X_test)
    y_pred = [int(x) for x in y_pred]

    print("SVM"+str(window),"=", y_pred)

    maior = max(y_test)
    menor = min(y_test)

    a = []
    b = []

    for y1, y2 in zip(y_test, y_pred):
        a.append((y1 - menor)/(maior-menor))
        b.append((y2 - menor) / (maior - menor))

    y_test = np.array(a)
    y_pred = np.array(b)
    
    print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    print('R2:', metrics.r2_score(y_test, y_pred))
    print('Mean Absolute Percentage Error:', metrics.regression.explained_variance_score(y_test, y_pred))

    
C = [0.001, 1, 1, 1, 0.1, 0.1, 0.1, 0.001, 0.1, 0.1]
for window, c in zip(range(1,11), C):
    train_test(window, c)

# Train Test Auto Regression

In [None]:
import pandas as pd
from statsmodels.tsa.ar_model import AutoReg

data = pd.read_csv("../data/dataset_new.csv")
df = pd.DataFrame()
df['y'] = data['confirmados'][:len(data)-10]

model = AutoReg(df['y'].values, lags=1)
model_fit = model.fit()

# make prediction
print(len(data))
yhat = model_fit.predict(0)
print(*yhat, sep="\n")
print(len(yhat))

yhat = model_fit.predict(55,64)
print(list(yhat))

# Train Test Prophet

In [None]:
import pandas as pd
from fbprophet import Prophet

data = pd.read_csv("../data/dataset_new.csv")
df = pd.DataFrame()
df['y'] = data['confirmados'][:len(data)-10]

model = Prophet(seasonality_mode='multiplicative')
model.add_seasonality(name="yearly", period=365.25, fourier_order=10)
model.fit(soma);
future = model.make_future_dataframe(periods=10, freq = 'd')
forecast = model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
model.plot(forecast,uncertainty=True);

# Prediction window of the observed test data

In [None]:
import matplotlib.pyplot as plt

y_test =  [9059.0, 10344.0, 11479.0, 12626.0, 13464.0, 14201.0, 15467.0, 17177.0, 18929.0, 20537.0]

LR1 = [8666, 9588, 10913, 12234, 13531, 14579, 15508, 16757, 18499, 20299]
LR2 = [8494, 9085, 9936, 11268, 12755, 14189, 15445, 16435, 17638, 19385]
LR3 = [8471, 9048, 9522, 10130, 11449, 13000, 14589, 15973, 17039, 18316]
LR4 = [9098, 9710, 10298, 11308, 12676, 14308, 15975, 17532, 18864, 19921]
LR5 = [8782, 9758, 10663, 11040, 11963, 13484, 15232, 17184, 18897, 20489]
LR6 = [8429, 9554, 10516, 10918, 11758, 12686, 13615, 15206, 17001, 18847]
LR7 = [9556, 9146, 10559, 11573, 11816, 12876, 13735, 14094, 15541, 17488]
LR8 = [9267, 9844, 10116, 11415, 12367, 12696, 13691, 14522, 15459, 17344]
LR9 = [8616, 9912, 11103, 10943, 12254, 13142, 13686, 14460, 14886, 15406]
LR10 = [8600, 9013, 10735, 11721, 11604, 13212, 13832, 13660, 14955, 15132]


SVM1 = [8851, 9914, 11304, 12579, 13848, 14807, 15664, 17009, 18850, 20756]
SVM2 = [8776, 9486, 10514, 11948, 13419, 14852, 16043, 16981, 18308, 20200]
SVM3 = [8777, 9137, 9725, 10674, 12053, 13723, 15265, 16718, 17774, 18918]
SVM4 = [8756, 9353, 9795, 10412, 11494, 13025, 14733, 16355, 17790, 18831]
SVM5 = [10037, 10958, 11762, 12250, 13002, 14398, 16309, 18461, 20472, 22270]
SVM6 = [8774, 9685, 10617, 11390, 11852, 12551, 13804, 15556, 17613, 19546]
SVM7 = [10084, 10325, 11399, 12503, 13424, 13939, 14780, 16141, 18060, 20421]
SVM8 = [10155, 9889, 11104, 12405, 13537, 13951, 15147, 16607, 18628, 21196]
SVM9 = [9703, 11103, 11915, 12180, 13359, 14683, 15991, 16399, 17254, 18962]
SVM10 = [9936, 10873, 12673, 12932, 13649, 15032, 16409, 17455, 18220, 19347]

RF1 = [7464.1, 7464.1, 7464.1, 7438.5, 7438.5, 7035.1, 7438.5, 7464.1, 7464.1, 7464.1]
RF2 = [6864.6, 6864.6, 6864.6, 6864.6, 6864.6, 6864.6, 6723.3, 6723.3, 6864.6, 6864.6]
RF3 = [6755.9, 8069.0, 7996.9, 7996.9, 7996.9, 7924.8, 7924.8, 7536.2, 7536.2, 7996.9]
RF4 = [5991.2, 5888.3, 5991.2, 5991.2, 5991.2, 5991.2, 5991.2, 5991.2, 5991.2, 5991.2]
RF5 = [7769.8, 7769.8, 7130.5, 7769.8, 7769.8, 7769.8, 7769.8, 7769.8, 7769.8, 7769.8]
RF6 = [7348.2, 7329.3, 7329.3, 5930.7, 7329.3, 7329.3, 7329.3, 7329.3, 7329.3, 7329.3]
RF7 = [7545.6, 7545.6, 7487.2, 7487.2, 7545.6, 7487.2, 7487.2, 7487.2, 7487.2, 7487.2]
RF8 = [6045.1, 5083.9, 5824.3, 5824.3, 5824.3, 5280.2, 5824.3, 5824.3, 5824.3, 5824.3]
RF9 = [6686.0, 6628.7, 6615.9, 6615.9, 6821.2, 6821.2, 6686.0, 6821.2, 6821.2, 6821.2]
RF10 = [7810.2, 7065.1, 7396.4, 7396.4, 7551.4, 7810.2, 7810.2, 6687.5, 7810.2, 7810.2]

GB1 = [7871.3881846296335, 7871.3881846296335, 7871.3881846296335, 7710.409492846894, 7710.409492846894, 7707.083925167276, 7710.409492846894, 7871.3881846296335, 7871.3881846296335, 7871.3881846296335]
GB2 = [7837.998960945312, 7837.998960945312, 7837.998960945312, 7837.998960945312, 7837.998960945312, 7837.998960945312, 7837.998960945312, 7837.998960945312, 7837.998960945312, 7837.998960945312]
GB3 = [6680.092299232183, 6626.087673621603, 6626.087673621603, 6626.087673621603, 6626.087673621603, 6626.087673621603, 6626.087673621603, 6626.087673621603, 6626.087673621603, 6626.087673621603]
GB4 = [8064.369990171665, 7327.439788652379, 8064.369990171665, 8011.8279573819245, 8011.8279573819245, 8011.8279573819245, 8011.8279573819245, 8011.8279573819245, 7907.179868792137, 8011.8279573819245]
GB5 = [7498.880402259262, 7498.880402259262, 7451.377593613323, 7498.880402259262, 7498.880402259262, 7498.880402259262, 7498.880402259262, 7498.880402259262, 7498.880402259262, 7498.880402259262]
GB6 = [7442.358979848454, 7737.7596058309555, 7737.7596058309555, 7787.695732973893, 7737.7596058309555, 7737.7596058309555, 7737.7596058309555, 7737.7596058309555, 7737.7596058309555, 7737.7596058309555]
GB7 = [7355.683403081797, 8023.93002916676, 7726.296442082717, 7726.296442082717, 7036.19592708027, 7726.296442082717, 7726.296442082717, 7726.296442082717, 7726.296442082717, 7726.296442082717]
GB8 = [7701.865588902501, 7585.568905332606, 7592.50608774121, 7626.499871798507, 7626.499871798507, 7349.139470345359, 7626.499871798507, 7626.499871798507, 7626.499871798507, 7626.499871798507]
GB9 = [7412.081136901952, 7237.123392831925, 7728.376972157348, 7902.562347150866, 8068.733425200793, 8068.733425200793, 7158.961894077294, 8068.733425200793, 8068.733425200793, 8068.733425200793]
GB10 = [7330.046472502023, 7238.018765427782, 7330.046472502023, 7320.199684299847, 7330.046472502023, 7347.804086449134, 7347.804086449134, 7183.218527487421, 7347.804086449134, 7347.804086449134]

CNN1 = [8902, 9987, 11446, 12736, 13993, 14885, 16284, 17685, 19529, 21010]
CNN2 = [8207, 8971, 9932, 11216, 12439, 13595, 14427, 16236, 17474, 19108]
CNN3 = [8612, 9127, 9985, 11047, 12487, 13953, 15306, 16369, 18300, 19616]
CNN4 = [8897, 9251, 9906, 10853, 12051, 13620, 15023, 16384, 17295, 19395]
CNN5 = [8783, 9391, 9774, 10391, 11316, 12500, 14015, 15424, 16799, 17773]
CNN6 = [9344, 10301, 11002, 11386, 12376, 13636, 15312, 17576, 19460, 21326]
CNN7 = [8538, 9612, 10524, 11215, 11615, 12457, 13615, 15139, 17120, 18862]
CNN8 = [9172, 9024, 10409, 11492, 12296, 12606, 13756, 15181, 17039, 19475]
CNN9 = [9206, 10197, 11151, 12511, 13740, 14752, 15626, 16764, 18457, 20609]
CNN10 = [8613, 9236, 10487, 10778, 12001, 13170, 14118, 14767, 15773, 17218]

MLP1 = [9014, 10081, 11514, 12833, 14136, 15118, 16158, 17536, 19411, 21241]
MLP2 = [8534, 9340, 10441, 11857, 13254, 14586, 15662, 16740, 18039, 19877]
MLP3 = [8701, 9210, 10026, 11161, 12656, 14223, 15688, 16932, 18150, 19475]
MLP4 = [9158, 9476, 10323, 11395, 12778, 14620, 16191, 17774, 18847, 20392]
MLP5 = [9089, 9937, 10314, 11243, 12466, 13927, 15875, 17595, 19316, 20543]
MLP6 = [9086, 10155, 10995, 11313, 12423, 13768, 15418, 17691, 19584, 21522]
MLP7 = [8531, 10046, 11263, 12187, 12299, 13646, 15130, 17111, 19650, 21477]
MLP8 = [10042, 9419, 10709, 11994, 13052, 13267, 14512, 15899, 17845, 20287]
MLP9 = [9152, 10426, 10889, 11701, 13005, 14279, 15069, 16057, 17554, 19499]
MLP10 = [9806, 10709, 12343, 12640, 13540, 15019, 16563, 17572, 18580, 20188]

AR = [8005.5756863892475, 8788.678820126106, 9570.346974529433, 10419.343897213972, 11341.468932841171, 12343.020951006043, 13430.841372677867, 14612.36090271137, 15895.650287649703, 17289.475445536602]

PROPHET = [8271.167728112437, 8644.964341092684, 8920.769274456597, 9514.963157993063, 9711.110557018099, 10259.978544495567, 11114.728071108544, 10475.055172062208, 11181.421580707369, 11908.313992589217]

X = ['12/05/2020','13/05/2020','14/05/2020','15/05/2020','16/05/2020','17/05/2020','18/05/2020','19/05/2020','20/05/2020','21/05/2020']
plt.plot(X,y_test, linewidth=4, linestyle='solid', color="black", label='True')

plt.plot(X,LR10, linewidth=4, linestyle='dashed', color="blue", label='Linear Regression')
plt.plot(X,SVM10, linewidth=4, linestyle='dashed', color="red", label='Linear SVM')
plt.plot(X,CNN10, linewidth=4, linestyle='dashed', color="purple", label='Convolutional Neural Network')
plt.plot(X,MLP10, linewidth=4, linestyle='dashed', color="green", label='Multilayer Perceptron')

plt.plot(X,AR, linewidth=4, linestyle='dashed', color="cyan", label='AutoRegression')
plt.plot(X,PROPHET, linewidth=4, linestyle='dashed', color="orange", label='Prophet')

plt.title('Window 10', fontsize=18)
plt.ylabel("Confirmed cases", fontsize=18)
plt.xlabel("Days", fontsize=18)

plt.legend(fontsize=18)
plt.show()

# Radar chart by windows

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from math import pi

categories = ['MAE', 'MAPE', 'MSE', 'RMSE', 'R2']

N = len(categories)

# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]


# Initialise the spider plot
ax = plt.subplot(111, polar=True)

# If you want the first axis to be on top:
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)


# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=16)

# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1], ["0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "0.95", "1"], color="grey", size=10)
plt.ylim(0.4, 1)

# Plot data
#ax.plot(angles, values, linewidth=2, linestyle='solid', color="blue")

# Fill area
#ax.fill(angles, values, color='blue', alpha=0.1)

#Replace theses data for each window
LR =  [0.9679299529534762, 0.8856733526515179, 0.9986587650986587, 0.9633771259819591, 0.9858124297310489, 0.9679299529534762]
SVM =  [0.9781059418017076, 0.9296331014123942, 0.9993191935079422, 0.9739077311822494, 0.9927984352808236, 0.9781059418017076]
CNN =  [0.9528158215717024, 0.9132517997237986, 0.9981613909543264, 0.957120995281215, 0.9805512106741805, 0.9528158215717024]
MLP =  [0.9618836034152292, 0.9129720522743806, 0.9979186658015982, 0.954378358223297, 0.9779836662739194, 0.9618836034152292]



# Plot data
plt.title('Window 1', fontsize=20)
ax.plot(angles, LR, linewidth=4, linestyle='solid', color="blue", label="Linear Regression")
ax.plot(angles, SVM, linewidth=4, linestyle='solid', color="red", label="Linear SVM")
ax.plot(angles, CNN, linewidth=4, linestyle='solid', color="purple", label="CNN")
ax.plot(angles, MLP, linewidth=4, linestyle='solid', color="green", label="MLP")


#ax.legend(loc=1, bbox_to_anchor=(1.2, 1.05))
plt.show()

# Dataset correlation 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns;

data = pd.read_csv("../data/dataset_new.csv")

data = data.rename(columns={'confirmados':'A', 'descartados':'B', 'mortes':'C', 'recuperados':'D', 'Confirmados/100k hab':'E',
                     'mortes/confirmados':'F', 'novos casos':'G', 'novas mortes':'H'})

corr = data.corr()
sns.set(font_scale=1.6)
sns.heatmap(corr)
plt.title("Correlation heatmap for the dataset")
plt.show()