In [None]:
import os
from os import path
import sys
sys.path.append('..')

import numpy as np
import pandas as pd

import collections

import utils

import logging as log
log.basicConfig(level=log.DEBUG)

In [2]:
DATA_FOLDER = path.join('..', 'data')
RESULTS_FOLDER = path.join('..', 'results')

STATES = ['AC', 'AL', 'AM', 'AP', 'BA', 'CE',
         'DF', 'ES', 'GO', 'MA', 'MT', 'MS',
         'MG', 'PA', 'PB', 'PR', 'PE', 'PI',
         'RJ', 'RN', 'RO', 'RS', 'RR', 'SC',
         'SE', 'SP', 'TO']

MODELS = ['linear-regression', 'exponential-holt']

STRATEGIES = [
    'aggregated',
    'windowed'
]

METRICS = ['MAE', 'RMSE', 'MSLE']

# Load State data

In [3]:
df_state = {}
for state in STATES:
    df_state[state] = pd.read_csv(path.join(DATA_FOLDER, state + '.csv'), index_col='data')

df_state['CE'].head(14)

Unnamed: 0_level_0,cases,cured,deaths,internadosDomiciliar,internadosEnfermaria,internadosUTI,percentual_cured,percentual_deaths,percentual_internados,percentual_internadosDomiciliar,percentual_internadosEnfermaria,percentual_internadosUTI,refuses,suspects,total_internados_DB
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
14/03/2020,27,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,59,22,0
15/03/2020,33,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,59,22,0
16/03/2020,46,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,60,22,0
17/03/2020,77,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,95,211,0
18/03/2020,96,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,89,493,0
19/03/2020,143,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,118,766,0
20/03/2020,215,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,118,766,0
20/03/2020,215,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,118,766,0
21/03/2020,266,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,118,766,0
22/03/2020,328,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,118,766,0


# Load results

List the results we have

In [4]:
state = 'CE'
os.listdir(path.join(RESULTS_FOLDER, state))

['result-CE-aggregated-exponential-holt.csv',
 'result-CE-windowed-exponential-holt.csv',
 'result-CE-aggregated-linear-regression.csv',
 'result-CE-windowed-linear-regression.csv']

In [5]:
results = collections.defaultdict(dict)
state = 'SP'
# for state in STATES:
for strategy in STRATEGIES:
    for model in MODELS:
        filename = f'result-{state}-{strategy}-{model}.csv'
        results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                                  state, 
                                                                  filename), 
                                                        index_col='data')

        
results['aggregated']['linear-regression'].iloc[5:15]

Unnamed: 0_level_0,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,yhat_model_1_to_14,yhat_model_1_to_15,yhat_model_1_to_16,...,yhat_model_1_to_48,yhat_model_1_to_49,yhat_model_1_to_50,yhat_model_1_to_51,yhat_model_1_to_52,yhat_model_1_to_53,yhat_model_1_to_54,yhat_model_1_to_55,yhat_model_1_to_56,yhat_model_1_to_57
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
02/03/2020,,,,,,,,,,,...,,,,,,,,,,
03/03/2020,,,,,,,,,,,...,,,,,,,,,,
04/03/2020,2.642857,,,,,,,,,,...,,,,,,,,,,
05/03/2020,2.857143,3.190476,,,,,,,,,...,,,,,,,,,,
06/03/2020,3.071429,3.452381,5.022222,,,,,,,,...,,,,,,,,,,
07/03/2020,3.285714,3.714286,5.488889,7.963636,,,,,,,...,,,,,,,,,,
08/03/2020,3.5,3.97619,5.955556,8.727273,11.1,,,,,,...,,,,,,,,,,
09/03/2020,3.714286,4.238095,6.422222,9.490909,12.127273,14.331002,,,,,...,,,,,,,,,,
10/03/2020,3.928571,4.5,6.888889,10.254545,13.154545,15.586247,16.582418,,,,...,,,,,,,,,,
11/03/2020,4.142857,4.761905,7.355556,11.018182,14.181818,16.841492,17.934066,19.118681,,,...,,,,,,,,,,


# Caculate metrics

In [6]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_log_error as msle

In [7]:
def calculate_metrics(y_true, y_hat):
    m1 = np.sqrt(mse(y_true, y_hat))
    m2 = mae(y_true, y_hat)
    m3 = msle(y_true, y_hat)
    
    return m1, m2, m3

In [8]:
def generate_results(strategy='aggregated'):
    # Empty lists
    list_states = []
    list_models = []
    list_metric_name = []
    dict_metric_result = collections.defaultdict(list)

    for state in STATES:
#         print(state)
        for model in MODELS:
            filename = f'result-{state}-{strategy}-{model}.csv'
    #         print(filename)
            results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                                      state, 
                                                                      filename), 
                                                            index_col='data')

            for i in results[strategy][model].columns:  
                # Get dayout
                dayout = df_state[state]['cases'].index[-1]
                # Get y_true and y_hat
                y_hat = results[strategy][model][i].dropna()
                y_true = df_state[state]['cases']
                y_true = y_true.loc[y_hat.index.unique()].dropna()
                y_true, y_hat = utils.check_inputs(y_true, y_hat)
                
#                 if state == 'SP':
#                     print(i)
#                     print(dayout)
#                     print(y_true)

                if y_true.index[-1] == dayout:
                    break

                m_mae, m_rmse, m_msle = calculate_metrics(y_true, y_hat)

                dict_metric_result[i] += [m_mae, m_rmse, m_msle]

            list_states += [state for x in range(len(METRICS))]
            list_models += [model for x in range(len(METRICS))]
            list_metric_name += METRICS

    # Create metrics DataFrame
    df_metrics_labels = pd.DataFrame(list(zip(list_states, list_models, list_metric_name)), columns=['state', 'model', 'metric'])
    df_results_days = pd.DataFrame.from_dict(dict_metric_result, orient='index').transpose()
    df_metrics = pd.DataFrame()
    df_metrics = pd.concat([df_metrics_labels, df_results_days], axis=1)
    
    
    return df_metrics

In [9]:
def create_dataframes(strategy='aggregated'):
    id_columns = []
    
    STATES = ['AC', 'AL', 'AM', 'AP', 'BA', 'CE',
         'DF', 'ES', 'GO', 'MA', 'MT', 'MS',
         'MG', 'PA', 'PB', 'PR', 'PE', 'PI',
         'RJ', 'RN', 'RO', 'RS', 'RR', 'SC',
         'SE', 'SP', 'TO']

    for state in STATES:
        filename = f'result-{state}-{strategy}-{model}.csv'
        results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                                  state, 
                                                                  filename), 
                                                        index_col='data')

        id_columns += results[strategy][model].columns.tolist()
    
    id_columns_unique = pd.Series(id_columns).unique()
#     print(id_columns)

    u = pd.DataFrame(columns=['state', 'model', 'metric'] + id_columns_unique.tolist())
    
    return u

In [10]:
def calculate_metrics2(y_true, y_hat, metric):
    if metric == 'RMSE':
        m = np.sqrt(mse(y_true, y_hat))
    elif metric == 'MAE':
        m = mae(y_true, y_hat)
    elif metric == 'MSLE':
        m = msle(y_true, y_hat)
    
    return m

In [11]:
def generate_results2(strategy='aggregated'):
    # Empty lists
    list_states = []
    list_models = []
    list_metric_name = []
    dict_metric_result = collections.defaultdict(list)
    
    df_out = create_dataframes(strategy)

    for state in STATES:
        print(state)
        for model in MODELS:
            filename = f'result-{state}-{strategy}-{model}.csv'
            print(filename)
            results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                                      state, 
                                                                      filename), 
                                                            index_col='data')
            for metric in METRICS:
                df_buffer = create_dataframes(strategy)
                for i in results[strategy][model].columns:  
                    # Get dayout
                    dayout = df_state[state]['cases'].index[-1]
                    # Get y_true and y_hat
                    y_hat = results[strategy][model][i].dropna()
                    y_true = df_state[state]['cases']
                    y_true = y_true.loc[y_hat.index.unique()].dropna()
                    y_true, y_hat = utils.check_inputs(y_true, y_hat)

    #                 if state == 'SP':
    #                     print(i)
    #                     print(dayout)
    #                     print(y_true)

                    if y_true.index[-1] == dayout:
                        print('==========')
                        print(strategy)
                        print(model)
                        print(i)
                        print(y_hat)
                        print('==========')
                        break

                    m = calculate_metrics2(y_true, y_hat, metric)
                    
                    df_buffer['state'] = [state]
                    df_buffer['model'] = [model]
                    df_buffer['metric'] = [metric]
                    df_buffer[i] = [m]
                  
#                 print(df_buffer)
                df_out = df_out.append(df_buffer, ignore_index = True)
    
    
    return df_out

In [12]:
STATES = ['AL', 'SP']

df_out = generate_results2()
df_out

AL
result-AL-aggregated-linear-regression.csv
aggregated
linear-regression
yhat_model_1_to_31
data
08/04/2020    27.622177
09/04/2020    28.637903
10/04/2020    29.653629
11/04/2020    30.669355
12/04/2020    31.685081
13/04/2020    32.700806
14/04/2020    33.716532
15/04/2020    34.732258
16/04/2020    35.747984
17/04/2020    36.763710
18/04/2020    37.779435
19/04/2020    38.795161
20/04/2020    39.810887
21/04/2020    40.826613
22/04/2020    41.842339
Name: yhat_model_1_to_31, dtype: float64
aggregated
linear-regression
yhat_model_1_to_31
data
08/04/2020    27.622177
09/04/2020    28.637903
10/04/2020    29.653629
11/04/2020    30.669355
12/04/2020    31.685081
13/04/2020    32.700806
14/04/2020    33.716532
15/04/2020    34.732258
16/04/2020    35.747984
17/04/2020    36.763710
18/04/2020    37.779435
19/04/2020    38.795161
20/04/2020    39.810887
21/04/2020    40.826613
22/04/2020    41.842339
Name: yhat_model_1_to_31, dtype: float64
aggregated
linear-regression
yhat_model_1_to_3

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_48,yhat_model_1_to_49,yhat_model_1_to_50,yhat_model_1_to_51,yhat_model_1_to_52,yhat_model_1_to_53,yhat_model_1_to_54,yhat_model_1_to_55,yhat_model_1_to_56,yhat_model_1_to_57
0,AL,linear-regression,MAE,6.133333,7.2,8.333333,9.466667,10.6,9.876923,9.247253,...,,,,,,,,,,
1,AL,linear-regression,RMSE,7.840068,8.861903,9.889388,10.819735,11.676187,10.896157,10.208646,...,,,,,,,,,,
2,AL,linear-regression,MSLE,1.928073,2.249926,2.587814,2.925702,3.263589,1.493169,0.865749,...,,,,,,,,,,
3,AL,exponential-holt,MAE,8.06081,9.103615,9.477386,9.299553,10.813496,13.067283,15.656278,...,,,,,,,,,,
4,AL,exponential-holt,RMSE,8.4275,9.559352,9.972044,9.753888,11.337185,14.408855,17.823974,...,,,,,,,,,,
5,AL,exponential-holt,MSLE,0.185333,0.196329,0.17461,0.139695,0.165371,0.187799,0.21287,...,,,,,,,,,,
6,SP,linear-regression,MAE,60.390476,78.37619,101.111111,126.024242,162.242424,206.015618,256.022711,...,,,,,,,,,,
7,SP,linear-regression,RMSE,92.310405,116.68844,151.332341,186.352687,240.634284,300.567556,359.883818,...,,,,,,,,,,
8,SP,linear-regression,MSLE,4.700124,5.118452,4.447152,3.793212,3.673025,3.776006,4.204368,...,,,,,,,,,,
9,SP,exponential-holt,MAE,3720.203667,4218.317901,4717.191672,5236.513839,5762.670639,6263.352998,6837.227707,...,,,,,,,,,,


In [159]:
df_out.loc[df_out.model == 'exponential-holt']

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_47,yhat_model_1_to_48,yhat_model_1_to_49,yhat_model_1_to_50,yhat_model_1_to_51,yhat_model_1_to_52,yhat_model_1_to_53,yhat_model_1_to_54,yhat_model_1_to_55,yhat_model_1_to_56
3,AL,exponential-holt,MAE,8.06081,9.103615,9.477386,9.299553,10.813496,13.067283,15.656278,...,,,,,,,,,,
4,AL,exponential-holt,RMSE,8.4275,9.559352,9.972044,9.753888,11.337185,14.408855,17.823974,...,,,,,,,,,,
5,AL,exponential-holt,MSLE,0.185333,0.196329,0.17461,0.139695,0.165371,0.187799,0.21287,...,,,,,,,,,,
9,SP,exponential-holt,MAE,3720.203667,4218.317901,4717.191672,5236.513839,5762.670639,6263.352998,6837.227707,...,,,,,,,,,,
10,SP,exponential-holt,RMSE,4202.211146,4726.75853,5217.572098,5700.760546,6160.752903,6617.662899,7197.907954,...,,,,,,,,,,
11,SP,exponential-holt,MSLE,28.818807,29.386081,29.571722,29.712447,30.635059,30.965186,31.12901,...,,,,,,,,,,


In [50]:
state = 'SP'
strategy = 'aggregated'
model = 'exponential-holt'
label = 'yhat_model_1_to_20'

filename = f'result-{state}-{strategy}-{model}.csv'
print(filename)
results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                          state, 
                                                          filename), 
                                                index_col='data')

# results['aggregated']['exponential-holt']['yhat_model_1_to_20']
dayout = df_state[state]['cases'].index[-1]
dayout

result-SP-aggregated-exponential-holt.csv


'22/04/2020'

In [51]:
y_hat = results[strategy][model][label].dropna()
y_hat

data
18/03/2020     167.632446
19/03/2020     202.810388
20/03/2020     245.370479
21/03/2020     296.861874
22/03/2020     359.158822
23/03/2020     434.528887
24/03/2020     525.715482
25/03/2020     636.037732
26/03/2020     769.511287
27/03/2020     930.994485
28/03/2020    1126.365196
29/03/2020    1362.734768
30/03/2020    1648.706880
31/03/2020    1994.690707
Name: yhat_model_1_to_20, dtype: float64

In [25]:
y_true = df_state[state]['cases']
y_true = y_true.loc[y_hat.index.unique()].dropna()
y_true

data
18/03/2020     240
19/03/2020     286
20/03/2020     396
21/03/2020     459
22/03/2020     631
23/03/2020     745
24/03/2020     810
25/03/2020     862
26/03/2020    1052
27/03/2020    1223
28/03/2020    1406
29/03/2020    1451
30/03/2020    1517
31/03/2020    2339
Name: cases, dtype: int64

In [28]:
y_true.index[-1]

'31/03/2020'

In [18]:
y_true, y_hat = utils.check_inputs(y_true, y_hat)
y_true

data
18/03/2020     240
19/03/2020     286
20/03/2020     396
21/03/2020     459
22/03/2020     631
23/03/2020     745
24/03/2020     810
25/03/2020     862
26/03/2020    1052
27/03/2020    1223
28/03/2020    1406
29/03/2020    1451
30/03/2020    1517
31/03/2020    2339
Name: cases, dtype: int64

In [19]:
calculate_metrics2(y_true, y_hat, 'RMSE')

231.49378748285793

In [166]:
results['aggregated']['linear-regression']['yhat_model_1_to_20']

data
26/02/2020           NaN
27/02/2020           NaN
28/02/2020           NaN
29/02/2020           NaN
01/03/2020           NaN
02/03/2020           NaN
03/03/2020           NaN
04/03/2020           NaN
05/03/2020           NaN
06/03/2020           NaN
07/03/2020           NaN
08/03/2020           NaN
09/03/2020           NaN
10/03/2020           NaN
11/03/2020           NaN
12/03/2020           NaN
13/03/2020           NaN
14/03/2020           NaN
15/03/2020           NaN
16/03/2020           NaN
17/03/2020     95.631579
18/03/2020    101.447368
19/03/2020    107.263158
20/03/2020    113.078947
21/03/2020    118.894737
22/03/2020    124.710526
23/03/2020    130.526316
24/03/2020    136.342105
25/03/2020    142.157895
26/03/2020    147.973684
27/03/2020    153.789474
28/03/2020    159.605263
29/03/2020    165.421053
30/03/2020    171.236842
31/03/2020    177.052632
01/04/2020           NaN
02/04/2020           NaN
03/04/2020           NaN
04/04/2020           NaN
05/04/2020          

In [136]:
results['aggregated']['exponential-holt'][['yhat_model_1_to_41', 'yhat_model_1_to_42']]

Unnamed: 0_level_0,yhat_model_1_to_41,yhat_model_1_to_43
data,Unnamed: 1_level_1,Unnamed: 2_level_1
26/02/2020,,
27/02/2020,,
28/02/2020,,
29/02/2020,,
01/03/2020,,
02/03/2020,,
03/03/2020,,
04/03/2020,,
05/03/2020,,
06/03/2020,,


In [135]:
df_state['SP']['cases'].iloc[41:]

data
07/04/2020     5682
08/04/2020     6708
09/04/2020     7480
10/04/2020     8216
11/04/2020     8419
12/04/2020     8755
13/04/2020     8895
14/04/2020     9371
15/04/2020    11043
16/04/2020    11568
17/04/2020    12841
18/04/2020    13894
19/04/2020    14267
20/04/2020    14580
21/04/2020    15385
22/04/2020    15914
Name: cases, dtype: int64

In [114]:
u = create_dataframes()
u

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_47,yhat_model_1_to_48,yhat_model_1_to_49,yhat_model_1_to_50,yhat_model_1_to_51,yhat_model_1_to_52,yhat_model_1_to_53,yhat_model_1_to_54,yhat_model_1_to_55,yhat_model_1_to_56


In [92]:
u = u.append(df_out)
u

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_25,yhat_model_1_to_26,yhat_model_1_to_27,yhat_model_1_to_28,yhat_model_1_to_29,yhat_model_1_to_30,yhat_model_1_to_31,yhat_model_1_to_32,yhat_model_1_to_33,yhat_model_1_to_34
0,AC,exponential-holt,MSLE,1.176458,1.154767,1.085585,0.973744,0.98438,0.957396,0.91515,...,,,,,,,,,,
0,AC,exponential-holt,MSLE,1.176458,1.154767,1.085585,0.973744,0.98438,0.957396,0.91515,...,,,,,,,,,,


In [31]:
id_columns = []
strategy = 'aggregated'

for state in STATES:
    filename = f'result-{state}-{strategy}-{model}.csv'
    results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                              state, 
                                                              filename), 
                                                    index_col='data')
    
    id_columns += results[strategy][model].columns.tolist()
    
u = pd.DataFrame(columns=['state', 'model', 'metric'] + id_columns)
z = pd.DataFrame(columns=['state', 'model', 'metric'] + id_columns)

z['state'] = ['CE']
z['model'] = ['uhaue']

In [33]:
u = u.append(z, ignore_index = True)
u

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_25,yhat_model_1_to_26,yhat_model_1_to_27,yhat_model_1_to_28,yhat_model_1_to_29,yhat_model_1_to_30,yhat_model_1_to_31,yhat_model_1_to_32,yhat_model_1_to_33,yhat_model_1_to_34
0,CE,uhaue,,,,,,,,,...,,,,,,,,,,
1,CE,uhaue,,,,,,,,,...,,,,,,,,,,


In [30]:

z['state'] = ['CE']
z['model'] = 'uhaue'
z

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_25,yhat_model_1_to_26,yhat_model_1_to_27,yhat_model_1_to_28,yhat_model_1_to_29,yhat_model_1_to_30,yhat_model_1_to_31,yhat_model_1_to_32,yhat_model_1_to_33,yhat_model_1_to_34
0,CE,uhaue,,,,,,,,,...,,,,,,,,,,


In [208]:
id_columns = []

for state in STATES:
    filename = f'result-{state}-{strategy}-{model}.csv'
    results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                              state, 
                                                              filename), 
                                                    index_col='data')
    
    id_columns += results[strategy][model].columns.tolist()

id_columns_unique = pd.Series(id_columns).unique()
max_size = len(STATES) * len(MODELS) * len(METRICS)
dict_metric_result = collections.defaultdict(list)

for id in id_columns_unique:
    dict_metric_result[id] = np.zeros((max_size))
#     dict_metric_result[id][:] = np.nan

In [209]:
# Empty lists
list_states = []
list_models = []
list_metric_name = []
# dict_metric_result = collections.defaultdict(list)

strategy = 'aggregated'



for state in STATES:
    id = 0
#         print(state)
    for model in MODELS:
        filename = f'result-{state}-{strategy}-{model}.csv'
#         print(filename)
        results[strategy][model] = pd.read_csv(path.join(RESULTS_FOLDER, 
                                                                  state, 
                                                                  filename), 
                                                        index_col='data')

        for i in results[strategy][model].columns:
#             print(id)

            # Get dayout
            dayout = df_state[state]['cases'].index[-1]
            # Get y_true and y_hat
            y_hat = results[strategy][model][i].dropna()
            y_true = df_state[state]['cases']
            y_true = y_true.loc[y_hat.index.unique()].dropna()
            y_true, y_hat = utils.check_inputs(y_true, y_hat)

            if y_true.index[-1] == dayout:
                break

            m = calculate_metrics(y_true, y_hat)
            dict_metric_result[i][id:id+3] = m
                
            
        id += 1            

        list_states += [state for x in range(len(METRICS))]
        list_models += [model for x in range(len(METRICS))]
        list_metric_name += METRICS
    
    
# for i in id_columns_unique:
#     dict_metric_result[i][dict_metric_result[i] == 0] = np.nan
#     dict_metric_result[id][:] = np.nan

In [210]:
print(i)
dict_metric_result.keys()

yhat_model_1_to_20


dict_keys(['yhat_model_1_to_7', 'yhat_model_1_to_8', 'yhat_model_1_to_9', 'yhat_model_1_to_10', 'yhat_model_1_to_11', 'yhat_model_1_to_12', 'yhat_model_1_to_13', 'yhat_model_1_to_14', 'yhat_model_1_to_15', 'yhat_model_1_to_16', 'yhat_model_1_to_17', 'yhat_model_1_to_18', 'yhat_model_1_to_19', 'yhat_model_1_to_20', 'yhat_model_1_to_21', 'yhat_model_1_to_22', 'yhat_model_1_to_23', 'yhat_model_1_to_24', 'yhat_model_1_to_25', 'yhat_model_1_to_26', 'yhat_model_1_to_27', 'yhat_model_1_to_28', 'yhat_model_1_to_29', 'yhat_model_1_to_30', 'yhat_model_1_to_31', 'yhat_model_1_to_32', 'yhat_model_1_to_33', 'yhat_model_1_to_34', 'yhat_model_1_to_35', 'yhat_model_1_to_36', 'yhat_model_1_to_37', 'yhat_model_1_to_38', 'yhat_model_1_to_39', 'yhat_model_1_to_40', 'yhat_model_1_to_41', 'yhat_model_1_to_42', 'yhat_model_1_to_43', 'yhat_model_1_to_44', 'yhat_model_1_to_45', 'yhat_model_1_to_46', 'yhat_model_1_to_47', 'yhat_model_1_to_48', 'yhat_model_1_to_49', 'yhat_model_1_to_50', 'yhat_model_1_to_51', 'y

In [213]:
# dict_metric_result['yhat_model_1_to_41'][dict_metric_result['yhat_model_1_to_41'] == 0] = np.nan
dict_metric_result['yhat_model_1_to_8']

array([3.11556057, 2.31568542, 1.7008671 , 0.01213688, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [212]:
dict_metric_result[i][dict_metric_result[i] == 0] = np.nan
dict_metric_result[i]

array([9.49006888e+02, 2.31493787e+02, 2.12806738e+02, 1.28756782e-01,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
                  nan,            nan,            nan,            nan,
      

In [141]:
dict_metric_result[i][id:id+3]

array([0., 0.])

In [132]:
dict_metric_result['yhat_model_1_to_41']

[7168.124207201054,
 6681.534668989548,
 0.9932207484192102,
 21036.03803599028,
 15086.405502117987,
 0.6473978241469766]

In [117]:
# Create metrics DataFrame
df_metrics_labels = pd.DataFrame(list(zip(list_states, list_models, list_metric_name)), columns=['state', 'model', 'metric'])
df_results_days = pd.DataFrame.from_dict(dict_metric_result, orient='index').transpose()
df_metrics = pd.DataFrame()
sdf_metrics = pd.concat([df_metrics_labels, df_results_days], axis=1)

In [119]:
state = 'SP'
df_metrics.loc[df_metrics.state == state]

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_32,yhat_model_1_to_33,yhat_model_1_to_34,yhat_model_1_to_35,yhat_model_1_to_36,yhat_model_1_to_37,yhat_model_1_to_38,yhat_model_1_to_39,yhat_model_1_to_40,yhat_model_1_to_41
150,SP,linear-regression,MAE,92.310405,116.68844,151.332341,186.352687,240.634284,300.567556,359.883818,...,,,,,,,,,,
151,SP,linear-regression,RMSE,60.390476,78.37619,101.111111,126.024242,162.242424,206.015618,256.022711,...,,,,,,,,,,
152,SP,linear-regression,MSLE,4.700124,5.118452,4.447152,3.793212,3.673025,3.776006,4.204368,...,,,,,,,,,,
153,SP,exponential-holt,MAE,96.987029,122.073071,156.443183,186.999685,234.873836,286.997271,344.316312,...,,,,,,,,,,
154,SP,exponential-holt,RMSE,66.31922,85.29005,108.400743,130.692156,163.090307,202.593891,253.094717,...,,,,,,,,,,
155,SP,exponential-holt,MSLE,6.787286,6.790828,4.863147,3.127656,2.462456,2.20943,2.513015,...,,,,,,,,,,


In [225]:
df_metrics = generate_results(strategy='aggregated')
df_metrics.head(20)

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_32,yhat_model_1_to_33,yhat_model_1_to_34,yhat_model_1_to_35,yhat_model_1_to_36,yhat_model_1_to_37,yhat_model_1_to_38,yhat_model_1_to_39,yhat_model_1_to_40,yhat_model_1_to_41
0,AC,linear-regression,MAE,6.061252,6.727641,5.474591,4.247599,5.916909,8.824073,9.825322,...,537.038253,2256.863707,5066.858404,5411.259054,5684.118817,5995.156462,6316.796521,6593.584799,6864.828491,7168.124207
1,AC,linear-regression,RMSE,5.142857,5.671429,4.376667,3.597576,4.750303,7.825874,9.117216,...,467.330963,2054.753253,4604.374077,4951.217927,5223.692492,5505.925225,5785.694029,6047.759244,6342.142902,6681.534669
2,AC,linear-regression,MSLE,0.016516,0.019184,0.012799,0.006441,0.009819,0.021202,0.025726,...,0.570126,0.770747,1.965761,1.815079,1.608628,1.425194,1.265021,1.134744,1.050345,0.993221
3,AC,exponential-holt,MAE,493530.565129,472875.913024,427387.925609,366567.667559,319606.876322,278431.925268,231676.086834,...,17301.560997,4789.097355,4730.950835,7456.987648,11368.197371,15065.711505,18465.593827,20905.351845,21374.456401,21036.038036
4,AC,exponential-holt,RMSE,222374.230482,215775.769707,197686.743561,172033.297542,152182.612115,134539.200453,113709.690026,...,11227.880354,3330.294171,2985.400326,4862.677309,7695.300755,10483.242514,13118.161375,15013.658728,15342.664991,15086.405502
5,AC,exponential-holt,MSLE,40.610744,40.339773,39.495438,38.144716,36.853897,35.501055,33.753971,...,4.72185,0.370541,0.157265,0.254669,0.419411,0.56451,0.678955,0.737898,0.70708,0.647398
6,AL,linear-regression,MAE,7.840068,8.861903,9.889388,10.819735,11.676187,10.896157,10.208646,...,549.019429,4744.495363,,,,,,,,
7,AL,linear-regression,RMSE,6.133333,7.2,8.333333,9.466667,10.6,9.876923,9.247253,...,442.56564,4204.849398,,,,,,,,
8,AL,linear-regression,MSLE,1.928073,2.249926,2.587814,2.925702,3.263589,1.493169,0.865749,...,1.434252,1.942057,,,,,,,,
9,AL,exponential-holt,MAE,8.115241,9.172942,10.236489,11.19949,12.059377,9.559921,6.56203,...,558.807496,4987.954819,,,,,,,,


In [49]:
df_metrics.loc[df_metrics['state'] == 'SP']

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_1_to_8,yhat_model_1_to_9,yhat_model_1_to_10,yhat_model_1_to_11,yhat_model_1_to_12,yhat_model_1_to_13,...,yhat_model_1_to_32,yhat_model_1_to_33,yhat_model_1_to_34,yhat_model_1_to_35,yhat_model_1_to_36,yhat_model_1_to_37,yhat_model_1_to_38,yhat_model_1_to_39,yhat_model_1_to_40,yhat_model_1_to_41
150,SP,linear-regression,MAE,92.310405,116.68844,151.332341,186.352687,240.634284,300.567556,359.883818,...,,,,,,,,,,
151,SP,linear-regression,RMSE,60.390476,78.37619,101.111111,126.024242,162.242424,206.015618,256.022711,...,,,,,,,,,,
152,SP,linear-regression,MSLE,4.700124,5.118452,4.447152,3.793212,3.673025,3.776006,4.204368,...,,,,,,,,,,
153,SP,exponential-holt,MAE,96.987029,122.073071,156.443183,186.999685,234.873836,286.997271,344.316312,...,,,,,,,,,,
154,SP,exponential-holt,RMSE,66.31922,85.29005,108.400743,130.692156,163.090307,202.593891,253.094717,...,,,,,,,,,,
155,SP,exponential-holt,MSLE,6.787286,6.790828,4.863147,3.127656,2.462456,2.20943,2.513015,...,,,,,,,,,,


In [50]:
df_state['SP'].cases.index[0]

'26/02/2020'

In [44]:
df_state['SP'].cases.index[-1]

'22/04/2020'

In [19]:
df_metrics = generate_results(strategy='windowed')
df_metrics.head()

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_2_to_8,yhat_model_3_to_9,yhat_model_4_to_10,yhat_model_5_to_11,yhat_model_6_to_12,yhat_model_7_to_13,...,yhat_model_26_to_32,yhat_model_27_to_33,yhat_model_28_to_34,yhat_model_29_to_35,yhat_model_30_to_36,yhat_model_31_to_37,yhat_model_32_to_38,yhat_model_33_to_39,yhat_model_34_to_40,yhat_model_35_to_41
0,AC,linear-regression,MAE,6.061252,7.196891,7.156083,4.282618,7.131419,11.688066,13.012919,...,199.859786,976.558927,3702.522747,3197.542403,2345.691603,1486.266765,900.209979,830.814891,876.401241,2010.450975
1,AC,linear-regression,RMSE,5.142857,6.1,6.019048,3.695238,6.019048,10.390476,11.533333,...,125.97619,812.480952,3269.180952,2801.304762,1982.057143,1134.538095,671.504762,717.685714,718.457143,1610.804762
2,AC,linear-regression,MSLE,0.016516,0.021357,0.019402,0.006636,0.015282,0.036865,0.04411,...,0.032337,0.06984,0.566884,0.282283,0.098851,0.025444,0.009749,0.012298,0.008266,0.028956
3,AC,exponential-holt,MAE,493530.565129,258.290566,141.733815,20.844213,2972.203661,652.68425,6.425504,...,626.138938,2999.673883,3829.813386,6921.909762,4208.166003,5106.388019,2606.573844,1342.294492,447953.216533,39645.780316
4,AC,exponential-holt,RMSE,222374.230482,178.709235,101.472142,19.675516,1818.500989,454.581939,4.674861,...,474.743841,2052.217733,3566.719566,4505.836704,2699.995614,3404.495419,2385.221721,1258.384119,272901.943213,28045.776771


In [20]:
df_metrics

Unnamed: 0,state,model,metric,yhat_model_1_to_7,yhat_model_2_to_8,yhat_model_3_to_9,yhat_model_4_to_10,yhat_model_5_to_11,yhat_model_6_to_12,yhat_model_7_to_13,...,yhat_model_26_to_32,yhat_model_27_to_33,yhat_model_28_to_34,yhat_model_29_to_35,yhat_model_30_to_36,yhat_model_31_to_37,yhat_model_32_to_38,yhat_model_33_to_39,yhat_model_34_to_40,yhat_model_35_to_41
0,AC,linear-regression,MAE,6.061252,7.196891,7.156083,4.282618,7.131419,11.688066,13.012919,...,199.859786,976.558927,3702.522747,3197.542403,2345.691603,1486.266765,900.209979,830.814891,876.401241,2010.450975
1,AC,linear-regression,RMSE,5.142857,6.100000,6.019048,3.695238,6.019048,10.390476,11.533333,...,125.976190,812.480952,3269.180952,2801.304762,1982.057143,1134.538095,671.504762,717.685714,718.457143,1610.804762
2,AC,linear-regression,MSLE,0.016516,0.021357,0.019402,0.006636,0.015282,0.036865,0.044110,...,0.032337,0.069840,0.566884,0.282283,0.098851,0.025444,0.009749,0.012298,0.008266,0.028956
3,AC,exponential-holt,MAE,493530.565129,258.290566,141.733815,20.844213,2972.203661,652.684250,6.425504,...,626.138938,2999.673883,3829.813386,6921.909762,4208.166003,5106.388019,2606.573844,1342.294492,447953.216533,39645.780316
4,AC,exponential-holt,RMSE,222374.230482,178.709235,101.472142,19.675516,1818.500989,454.581939,4.674861,...,474.743841,2052.217733,3566.719566,4505.836704,2699.995614,3404.495419,2385.221721,1258.384119,272901.943213,28045.776771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,TO,linear-regression,RMSE,4.180952,3.971429,2.742857,1.823810,3.314286,4.352381,3.342857,...,,,,,,,,,,
158,TO,linear-regression,MSLE,0.080527,0.068148,0.038089,0.020519,0.042438,0.067781,0.032546,...,,,,,,,,,,
159,TO,exponential-holt,MAE,1.817653,24071.418888,3.237238,799946.357325,225.585298,10.109047,9.631564,...,,,,,,,,,,
160,TO,exponential-holt,RMSE,1.435962,11817.742429,2.705786,348351.647335,152.071631,8.996073,8.669944,...,,,,,,,,,,
