In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import fbprophet
import pickle
import scipy.optimize as optim


%matplotlib inline

plt.style.use('bmh')


max_date = '2020-05-01'
min_samples = 30


data_proc_file = '../../Data/Processed/covid19_data_modeling.parquet'
model_score_file = '../../Data/Modeling/model_scores.parquet'
model_file = '../../Data/Modeling/trained_models.jbl'

time_col = 'date'
grain_col = 'countrycode'
target_col = 'cases'
countrycode_list = ['US','CN','BR','IT','FR','UK']

ntest = 15

pd.plotting.register_matplotlib_converters()

ERROR:fbprophet:Importing plotly failed. Interactive plots will not work.


# Carga dos Dados 

In [2]:
df = pd.read_parquet(data_proc_file)

df = df[df[time_col] < max_date]

print('shape:', df.shape)
print('columns:', df.columns)

shape: (10751, 5)
columns: Index(['cases', 'countrycode', 'date', 'deaths', 'recovered'], dtype='object')


# Treino / Teste 

In [3]:
def split_last_n_by_grain(df, ntest, time_column_name, grain_column_names):
    """Group df by grain and split on last n rows for each group."""
    df_grouped = (df.sort_values(time_column_name) # Sort by ascending time
                  .groupby(grain_column_names, group_keys=False))
    df_head = df_grouped.apply(lambda dfg: dfg.iloc[:-ntest])
    df_tail = df_grouped.apply(lambda dfg: dfg.iloc[-ntest:])
    return df_head, df_tail


df_train, df_test = split_last_n_by_grain(df, ntest, time_col, grain_col)

print('Train:', df_train.shape)
print('Test :', df_test.shape)

Train: (7976, 5)
Test : (2775, 5)


# Construção do Pipeline 

In [4]:
def mape(y_true, y_pred):
    return ((y_pred - y_true).abs()/y_true).mean() * 100
    
trained_models = {}
df_model_result=pd.DataFrame()
result_list = []
for countrycode in countrycode_list:
    print('Processing ', countrycode, end='')
    Xtrn = df_train[df_train[grain_col] == countrycode][[time_col, target_col]].copy()
    Xtst = df_test[df_test[grain_col] == countrycode].groupby(time_col)[target_col].sum()
    Xrefit = df[df[grain_col] == countrycode][[time_col, target_col]].copy()
    Xtrn.columns = ['ds', 'y']
    
    if Xtrn.shape[0] < min_samples:
        print('=> less than', min_samples, '. Ignored.')
        continue
    
    model = fbprophet.Prophet(growth='linear').fit(Xtrn)
    
    # Evaluate Model
    n_periods = Xtst.shape[0]
    forecast_data = model.make_future_dataframe(
        periods=n_periods,
        include_history=True
        )
    df_forecast = model.predict(forecast_data).set_index('ds')
    forecast     = df_forecast.yhat[-n_periods:].rename('forecast')
    forecast_low  =  df_forecast.yhat_lower[-n_periods:].rename('forecast_lo')
    forecast_up   =  df_forecast.yhat_upper[-n_periods:].rename('forecast_up')
    
    # Score model
    score = mape(Xtst, forecast)
    print(' mape %.2f %%'%score)
    
    # Refit model
    Xrefit.columns = ['ds','y']
    trained_models[countrycode] = fbprophet.Prophet(growth='linear').fit(Xrefit)
    
    #result list
    result_list.append({'countrycode':countrycode,
                        'model_name': 'prophet',
                        'date_begin': Xrefit.ds.min(),
                        'date_end'  : Xrefit.ds.max(),
                        'score': score})
    
df_results = pd.DataFrame().from_dict(result_list)

Processing  US

INFO:numexpr.utils:NumExpr defaulting to 8 threads.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


 mape 0.93 %


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Processing  CN

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


 mape 0.56 %
Processing  BR

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


 mape 21.18 %


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Processing  IT

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


 mape 5.58 %


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Processing  FR

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


 mape 23.06 %
Processing  UK

ValueError: Dataframe has less than 2 non-NaN rows.

# Avaliar os dados de Teste 

In [None]:
df_results.to_parquet(model_score_file)
with open(model_file, 'wb') as fid:
    pickle.dump(trained_models, fid)

df_results.head()