# NeuralProphet Nuovi Positivi, Terapia Intensiva, Deceduti

In questo notebook sono stati utlizzati gli stessi dati del notebook https://colab.research.google.com/github/marco-mazzoli/progetto-tesi/blob/master/covid_multi_output.ipynb, ossia quelli regionali dell'Emilia-Romagna.
Sono in particolare stati usati i dati di ricoverati in terapia intensiva, deceduti e nuovi positivi fino al 6 Dicembre 2021.

Il modello NeuralProphet è stato utilizzato in modalità univariate ed autoregressiva con lag di 28 giorni per fare forecasting di 1, 2, 7 e 14 giorni.

L'ultimo 20% dei dati è stato escluso dalla fase di ricerca degli iper-parametri. Una volta stabiliti è stato usato l'intero dataset per fare previsione mantendo il primo 80% di train e il restante di test.

Gli iper-parametri su cui è stata fatta la ricerca sono learning-rate e numero di layer hidden della rete neurale.

Prima della fase di train i dati vengono scalati per poi essere riportati alla scala originale una volta effettuata la previsione.

Questo modello crea automaticamente i lag temporali.

In [1]:
if 'google.colab' in str(get_ipython()):
    !pip install git+https://github.com/marco-mazzoli/neural_prophet.git

Collecting git+https://github.com/marco-mazzoli/neural_prophet.git
  Cloning https://github.com/marco-mazzoli/neural_prophet.git to /tmp/pip-req-build-2jswfods
  Running command git clone -q https://github.com/marco-mazzoli/neural_prophet.git /tmp/pip-req-build-2jswfods
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting holidays>=0.11.3.1
  Downloading holidays-0.13-py3-none-any.whl (172 kB)
[K     |████████████████████████████████| 172 kB 13.2 MB/s 
Collecting torch-lr-finder>=0.2.1
  Downloading torch_lr_finder-0.2.1-py3-none-any.whl (11 kB)
Collecting dataclasses>=0.6
  Downloading dataclasses-0.6-py3-none-any.whl (14 kB)
Building wheels for collected packages: neuralprophet
  Building wheel for neuralprophet (PEP 517) ... [?25l[?25hdone
  Created wheel for neuralprophet: filename=neuralprophet-0.3.0

In [2]:
import os

if 'google.colab' in str(get_ipython()):
    files = ['util.py']

    for file in files:
        os.system('rm ./' + file)
        os.system(
            'wget -nv https://raw.githubusercontent.com/marco-mazzoli/progetto-tesi/master/' + file)

import pandas as pd
from numpy.random import seed
from neuralprophet import NeuralProphet
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import logging
import plotly.graph_objects as go
from util import select_relevant_rows, select_attributes, save_config, load_config


In [3]:
use_existing_config = True
column_to_predict = 'terapia_intensiva'
columns = ['deceduti']
split_percent = 0.80
region_focus = 'Emilia-Romagna'
attribute_focus = 'denominazione_regione'
n_futures = [1, 2, 7, 14]


In [4]:
local_region_path = r'../COVID-19/dati-regioni/dpc-covid19-ita-regioni.csv'
remote_region_path = r'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv'

regions_frame = pd.read_csv(remote_region_path)

region_focus_data = select_relevant_rows(
    regions_frame,
    attribute_focus,
    region_focus
)

df = select_attributes(region_focus_data, [
    'data',
    'terapia_intensiva',
    'nuovi_positivi',
    'deceduti',
])

df = pd.DataFrame(df)
df['data'] = pd.to_datetime(
    df['data'])
df['data'] = df['data'].dt.strftime(
    r'%Y-%m-%d')
df['data'] = pd.to_datetime(df['data'])
df = df[df['data'] < pd.to_datetime('2022-02-8')]
df = df.fillna(0)

df.rename(columns={'data': 'date'}, inplace=True)
df.set_index('date', inplace=True)

# revert cumulative data
df['deceduti'] = df['deceduti'].diff()

df = df.astype('float')

# numpy seed
seed(1)

In [5]:
def define_np_configs():
    learning_rate = [0.001, 0.01, 0.05, 0.1]
    n_hidden = [0, 1, 2]

    configs = []
    keys = ['learning_rate', 'n_hidden']

    for i in learning_rate:
        for j in n_hidden:   
            config = dict(
                zip(keys, (i, j)))
            configs.append(config)

    return configs

In [6]:
def split_series(series, n_past, n_future, arima=False):
    X, y, X_indexes, y_indexes = list(), list(), list(), list()
    index = np.array(series.index).reshape(series.values.shape[0], 1)
    series = series.values

    for window_start in range(len(series)):
        past_end = window_start + n_past
        future_end = past_end + n_future
        if future_end > len(series):
            break
        start = 0 if arima == True else window_start

        past, future = series[start:past_end,
                              :], series[past_end:future_end, :]
        past_index, future_index = index[start:past_end,
                                         :], index[past_end:future_end, :]
        X.append(past)
        y.append(future)
        X_indexes.append(past_index)
        y_indexes.append(future_index)

    return np.array(X), np.array(y), np.array(X_indexes), np.array(y_indexes)


def plot_last_results(
    results, column_to_predict):
        pred = results[-1][1][-1]['pred']
        test = results[-1][1][-1]['y_test']

        prediction_trace = go.Scatter(
            x=pred.index, y=pred, mode='lines', name='Prediction')
        truth_trace = go.Scatter(
            x=test.index, y=test, mode='lines', name='Ground Truth')
        layout = go.Layout(
            title=column_to_predict, xaxis={'title': 'Date'},
            yaxis={'title': column_to_predict}, autosize=False, 
            width=600, height=400)
        fig = go.Figure(
            data=[prediction_trace, truth_trace], layout=layout)
        fig.show()

def plot_mape(results, column_to_predict):
    mapes = list(map(lambda x:x[1][1], results))
    dates = list(map(lambda x:x[0], results))

    mapes_trace = go.Scatter(
        x=dates, y=mapes, mode='lines', name='Mape')
    layout = go.Layout(
        title='mape: ' + column_to_predict, xaxis={'title': 'Date'},
        yaxis={'title': 'mape'}, autosize=False, 
        width=600, height=400)
    fig = go.Figure(
        data=[mapes_trace], layout=layout)
    fig.show()

In [7]:
table_result = pd.DataFrame(columns=['mae', 'mape'])
summaries = []

logger = logging.getLogger('NP.df_utils')
logger.propagate = False

logger = logging.getLogger('NP.config')
logger.propagate = False

logger = logging.getLogger('NP.forecaster')
logger.propagate = False

for column_to_predict in columns:
    for n_future in n_futures:
        split = int(split_percent * len(df))
        
        current = df[[column_to_predict]].reset_index().copy()
        current.columns = ['ds', 'y']

        holdout = current[:split]
        results = []

        config_path = region_focus + '_' + 'uni_nerualprophet_config' + '_' + column_to_predict
        config_path = config_path + '_' + str(n_future)

        if use_existing_config:
            if not os.path.isfile(config_path):
                os.system('wget -nv https://raw.githubusercontent.com/marco-mazzoli/progetto-tesi/master/configs/' + config_path)
            config = load_config(config_path)
            os.system('rm ' + config_path)
        else:
            for config in define_np_configs():
                m = NeuralProphet(
                    n_lags=28,
                    epochs=200,
                    n_forecasts=n_future,
                    weekly_seasonality=False,
                    changepoints_range=1,
                    daily_seasonality=False,
                    yearly_seasonality=False,
                    learning_rate=config['learning_rate'],
                    num_hidden_layers=config['n_hidden'],
                    normalize='minmax'
                )

                df_train, df_test = m.split_df(df=holdout, freq="D", valid_p=1-split_percent)

                metrics_train = m.fit(df=df_train, validation_df=df_test,freq="D", progress_print=False, progress_bar=False)
                metrics_test = m.test(df=df_test)

                mae = metrics_test['MAE'][0]

                results.append((mae, config))
        
            results.sort()
            config = results[0][-1]
            save_config(config_path, config)

        m = NeuralProphet(
            n_lags=28,
            epochs=200,
            n_forecasts=n_future,
            weekly_seasonality=False,
            changepoints_range=1,
            daily_seasonality=False,
            yearly_seasonality=False,
            learning_rate=config['learning_rate'],
            num_hidden_layers=config['n_hidden'],
            normalize='minmax'
        )

        df_train, df_test = m.split_df(df=current, freq="D", valid_p=1-split_percent)

        metrics_train = m.fit(df=df_train, validation_df=df_test,freq="D", progress_print=False, progress_bar=False)
        metrics_test = m.test(df=df_test)

        prep_df = m._prepare_dataframe_to_predict(df_test)
        dates, predicted, _ = m._predict_raw(prep_df)

        df_to_split = df_test.set_index('ds', drop=True)
        _, y_test, _, y_test_indexes = split_series(df_to_split, 28, n_future)

        maes = []
        mapes = []

        df_results = []

        for i in range(len(y_test)):
            current = pd.DataFrame(
                {'y_test':y_test[i].reshape(-1),
                'pred':predicted[i].reshape(-1),
                'dates':y_test_indexes[i].reshape(-1)})
            current.set_index('dates', inplace=True)
            df_results.append(current)

        results = {}

        for el in df_results:
            mae = mean_absolute_error(el['y_test'], el['pred'])

            to_filter_test = el['y_test'].values
            to_filter_pred = el['pred'].values

            mask = np.nonzero(to_filter_test)

            filtered_test = np.array(to_filter_test)[mask]
            filtered_pred = np.array(to_filter_pred)[mask]

            mape = 0
            if len(filtered_test) != 0:
                mape = mean_absolute_percentage_error(filtered_test, filtered_pred)

            results[el.index[0]] = (mae, mape, el)

        results = list(results.items())

        avg_mae = np.mean(np.array(list(map(lambda x:x[1][0], results))))
        avg_mape = np.mean(np.array(list(map(lambda x:x[1][1], results))))

        summary = '|' + column_to_predict + '| seq len ' + str(n_future) + '| mae: ' + str(avg_mae) + '| mape: ' + str(avg_mape)

        print(summary)
        summaries.append(summary)

        if n_future > 2:
            plot_last_results(results, column_to_predict)
        else:
            print(('Pred: ', results[0][1][-1]['pred'].values))
            print(('Test: ', results[0][1][-1]['y_test'].values))

        plot_mape(results, column_to_predict)

        values_to_add = {'mae': avg_mae, 'mape':avg_mape}
        row_to_add = pd.Series(values_to_add, name = column_to_predict + ' ' + str(n_future))

        table_result = table_result.append(row_to_add)


|deceduti| seq len 1| mae: 3.8443252796674297| mape: 0.3918825281471233
('Pred: ', array([4.3333993], dtype=float32))
('Test: ', array([2.]))


|deceduti| seq len 2| mae: 3.92938096120077| mape: 0.5305110336490929
('Pred: ', array([5.9092307, 5.9702873], dtype=float32))
('Test: ', array([2., 5.]))


|deceduti| seq len 7| mae: 7.885197224393329| mape: 0.9779880271329816


|deceduti| seq len 14| mae: 4.348169633965471| mape: 0.6447842813100543


In [8]:
print(table_result)

                  mae      mape
deceduti 1   3.844325  0.391883
deceduti 2   3.929381  0.530511
deceduti 7   7.885197  0.977988
deceduti 14  4.348170  0.644784
