# Informer Nuovi Positivi, Terapia Intensiva, Deceduti

In questo notebook sono stati utlizzati gli stessi dati del notebook https://colab.research.google.com/github/marco-mazzoli/progetto-tesi/blob/master/covid_multi_output.ipynb, ossia quelli regionali dell'Emilia-Romagna.
Sono in particolare stati usati i dati di ricoverati in terapia intensiva, deceduti e nuovi positivi.

Il modello Informer è stato utilizzato in modalità univariate con lag di 28 giorni per fare forecasting di 1, 2, 7 e 14 giorni.

L'ultimo 20% dei dati è stato escluso dalla fase di ricerca degli iper-parametri. Una volta stabiliti è stato usato l'intero dataset per fare previsione mantendo il primo 80% di train e il restante di test.

Prima della fase di train i dati vengono scalati per poi essere riportati alla scala originale una volta effettuata la previsione.


In [34]:
!git clone https://github.com/zhouhaoyi/Informer2020.git

fatal: destination path 'Informer2020' already exists and is not an empty directory.


In [35]:
import sys
if not 'Informer2020' in sys.path:
    sys.path += ['Informer2020']
from utils.tools import dotdict
from exp.exp_informer import Exp_Informer
import torch
import pandas as pd
import os
import numpy as np
from utils.metrics import MAE, MAPE
import plotly.graph_objects as go
from pandas.io.parsers import read_csv
import pickle


In [36]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
        
def select_relevant_rows(frame, row, filter):
    return frame[frame[row] == filter]

def select_attributes(frame, attributes):
    return frame[attributes]

def save_config(path, config):
    pickle.dump(config, open(path, 'wb'))

def load_config(path):
    return pickle.load(open(path, 'rb'))

In [37]:
use_existing_config = True
columns = ['nuovi_positivi', 'terapia_intensiva', 'deceduti']
split_percent = 0.80
n_futures = [1, 2, 7, 14]
region_focus = 'Emilia-Romagna'
attribute_focus = 'denominazione_regione'
local_region_path = r'../COVID-19/dati-regioni/dpc-covid19-ita-regioni.csv'
remote_region_path = r'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv'

regions_frame = pd.read_csv(remote_region_path)

region_focus_data = select_relevant_rows(
    regions_frame,
    attribute_focus,
    region_focus
)

df = select_attributes(region_focus_data, [
    'data',
    'ricoverati_con_sintomi',
    'terapia_intensiva',
    'totale_ospedalizzati',
    'variazione_totale_positivi',
    'nuovi_positivi',
    'deceduti',
    'tamponi',
    'ingressi_terapia_intensiva'
])

# YYYY-MM-DD hh:mm:ss

df = pd.DataFrame(df)
df['data'] = pd.to_datetime(df['data'], format=r'%Y-%m-%dT%H:%M:%S')
df['data'] = df['data'].dt.strftime(r'%Y-%m-%d')
df['data'] = pd.to_datetime(df['data'])
df = df.fillna(0)
df.rename(columns={'data': 'date'}, inplace=True)

# revert cumulative data
df['deceduti'] = df['deceduti'].diff()
df['tamponi'] = df['tamponi'].diff()
df.dropna(inplace=True)

# df = df[df['date'] < pd.to_datetime('2022-01-31')]

split = int(split_percent*len(df))

df_holdout = df[:split]

df.to_csv('./covid.csv', index=False)
df_holdout.to_csv('./covid_holdout.csv', index=False)


In [38]:
def define_informer_configs():
    layer_encoder = [4, 3, 2]
    layer_decoder = [2]
    multi_head_number = [8, 16]
    multi_head_output = [512]
    objectives = ['mse']
    input_seq = [28, 56, 112]
    decoder_start_tok = [28, 56, 112]
    
    configs = []
    keys = ['layer_encoder', 'layer_decoder', 'multi_head_number', 'multi_head_output',
            'input_seq', 'objective', 'decoder_start_tok']

    for i in layer_encoder:
        for j in layer_decoder:
            for k in multi_head_number:
                for l in multi_head_output:
                    for m in input_seq:
                            for n in objectives:
                                    for o in decoder_start_tok:
                                        config = dict(
                                            zip(keys, (i, j, k, l, m, n, o)))
                                        configs.append(config)

    return configs

In [39]:
def prepare_args(config, path, column_to_predict, n_future):
    args = dotdict()
    args.model = 'informer' # model of experiment, options: [informer, informerstack, informerlight(TBD)]
    args.data = 'custom' # data
    args.root_path = '' # root path of data file
    args.data_path = path # data file
    args.features = 'S' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
    args.target = column_to_predict # target feature in S or MS task
    args.freq = 'd' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
    args.checkpoints = './informer_checkpoints' # location of model checkpoints

    args.seq_len = config['input_seq'] # input sequence length of Informer encoder
    args.label_len = n_future #config['decoder_start_tok'] start token length of Informer decoder
    args.pred_len = n_future # prediction sequence length
    # Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

    args.enc_in = 1 # encoder input size
    args.dec_in = 1 # decoder input size
    args.c_out = 1 # output size
    args.factor = 5 # probsparse attn factor
    args.d_model = 512 # dimension of model
    args.n_heads = config['multi_head_number'] # num of heads
    args.e_layers = config['layer_encoder'] # num of encoder layers
    args.d_layers = config['layer_decoder'] # num of decoder layers
    args.d_ff = 2048 # dimension of fcn in model
    args.dropout = 0.05 # dropout
    args.attn = 'prob' # attention used in encoder, options:[prob, full]
    args.embed = 'timeF' # time features encoding, options:[timeF, fixed, learned]
    args.activation = 'gelu' # activation
    args.distil = True # whether to use distilling in encoder
    args.output_attention = False # whether to output attention in ecoder
    args.mix = True
    args.padding = 0

    args.batch_size = 32
    args.learning_rate = 0.0001
    args.loss = config['objective']
    args.lradj = 'type1'
    args.use_amp = False # whether to use automatic mixed precision training

    args.num_workers = 0
    args.itr = 1
    args.train_epochs = 6
    args.patience = 3
    args.des = 'exp'
    args.scale = True
    args.inverse = True

    args.use_gpu = True if torch.cuda.is_available() else False
    args.gpu = 0

    args.use_multi_gpu = False
    args.devices = '0,1,2,3'

    args.do_predict = True

    args.freq = args.freq[-1:]

    return args

In [40]:
summaries = []

for column in columns:
    for n_future in n_futures:
        config_path = region_focus + '_' + 'uni_informer_config' + '_' + column
        config_path = config_path + '_' + str(n_future)
        
        results = []

        if use_existing_config is True:
            if not os.path.isfile(config_path):
                os.system('wget -nv https://raw.githubusercontent.com/marco-mazzoli/progetto-tesi/master/configs/' + config_path)
            config = load_config(config_path)
            os.system('rm ' + config_path)
        else:
            configs = define_informer_configs()
            for config in configs:
                Exp = Exp_Informer
                args = prepare_args(config, 'covid_holdout.csv', column, n_future)

                setting = '{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}'.format(
                    args.model, args.data, args.features, args.seq_len, args.label_len, 
                    args.pred_len, args.d_model, args.n_heads, args.e_layers,
                    args.d_layers, args.d_ff, args.attn, args.factor, args.embed, 
                    args.distil, args.mix, args.des, args.itr)

                with HiddenPrints():      
                    exp = Exp(args)
                    try:
                        exp.train(setting)
                    except Exception:
                        continue
                    exp.test(setting)

                torch.cuda.empty_cache()

                preds = np.load('./results/'+setting+'/pred.npy')
                trues = np.load('./results/'+setting+'/true.npy')

                mae = (MAE(trues, preds))

                results.append((mae, config))

            results.sort(key=lambda tup: tup[0])

            config = results[0][-1]
            save_config(config_path, config)

        Exp = Exp_Informer
        args = prepare_args(config, 'covid.csv', column, n_future)

        setting = '{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}'.format(args.model, args.data, args.features, 
                    args.seq_len, args.label_len, args.pred_len,
                    args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, args.embed, args.distil, args.mix, args.des, args.itr)

        with HiddenPrints():
            exp = Exp(args)
            exp.train(setting)
            exp.test(setting)

        torch.cuda.empty_cache()

        preds = np.load('./results/'+setting+'/pred.npy')
        trues = np.load('./results/'+setting+'/true.npy')

        mapes = []

        for i in range(len(trues)):
            current_error = MAPE(trues[i,:,0], preds[i,:,0])
            mapes.append(current_error)

        summary = '|' + column + '| seq len ' + str(n_future) + '| mae: ' + str(MAE(trues,preds)) + '| mape: ' + str(MAPE(trues,preds)) + '| config: ' + str(config)

        print(summary)

        mapes_trace = go.Scatter(y=mapes, mode='lines', name='Mape')
        layout = go.Layout(
            title='mape: ' + column, xaxis={'title': 'step'},
            yaxis={'title': 'mape'}, autosize=False, 
            width=600, height=400)
        fig = go.Figure(
            data=[mapes_trace], layout=layout)
        fig.show()

        summaries.append(summary)

        if n_future > 2:
            prediction_trace = go.Scatter(y=preds[-1,:,0], mode='lines', name='Prediction')
            truth_trace = go.Scatter(y=trues[-1,:,0], mode='lines', name='Ground Truth')
            layout = go.Layout(
                title=column, xaxis={'title': 'Date'},
                yaxis={'title': column}, autosize=False, width=600, height=400)
            fig = go.Figure(
                data=[prediction_trace, truth_trace], layout=layout)
            fig.show()


print(summaries)


|nuovi_positivi| seq len 1| mae: 2561.2346| mape: 1.0208218| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 16, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 28}


|nuovi_positivi| seq len 2| mae: 2648.6597| mape: 1.0601823| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 16, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 28}


|nuovi_positivi| seq len 7| mae: 3027.6152| mape: 1.3214903| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 28}


|nuovi_positivi| seq len 14| mae: 3652.236| mape: 4.1378317| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 56}


|terapia_intensiva| seq len 1| mae: 11.324346| mape: 0.24051958| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 16, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 112}


|terapia_intensiva| seq len 2| mae: 9.693016| mape: 0.17222382| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 16, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 56}


|terapia_intensiva| seq len 7| mae: 11.302136| mape: 0.15165594| config: {'layer_encoder': 2, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 56, 'objective': 'mse', 'decoder_start_tok': 56}


|terapia_intensiva| seq len 14| mae: 27.838223| mape: 0.27938026| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 112}


|deceduti| seq len 1| mae: 4.5255365| mape: 1.0147161| config: {'layer_encoder': 2, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 28}


|deceduti| seq len 2| mae: 3.86515| mape: 0.50734633| config: {'layer_encoder': 3, 'layer_decoder': 2, 'multi_head_number': 16, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 112}


|deceduti| seq len 7| mae: 3.7784321| mape: 0.35595056| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 56}


|deceduti| seq len 14| mae: 6.3630023| mape: 0.48377424| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 112}


["|nuovi_positivi| seq len 1| mae: 2561.2346| mape: 1.0208218| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 16, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 28}", "|nuovi_positivi| seq len 2| mae: 2648.6597| mape: 1.0601823| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 16, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 28}", "|nuovi_positivi| seq len 7| mae: 3027.6152| mape: 1.3214903| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 28}", "|nuovi_positivi| seq len 14| mae: 3652.236| mape: 4.1378317| config: {'layer_encoder': 4, 'layer_decoder': 2, 'multi_head_number': 8, 'multi_head_output': 512, 'input_seq': 28, 'objective': 'mse', 'decoder_start_tok': 56}", "|terapia_intensiva| seq len 1| mae: 11.324346| mape: 0.24051958| config: {'layer_encoder': 4, 'l