In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'
matplotlib.rcParams['figure.figsize']=(30,12)
#plt.style.use('ggplot')
plt.style.use('fivethirtyeight')
#import seaborn as sns
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import datetime 
#For time series manipulation
#https://pythonrepo.com/repo/unit8co-darts-python-machine-learning
#pip install 'u8darts[pmdarima]' 
from darts import TimeSeries
from darts.models import (ARIMA,AutoARIMA)
from darts.metrics import mape
import logging
logging.disable(logging.CRITICAL)


In [None]:
#from https://covid.saude.rs.gov.br/
fname='CSV/transparencia_dados_covid.csv'
cols_data=["DATA INCLUSAO REGISTRO"]
df1 = pd.read_csv(fname, decimal=".",sep="\t",parse_dates=cols_data)
#Extract region of interest
col_reg='REGIAO COVID'
mycol_reg='PORTO ALEGRE - R10' 
mask=df1[col_reg]==mycol_reg
df0=df1[mask]
#drop bunch of columns
notNumeric=list(df0.select_dtypes(exclude=np.number))
df0['dia']=df0[cols_data].astype('datetime64[D]') 
#the above is to identify all registers in the same day, dropping the time
df0.drop(columns=notNumeric,inplace=True)
notKeep=['IBGE', 'LATITUDE_MUNICIPIO', 'LONGITUDE_MUNICIPIO',
       'CODIGO MACRORREGIAO SAUDE', 'CODIGO REGIAO COVID',
       'CNES ESTABELECIMENTO SAUDE', 'LATITUDE_ESTABELECIMENTO',
       'LONGITUDE_ESTABELECIMENTO']
df0.drop(columns=notKeep,inplace=True)
#group by day
df=df0.groupby('dia').sum()


In [None]:
#look for ICU capacity and demand
cols_cap=['NUMERO LEITOS UTI ADULTO (SUS  PRIVADO)','NUMERO LEITOS UTI ADULTO (SUS)']
cols_uti=['NUMERO SUSPEITOS+CONFIRMADOS COVID-SRAG EM LEITO UTI ADULTO (SUS)']
ts=pd.DataFrame(index=df.index)
ts['cap']=df[cols_cap].sum(axis=1).values
ts['uti']=df[cols_uti].sum(axis=1).values


In [None]:
#from https://ti.saude.rs.gov.br/covid19/ 
fname='CSV/20210521.csv'
cols_data=['DATA_CONFIRMACAO']
df1 = pd.read_csv(fname, decimal=".",sep=";",parse_dates=cols_data)
#Extract region of interest
col_reg='REGIAO_COVID'
mycol_reg='PORTO ALEGRE - R10' 
mask=df1[col_reg]==mycol_reg
df0=df1[mask]
#drop bunch of columns
notNumeric=list(df0.select_dtypes(exclude=np.number))
df0['Count']=1 #trick to count on the same day, keeping the same code as for the ICU data above
df0['dia']=df0[cols_data]
df0.drop(columns=notNumeric,inplace=True)
df=df0.groupby('dia').sum()


In [None]:
#ICU data starts later than registers of cases (ts.index[0]>df.index[0])
ts_first=ts.index[0]
ts['cases']=df['Count'][ts_first:]

In [None]:
#Rolling mean of UTI demand and cases, over WIN days
WIN=7
ts['uti-mean']=ts['uti'].rolling(WIN,min_periods=1).mean()
ts['cases-mean']=ts['cases'].rolling(WIN,min_periods=1).mean()
#Case correction considering only half cases are detected and accumulation over WIN days
SUBNOT=2
ts['cases-accum-corrected']=SUBNOT*ts['cases-mean'].rolling(WIN,min_periods=1).sum()


In [None]:
#This is our history of ratios
ts['Ratio']=ts['uti-mean']/ts['cases-accum-corrected']
ts['Ratio'].plot(title='Ratio UTI demand')

In [None]:
#Create values to feed the timeseries method
y=pd.DataFrame(ts['Ratio'].values,index=ts.index,columns=['Ratio']);
series = TimeSeries.from_dataframe(y)
#train,test=series.split_after(pd.Timestamp('20210101'))

In [None]:
models = [AutoARIMA(),ARIMA((1,0,0))] 
backtests = [ model.historical_forecasts(series,
                            start=.75, 
                            forecast_horizon=3)
             for model in models]


In [None]:
lproj=150
projec = [model.predict(lproj) for model in models]

In [None]:
series.plot(label='Ratio historico')
for i, m in enumerate(models):
    err = mape(backtests[i], series)
    backtests[i].plot(lw=3, label='{}, MAPE={:.2f}%'.format(m, err))
    projec[i].plot(label=str(m)+' projetado', lw=3)
    plt.title('Backtest e projecao com modelos calibrados')
plt.legend()

In [None]:
a_aarima = AutoARIMA()
a_aarima.fit(series)

# access callibration 
a_aarima.model.summary() #a_parameter

In [None]:
my_arima = ARIMA((1,0,0))
my_arima.fit(series)

# access a parameter from pmdarima
my_arima.model.summary() #a_parameter

In [None]:
my_arima.model.params