In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import pymc3 as pm
import arviz as az

sns.set()

In [None]:
def parse_scb_prel(YTD=True):
    
    def monthname2number(m):
        if m == 'januari' : return 1
        if m == 'februari' : return 2
        if m == 'mars' : return 3
        if m == 'april' : return 4
        if m == 'maj' : return 5
        if m == 'juni' : return 6
        if m == 'juli' : return 7
        if m == 'augusti' : return 8
        if m == 'september' : return 9
        if m == 'oktober' : return 10
        if m == 'november' : return 11
        if m == 'december' : return 12

    df = pd.read_csv('scb-dead_per_day.csv',sep=';',usecols=[1,2,3,4,5,6,7,10,11],thousands=',')
    
    
    df['month'] = df['Månad'].apply(monthname2number)
    df[['Månad','month']] = df[['Månad','month']].replace(np.nan,0)
    df['month'] = df['month'].astype(int)
    
    
    df = df.replace(np.nan,0)

    
    df[['2015','2016','2017','2018','2019','2020','2015-2019']] = df[['2015','2016','2017','2018','2019','2020','2015-2019']].astype(int)

    unknown_death_day = df.iloc[-1,:7]
    
    df.drop(366,inplace=True)
    
    if YTD:
        df = df.loc[df['2020'] != 0]
        
    df = df.drop('Månad',axis=1)
    
    known_death_day = df.iloc[:,:7].sum()
    
    year_fraction = len(df) / 366
    
    unknown_death_day[['2015','2016','2017','2018','2019','2015-2019']] = unknown_death_day[
    ['2015','2016','2017','2018','2019','2015-2019']] * year_fraction
    
    dead_pct_per_day = df[['2015','2016','2017','2018','2019','2020','2015-2019']] / known_death_day
    
    all_deaths = df[['2015','2016','2017','2018','2019','2020','2015-2019']] + unknown_death_day * dead_pct_per_day
    
    if YTD:
        index = pd.date_range('2020-01-01',periods=len(all_deaths))
    else:
        index = pd.date_range('2020-01-01','2020-12-31')
        
    all_deaths.index = index
    
    return all_deaths
    

In [None]:
df_scb = parse_scb_prel()
df_scb['2015-2018'] = df_scb.loc[:,'2015' : '2018'].mean(axis=1)
df_scb['excess'] = df_scb['2020'] - df_scb['2015-2018']
df_scb['cum_excess'] = df_scb['excess'].cumsum()
df_scb = df_scb.astype(float)
df_scb

In [None]:
df_scb_weekly = df_scb.resample('W').sum()
df_scb_weekly.drop('cum_excess',axis=1,inplace=True)
df_scb_weekly

In [None]:
df_conf_per_dag = pd.read_excel('fhm_excel.xlsx',sheet_name='Antal per dag region',index_col=0,usecols=[0,1])

In [None]:
df_conf_per_dag

In [None]:
df_dead_per_dag = pd.read_excel('fhm_excel.xlsx',sheet_name='Antal avlidna per dag',index_col=0)

In [None]:
dead_no_date = df_dead_per_dag.iloc[-1,0]
dead_no_date

In [None]:
df_dead_per_dag = df_dead_per_dag[:-1]
df_dead_per_dag.index = pd.to_datetime(df_dead_per_dag.index)
df_dead_per_dag

In [None]:
df_icu_per_dag = pd.read_excel('fhm_excel.xlsx',sheet_name='Antal intensivvårdade per dag',index_col=0)
df_icu_per_dag

In [None]:
df = pd.concat([df_conf_per_dag,df_icu_per_dag,df_dead_per_dag],axis=1)
df

In [None]:
df.plot()

In [None]:
df_weekly = df.resample('W').sum()
df_weekly

In [None]:
plt.figure(figsize=(18,12))

ax = plt.gca()

styles = ['bo--','go--','ro--']
last_date = df.index[-1]
last_good_date = last_date - pd.DateOffset(weeks=1)

title = 'SWEDEN weekly : [ COVID:Positives, ICUs, Deaths ] & All Cause Deaths. YTD {}\nDataSource : FHM, SCB'.format(
    last_good_date.date())


ax.plot(df_scb_weekly['2020'][:last_good_date],'x--',color='crimson')
ax.plot(df_scb_weekly['2015-2018'][:last_good_date],'x--',color='orange',alpha=1.0)


df_weekly[:last_good_date].plot(style=styles,title=title,ax=ax,label=['Covid-cases','ICUs','Covid-Deaths'])

ax.legend(['All Cause Deaths','2015-2018 all cause deaths avg','Covid Positives','Covid ICUs','Covid-Deaths'],
         loc='upper left')

ax2 = plt.twinx()
ax2.plot(df_scb_weekly['excess'][:last_good_date].cumsum(),'--',color='k',label='cumulative excess deaths')

ax.set_ylabel('Nr of events [no pop adj.]')
ax2.set_ylabel('cumulative excess deaths [non-pop.adj.]')
ax2.legend(loc='upper right')
plt.savefig('FHM_weekly_combined.jpg',format='jpg')

In [None]:
df_weekly.corr()

In [None]:
plt.scatter(df_weekly['Totalt_antal_fall'],df_weekly['Antal_avlidna'],color='crimson')
plt.scatter(df_weekly['Totalt_antal_fall'],df_weekly['Antal_intensivvårdade'],color='orange')

In [None]:
print (df_weekly.max())
df_weekly['Totalt_antal_fall'].argmax()

In [None]:
df_weekly['Totalt_antal_fall'].where(df_weekly['Totalt_antal_fall'] > 5000)

In [None]:
df_scb_weekly.sum()

In [None]:
df_weekly.sum()

In [None]:
df_weekly

In [None]:
df_scb_weekly

In [None]:
df_2020_weekly = df_weekly.copy()
df_2020_weekly['all_cause_deaths'] = df_scb_weekly['2020']
df_2020_weekly.columns=['covid_pos','covid_ICUs','covid_deaths','all_cause_deaths']
df_2020_weekly = df_2020_weekly[:-2]
df_2020_weekly

In [None]:

icu_shift = 2
deaths_shift = 3

df_2020_weekly_shifted = df_2020_weekly.copy()

df_2020_weekly_shifted['covid_ICUs'] = df_2020_weekly_shifted['covid_ICUs'].shift(icu_shift)
df_2020_weekly_shifted['covid_deaths'] = df_2020_weekly_shifted['covid_deaths'].shift(deaths_shift)
df_2020_weekly_shifted['all_cause_deaths'] = df_2020_weekly_shifted['all_cause_deaths'].shift(deaths_shift)



df_2020_weekly_shifted

In [None]:
df_2020_weekly.corr()

In [None]:
df_2020_weekly_shifted.corr()

In [None]:
plt.scatter(df_2020_weekly['covid_pos'],df_2020_weekly['all_cause_deaths'],color='crimson')
#plt.scatter(df_2020_weekly['covid_pos'],df_2020_weekly['covid_deaths'],color='orange')

In [None]:
def standardize(series):
    return (series - series.mean()) / series.std()

x = standardize(df_2020_weekly_shifted['covid_pos'])

y_param = 'all_cause_deaths'

y = standardize(df_2020_weekly_shifted[y_param])


model = pm.Model()
with model:
    
    alpha = pm.Normal('alpha',mu=0,sd=1)
    beta = pm.Normal('beta',mu=0,sd=1)
    
    sigma = pm.Exponential('sigma',1)
    
    reg = pm.Deterministic('reg',alpha + x.values * beta)
    
    obs = pm.Normal('obs',reg,sigma,observed=y)
    
    trace = pm.sample(500,tune=500)
    
    

In [None]:
with model:
    print (az.summary(trace,hdi_prob=0.89))

In [None]:
with model:
    az.plot_trace(trace,var_names=['alpha','beta','sigma'])

In [None]:
with model:
    
    X_min = x.min()
    X_max = x.max()
    
    alpha_mean = trace['alpha'].mean()
    alpha_CI = np.percentile(trace['alpha'],[5.5,94.5])
    
    beta_mean = trace['beta'].mean()
    beta_CI = np.percentile(trace['beta'],[5.5, 94.5])

    X = np.linspace(X_min,X_max,100)

    result = pm.trace_to_dataframe(trace)
    
    rows = np.random.choice(result.index,replace=True,size=1000)
    
    ppc = pm.sample_posterior_predictive(trace,samples=2000,model=model)
    
    plt.figure(figsize=(18,12))
    
    title = 'SWEDEN Covid-Positives predictor for ' + y_param + '\n' + r'$\alpha$ : {:.2f} $\alpha$ CI : [{:.2f}  {:.2f}] '.format(
        alpha_mean,alpha_CI[0],alpha_CI[1]) +\
    r' $\beta$ : {:.2f} $\beta$ CI : [{:.2f}  {:.2f}]'.format(beta_mean,beta_CI[0],beta_CI[1]) + '\nDataSource: FHM,SCB'
    
    plt.title(title)
    
    ax = plt.gca()
    
    az.plot_hdi(x,trace['reg'],hdi_prob=0.89,ax=ax)
    
    ax.scatter(x,y,color='crimson')
    
    #ax.plot(X,trace['alpha'][rows] + X[:,np.newaxis] * trace['beta'][rows],color='c',alpha=0.05)
    
    az.plot_hpd(x,ppc['obs'],hdi_prob=0.89,ax=ax)
    
    ax.plot(x,trace['alpha'].mean() + x * trace['beta'].mean(),color='k',ls='dashed')

    
    ax.set_ylabel( y_param + ' [standardized]')
    ax.set_xlabel('covid positives [standardized]')
    
    plt.savefig('FHM_combined_regression_{}.jpg'.format(y_param),format='jpg')

In [None]:
result.describe()