In [None]:
# uses first sheet of scb excel

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc3 as pm
import arviz as az
import scipy.stats as sps
import requests

sns.set()

In [None]:
def parse_scb_prel(YTD=True):
    
    def monthname2number(m):
        if m == 'januari' : return 1
        if m == 'februari' : return 2
        if m == 'mars' : return 3
        if m == 'april' : return 4
        if m == 'maj' : return 5
        if m == 'juni' : return 6
        if m == 'juli' : return 7
        if m == 'augusti' : return 8
        if m == 'september' : return 9
        if m == 'oktober' : return 10
        if m == 'november' : return 11
        if m == 'december' : return 12

    df = pd.read_csv('scb-dead_per_day.csv',sep=';',usecols=[1,2,3,4,5,6,7,10,11],thousands=',')
    
    df['month'] = df['Månad'].apply(monthname2number)
    df[['Månad','month']] = df[['Månad','month']].replace(np.nan,0)
    df['month'] = df['month'].astype(int)
    
    df[['2015','2016','2017','2018','2019','2020','2015-2019']] = df[['2015','2016','2017','2018','2019','2020','2015-2019']].astype(int)
    
    unknown_death_day = df.iloc[-1,:7]
    
    df.drop(366,inplace=True)
    
    if YTD:
        df = df.loc[df['2020'] != 0]
        
    df = df.drop('Månad',axis=1)
    
    known_death_day = df.iloc[:,:7].sum()
    
    year_fraction = len(df) / 366
    
    unknown_death_day[['2015','2016','2017','2018','2019','2015-2019']] = unknown_death_day[
    ['2015','2016','2017','2018','2019','2015-2019']] * year_fraction
    
    dead_pct_per_day = df[['2015','2016','2017','2018','2019','2020','2015-2019']] / known_death_day
    
    all_deaths = df[['2015','2016','2017','2018','2019','2020','2015-2019']] + unknown_death_day * dead_pct_per_day
    
    if YTD:
        index = pd.date_range('2020-01-01',periods=len(all_deaths))
    else:
        index = pd.date_range('2020-01-01','2020-12-31')
        
    all_deaths.index = index
    
    return all_deaths
    
    

In [None]:
# days with k homicides in UK
zeros = np.array([0] * 259)
ones = np.array([1] * 387)
twos = np.array([2] * 261)
threes = np.array([3] * 131)
fours = np.array([4] * 40)
fives = np.array([5] * 13)
six = np.array([6] * 3)

data = np.concatenate([zeros,ones,twos,threes,fours,fives,six])


In [None]:
data_mean = data.mean()
data_std = data.std()
print (data_mean)
print (data_std)

In [None]:
scb = parse_scb_prel()
scb

In [None]:
scb.loc[:'2020-06-30'].sum()

In [None]:
 # första halvåret
scb.loc[:'2020-06-30'].sum().loc['2020'] - scb.loc[:'2020-06-30'].sum().loc['2015-2019']

In [None]:
last_data_day = scb.index[-1]
last_data_day

In [None]:
day_excess = scb['2020'][:last_data_day] - scb['2015-2019'][:last_data_day]
idx = day_excess.index.date
day_excess.plot(x=idx,figsize=(18,12),style='ro--',
               title='Sweden Daily Excess Deaths YTD {}\nDataSource:SCB.se'.format(last_data_day.date()))
plt.ylabel('Excess Deaths per day')

plt.savefig('scb_excess_deaths_per_day.jpg',format='jpg')

In [None]:
cumulative_until_last_day = scb[:last_data_day].sum()
cumulative_until_last_day['2020'] / cumulative_until_last_day['2015-2019']

In [None]:
scb_weekly = scb.resample('W-TUE').sum()
print(scb_weekly.loc[:last_data_day].sum())
(scb_weekly.loc[:last_data_day,'2020'] - scb_weekly.loc[:last_data_day,'2015-2019']).sum()

In [None]:
scb_weekly

In [None]:
scb_weekly.loc[:'2020-07-31'].sum()

In [None]:
scb_weekly['excess'] = scb_weekly['2020'] - scb_weekly['2015-2019']
scb_weekly['cumexcess'] = scb_weekly['excess'].cumsum()
scb_weekly

In [None]:
scb_monthly = scb.resample('M').sum()
scb_monthly['delta'] = scb_monthly['2020'] - scb_monthly['2015-2019']
scb_monthly['cumdelta'] = (scb_monthly['delta'].cumsum())
# last of feb not correct for delta - leap year
scb_monthly

In [None]:
scb_monthly.loc[:'2020-07-31'].sum()

In [None]:
scb_monthly.loc[:'2020-07-31'].sum()['2020'] / 10.327589

In [None]:
data = scb_weekly.iloc[:,0:5]
#data = data.iloc[:9]
data.mean(axis=1)
data

In [None]:
data_min = data.min(axis=1)
data_max = data.max(axis=1)

print (data.mean().mean())

In [None]:
walk = 1000
tune = 500
chains = 2


res_array = np.zeros((len(data),3))
dist_array = np.zeros((len(data),walk*chains))

for i in range(len(data)):
    
    model1 = pm.Model()
    
    with model1:
        
        print ('\n++++++++++++++++++++ processing day ++++++++++++++++++++', i)
        print (data.iloc[i,:])
        print (data.iloc[i,:].mean())

        lambda_ = pm.Exponential('lambda',1/ data.iloc[i,:].mean())
        lkh  = pm.Poisson('lkh',lambda_,observed=data.iloc[i,:])
        
        trace1 = pm.sample(walk,tune=tune)
        post = trace1['lambda'][:]   

        res_array[i,0] = post.min()
        res_array[i,1] = post.mean()
        res_array[i,2] = post.max()

        dist_array[i,:] = post
                     
    

In [None]:
dist_array

In [None]:
N = 1000

lambdas_per_week = np.zeros((len(dist_array),N))
samples_per_week = np.zeros((len(dist_array),N))

for week in range(len(dist_array)):
    
    lambdas_per_week[week] = np.random.choice(dist_array[week],replace=True,size=N)
    pois_dist = pm.Poisson.dist(lambdas_per_week[week])
    samples_per_week[week] = pois_dist.random(size=N)

In [None]:
print (lambdas_per_week[:5])
samples_per_week[:5]

In [None]:
ci_samples = np.percentile(samples_per_week,[5.5,94.5],axis=1)
ci_samples

plt.fill_between(scb_weekly.index,ci_samples[0],ci_samples[1])

In [None]:
res_array

In [None]:
errors_low = np.abs(res_array[:,1] - res_array[:,0])
errors_low = errors_low[:-1]
errors_high = np.abs(res_array[:,1] - res_array[:,2]) 
errors_high = errors_high[:-1]
print (errors_low)
print (errors_high)

errors = np.array((errors_low,errors_high))
errors

In [None]:
excess = scb_weekly.loc[:last_data_day,'2020'] - scb_weekly.loc[:last_data_day,'2015-2019']
print (excess)
current_excess = excess.cumsum()
print (current_excess)
excess.index = excess.index.date
excess.plot.bar(x=excess.index,title='Sweden Weekly Excess deaths YTD {}\nDataSource: SCB.se'.format(last_data_day.date()),
                y=excess,
                figsize=(18,12),
                color='crimson')
plt.ylabel('Number of Excess Deaths')
plt.savefig('swe_excess_deaths.jpg',format='jpg',dpi=400)

In [None]:
plt.figure(figsize=(18,12))
plt.title('Sweden weekly & excess deaths 2020 Y2D '\
          '{} compared to average 2015-2019 Current nr Excess Deaths: {}\n$Data Source: SCB.se$'.format(
    excess.index[-1],int(current_excess[-1])))
ax = plt.gca()
ax2 = plt.twinx()

ax.errorbar(scb_weekly.index[:len(data) - 1],res_array[:-1,1],yerr=errors,
             fmt='o',capsize=5,color='lime',alpha=0.6,label='Posterior sample means with CI')

ax.plot(scb_weekly.index[:len(data) -1],scb_weekly['2015-2019'][:-1],'o--',
        markerfacecolor='none',markeredgecolor='navy',markersize=12,
        label='2015-2019 mean')

ax.plot(scb_weekly[:last_data_day].index,scb_weekly.loc[:last_data_day,'2020'],'o--',
         color='crimson',label='2020 data')

ax.fill_between(scb_weekly.index[:len(data) -1],y1=data_min[:-1],y2=data_max[:-1],color='grey',
                 alpha=0.2,label='2015-2019 range')

ax.fill_between(scb_weekly.index[:len(data) -1],y1=ci_samples[0][:-1],y2=ci_samples[1][:-1],color='c',
                 alpha=0.2,label='Poisson Sample 89% CI')



ax2.plot(excess.index,excess.cumsum(),'o--',ls='dashed',color='orange',label='Cumulative Excess Deaths')
ax.set_ylabel('number of weekly deaths')
ax2.set_ylabel('cumulative Excess deaths')
ax.legend(loc='upper left')
ax2.legend(loc='upper right')
plt.savefig('scb_poisson_weekly.jpg',format='jpg',dpi=400)

In [None]:
#scb_monthly.index = scb_monthly.index.date
scb_monthly[['2020','2015-2019']][:pd.Timestamp('2020-08-31')].plot.bar(color=['r','orange'],
                                                                        title='Sweden deaths per month\nDataSource: SCB.se',figsize=(18,12))
plt.ylabel('Number of deaths')
plt.xticks(range(len(scb_monthly[:pd.Timestamp('2020-08-31')])),['Jan','Feb','Mar','Apr','May','Jun','Jul',
                                                                'Aug'])
plt.savefig('scb_monthly_deaths.jpg',format='jpg')

In [None]:
### population 2014-2019
url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/BefolkningR1860'
data = {
  "query": [
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:ÅlderTotA",
        "values": []
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2014",  
          "2015",
          "2016",
          "2017",
          "2018",
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r = requests.post(url,json=data)
r.status_code

In [None]:
json = r.json()

In [None]:
data_list = []

for rec in range(len(json['data'])):
    data_list.append((json['data'][rec]['key'][0],json['data'][rec]['key'][1],json['data'][rec]['values'][0]))

In [None]:
population = pd.DataFrame(data_list,columns=['gender','year','population'])
population = population.set_index('year')
population['population'] = population['population'].astype(int)

In [None]:
population = population.groupby(population.index).sum()
population['growth'] = population['population'] / population['population'].shift()


population.loc['2020','population'] = population.loc['2019','population']  
population['population'] = population['population'].astype(int)
population

In [None]:
pop = population['population']
pop = pop.shift()
pop

In [None]:
pop.drop('2014',inplace=True)
pop

In [None]:
weekly_deaths = scb_weekly.loc[:,'2015':'2020'].copy()
weekly_deaths['2015-2018'] = weekly_deaths.loc[:,'2015':'2018'].mean(axis=1)

weekly_deaths_no_truncate = weekly_deaths.copy()
weekly_deaths = weekly_deaths[:-1]
weekly_deaths

In [None]:
weekly_deaths_no_truncate

In [None]:
weekly_deaths_per_m = weekly_deaths / (pop / 1e6)
weekly_deaths_per_m['2015-2018'] = weekly_deaths_per_m.loc[:,'2015' : '2018'].mean(axis=1)
weekly_deaths_per_m['excess'] = weekly_deaths_per_m['2020'] - weekly_deaths_per_m['2015-2018']
weekly_deaths_per_m['cum_excess'] = weekly_deaths_per_m['excess'].cumsum()
weekly_deaths_per_m['abs_excess'] = weekly_deaths_per_m['cum_excess'] * (pop['2020'] / 1e6)
weekly_deaths_per_m['min'] = weekly_deaths_per_m.loc[:,'2015':'2018'].min(axis=1)
weekly_deaths_per_m['max'] = weekly_deaths_per_m.loc[:,'2015' :'2018'].max(axis=1)
weekly_deaths_per_m

In [None]:
data = weekly_deaths_per_m.loc[:,'2015' : '2020'].copy()
data_orig = data.copy()

data.drop('2015-2018',axis=1,inplace=True)

data


In [None]:
data = data.unstack().reset_index()
data.columns=['year','week','dead_per_m']


In [None]:
data['week']

In [None]:
unique_weeks = data['week'].dt.date.unique()
week_idx_map = dict(zip(unique_weeks,range(len(unique_weeks))))

unique_years = data['year'].unique()
year_idx_map = dict(zip(unique_years,range(len(unique_years))))
year_idx_map

In [None]:
data['week_idx'] = data['week'].apply(lambda x : week_idx_map[pd.to_datetime(x).date()])
data['year_idx'] = data['year'].apply(lambda x : year_idx_map[x])
data.head(50)

In [None]:
model2 = pm.Model()

week_idx = data['week_idx'].values

with model2:

    lambda_bar = pm.Exponential('lambda_bar',1 / data['dead_per_m'].mean())
    lambda_ = pm.Exponential('lambda_',lambda_bar,shape=len(week_idx_map))
    lkh = pm.Poisson('lkh',lambda_[week_idx],observed=data['dead_per_m'])
    
    trace2 = pm.sample(500,tune=500)
    
    result2 = pm.trace_to_dataframe(trace2)
    summary2 = az.summary(trace2)
    az.plot_trace(trace2)

In [None]:
print (summary2)

In [None]:
result2.describe()

In [None]:
weekly_means = result2.describe().loc['mean','lambda___0' :]

weekly_means

In [None]:
CI = result2.loc[:,'lambda___0':].quantile([0.055,0.945])
CI

In [None]:
abs_error = np.abs(weekly_means - CI)
abs_error

In [None]:
plt.figure(figsize=(18,12))

ax = plt.gca()

ax2 = plt.twinx()

ax.plot(weekly_deaths_per_m.index,weekly_deaths_per_m['2020'],'o--',
         color='crimson',label='weekly deaths per million')


ax.plot(weekly_deaths_per_m.index,weekly_deaths_per_m['2015-2018'],'o--',
         markerfacecolor='none',markeredgecolor='navy',label='average 2015-2018')

ax.fill_between(weekly_deaths_per_m.index,weekly_deaths_per_m['min'],weekly_deaths_per_m['max'],
                color='grey',alpha=0.3,label='2015-2018 range')

ax.errorbar(weekly_deaths_per_m.index,weekly_means,yerr=abs_error.values,
            fmt='o--',capsize=5,mec='g',mfc='none',color='g',label='Poisson expectation 89% CI')


ax.legend(loc='upper left')

ax2.plot(weekly_deaths_per_m.index,weekly_deaths_per_m['cum_excess'],'o--',color='orange',
        label='cumulative excess deaths per M')

plt.title('SWEDEN Weekly All Cause deaths & excess deaths, per million, cmp Baseline 2015-2018')
ax.set_ylabel('deaths per million')
ax2.set_ylabel('cumulative excess deaths per M')
ax2.legend(loc='upper right')

plt.savefig('scb_poisson_per_m.jpg',format='jpg')

In [None]:
post_samples2 = pm.sample_posterior_predictive(trace2,100,model2)
post_samples2

In [None]:
data_ppc2 = az.from_pymc3(trace=trace2,model=model2,posterior_predictive=post_samples2)
data_ppc2

In [None]:
az.plot_ppc(data_ppc2,figsize=(18,12))

In [None]:
az.plot_forest(trace2,model_names=[model2],figsize=(18,12),combined=True,colors='r',hdi_prob=0.89)


In [None]:
pop

In [None]:
data['pop'] = data['year'].apply(lambda x : pop[x])
data

In [None]:
# Scientific, dynamic model, not a GLM

# Delta D = alpha * Pop^beta - gamma

# alpha = deaths per individual
# beta = Elasticity (diminishing returns)
# gamma = death loss (e.g. better health care)

# D ~ Poisson(lambda)
# lambda = exp(a[year_idx,week_idx]) * P^b[year_idx,week_idx] / g
# a = pm.Normal('a',1,1,shape=len(unique_weeks))
# b = pm.Exponential('b',1,shape=len(unique_weeks))
# g = pm.Exponential(1)

model3 = pm.Model()

year_idx = data['year_idx'].values

with model3:
    
    a = pm.Normal('a',mu=1,sigma=1,shape=(len(year_idx_map),len(week_idx_map)))
    b = pm.Exponential('b',1,shape=(len(year_idx_map),len(week_idx_map)))
    g = pm.Exponential('g',1)
    
    lambda_ = np.exp(a[year_idx,week_idx]) * np.log(data['pop'].values) ** b[year_idx,week_idx] / g
    
    lkh = pm.Poisson('lkh',lambda_,observed=data['dead_per_m'].values.astype(int))
    
    step1 = pm.Metropolis()
    trace3 = pm.sample(500,tune=500,target_accept=0.95)
    
    result3 = pm.trace_to_dataframe(trace3)
    summary3 = az.summary(trace3)
    
    _= az.plot_trace(trace3)
    

In [None]:
def alpha_cols(col):
    if 'a_' in col.name:
        return True
    return False

def beta_cols(col):
    if 'b_' in col.name:
        return True
    return False

def gamma_col(col):
    if col.name == 'g':
        return True
    return False

alpha_mask = result3.apply(alpha_cols)
alphas = result3.loc[:,alpha_mask]

beta_mask = result3.apply(beta_cols)
betas = result3.loc[:,beta_mask]

gamma_mask = result3.apply(gamma_col)
gamma = result3.loc[:,gamma_mask]

gamma

In [None]:
summary3

In [None]:
plt.figure(figsize=(18,12))

ax = plt.gca()
ax.plot(range(len(alphas.mean())),alphas.mean(),'o--')

ax.plot(range(len(betas.mean())),betas.mean(),'o--')

In [None]:
alphas

In [None]:
lambdas = np.exp(alphas.values) * np.log(10e6) ** betas.values / gamma.values
plt.plot(range(len(lambdas)),lambdas)

lambdas.shape

In [None]:
samples = pm.Poisson.dist(lambdas.mean(axis=0)).random(size=1000)
samples.shape

In [None]:
CI = np.percentile(samples,[5.5,94.5],axis=0)
weekly_mins = data.loc[data['year_idx'].isin([0,1,2,3])].groupby('week_idx').min()['dead_per_m']
weekly_max = data.loc[data['year_idx'].isin([0,1,2,3])].groupby('week_idx').max()['dead_per_m']

weekly_max

In [None]:
cols = data['year'].str.cat(data['week_idx'].astype(str),sep='_')

weekly_means = data.loc[data['year_idx'].isin([0,1,2,3])].groupby('week_idx').mean()['dead_per_m']
weekly_means

In [None]:
repeated_weekly_means = np.array(list (weekly_means) * 6)
repeat_weekly_means = repeated_weekly_means


data['dead_per_m'] - repeated_weekly_means

In [None]:
plt.figure(figsize=(18,12))
idx = pd.date_range(start='2015-01-01',end='2020-09-15',freq='W-WED')
plt.plot(data['dead_per_m'] - repeated_weekly_means,'o--')
idx

In [None]:
last_week = data.iloc[-1,1].date()
month_name = pd.to_datetime(data.iloc[-1,1]).month_name()
day = last_week.day

In [None]:
CI[1]

In [None]:
last_week = data.iloc[-1,1].date()

plt.figure(figsize=(18,12))

plt.title('SWEDEN weekly deaths per million 2015 - 2020, period Jan 1st - {} {}'.format(month_name,day))

plt.plot(range(len(samples.mean(axis=0))),samples.mean(axis=0),'o--',label='Poisson expectation',
         mec='k',mfc='none')

plt.fill_between(range(len(samples.T)),CI[0],CI[1],color='m',alpha=0.3,label='Poisson 89% CI')

plt.plot(range(len(data)),data['dead_per_m'],'o--',mec='red',mfc='none',label='actuals')


for i in range(len(year_idx_map)):
    
    label = 'start of year' if i == 0 else ''
    label2 = '2015-2018 mean' if i == 0 else ''
    label3 = '2015-2018 range' if i == 0 else ''
    
    plt.axvline(i * len(week_idx_map),ls='--',color='orange',label=label)
    
    plt.plot(range(i*len(week_idx_map),i*len(week_idx_map)+ len(week_idx_map)),weekly_means,'x--',
             color='orange',label=label2)
    
    plt.fill_between(range(i*len(week_idx_map),i*len(week_idx_map) + len(week_idx_map)),
    weekly_mins,weekly_max,alpha=0.3,color='c',label=label3)
    
plt.legend(loc='upper left')

    
plt.ylabel('weekly deaths per million')
plt.xlabel('week nr jan 2015 - {}'.format(last_week))

plt.savefig('scb_weekly_deaths_per_m_2015_2020.jpg',format='jpg',dpi=400)


In [None]:
under_mean = ((data['dead_per_m'] - repeat_weekly_means) < 0).astype(int) 
under_mean = under_mean.loc[under_mean==1]
under_mean.loc[125:200].plot(x=under_mean.index,style='o--',figsize=(18,12))

In [None]:
post_samples3 = pm.sample_posterior_predictive(trace3,100,model3)
data_ppc3 = az.from_pymc3(trace=trace3,model=model3,posterior_predictive=post_samples3)
az.plot_ppc(data_ppc3,figsize=(18,12))
az.plot_forest(trace3,model_names=[model3],figsize=(18,12),combined=True,colors='r',hdi_prob=0.89)


In [None]:
weekly_deaths_per_m

In [None]:
excess_2019 = weekly_deaths_per_m['2019'] - weekly_deaths_per_m['2015-2018']
excess_2020 = weekly_deaths_per_m['2020'] - weekly_deaths_per_m['2015-2018']

In [None]:
xticks = ['Jan','Feb','Mar','April','May','Jun','Jul','Aug']

plt.figure(figsize=(18,12))

plt.subplot(211)

plt.title('SWEDEN weekly all cause deaths per million 2019 & 2020 vs 2015-2018 average : Jan 1:st - {}'.format(last_week))
plt.plot(weekly_deaths_per_m['2015-2018'],'o--',color='k',label='2015-2018 mean')
plt.plot(weekly_deaths_per_m['2019'],'o--',color='g',label='2019')
plt.plot(weekly_deaths_per_m['2020'],'o--',color='r',label='2020')
plt.ylabel('deaths per million')
plt.legend(loc='upper left')

plt.subplot(212)

plt.title('SWEDEN weekly all cause cumulative excess deaths per million 2019 & 2020 vs 2015-2018 average : Jan 1:st - {}'.format(last_week))

plt.plot(excess_2019.cumsum(),'o--',color='g',label='2019')
plt.plot(excess_2020.cumsum(),'o--',color='r',label='2020')

plt.ylabel('deaths per million')
plt.legend(loc='upper left')
plt.savefig('excess_deaths_2019_2020_cmp_15-18.jpg',format='jpg')

In [None]:
data.head()

In [None]:
# ONLY condition on years

# Scientific, dynamic model, not a GLM

# Delta D = alpha * Pop^beta - gamma

# alpha = deaths per individual
# beta = Elasticity (diminishing returns)
# gamma = death loss (e.g. better health care)

# D ~ Poisson(lambda)
# lambda = exp(a[year_idx]) * P^b[week_idx] / g
# a = pm.Normal('a',1,1,shape=len(unique_weeks))
# b = pm.Exponential('b',1,shape=len(unique_weeks))
# g = pm.Exponential(1)


model4 = pm.Model()

year_idx = data['year_idx'].values

with model4:
    
    a = pm.Normal('a',mu=1,sigma=1,shape=len(year_idx_map))
    b = pm.Exponential('b',1,shape=len(year_idx_map))
    g = pm.Exponential('g',1)
    
    lambda_ = np.exp(a[year_idx]) * np.log(data['pop'].values) ** b[year_idx] / g
    
    lkh = pm.Poisson('lkh',lambda_,observed=data['dead_per_m'].values.astype(int))
    
    step1 = pm.Metropolis()
    trace4 = pm.sample(500,tune=500,target_accept=0.95)
    
    result4 = pm.trace_to_dataframe(trace4)
    summary4 = az.summary(trace4)
    
    _= az.plot_trace(trace4)

In [None]:
summary4


In [None]:

alphas = result4.iloc[:,0:6]
betas = result4.iloc[:,6:12]
gamma = result4['g']
betas

In [None]:
lambdas = pd.DataFrame(np.exp(alphas.values) * np.log(pop.T.values) ** betas.values) 
lambdas.columns=['2015','2016','2017','2018','2019','2020']
lambdas = lambdas.div(gamma,axis=0)
lambdas

In [None]:
lambdas.mean()

In [None]:
lambdas.mean().plot(kind='bar',
                    figsize=(18,12),color='red',
                    title='SWEDEN average weekly deaths per million, 2015-2020 YTD {}'.format(last_week))

plt.ylabel('average weekly deaths per million')

In [None]:
yearly_week_means = data.groupby('year').mean()
yearly_week_means

In [None]:
CI = lambdas.quantile([0.055,0.945])
CI

In [None]:
samples = pm.Poisson.dist(lambdas.mean()).random(size=1000)
samples

In [None]:
sample_CI = np.percentile(samples,[5.5,94.5],axis=0)
sample_CI

In [None]:
az.plot_hdi([0,1,2,3,4,5],samples,hdi_prob=0.89)

In [None]:
plt.figure(figsize=(18,12))
plt.title('SWEDEN average weekly deaths per million, 2015-2020, YTD {}'.format(last_week))

ax = plt.gca()

az.plot_hdi([0,1,2,3,4,5],samples,hdi_prob=0.89,ax=ax,fill_kwargs={'alpha' : 0.5,'label' : 'Samples HDI'})

plt.plot(lambdas.mean(),'o--',color='orange',label='Lambda means')
plt.fill_between(lambdas.mean().index,CI.loc[0.055],CI.loc[0.945],color='c',alpha=0.3,label='Lambda CI 89%')
plt.fill_between(lambdas.mean().index,lambdas.min(),lambdas.max(),color='grey',alpha=0.3,label='actuals range')

plt.plot(lambdas.mean().index,samples.mean(axis=0),'o--',color='lime',label='Poisson sample means')
plt.fill_between(lambdas.mean().index,sample_CI[0],sample_CI[1],color='g',alpha=0.3,label='sample CI 89%')

plt.plot(yearly_week_means['dead_per_m'],'o--',color='r',label='actuals')
plt.ylabel('average weekly deaths per million')
plt.legend(loc='upper left')

In [None]:
post_samples4 = pm.sample_posterior_predictive(trace4,1000,model4)
post_samples4['lkh'].shape

In [None]:
np.percentile(post_samples4['lkh'],[5.5,94.5])

In [None]:
data_ppc4 = az.from_pymc3(trace=trace4,model=model4,posterior_predictive=post_samples4)
data_ppc4

In [None]:
az.plot_ppc(data_ppc4,figsize=(18,12),)

In [None]:
az.plot_forest(trace4,model_names=[model4],figsize=(18,12),combined=True,colors='r',hdi_prob=0.89)


In [None]:
scb_daily = parse_scb_prel(YTD=False)
scb_daily.drop('2015-2019',axis=1,inplace=True)
scb_daily = scb_daily.astype(float)
scb_daily.info()


In [None]:
print (scb_daily.loc['2020-02-29'])
print (scb_daily.loc['2020-02-28'])

scb_daily.loc['2020-02-28'] += 0.5 * scb_daily.loc['2020-02-29']
scb_daily.loc['2020-03-01'] += 0.5 * scb_daily.loc['2020-02-29']

scb_daily.loc['2020-02-27' : '2020-03-02']

In [None]:
scb_daily.drop(pd.to_datetime('2020-02-29'),inplace=True)
scb_daily['2015-2018'] = scb_daily.loc[:,'2015' : '2018'].mean(axis=1)
scb_daily

In [None]:
scb_daily.loc['2020-02-27' : '2020-03-03']

In [None]:
scb_daily.info()


In [None]:
pop['2015-2018'] = pop['2015' : '2018'].mean()
pop

In [None]:
scb_daily_per_m = scb_daily / (pop / 1e6)
scb_daily_per_m

In [None]:
scb_daily.plot(y=['2015-2018','2020'])

In [None]:
scb_daily_per_m.plot(y=['2015-2018','2020'])

In [None]:
scb_monthly_per_m = scb_daily_per_m.resample('M').sum()
scb_monthly_per_m

In [None]:
scb_monthly_per_m.plot(y=['2015-2018','2020'])

In [None]:
scb_transaction = scb_monthly_per_m.unstack().reset_index()
scb_transaction.columns=['year','month','dead_per_m']
scb_transaction = scb_transaction.loc[scb_transaction['year'] != '2015-2018']
scb_transaction

In [None]:
scb_transaction['monthNr'] = scb_transaction['month'].dt.month
scb_transaction['year_month'] = scb_transaction['year'].astype(
    str).str.cat(scb_transaction['monthNr'].astype(str),'_')

In [None]:
scb_transaction

In [None]:
scb_transaction.set_index('year_month',inplace=True)


In [None]:
scb_transaction = (scb_transaction.loc[scb_transaction['dead_per_m'] > 0 ]).copy()
scb_transaction

In [None]:
monthly_means=scb_transaction.loc[scb_transaction.year.isin(
    ['2015','2016','2017','2018'])].groupby('monthNr')['dead_per_m'].mean()

monthly_mins=scb_transaction.loc[scb_transaction.year.isin(
    ['2015','2016','2017','2018'])].groupby('monthNr')['dead_per_m'].min()

monthly_maxs=scb_transaction.loc[scb_transaction.year.isin(
    ['2015','2016','2017','2018'])].groupby('monthNr')['dead_per_m'].max()

scb_transaction['monthly_mean'] = scb_transaction['monthNr'].apply(lambda x : monthly_means.loc[x])
scb_transaction['monthly_min'] = scb_transaction['monthNr'].apply(lambda x : monthly_mins.loc[x])
scb_transaction['monthly_max'] = scb_transaction['monthNr'].apply(lambda x : monthly_maxs.loc[x])

scb_transaction

In [None]:
scb_transaction.groupby('year').sum()

In [None]:
plt.figure(figsize=(18,12))
plt.title('SWEDEN monthly all cause deaths per million, 2015-2020 YTD {}'.format(scb_transaction.iloc[-2,1].date()))

plt.plot(scb_transaction['dead_per_m'][:-1],'o--',color='r',label='monthly deaths per million')

plt.plot(scb_transaction['monthly_mean'][:-1],'o--',color='orange',label='2015-2018 monthly average')

plt.fill_between(scb_transaction.index[:-1],scb_transaction['monthly_min'][:-1],
                scb_transaction['monthly_max'][:-1],color='c',alpha=0.3,label='2015-2018 monthly range')


_= plt.xticks(rotation=90)

for month in range(len(scb_transaction)):
    if month % 12 == 0:
        plt.axvline(month,color='grey',ls='dashed')

plt.legend(loc='upper left')
plt.ylabel('monthly deaths per million')
plt.xlabel('Year_Month')

plt.savefig('scb_deaths_per_m_monthly_2015_2020.jpg',format='jpg',dpi=400)

In [None]:
#### do one with a credible interval for 2020 ###
#### uses real data until last_data_day, then a PYMC distribution based on historical data for the
#### remaining days of the year to provide a cumulative sum of full year 2020 deaths

df_hist = parse_scb_prel(YTD=False)
df_hist.drop('2015-2019',axis=1,inplace=True)

In [None]:
df_hist

In [None]:
# fill 2020 dates with no data yet with mean of 2015-2019,
# creating a 2020 with real data to last_date, and rest of the year with mean of 2015-2019

mask = df_hist['2020'] == 0

df_hist.loc[mask,'2020'] = df_hist.loc[mask,'2015' : '2019'].mean(axis=1)

In [None]:
df_hist

In [None]:
df_hist.sum()

In [None]:
mort_hist = df_hist.sum().div(pop)
mort_hist.drop('2015-2018',inplace=True)
    

In [None]:
mort_hist

In [None]:
mort_hist.plot(kind='bar')

In [None]:
df_hist

In [None]:
df_hist = df_hist / (pop / 1e6 )
df_hist.drop('2015-2018',axis=1,inplace=True)

In [None]:
df_hist

In [None]:
df_hist_transaction = df_hist.unstack().reset_index()

In [None]:
df_hist_transaction.loc[df_hist_transaction['level_1'] == '2020-02-29']
df_hist_transaction.drop([59,791,1157,1523],inplace=True)

In [None]:
df_hist_transaction
date_hist = pd.date_range('2015-01-01','2020-12-31')
df_hist_transaction.index = date_hist
df_hist_transaction.drop(['level_0'],inplace=True,axis=1)

In [None]:
df_hist_transaction['day_idx'] = df_hist_transaction.level_1.dt.dayofyear
df_hist_transaction.drop('level_1',axis=1,inplace=True)
df_hist_transaction.rename(columns={0:'dead'},inplace=True)


In [None]:
df_hist_transaction

In [None]:
len(df_hist_transaction['day_idx'].unique())

In [None]:
model5 = pm.Model()

day_idx = df_hist_transaction['day_idx'].values

with model5:

    lambda_bar = pm.Exponential('lambda_bar',1 / df_hist_transaction['dead'].mean())
    
    lambda_ = pm.Exponential('lambda_',1 / lambda_bar,shape=len(df_hist_transaction['day_idx'].unique()))
    
    lkh = pm.Poisson('lkh',lambda_[day_idx-1],observed=df_hist_transaction['dead'])
    
    trace5 = pm.sample(500,tune=500)
    
    summary5 = az.summary(trace5)
    

In [None]:
with model5:
    print (summary5)

In [None]:
with model5:
    az.plot_forest(trace5,var_names=['lambda_bar','lambda_'])

In [None]:
daily_hist_means = pd.DataFrame(trace5['lambda_']).T
dates = pd.date_range('2020-01-01','2020-12-31')
daily_hist_means.index=dates
daily_hist_means

In [None]:
ci_hist = np.percentile(daily_hist_means,[5.5,94.5],axis=1)
print (ci_hist[:,0])
means_hist = daily_hist_means.mean(axis=1)
means_hist

In [None]:
plt.plot(daily_hist_means.iloc[0,:])

In [None]:
ax = means_hist.plot(figsize=(18,12))
df_hist.plot(ax=ax,y='2020')

In [None]:
df_hist[df_hist['2020'] < 15]

In [None]:
last_data_day - pd.DateOffset(days=7)

In [None]:
means_hist[:last_data_day] = df_hist.loc[:last_data_day,'2020'] 

In [None]:
ax = means_hist.plot(figsize=(18,12),style='--')
df_hist.plot(ax=ax,y='2020',style='--')
ax.fill_between(df_hist.index,ci_hist[0,:],ci_hist[1,:],color='m',alpha=0.3)

In [None]:
means_hist.sum()

In [None]:
last_data_day_idx = last_data_day.dayofyear
last_data_day_idx

best_case = means_hist.copy()
worst_case = means_hist.copy()

best_case[last_data_day:] = ci_hist[0,last_data_day_idx-1:]
worst_case[last_data_day:] = ci_hist[1,last_data_day_idx-1:]

print (best_case.sum())
print (means_hist.sum())
worst_case.sum()

In [None]:
#### NOTE : NOT AGE ADJ DATA!!! ####

err_hist_low = np.abs(best_case.sum() - means_hist.sum())
err_hist_high = np.abs(worst_case.sum() - means_hist.sum())

avg_2015_2018 = (mort_hist['2015' : '2018']).mean() * 1e6

title = 'SWEDEN deaths per million 2015-2020 (forecasted)\nnon-age adj data\nDataSource:scb.se'
ax = (mort_hist * 1e6).plot(kind='bar',color='orange',figsize=(18,12),title=title)

err = np.array([[0,0],[0,0],[0,0],[0,0],[0,0],[err_hist_low,err_hist_high]])
ax.errorbar(mort_hist.index,(mort_hist*1e6),err.T,fmt='none',ecolor='k',capsize=5)

ax.axhline(avg_2015_2018,color='r',ls='dashed')

_ = ax.set_yticks(range(0,10000,1000))
#ax.set_ylim([8000,10000])

In [None]:
print((mort_hist['2015' : '2018'] * 1e6).mean())
(mort_hist * 1e6) - (mort_hist['2015' : '2018'] * 1e6).mean()