In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm
from pymc.Matplot import plot as pmplot

sns.set()

In [None]:
# TRANSACTION FORMAT - EACH LINE IS A COMPLETE 'OBJECT' OR TRANSACTION, CONTAINING ALL DATA FOR THAT REC,
# KEY == COUNTRY + AGE_GRP

# A,64,dead,pop
# A,79,dead,pop
# A,89,dead,pop
# A,90,dead,pop
# B...

df = pd.DataFrame({'country' : ['A','A','A','A','B','B','B','B','C','C','C','C'],
                  'age_grp' : ['64','79','89','90','64','79','89','90','64','79','89','90'],
                  'dead' : [1,2,3,8,
                            4,3,2,2,
                            2,2,10,200],
                  'pop' : [100,200,300,400,
                           400,300,200,100,
                           200,200,1000,10000],
                  'country_idx' : [1,1,1,1,2,2,2,2,3,3,3,3],
                  'age_idx' : [1,2,3,4,1,2,3,4,1,2,3,4] })

df['pop'] *= 100
df['dead'] *= 100

df['rate'] = df['dead'] / df['pop']

df

In [None]:
# NOW KEY MOVED TO HIERARCHICAL INDEX
grouped = df.groupby(['country','age_grp']).sum()
grouped

In [None]:
grouped['fraction_dead'] = grouped['dead'] / (grouped['pop'] )
grouped

In [None]:
tot_pop = grouped.groupby(level=0).sum()['pop']
tot_pop

In [None]:
ratio_age_of_pop = grouped['pop'] / tot_pop
ratio_age_of_pop

In [None]:
ratio_age_of_pop.unstack().plot(kind='bar',stacked='True',figsize=(18,12),title='population age group ratio')

In [None]:
total_fractions = grouped.groupby(level=0).sum()['dead'] / grouped.groupby(level=0).sum()['pop']
total_fractions

In [None]:
std_pop = grouped.loc['B']['pop']
std_pop


In [None]:
std_dead = (std_pop * grouped['fraction_dead']).unstack().sum(axis=1)
#total_dead / std_pop.sum()
std_dead

In [None]:
std_rate = std_dead / std_pop.sum()
std_rate

In [None]:
rate_cmp = pd.concat([total_fractions,std_rate],axis=1)
rate_cmp.columns = ['total_rate','age_adj_rate']
rate_cmp

In [None]:
rate_cmp.plot(kind='bar',figsize=(18,12))

In [None]:
# pymc 1 : no conditioning on age, each country gets its own alpha for total probability. 
# To be compared to rate_cmp

country_idx = df['country_idx']

alpha = pm.Normal('alpha',0, 1 / 10 ** 2,size=3) # 3 countries

@pm.deterministic
def logit(country_idx=country_idx-1,alpha=alpha):
        
    return np.exp(alpha[country_idx]) / (1 + np.exp(alpha[country_idx] ))


obs = pm.Binomial('obs',n=df['pop'],p=logit,observed=True,
                      value=df['dead'])

model = pm.Model([alpha,logit,obs])

_map = pm.MAP(model)
_map.fit()

mcmc = pm.MCMC(model)
sample = mcmc.sample(50000,20000,3)



In [None]:
def logit_pure(x):
    return (np.exp(x)) / (1 + np.exp(x))

def logistic_pure(p):
    return np.log(p / (1 - p))

In [None]:
alpha_A1 = mcmc.trace('alpha')[:,0]
alpha_B1 = mcmc.trace('alpha')[:,1]
alpha_C1 = mcmc.trace('alpha')[:,2]

pmplot(alpha_A1,'alpha_A1')
pmplot(alpha_B1,'alpha_B1')
pmplot(alpha_C1,'alpha_C1')

result = pd.DataFrame({'alpha_A1' : alpha_A1,
                      'alpha_B1' : alpha_B1,
                      'alpha_C1' : alpha_C1})

result.describe()

In [None]:
result_p = pd.DataFrame()

result_p['A'] = logit_pure(result['alpha_A1'])
result_p['B'] = logit_pure(result['alpha_B1'])
result_p['C'] = logit_pure(result['alpha_C1'])

result_p.describe()

In [None]:
rate_cmp['pymc_1'] = result_p.describe().loc['mean']
rate_cmp

In [None]:
# pymc 2 : condition on age by a common beta
# that is: each country gets its own alpha, and they share a common beta for age groups
# not very good results... they make no sense....!!!

country_idx = df['country_idx']
x = df['age_idx']

alpha = pm.Normal('alpha',0, 1 / 10 ** 2,size=3) # 3 countries
beta = pm.Normal('beta',0, 1 / 5 ** 2)

@pm.deterministic
def logit(country_idx=country_idx-1,alpha=alpha,beta=beta,x=x):
        
    return np.exp(alpha[country_idx] + beta * x) / (1 + np.exp(alpha[country_idx] + beta * x))


obs = pm.Binomial('obs',n=df['pop'],p=logit,observed=True,
                      value=df['dead'])

model = pm.Model([alpha,logit,obs,beta])

_map = pm.MAP(model)
_map.fit()

mcmc = pm.MCMC(model)
sample = mcmc.sample(50000,20000,3)

In [None]:
alpha_A2 = mcmc.trace('alpha')[:,0]
alpha_B2 = mcmc.trace('alpha')[:,1]
alpha_C2 = mcmc.trace('alpha')[:,2]

pmplot(alpha_A2,'alpha_A2')
pmplot(alpha_B2,'alpha_B2')
pmplot(alpha_C2,'alpha_C2')

beta = mcmc.trace('beta')[:]

pmplot(beta,'beta')

result2 = pd.DataFrame({'alpha_A2' : alpha_A2,
                      'alpha_B2' : alpha_B2,
                      'alpha_C2' : alpha_C2,
                      'beta' : beta})

result2.describe()

In [None]:
result2_p = pd.DataFrame()

result2_p['A'] = logit_pure(result2['alpha_A2'] + result2['beta'])
result2_p['B'] = logit_pure(result2['alpha_B2'] + result2['beta'])
result2_p['C'] = logit_pure(result2['alpha_C2'] + result2['beta'])

result2_p.describe()

In [None]:
rate_cmp['pymc_2'] = result2_p.describe().loc['mean']
rate_cmp

In [None]:
# pymc 3 : condition country by separate alphas and on age by a separate beta for each age grp

country_idx = df['country_idx']
age_idx = df['age_idx']

alpha = pm.Normal('alpha',0, 1 / 2 ** 2,size=3) # 3 countries
beta = pm.Normal('beta',0, 1 / 2 ** 2,size=4) # 4 age groups

@pm.deterministic
def logit(country_idx=country_idx-1,age_idx=age_idx-1,alpha=alpha,beta=beta):
        
    return np.exp(alpha[country_idx] + beta[age_idx]) / (
        1 + np.exp(alpha[country_idx] + beta[age_idx] ))


obs = pm.Binomial('obs',n=df['pop'],p=logit,observed=True,
                      value=df['dead'])

model = pm.Model([alpha,logit,obs,beta])

#_map = pm.MAP(model)
#_map.fit()

mcmc = pm.MCMC(model)
sample = mcmc.sample(50000,10000,2)

In [None]:
alpha_A3 = mcmc.trace('alpha')[:,0]
alpha_B3 = mcmc.trace('alpha')[:,1]
alpha_C3 = mcmc.trace('alpha')[:,2]

pmplot(alpha_A3,'alpha_A3')
pmplot(alpha_B3,'alpha_B3')
pmplot(alpha_C3,'alpha_C3')

beta_0 = mcmc.trace('beta')[:,0]
beta_1 = mcmc.trace('beta')[:,1]
beta_2 = mcmc.trace('beta')[:,2]
beta_3 = mcmc.trace('beta')[:,3]


pmplot(beta_0,'beta_0')
pmplot(beta_1,'beta_1')
pmplot(beta_2,'beta_2')
pmplot(beta_3,'beta_3')


result3 = pd.DataFrame({'alpha_A3' : alpha_A3,
                      'alpha_B3' : alpha_B3,
                      'alpha_C3' : alpha_C3,
                      'beta_0' : beta_0,
                       'beta_1' : beta_1,
                       'beta_2' : beta_2,
                       'beta_3' : beta_3})

result3.describe()

In [None]:
result3_p = pd.DataFrame()

result3_p['A_64'] = logit_pure(result3['alpha_A3'] + result3['beta_0'])
result3_p['A_79'] = logit_pure(result3['alpha_A3'] + result3['beta_1'])
result3_p['A_89'] = logit_pure(result3['alpha_A3'] + result3['beta_2'])
result3_p['A_90'] = logit_pure(result3['alpha_A3'] + result3['beta_3'])

result3_p['B_64'] = logit_pure(result3['alpha_B3'] + result3['beta_0'])
result3_p['B_79'] = logit_pure(result3['alpha_B3'] + result3['beta_1'])
result3_p['B_89'] = logit_pure(result3['alpha_B3'] + result3['beta_2'])
result3_p['B_90'] = logit_pure(result3['alpha_B3'] + result3['beta_3'])

result3_p['C_64'] = logit_pure(result3['alpha_C3'] + result3['beta_0'])
result3_p['C_79'] = logit_pure(result3['alpha_C3'] + result3['beta_1'])
result3_p['C_89'] = logit_pure(result3['alpha_C3'] + result3['beta_2'])
result3_p['C_90'] = logit_pure(result3['alpha_C3'] + result3['beta_3'])


result3_p.describe()

In [None]:
pymc_3 = result3_p.describe().loc['mean'].reset_index()['index'].str.split('_',expand=True)
pymc_3.columns = ['country','age_grp']
result3_summary = pd.concat([result3_p.describe().loc['mean'].reset_index(),pymc_3],axis=1)
result3_summary

In [None]:
multi_idx = pd.MultiIndex.from_arrays((result3_summary.country,result3_summary.age_grp))
result3_summary.index = multi_idx
result3_summary

In [None]:
grouped['fraction_dead']

In [None]:
plt.figure(figsize=(18,12))

plt.subplot(3,4,1)
plt.title('A_64')
plt.hist(result3_p['A_64'],density=True)
plt.axvline(grouped.loc[('A','64'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,2)
plt.title('A_79')
plt.hist(result3_p['A_79'],density=True)
plt.axvline(grouped.loc[('A','79'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,3)
plt.title('A_89')
plt.hist(result3_p['A_89'],density=True)
plt.axvline(grouped.loc[('A','89'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,4)
plt.title('A_90')
plt.hist(result3_p['A_90'],density=True)
plt.axvline(grouped.loc[('A','90'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,5)
plt.title('B_64')
plt.hist(result3_p['B_64'],density=True)
plt.axvline(grouped.loc[('B','64'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,6)
plt.title('B_79')
plt.hist(result3_p['B_79'],density=True)
plt.axvline(grouped.loc[('B','79'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,7)
plt.title('B_89')
plt.hist(result3_p['B_89'],density=True)
plt.axvline(grouped.loc[('B','89'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,8)
plt.title('B_90')
plt.hist(result3_p['B_90'],density=True)
plt.axvline(grouped.loc[('B','90'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,9)
plt.title('C_64')
plt.hist(result3_p['C_64'],density=True)
plt.axvline(grouped.loc[('C','64'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,10)
plt.title('C_79')
plt.hist(result3_p['C_79'],density=True)
plt.axvline(grouped.loc[('C','79'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,11)
plt.title('C_89')
plt.hist(result3_p['C_89'],density=True)
plt.axvline(grouped.loc[('C','89'),'fraction_dead'],color='orange',ls='dashed')

plt.subplot(3,4,12)
plt.title('C_90')
plt.hist(result3_p['C_90'],density=True)
plt.axvline(grouped.loc[('C','90'),'fraction_dead'],color='orange',ls='dashed')

In [None]:
means_p = result3_p.describe().loc['mean']
country_age_death_rate = means_p.reset_index()['index'].str.split('_',expand=True)
country_age_death_rate.columns = ['country','age_grp']
means_p.index = country_age_death_rate.index
country_age_death_rate['rate'] = means_p
country_age_death_rate

In [None]:
country_age_death_rate = country_age_death_rate.pivot(index='age_grp',columns='country',values='rate')
country_age_death_rate

In [None]:
std_pop

In [None]:
pymc_deaths_per_age_grp = std_pop.values * country_age_death_rate.T
pymc_deaths_per_age_grp

In [None]:
analytic_deaths_per_age_grp = (std_pop * grouped['fraction_dead']).unstack()
analytic_deaths_per_age_grp

In [None]:
ratio_analytic_pymc = pymc_deaths_per_age_grp / analytic_deaths_per_age_grp
ratio_analytic_pymc

In [None]:
ratio_analytic_pymc.mean(axis=1)

In [None]:
pymc_dead = pymc_deaths_per_age_grp.sum(axis=1)
pymc_dead

In [None]:
std_dead

In [None]:
cmp_pymc_analytic_deaths = pd.concat([std_dead,pymc_dead],axis=1)
cmp_pymc_analytic_deaths.columns=['analytic','pymc']
cmp_pymc_analytic_deaths

In [None]:

scb_all_data = pd.read_pickle('stratified_population_death_age_control.pkl')
scb_all_data

In [None]:
year_idx = scb_all_data['year_idx'] 
age_idx = scb_all_data['age_idx']

alpha = pm.Normal('alpha',0, 1 / 10 ** 2,size=len(year_idx)) # 20 years

beta = pm.Normal('beta',0,1 / 10 ** 2,size=4)


@pm.deterministic
def logit_age(age_idx=age_idx-1,year_idx=year_idx-1,alpha=alpha,beta=beta):
        
    return np.exp(alpha[year_idx] + beta[age_idx]) / (
        1 + np.exp(alpha[year_idx] + beta[age_idx]))


lkh_age = pm.Binomial('lkh_age',n=scb_all_data['pop'],p=logit_age,observed=True,
                      value=scb_all_data['dead'])

model = pm.Model([alpha,beta,logit_age,lkh_age])

_map = pm.MAP(model)
_map.fit()

mcmc = pm.MCMC(model)
sample = mcmc.sample(50000,20000,2)

In [None]:
alpha_2001 = mcmc.trace('alpha')[:,0]
alpha_2002 = mcmc.trace('alpha')[:,1]
alpha_2003 = mcmc.trace('alpha')[:,2]
alpha_2004 = mcmc.trace('alpha')[:,3]
alpha_2005 = mcmc.trace('alpha')[:,4]
alpha_2006 = mcmc.trace('alpha')[:,5]
alpha_2007 = mcmc.trace('alpha')[:,6]
alpha_2008 = mcmc.trace('alpha')[:,7]
alpha_2009 = mcmc.trace('alpha')[:,8]
alpha_2010 = mcmc.trace('alpha')[:,9]
alpha_2011 = mcmc.trace('alpha')[:,10]
alpha_2012 = mcmc.trace('alpha')[:,11]
alpha_2013 = mcmc.trace('alpha')[:,12]
alpha_2014 = mcmc.trace('alpha')[:,13]
alpha_2015 = mcmc.trace('alpha')[:,14]
alpha_2016 = mcmc.trace('alpha')[:,15]
alpha_2017 = mcmc.trace('alpha')[:,16]
alpha_2018 = mcmc.trace('alpha')[:,17]
alpha_2019 = mcmc.trace('alpha')[:,18]
alpha_2020 = mcmc.trace('alpha')[:,19]


beta_64 = mcmc.trace('beta')[:,0]
beta_79 = mcmc.trace('beta')[:,1]
beta_89 = mcmc.trace('beta')[:,2]
beta_90 = mcmc.trace('beta')[:,3]

pmplot(beta_64,'beta_64')
pmplot(beta_79,'beta_79')
pmplot(beta_89,'beta_89')
pmplot(beta_90,'beta_90')

pmplot(alpha_2001,'alpha_2001')
pmplot(alpha_2002,'alpha_2002')
pmplot(alpha_2012,'alpha_2012')
pmplot(alpha_2015,'alpha_2015')
pmplot(alpha_2019,'alpha_2019')
pmplot(alpha_2020,'alpha_2020')




result = pd.DataFrame({
                       'alpha_2001': alpha_2001,
                       'alpha_2002' : alpha_2002,
                       'alpha_2003' : alpha_2003,
                       'alpha_2004' : alpha_2004,
                       'alpha_2005' : alpha_2005,
                       'alpha_2006' : alpha_2006,
                       'alpha_2007' : alpha_2007,
                       'alpha_2008' : alpha_2008,
                       'alpha_2009' : alpha_2009,
                       'alpha_2010' : alpha_2010,
                       'alpha_2011' : alpha_2011,
                       'alpha_2012' : alpha_2012,
                       'alpha_2013' : alpha_2013,
                       'alpha_2014' : alpha_2014,
                       'alpha_2015' : alpha_2015,
                       'alpha_2016' : alpha_2016,
                       'alpha_2017' : alpha_2017,
                       'alpha_2018' : alpha_2018,
                       'alpha_2019' : alpha_2019,
                        'alpha_2020' : alpha_2020,
                        'beta_64' : beta_64,
                        'beta_79' : beta_79,
                        'beta_89' : beta_89,
                        'beta_90' : beta_90})
                      

result.describe()


In [None]:
result_p = pd.DataFrame()

result_p['2001_64'] = logit_pure(result['alpha_2001'] + result['beta_64']) 
result_p['2001_79'] = logit_pure(result['alpha_2001'] + result['beta_79']) 
result_p['2001_89'] = logit_pure(result['alpha_2001'] + result['beta_89']) 
result_p['2001_90'] = logit_pure(result['alpha_2001'] + result['beta_90']) 

result_p['2002_64'] = logit_pure(result['alpha_2002'] + result['beta_64']) 
result_p['2002_79'] = logit_pure(result['alpha_2002'] + result['beta_79']) 
result_p['2002_89'] = logit_pure(result['alpha_2002'] + result['beta_89']) 
result_p['2002_90'] = logit_pure(result['alpha_2002'] + result['beta_90']) 

result_p['2003_64'] = logit_pure(result['alpha_2003'] + result['beta_64']) 
result_p['2003_79'] = logit_pure(result['alpha_2003'] + result['beta_79']) 
result_p['2003_89'] = logit_pure(result['alpha_2003'] + result['beta_89']) 
result_p['2003_90'] = logit_pure(result['alpha_2003'] + result['beta_90']) 

result_p['2004_64'] = logit_pure(result['alpha_2004'] + result['beta_64']) 
result_p['2004_79'] = logit_pure(result['alpha_2004'] + result['beta_79']) 
result_p['2004_89'] = logit_pure(result['alpha_2004'] + result['beta_89']) 
result_p['2004_90'] = logit_pure(result['alpha_2004'] + result['beta_90']) 

result_p['2005_64'] = logit_pure(result['alpha_2005'] + result['beta_64']) 
result_p['2005_79'] = logit_pure(result['alpha_2005'] + result['beta_79']) 
result_p['2005_89'] = logit_pure(result['alpha_2005'] + result['beta_89']) 
result_p['2005_90'] = logit_pure(result['alpha_2005'] + result['beta_90']) 

result_p['2006_64'] = logit_pure(result['alpha_2006'] + result['beta_64']) 
result_p['2006_79'] = logit_pure(result['alpha_2006'] + result['beta_79']) 
result_p['2006_89'] = logit_pure(result['alpha_2006'] + result['beta_89']) 
result_p['2006_90'] = logit_pure(result['alpha_2006'] + result['beta_90']) 

result_p['2007_64'] = logit_pure(result['alpha_2007'] + result['beta_64']) 
result_p['2007_79'] = logit_pure(result['alpha_2007'] + result['beta_79']) 
result_p['2007_89'] = logit_pure(result['alpha_2007'] + result['beta_89']) 
result_p['2007_90'] = logit_pure(result['alpha_2007'] + result['beta_90']) 

result_p['2008_64'] = logit_pure(result['alpha_2008'] + result['beta_64']) 
result_p['2008_79'] = logit_pure(result['alpha_2008'] + result['beta_79']) 
result_p['2008_89'] = logit_pure(result['alpha_2008'] + result['beta_89']) 
result_p['2008_90'] = logit_pure(result['alpha_2008'] + result['beta_90']) 

result_p['2009_64'] = logit_pure(result['alpha_2009'] + result['beta_64']) 
result_p['2009_79'] = logit_pure(result['alpha_2009'] + result['beta_79']) 
result_p['2009_89'] = logit_pure(result['alpha_2009'] + result['beta_89']) 
result_p['2009_90'] = logit_pure(result['alpha_2009'] + result['beta_90']) 

result_p['2010_64'] = logit_pure(result['alpha_2010'] + result['beta_64']) 
result_p['2010_79'] = logit_pure(result['alpha_2010'] + result['beta_79']) 
result_p['2010_89'] = logit_pure(result['alpha_2010'] + result['beta_89']) 
result_p['2010_90'] = logit_pure(result['alpha_2010'] + result['beta_90']) 

result_p['2011_64'] = logit_pure(result['alpha_2011'] + result['beta_64']) 
result_p['2011_79'] = logit_pure(result['alpha_2011'] + result['beta_79']) 
result_p['2011_89'] = logit_pure(result['alpha_2011'] + result['beta_89']) 
result_p['2011_90'] = logit_pure(result['alpha_2011'] + result['beta_90']) 

result_p['2012_64'] = logit_pure(result['alpha_2012'] + result['beta_64']) 
result_p['2012_79'] = logit_pure(result['alpha_2012'] + result['beta_79']) 
result_p['2012_89'] = logit_pure(result['alpha_2012'] + result['beta_89']) 
result_p['2012_90'] = logit_pure(result['alpha_2012'] + result['beta_90']) 

result_p['2013_64'] = logit_pure(result['alpha_2013'] + result['beta_64']) 
result_p['2013_79'] = logit_pure(result['alpha_2013'] + result['beta_79']) 
result_p['2013_89'] = logit_pure(result['alpha_2013'] + result['beta_89']) 
result_p['2013_90'] = logit_pure(result['alpha_2013'] + result['beta_90']) 

result_p['2014_64'] = logit_pure(result['alpha_2014'] + result['beta_64']) 
result_p['2014_79'] = logit_pure(result['alpha_2014'] + result['beta_79']) 
result_p['2014_89'] = logit_pure(result['alpha_2014'] + result['beta_89']) 
result_p['2014_90'] = logit_pure(result['alpha_2014'] + result['beta_90']) 

result_p['2015_64'] = logit_pure(result['alpha_2015'] + result['beta_64']) 
result_p['2015_79'] = logit_pure(result['alpha_2015'] + result['beta_79']) 
result_p['2015_89'] = logit_pure(result['alpha_2015'] + result['beta_89']) 
result_p['2015_90'] = logit_pure(result['alpha_2015'] + result['beta_90']) 

result_p['2016_64'] = logit_pure(result['alpha_2016'] + result['beta_64']) 
result_p['2016_79'] = logit_pure(result['alpha_2016'] + result['beta_79']) 
result_p['2016_89'] = logit_pure(result['alpha_2016'] + result['beta_89']) 
result_p['2016_90'] = logit_pure(result['alpha_2016'] + result['beta_90']) 

result_p['2017_64'] = logit_pure(result['alpha_2017'] + result['beta_64']) 
result_p['2017_79'] = logit_pure(result['alpha_2017'] + result['beta_79']) 
result_p['2017_89'] = logit_pure(result['alpha_2017'] + result['beta_89']) 
result_p['2017_90'] = logit_pure(result['alpha_2017'] + result['beta_90']) 

result_p['2018_64'] = logit_pure(result['alpha_2018'] + result['beta_64']) 
result_p['2018_79'] = logit_pure(result['alpha_2018'] + result['beta_79']) 
result_p['2018_89'] = logit_pure(result['alpha_2018'] + result['beta_89']) 
result_p['2018_90'] = logit_pure(result['alpha_2018'] + result['beta_90']) 

result_p['2019_64'] = logit_pure(result['alpha_2019'] + result['beta_64']) 
result_p['2019_79'] = logit_pure(result['alpha_2019'] + result['beta_79']) 
result_p['2019_89'] = logit_pure(result['alpha_2019'] + result['beta_89']) 
result_p['2019_90'] = logit_pure(result['alpha_2019'] + result['beta_90']) 

result_p['2020_64'] = logit_pure(result['alpha_2020'] + result['beta_64']) 
result_p['2020_79'] = logit_pure(result['alpha_2020'] + result['beta_79']) 
result_p['2020_89'] = logit_pure(result['alpha_2020'] + result['beta_89']) 
result_p['2020_90'] = logit_pure(result['alpha_2020'] + result['beta_90']) 

result_p.describe()


In [None]:
p_means = result_p.describe().loc['mean']
p_means = p_means.reset_index()
p_means[['year','age_grp']] = p_means['index'].str.split('_',1,expand=True)
p_means = pd.pivot(p_means,index='age_grp',columns='year',values='mean')
p_means = p_means.T
p_means.rename(columns={'90' : '90+'},inplace=True)


In [None]:
computed_death_rate_per_capita = pd.read_pickle('computed_death_rate_per_capita.pkl')
computed_death_rate_per_capita

In [None]:
p_means.index = range(2001,2021)
p_means

In [None]:
strata_abs_pop = pd.read_pickle('strata_abs_pop.pkl')
strata_abs_pop

In [None]:
pymc_strata_tot = strata_abs_pop * p_means
pymc_strata_tot.columns=['p_64','p_79','p_89','p_90+']
pymc_strata_tot

In [None]:
analytic_strata_tot = computed_death_rate_per_capita * strata_abs_pop
analytic_strata_tot.columns =['a_64','a_79','a_89','a_90+']
analytic_strata_tot

In [None]:
pymc_tot = pymc_strata_tot.sum(axis=1)
pymc_tot

In [None]:
analytic_tot = analytic_strata_tot.sum(axis=1)
analytic_tot

In [None]:
combined_totals = pd.concat([analytic_tot,pymc_tot],axis=1)
combined_totals.columns =['analytic_tot','pymc_tot']
combined_totals                         

In [None]:
combined_totals.plot(kind='bar',figsize=(18,12))

In [None]:
combined_strata_tot = pd.concat([analytic_strata_tot,pymc_strata_tot],axis=1)
combined_strata_tot = combined_strata_tot[['a_64','p_64','a_79','p_79','a_89','p_89',
                                          'a_90+','p_90+']]
combined_strata_tot

In [None]:
analytic_deaths_per_capita = combined_strata_tot[['a_64','a_79','a_89','a_90+']] / (strata_abs_pop.values )
pymc_deaths_per_capita = combined_strata_tot[['p_64','p_79','p_89','p_90+']] / strata_abs_pop.values

deaths_per_capita = pd.concat([analytic_deaths_per_capita,pymc_deaths_per_capita],axis=1)
deaths_per_capita = deaths_per_capita[['a_64','p_64','a_79','p_79','a_89','p_89','a_90+','p_90+']]
deaths_per_capita

In [None]:
ax = combined_strata_tot[['a_64','a_79','a_89','a_90+']].plot(kind='bar',color=['b','orange','g','r'],
                                                              stacked=True,figsize=(18,12),alpha=0.7)
combined_strata_tot[['p_64','p_79','p_89','p_90+']].plot(ax=ax,color=['r','g','orange','b'],
                                                         kind='bar',stacked=True,alpha=0.7)

In [None]:
width = 0.2
plt.figure(figsize=(18,12))
plt.subplot(2,2,1)
plt.title('SWEDEN absolute deaths age group 64, analytic vs Bayesian Infrence')
plt.bar(analytic_strata_tot.index + width,analytic_strata_tot['a_64'],color='b',width=width,label='analytic')
plt.bar(analytic_strata_tot.index - width,pymc_strata_tot['p_64'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)

plt.subplot(2,2,2)
plt.title('SWEDEN absolute deaths age group 79, analytic vs Bayesian Infrence')
plt.bar(analytic_strata_tot.index + width,analytic_strata_tot['a_79'],color='b',width=width,label='analytic')
plt.bar(analytic_strata_tot.index - width,pymc_strata_tot['p_79'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)

plt.subplot(2,2,3)
plt.title('SWEDEN absolute deaths age group 89, analytic vs Bayesian Infrence')
plt.bar(analytic_strata_tot.index + width,analytic_strata_tot['a_89'],color='b',width=width,label='analytic')
plt.bar(analytic_strata_tot.index - width,pymc_strata_tot['p_89'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)

plt.subplot(2,2,4)
plt.title('SWEDEN absolute deaths age group 90+, analytic vs Bayesian Infrence')
plt.bar(analytic_strata_tot.index + width,analytic_strata_tot['a_90+'],color='b',width=width,label='analytic')
plt.bar(analytic_strata_tot.index - width,pymc_strata_tot['p_90+'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)

In [None]:
width = 0.2
plt.figure(figsize=(18,12))
plt.subplot(2,2,1)
plt.title('SWEDEN  deaths per capita age group 64, analytic vs Bayesian Infrence')
plt.bar(analytic_deaths_per_capita.index + width,analytic_deaths_per_capita['a_64'],color='b',width=width,label='analytic')
plt.bar(analytic_deaths_per_capita.index - width,pymc_deaths_per_capita['p_64'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)

plt.subplot(2,2,2)
plt.title('SWEDEN deaths per capita age group 79, analytic vs Bayesian Infrence')
plt.bar(analytic_deaths_per_capita.index + width,analytic_deaths_per_capita['a_79'],color='b',width=width,label='analytic')
plt.bar(analytic_deaths_per_capita.index - width,pymc_deaths_per_capita['p_79'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)

plt.subplot(2,2,3)
plt.title('SWEDEN deaths per capita age group 89, analytic vs Bayesian Infrence')
plt.bar(analytic_deaths_per_capita.index + width,analytic_deaths_per_capita['a_89'],color='b',width=width,label='analytic')
plt.bar(analytic_deaths_per_capita.index - width,pymc_deaths_per_capita['p_89'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)

plt.subplot(2,2,4)
plt.title('SWEDEN deaths per capita age group 90+, analytic vs Bayesian Infrence')
plt.bar(analytic_deaths_per_capita.index + width,analytic_deaths_per_capita['a_90+'],color='b',width=width,label='analytic')
plt.bar(analytic_deaths_per_capita.index - width,pymc_deaths_per_capita['p_90+'],color='r',width=width,label='Bayesian inference')
plt.legend(loc='upper left')
xticks = range(2001,2021)
_=plt.xticks(xticks,rotation=90)