In [None]:
#### MUST RUN SIMPSON_POPULATION_DEATHS_PER_AGE_GROUP TO INCORPORATE NEW DATA FOR 2020


##### LESSONS LEARNED :
# 1) DONT SUM AVERAGES : DEATH RATES MUST BE CONVERTED TO ABSOLUTE NUMBERS BEFORE SUMMING AGE CATEGORIES,
#    AND FIRST THEN CONVERT BACK TO NEW RATES !!! EXAMPLE: 100 CATEGORIES OF AGE EACH WITH RATE OF 2% SUMS TO 200%...!

### ASSUMPTIONS : 2020 NR DEATHS SECOND HALF OF YEAR WILL BE SAME AS 1 ST HALF (DOUBTFUL)

#### POPULATION DATA AND AGE STRUCTURE FOR 2020 SAME AS FOR 2019

##### down below the death data for 2020 are doubled, since at design time, we have access to data jan-june. 
##### When data for additional months arrive, the scaling must be changed accoridngly

# standardization algorithm from : https://www.healthknowledge.org.uk/e-learning/epidemiology/specialists/standardisation


import requests
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set()

In [None]:
url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101I/Dodstal'

data = {
  "query": [
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "4"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r = requests.post(url,json=data)
r.status_code

json = r.json()

In [None]:
#parse death data from json to df 

data_list = []

nr_records = (len(json['data']))

for rec in range(nr_records):

    data_list.append ((json['data'][rec]['key'][0],json['data'][rec]['key'][2],json['data'][rec]['values'][0]))

In [None]:
# death rate df from SCB data, deaths per 1000

death_rate = pd.DataFrame(data_list,columns=['age_grp','year','dead_per_1k'])
death_rate

In [None]:
death_rate['year'] = death_rate['year'].astype(int)
death_rate['dead_per_1k'] = death_rate['dead_per_1k'].astype(float)

# make age_grp categorial for correct sorting
death_rate['age_grp'] = pd.Categorical(death_rate['age_grp'],['0','1-4','5-9','10-14','15-19','20-24','25-29',
                                             '30-34','35-39','40-44','45-49','50-54','55-59',
                                             '60-64','65-69','70-74','75-79','80-84',
                                             '85-89','90+'])


In [None]:
death_rate_idx = ['0','1-4','5-9','10-14','15-19','20-24','25-29','30-34','35-39',
                '40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79',
                '80-84','85-89','90+']

In [None]:
#pivot for easier handling
death_rate = pd.pivot(death_rate,index='year',columns='age_grp',values='dead_per_1k')
death_rate = death_rate.T

death_rate.index = death_rate_idx
death_rate

In [None]:
# Darwin awards to young males.....

# max deathrate and corresponding column index 

print(death_rate.max(axis=1),death_rate.apply(lambda row: np.argmax(row),axis=1))

death_rate.plot(figsize=(18,12),style='o--',title='Sweden deaths per 1K per age group, 2000-2019')
plt.xlabel('age group')
plt.ylabel('deaths per 1000 [log scale]')
plt.yscale('log')
_= plt.xticks(range(len(death_rate)),death_rate.index.values,rotation=90)

In [None]:
# fetch population data

url2 = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/BefolkningR1860'
data2 = {
  "query": [
    {
      "code": "Alder",
      "selection": {
        "filter": "agg:Ålder5år",
        "values": [
          "-4",
          "5-9",
          "10-14",
          "15-19",
          "20-24",
          "25-29",
          "30-34",
          "35-39",
          "40-44",
          "45-49",
          "50-54",
          "55-59",
          "60-64",
          "65-69",
          "70-74",
          "75-79",
          "80-84",
          "85-89",
          "90-94",
          "95-99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2000",
          "2001",
          "2002",
          "2003",
          "2004",
          "2005",
          "2006",
          "2007",
          "2008",
          "2009",
          "2010",
          "2011",
          "2012",
          "2013",
          "2014",
          "2015",
          "2016",
          "2017",
          "2018",
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}


In [None]:
r2 = requests.post(url2,json=data2)
print (r2.status_code)
json2 = r2.json()

In [None]:
#parse population data

data_list2 = []

nr_records2 = (len(json2['data']))

for rec2 in range(nr_records2):

    data_list2.append ((json2['data'][rec2]['key'][0],
                        json2['data'][rec2]['key'][1],json2['data'][rec2]['key'][2],json2['data'][rec2]['values'][0]))


In [None]:
# put pop.data into df2

population = pd.DataFrame(data_list2,columns=['age_grp','gender','year','pop'])
population['year'] = population['year'].astype(int)
population['pop'] = population['pop'].astype(int)



In [None]:
# make age_grp categorical for correct sort order
population['age_grp'] = pd.Categorical(population['age_grp'],['-4','5-9','10-14','15-19','20-24','25-29',
                                               '30-34','35-39','40-44','45-49','50-54',
                                               '55-59','60-64','65-69','70-74','75-79',
                                               '80-84','85-89','90-94','95-99','100+'])

population

In [None]:
# sum male + female by grouping 
population = population.groupby(['age_grp','year']).sum()
population = population.unstack()
population = population.droplevel(0,axis=1)

population_idx = ['1-4','5-9','10-14','15-19','20-24','25-29','30-34','35-39',
                '40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79',
                '80-84','85-89','90-94','95-99','100+']

population.index = population_idx
population

In [None]:
# in order to compare with death rates, where 0-year old are separate, and highest cat is 90+,
# must do some restructruring of age groups

In [None]:
# new index to confirm with death rates

population_idx = ['1-4','5-9','10-14','15-19','20-24','25-29','30-34','35-39',
                '40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79',
                '80-84','85-89','90+']

# merge the 90-94,95-99 and 100+ groups to 90+

pop_90_plus = population.iloc[-3:].sum()
population = population.drop(['90-94','95-99','100+'])
population = population.append(pop_90_plus,ignore_index=True)
population.index = population_idx


In [None]:
# born under the year - find the 0-years old

url3 = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101H/FoddaK'

data3 = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": []
      }
    },
    {
      "code": "AlderModer",
      "selection": {
        "filter": "vs:ÅlderTotA",
        "values": []
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2000",
          "2001",
          "2002",
          "2003",
          "2004",
          "2005",
          "2006",
          "2007",
          "2008",
          "2009",
          "2010",
          "2011",
          "2012",
          "2013",
          "2014",
          "2015",
          "2016",
          "2017",
          "2018",
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r3 = requests.post(url3,json=data3)
print (r3.status_code)
json3 = r3.json()

In [None]:
#parse population data

data_list3 = []

nr_records3 = (len(json3['data']))

for rec3 in range(nr_records3):

    data_list3.append ((json3['data'][rec3]['key'][0],
                        json3['data'][rec3]['key'][1],json3['data'][rec3]['values'][0]))


In [None]:
# put pop.data into df2

births = pd.DataFrame(data_list3,columns=['gender','year','0'])
births['year'] = births['year'].astype(int)
births['0'] = births['0'].astype(int)

births

In [None]:
# add boys + girls

births = (births.groupby(['year']).sum()).T
births.index.name='age_grp'
births

In [None]:
# add zero years old to top of population, subtract z-years from 1-4
population = pd.concat([births,population],axis=0)
population.loc['1-4',:] = population.loc['1-4',:] - population.loc['0',:]
population


In [None]:
# check for correct totals

population.sum()

In [None]:
#yby_pop_increase = population.sum() / (population.sum()).shift()
#yby_pop_increase
yby_age_increase = population.T / population.T.shift()

age_increase_2019 = yby_age_increase.loc[2019]
age_increase_2019

In [None]:
### ASSUME population for 2020 increases by age group as it did from 2018 to 2019
pop_2020 = population[2019] * age_increase_2019
population[2020] = pop_2020.astype(int)
#population
population.sum()

In [None]:
medel_pop = (population.T + population.T.shift()) / 2 #snittet av två påföljande år
medel_pop = medel_pop.T
medel_pop = medel_pop.drop(2000,axis=1)

# use the official population average of two consequtive years; drop column 2000 in death rate also

#### CHANGING POPULATION to official AVERAGE population! 
population = medel_pop
####

death_rate = death_rate.drop(2000,axis=1)

In [None]:
population.sum()

In [None]:
####
#### MUST RUN SIMPSON_POPULATION_DEATHS_PER_AGE_GROUP TO INCORPORATE NEW DATA FOR 2020
####
deaths_2020_ytd = pd.read_pickle('deaths_2020_ytd.pkl')
deaths_2020_ytd = deaths_2020_ytd.unstack()

In [None]:
deaths_2020_ytd = deaths_2020_ytd.drop(2019).T
print(deaths_2020_ytd)
print (deaths_2020_ytd.sum())

In [None]:
###### PARAMETER ######
### how many months of 2020 we have data for - can be changed to days, e.g. year_fraction_factor = 1 / (days/365)
###
months = 6
###
#######################

# print deaths inc FORECASTED DEATHS FOR 2020

year_fraction_factor = 1 / (months/12)
deaths_2020_ytd *= year_fraction_factor
print (deaths_2020_ytd)
print (deaths_2020_ytd.sum())

In [None]:
# CALCULATE ABSOLUTE YEARLY DEATHS FROM POPULATION x DEATH RATE
absolute_deaths = (death_rate / 1000 * population).astype(int,errors='ignore')
print (absolute_deaths.sum())


In [None]:
# stratify absolute deaths into 4 age groups

abs_deaths_64 = absolute_deaths.loc[:'60-64'].sum()
abs_deaths_79 = absolute_deaths.loc['65-69' : '75-79'].sum()
abs_deaths_89 = absolute_deaths.loc['80-84': '85-89'].sum()
abs_deaths_90 = absolute_deaths.loc['90+']

strata_abs_deaths = pd.concat([abs_deaths_64,abs_deaths_79,abs_deaths_89,abs_deaths_90],axis=1)
strata_abs_deaths.columns = ['64','79','89','90+']
strata_abs_deaths.loc[2020,:] = deaths_2020_ytd.T.values
strata_abs_deaths = strata_abs_deaths.astype(int)
print (strata_abs_deaths.sum(axis=1))
strata_abs_deaths

In [None]:
# stratify population to 4 age groups

abs_pop_64 = population.loc[:'60-64'].sum()
abs_pop_79 = population.loc['65-69' : '75-79'].sum()
abs_pop_89 = population.loc['80-84' : '85-89'].sum()
abs_pop_90 = population.loc['90+']

strata_abs_pop = pd.concat([abs_pop_64,abs_pop_79,abs_pop_89,abs_pop_90],axis=1)
strata_abs_pop.columns = ['64','79','89','90+']
strata_abs_pop

In [None]:
strata_abs_pop.sum(axis=1)

In [None]:
# compute stratified per capita death rates

computed_death_rate_per_capita = strata_abs_deaths / strata_abs_pop
computed_death_rate_per_capita 


In [None]:
# death rate YBY change

stratified_yearly_death_rate_per_capita_YBY_change = computed_death_rate_per_capita / computed_death_rate_per_capita.shift()
stratified_yearly_death_rate_per_capita_YBY_change

In [None]:
# calculate avg 2015-2019 death rate

stratified_avg_death_rate_2015_2019 = computed_death_rate_per_capita.loc[2015:2019].mean()
stratified_avg_death_rate_2015_2019

In [None]:
# factor death yearly death rates vs avg 2015-2019

stratified_death_rate_vs_avg_2015_2019 = computed_death_rate_per_capita / stratified_avg_death_rate_2015_2019

stratified_death_rate_vs_avg_2015_2019

In [None]:
title = 'SWEDEN deaths per capita compared to average 2015-2019, age groups -64,65-79,80-89,90+\n'\
'2020 forecast : ASSUMING full year 2020 deaths will be 2x deaths 2020 Jan-Jun\nDataSource:SCB.se'

stratified_death_rate_vs_avg_2015_2019.plot(figsize=(18,12),
                                            style='o--',title=title)
plt.xticks(range(2001,2021))
plt.ylabel('factor')
plt.savefig('death_rate_trend.jpg',format='jpg')

In [None]:
title = 'Sweden Deaths per Capita per Age Group : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Number of deaths 2:d half 2020 will be equal to 1:st half\n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'

(computed_death_rate_per_capita).plot(kind='bar',
                                    figsize=(18,12),
                                      title=title)
plt.yscale('log')
plt.ylabel('deaths per capita [log scale]')
plt.legend(['0-64','65-79','80-89','90+'])
plt.savefig('age_adj_death_ratio_1.jpg',format='jpg')


In [None]:
title = 'Sweden Deaths per Age Group, non-adj (abs numbers) : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Number of deaths 2:d half 2020 will be equal to 1:st half\n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'
print (computed_death_rate_per_capita)
total_computed_deaths = computed_death_rate_per_capita * strata_abs_pop
total_computed_deaths.plot(kind='bar',
                           stacked=True,figsize=(18,12),title=title)
plt.legend(['0-64','65-79','80-89','90+'])
plt.ylabel('number of deaths')
total_computed_deaths.sum(axis=1)
plt.savefig('age_adj_abs_numbers_age_grp_1.jpg',format='jpg',dpi=600)
total_computed_deaths.sum(axis=1)


In [None]:
dead_per_M_strata = (total_computed_deaths / strata_abs_pop * 1e6).astype(int)
dead_per_M_strata

In [None]:
std_pop = strata_abs_pop.loc[2001]
std_pop

In [None]:
computed_death_rate_per_capita

In [None]:
age_adj_deaths = std_pop * computed_death_rate_per_capita
age_adj_deaths

In [None]:
title = 'Sweden Age Adjusted Deaths per Age Group, non-pop-adjusted (abs numbers) : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Number of deaths 2:d half 2020 will be equal to 1:st half\n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'
age_adj_deaths.plot(kind='bar',stacked=True,figsize=(18,12),title=title)
plt.legend(['0-64','65-79','80-89','90+'])
plt.ylabel('number of deaths [age adjusted]')
plt.ylim([0,100000])
plt.savefig('age_adj_no_pop_adjusted_3.jpg',format='jpg',dpi=600)


In [None]:
print (std_pop.sum())
age_adj_deaths_per_M = age_adj_deaths.sum(axis=1) / (std_pop.sum() / 1e6)
age_adj_deaths_per_M

In [None]:
title = 'Sweden Age Adjusted + Population Adjusted Deaths All Age Groups : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Number of deaths 2:d half 2020 will be equal to 1:st half\n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'

age_adj_deaths_per_M.plot(kind='bar',figsize=(18,12),title=title,color='crimson')
plt.ylabel('age adjusted deaths per million')
plt.savefig('age_adj_pop_adj_4.jpg',format='jpg')


In [None]:
strata_abs_pop.head()

In [None]:
def create_age_idx(a):
    if a == '64' : return 1
    if a == '79' : return 2
    if a == '89' : return 3
    if a == '90+' : return 4

all_data = strata_abs_pop.unstack().reset_index()
all_data.rename(columns={'level_0' : 'age_grp',0 : 'pop' },inplace=True)

temp = strata_abs_deaths.unstack().reset_index()
temp.rename(columns={'level_0' : 'age_grp', 0 : 'dead'},inplace=True)
all_data = pd.concat([all_data,temp['dead']],axis=1)
all_data['year_idx'] = all_data['year'] - 2000
all_data['age_idx'] = all_data['age_grp'].apply(create_age_idx)
all_data['pop'] = all_data['pop'].astype(int)
all_data.tail(40)

In [None]:
#PYMC
import pymc as pm
from pymc.Matplot import plot as pmplot


### condition on year by assigning a unique alpha to each year
### condition on age_grp by assigning a unique beta to each age group

# model: 
# dead ~ Binomial(population,p)
# p = logit(alpha[year_idx] + beta[age_idx])
# alpha[year_idx] ~ Normal(0,10)
# beta[age_idx] ~ Normal(0,10) 


year_idx = all_data['year_idx'] 
age_idx = all_data['age_idx']

alpha = pm.Normal('alpha',0, 1 / 5 ** 2,size=len(year_idx)) # 20 years

beta = pm.Normal('beta',0,1 / 5 ** 2,size=4)


@pm.deterministic
def logit_age(age_idx=age_idx-1,year_idx=year_idx-1,alpha=alpha,beta=beta):
        
    return np.exp(alpha[year_idx] + beta[age_idx]) / (
        1 + np.exp(alpha[year_idx] + beta[age_idx]))


lkh_age = pm.Binomial('lkh_age',n=all_data['pop'],p=logit_age,observed=True,
                      value=all_data['dead'])

model = pm.Model([alpha,beta,logit_age,lkh_age])

_map = pm.MAP(model)
_map.fit()

mcmc = pm.MCMC(model)
sample = mcmc.sample(500000,200000,3)

In [None]:
def logit_pure(x):
    return (np.exp(x)) / (1 + np.exp(x))

def logistic_pure(p):
    return np.log(p / (1 - p))

In [None]:
#

alpha_2001 = mcmc.trace('alpha')[:,0]
alpha_2002 = mcmc.trace('alpha')[:,1]
alpha_2003 = mcmc.trace('alpha')[:,2]
alpha_2004 = mcmc.trace('alpha')[:,3]
alpha_2005 = mcmc.trace('alpha')[:,4]
alpha_2006 = mcmc.trace('alpha')[:,5]
alpha_2007 = mcmc.trace('alpha')[:,6]
alpha_2008 = mcmc.trace('alpha')[:,7]
alpha_2009 = mcmc.trace('alpha')[:,8]
alpha_2010 = mcmc.trace('alpha')[:,9]
alpha_2011 = mcmc.trace('alpha')[:,10]
alpha_2012 = mcmc.trace('alpha')[:,11]
alpha_2013 = mcmc.trace('alpha')[:,12]
alpha_2014 = mcmc.trace('alpha')[:,13]
alpha_2015 = mcmc.trace('alpha')[:,14]
alpha_2016 = mcmc.trace('alpha')[:,15]
alpha_2017 = mcmc.trace('alpha')[:,16]
alpha_2018 = mcmc.trace('alpha')[:,17]
alpha_2019 = mcmc.trace('alpha')[:,18]
alpha_2020 = mcmc.trace('alpha')[:,19]


beta_64 = mcmc.trace('beta')[:,0]
beta_79 = mcmc.trace('beta')[:,1]
beta_89 = mcmc.trace('beta')[:,2]
beta_90 = mcmc.trace('beta')[:,3]

pmplot(beta_64,'beta_64')
pmplot(beta_79,'beta_79')
pmplot(beta_89,'beta_89')
pmplot(beta_90,'beta_90')

pmplot(alpha_2001,'alpha_2001')
pmplot(alpha_2002,'alpha_2002')
pmplot(alpha_2012,'alpha_2012')
pmplot(alpha_2015,'alpha_2015')
pmplot(alpha_2019,'alpha_2019')
pmplot(alpha_2020,'alpha_2020')




result = pd.DataFrame({
                       'alpha_2001': alpha_2001,
                       'alpha_2002' : alpha_2002,
                       'alpha_2003' : alpha_2003,
                       'alpha_2004' : alpha_2004,
                       'alpha_2005' : alpha_2005,
                       'alpha_2006' : alpha_2006,
                       'alpha_2007' : alpha_2007,
                       'alpha_2008' : alpha_2008,
                       'alpha_2009' : alpha_2009,
                       'alpha_2010' : alpha_2010,
                       'alpha_2011' : alpha_2011,
                       'alpha_2012' : alpha_2012,
                       'alpha_2013' : alpha_2013,
                       'alpha_2014' : alpha_2014,
                       'alpha_2015' : alpha_2015,
                       'alpha_2016' : alpha_2016,
                       'alpha_2017' : alpha_2017,
                       'alpha_2018' : alpha_2018,
                       'alpha_2019' : alpha_2019,
                        'alpha_2020' : alpha_2020,
                        'beta_64' : beta_64,
                        'beta_79' : beta_79,
                        'beta_89' : beta_89,
                        'beta_90' : beta_90})
                      

result.describe()


In [None]:
result_p = pd.DataFrame()



result_p['2001_64'] = logit_pure(result['alpha_2001'] + result['beta_64']) 
result_p['2001_79'] = logit_pure(result['alpha_2001'] + result['beta_79']) 
result_p['2001_89'] = logit_pure(result['alpha_2001'] + result['beta_89']) 
result_p['2001_90'] = logit_pure(result['alpha_2001'] + result['beta_90']) 

result_p['2002_64'] = logit_pure(result['alpha_2002'] + result['beta_64']) 
result_p['2002_79'] = logit_pure(result['alpha_2002'] + result['beta_79']) 
result_p['2002_89'] = logit_pure(result['alpha_2002'] + result['beta_89']) 
result_p['2002_90'] = logit_pure(result['alpha_2002'] + result['beta_90']) 

result_p['2003_64'] = logit_pure(result['alpha_2003'] + result['beta_64']) 
result_p['2003_79'] = logit_pure(result['alpha_2003'] + result['beta_79']) 
result_p['2003_89'] = logit_pure(result['alpha_2003'] + result['beta_89']) 
result_p['2003_90'] = logit_pure(result['alpha_2003'] + result['beta_90']) 

result_p['2004_64'] = logit_pure(result['alpha_2004'] + result['beta_64']) 
result_p['2004_79'] = logit_pure(result['alpha_2004'] + result['beta_79']) 
result_p['2004_89'] = logit_pure(result['alpha_2004'] + result['beta_89']) 
result_p['2004_90'] = logit_pure(result['alpha_2004'] + result['beta_90']) 

result_p['2005_64'] = logit_pure(result['alpha_2005'] + result['beta_64']) 
result_p['2005_79'] = logit_pure(result['alpha_2005'] + result['beta_79']) 
result_p['2005_89'] = logit_pure(result['alpha_2005'] + result['beta_89']) 
result_p['2005_90'] = logit_pure(result['alpha_2005'] + result['beta_90']) 

result_p['2006_64'] = logit_pure(result['alpha_2006'] + result['beta_64']) 
result_p['2006_79'] = logit_pure(result['alpha_2006'] + result['beta_79']) 
result_p['2006_89'] = logit_pure(result['alpha_2006'] + result['beta_89']) 
result_p['2006_90'] = logit_pure(result['alpha_2006'] + result['beta_90']) 

result_p['2007_64'] = logit_pure(result['alpha_2007'] + result['beta_64']) 
result_p['2007_79'] = logit_pure(result['alpha_2007'] + result['beta_79']) 
result_p['2007_89'] = logit_pure(result['alpha_2007'] + result['beta_89']) 
result_p['2007_90'] = logit_pure(result['alpha_2007'] + result['beta_90']) 

result_p['2008_64'] = logit_pure(result['alpha_2008'] + result['beta_64']) 
result_p['2008_79'] = logit_pure(result['alpha_2008'] + result['beta_79']) 
result_p['2008_89'] = logit_pure(result['alpha_2008'] + result['beta_89']) 
result_p['2008_90'] = logit_pure(result['alpha_2008'] + result['beta_90']) 

result_p['2009_64'] = logit_pure(result['alpha_2009'] + result['beta_64']) 
result_p['2009_79'] = logit_pure(result['alpha_2009'] + result['beta_79']) 
result_p['2009_89'] = logit_pure(result['alpha_2009'] + result['beta_89']) 
result_p['2009_90'] = logit_pure(result['alpha_2009'] + result['beta_90']) 

result_p['2010_64'] = logit_pure(result['alpha_2010'] + result['beta_64']) 
result_p['2010_79'] = logit_pure(result['alpha_2010'] + result['beta_79']) 
result_p['2010_89'] = logit_pure(result['alpha_2010'] + result['beta_89']) 
result_p['2010_90'] = logit_pure(result['alpha_2010'] + result['beta_90']) 

result_p['2011_64'] = logit_pure(result['alpha_2011'] + result['beta_64']) 
result_p['2011_79'] = logit_pure(result['alpha_2011'] + result['beta_79']) 
result_p['2011_89'] = logit_pure(result['alpha_2011'] + result['beta_89']) 
result_p['2011_90'] = logit_pure(result['alpha_2011'] + result['beta_90']) 

result_p['2012_64'] = logit_pure(result['alpha_2012'] + result['beta_64']) 
result_p['2012_79'] = logit_pure(result['alpha_2012'] + result['beta_79']) 
result_p['2012_89'] = logit_pure(result['alpha_2012'] + result['beta_89']) 
result_p['2012_90'] = logit_pure(result['alpha_2012'] + result['beta_90']) 

result_p['2013_64'] = logit_pure(result['alpha_2013'] + result['beta_64']) 
result_p['2013_79'] = logit_pure(result['alpha_2013'] + result['beta_79']) 
result_p['2013_89'] = logit_pure(result['alpha_2013'] + result['beta_89']) 
result_p['2013_90'] = logit_pure(result['alpha_2013'] + result['beta_90']) 

result_p['2014_64'] = logit_pure(result['alpha_2014'] + result['beta_64']) 
result_p['2014_79'] = logit_pure(result['alpha_2014'] + result['beta_79']) 
result_p['2014_89'] = logit_pure(result['alpha_2014'] + result['beta_89']) 
result_p['2014_90'] = logit_pure(result['alpha_2014'] + result['beta_90']) 

result_p['2015_64'] = logit_pure(result['alpha_2015'] + result['beta_64']) 
result_p['2015_79'] = logit_pure(result['alpha_2015'] + result['beta_79']) 
result_p['2015_89'] = logit_pure(result['alpha_2015'] + result['beta_89']) 
result_p['2015_90'] = logit_pure(result['alpha_2015'] + result['beta_90']) 

result_p['2016_64'] = logit_pure(result['alpha_2016'] + result['beta_64']) 
result_p['2016_79'] = logit_pure(result['alpha_2016'] + result['beta_79']) 
result_p['2016_89'] = logit_pure(result['alpha_2016'] + result['beta_89']) 
result_p['2016_90'] = logit_pure(result['alpha_2016'] + result['beta_90']) 

result_p['2017_64'] = logit_pure(result['alpha_2017'] + result['beta_64']) 
result_p['2017_79'] = logit_pure(result['alpha_2017'] + result['beta_79']) 
result_p['2017_89'] = logit_pure(result['alpha_2017'] + result['beta_89']) 
result_p['2017_90'] = logit_pure(result['alpha_2017'] + result['beta_90']) 

result_p['2018_64'] = logit_pure(result['alpha_2018'] + result['beta_64']) 
result_p['2018_79'] = logit_pure(result['alpha_2018'] + result['beta_79']) 
result_p['2018_89'] = logit_pure(result['alpha_2018'] + result['beta_89']) 
result_p['2018_90'] = logit_pure(result['alpha_2018'] + result['beta_90']) 

result_p['2019_64'] = logit_pure(result['alpha_2019'] + result['beta_64']) 
result_p['2019_79'] = logit_pure(result['alpha_2019'] + result['beta_79']) 
result_p['2019_89'] = logit_pure(result['alpha_2019'] + result['beta_89']) 
result_p['2019_90'] = logit_pure(result['alpha_2019'] + result['beta_90']) 

result_p['2020_64'] = logit_pure(result['alpha_2020'] + result['beta_64']) 
result_p['2020_79'] = logit_pure(result['alpha_2020'] + result['beta_79']) 
result_p['2020_89'] = logit_pure(result['alpha_2020'] + result['beta_89']) 
result_p['2020_90'] = logit_pure(result['alpha_2020'] + result['beta_90']) 

result_p.describe()


In [None]:
p_means = result_p.describe().loc['mean']
p_means = p_means.reset_index()
p_means[['year','age_grp']] = p_means['index'].str.split('_',1,expand=True)
p_means = pd.pivot(p_means,index='age_grp',columns='year',values='mean')
p_means = p_means.T
p_means.rename(columns={'90' : '90+'},inplace=True)

In [None]:
p_means.index = range(2001,2021)

In [None]:
pymc_abs_deaths_cond_year_age = strata_abs_pop * p_means

pymc_abs_sum = pymc_abs_deaths_cond_year_age.sum(axis=1)
pymc_abs_sum

In [None]:
computed_abs_sum = total_computed_deaths.sum(axis=1)
computed_abs_sum

In [None]:
combined_abs_sums = pd.concat([pymc_abs_sum,computed_abs_sum],axis=1)
combined_abs_sums.columns=['pymc_abs_dead','analytic_abs_dead']
combined_abs_sums

In [None]:
combined_abs_sums.plot(kind='bar',figsize=(18,12),title='SWEDEN All Cause Deaths conditioned on year + age_grp, absolute')
plt.ylabel('number of deaths')

In [None]:
yearly_pop = strata_abs_pop.sum(axis=1)

pop_adj_deaths = combined_abs_sums.div(yearly_pop / 1e6,axis=0)
pop_adj_deaths.columns = ['pymc_dead_per_M','analytic_dead_per_M']
pop_adj_deaths

In [None]:
pop_adj_deaths.plot(kind='bar',figsize=(18,12),
                    title='SWEDEN All Cause Deaths, conditioned on year+age_grp, per Million pop')

plt.ylabel('deaths per million')