In [None]:
## leave the last line in the CSV!

# # uses table taken from scb excel sheet 2
# df = pd.read_csv('scb_dead_per_age.csv',sep=';',header=None,usecols=range(19),index_col=0)

### ASSUME population for 2020 increases by age group as it did from 2018 to 2019

### using average of two consecutive years for population

#### !!!!! ANOMALY in SCB data: historical death data for (at least ) 2019 from statistikdatabasen 
# http://www.statistikdatabasen.scb.se/pxweb/sv/ssd/START__BE__BE0101__BE0101G/ManadFoddDod/table/tableViewLayout1/
# does not concur with death data 2019 in the preliminary data! 

### DUE to okänd dödsdag when using the daily data of the preliminary stuff - much more okänd dag than okänd månad in 
### the monthly stuff
####

# ALL DATA BASED ON SCB PREL DATA IGNORES THOSE WITH UNKNOWN DAY OF DEATH !!!


##### LESSONS LEARNED :
# 1) DONT SUM AVERAGES : DEATH RATES MUST BE CONVERTED TO ABSOLUTE NUMBERS BEFORE SUMMING AGE CATEGORIES,
#    AND FIRST THEN CONVERT BACK TO NEW RATES !!! EXAMPLE: 100 CATEGORIES OF AGE EACH WITH RATE OF 2% SUMS TO 200%...!

### ASSUMPTIONS : 2020 NR DEATHS WILL GROW LINEARLY DEATHS = (1 / (NR_DATA_DAYS / 365)) * CURRENT_DEATHS

#### POPULATION AGE STRUCTURE FOR 2020 SAME AS FOR 2019

# standardization algorithm from : https://www.healthknowledge.org.uk/e-learning/epidemiology/specialists/standardisation


import requests
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

sns.set()

In [None]:
def parse_scb_prel(YTD=True):
    
    def monthname2number(m):
        if m == 'januari' : return 1
        if m == 'februari' : return 2
        if m == 'mars' : return 3
        if m == 'april' : return 4
        if m == 'maj' : return 5
        if m == 'juni' : return 6
        if m == 'juli' : return 7
        if m == 'augusti' : return 8
        if m == 'september' : return 9
        if m == 'oktober' : return 10
        if m == 'november' : return 11
        if m == 'december' : return 12

    df = pd.read_csv('scb-dead_per_day.csv',sep=';',usecols=[1,2,3,4,5,6,7,10,11],thousands=',')
    
    df['month'] = df['Månad'].apply(monthname2number)
    df[['Månad','month']] = df[['Månad','month']].replace(np.nan,0)
    df['month'] = df['month'].astype(int)
    
    df = df.replace(np.nan,0)

    
    df[['2015','2016','2017','2018','2019','2020','2015-2019']] = df[['2015','2016','2017','2018','2019','2020','2015-2019']].astype(int)

    unknown_death_day = df.iloc[-1,:7]
    
    df.drop(366,inplace=True)
    
    if YTD:
        df = df.loc[df['2020'] != 0]
    
    #df = df.loc[df['2020'] != 0]
    
    #df = df.drop('Månad',axis=1)
    
    index = pd.date_range('2020-01-01',periods=len(df))
    df.index = index
    
    known_death_day = df.iloc[:,:7].sum()
    
    
    unknown_death_day[['2015','2016','2017','2018','2019','2015-2019']] = unknown_death_day[
    ['2015','2016','2017','2018','2019','2015-2019']] 
    
    dead_pct_per_day = df[['2015','2016','2017','2018','2019','2020','2015-2019']] / known_death_day
    
    all_deaths = df[['2015','2016','2017','2018','2019','2020','2015-2019']] + unknown_death_day * dead_pct_per_day
    
    all_deaths = pd.concat([all_deaths,df['Månad']],axis=1)
    
    return all_deaths

In [None]:
daily_deaths = parse_scb_prel()

### year_fraction is used to predict full year 2020 deaths ###
### goes from 1/366 to 1 ###

year_fraction = len(daily_deaths) / 366

In [None]:
url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101I/Dodstal'

data = {
  "query": [
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "4"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r = requests.post(url,json=data)
r.status_code

json = r.json()

In [None]:
#parse death data from json to df 

data_list = []

nr_records = (len(json['data']))

for rec in range(nr_records):

    data_list.append ((json['data'][rec]['key'][0],json['data'][rec]['key'][2],json['data'][rec]['values'][0]))

In [None]:
# death rate df from SCB data, deaths per 1000

death_rate = pd.DataFrame(data_list,columns=['age_grp','year','dead_per_1k'])
death_rate

In [None]:
death_rate['year'] = death_rate['year'].astype(int)
death_rate['dead_per_1k'] = death_rate['dead_per_1k'].astype(float)

# make age_grp categorial for correct sorting
death_rate['age_grp'] = pd.Categorical(death_rate['age_grp'],['0','1-4','5-9','10-14','15-19','20-24','25-29',
                                             '30-34','35-39','40-44','45-49','50-54','55-59',
                                             '60-64','65-69','70-74','75-79','80-84',
                                             '85-89','90+'])


In [None]:
death_rate_idx = ['0','1-4','5-9','10-14','15-19','20-24','25-29','30-34','35-39',
                '40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79',
                '80-84','85-89','90+']

In [None]:
#pivot for easier handling
death_rate = pd.pivot(death_rate,index='year',columns='age_grp',values='dead_per_1k')
death_rate = death_rate.T

death_rate.index = death_rate_idx
death_rate

In [None]:
# Darwin awards to young males.....

# max deathrate and corresponding column index 

print(death_rate.max(axis=1),death_rate.apply(lambda row: np.argmax(row),axis=1))

death_rate.plot(figsize=(18,12),style='o--',title='Sweden deaths per 1K per age group, 2000-2019')
plt.xlabel('age group')
plt.ylabel('deaths per 1000 [log scale]')
plt.yscale('log')
_= plt.xticks(range(len(death_rate)),death_rate.index.values,rotation=90)

In [None]:
# fetch population data

url2 = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/BefolkningR1860'
data2 = {
  "query": [
    {
      "code": "Alder",
      "selection": {
        "filter": "agg:Ålder5år",
        "values": [
          "-4",
          "5-9",
          "10-14",
          "15-19",
          "20-24",
          "25-29",
          "30-34",
          "35-39",
          "40-44",
          "45-49",
          "50-54",
          "55-59",
          "60-64",
          "65-69",
          "70-74",
          "75-79",
          "80-84",
          "85-89",
          "90-94",
          "95-99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2000",
          "2001",
          "2002",
          "2003",
          "2004",
          "2005",
          "2006",
          "2007",
          "2008",
          "2009",
          "2010",
          "2011",
          "2012",
          "2013",
          "2014",
          "2015",
          "2016",
          "2017",
          "2018",
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}


In [None]:
r2 = requests.post(url2,json=data2)
print (r2.status_code)
json2 = r2.json()

In [None]:
#parse population data

data_list2 = []

nr_records2 = (len(json2['data']))

for rec2 in range(nr_records2):

    data_list2.append ((json2['data'][rec2]['key'][0],
                        json2['data'][rec2]['key'][1],json2['data'][rec2]['key'][2],json2['data'][rec2]['values'][0]))


In [None]:
# put pop.data into df2

population = pd.DataFrame(data_list2,columns=['age_grp','gender','year','pop'])
population['year'] = population['year'].astype(int)
population['pop'] = population['pop'].astype(int)



In [None]:
# make age_grp categorical for correct sort order
population['age_grp'] = pd.Categorical(population['age_grp'],['-4','5-9','10-14','15-19','20-24','25-29',
                                               '30-34','35-39','40-44','45-49','50-54',
                                               '55-59','60-64','65-69','70-74','75-79',
                                               '80-84','85-89','90-94','95-99','100+'])

population

In [None]:
# sum male + female by grouping 
population = population.groupby(['age_grp','year']).sum()
population = population.unstack()
population = population.droplevel(0,axis=1)

population_idx = ['1-4','5-9','10-14','15-19','20-24','25-29','30-34','35-39',
                '40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79',
                '80-84','85-89','90-94','95-99','100+']

population.index = population_idx
population

In [None]:
population.sum()


In [None]:
# in order to compare with death rates, where 0-year old are separate, and highest cat is 90+,
# must do some restructruring of age groups

In [None]:
# new index to confirm with death rates

population_idx = ['1-4','5-9','10-14','15-19','20-24','25-29','30-34','35-39',
                '40-44','45-49','50-54','55-59','60-64','65-69','70-74','75-79',
                '80-84','85-89','90+']

# merge the 90-94,95-99 and 100+ groups to 90+

pop_90_plus = population.iloc[-3:].sum()
population = population.drop(['90-94','95-99','100+'])
population = population.append(pop_90_plus,ignore_index=True)
population.index = population_idx
population.sum()


In [None]:
# born under the year - find the 0-years old

url3 = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101H/FoddaK'

data3 = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": []
      }
    },
    {
      "code": "AlderModer",
      "selection": {
        "filter": "vs:ÅlderTotA",
        "values": []
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2000",
          "2001",
          "2002",
          "2003",
          "2004",
          "2005",
          "2006",
          "2007",
          "2008",
          "2009",
          "2010",
          "2011",
          "2012",
          "2013",
          "2014",
          "2015",
          "2016",
          "2017",
          "2018",
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r3 = requests.post(url3,json=data3)
print (r3.status_code)
json3 = r3.json()

In [None]:
#parse population data

data_list3 = []

nr_records3 = (len(json3['data']))

for rec3 in range(nr_records3):

    data_list3.append ((json3['data'][rec3]['key'][0],
                        json3['data'][rec3]['key'][1],json3['data'][rec3]['values'][0]))


In [None]:
# put pop.data into df2

births = pd.DataFrame(data_list3,columns=['gender','year','0'])
births['year'] = births['year'].astype(int)
births['0'] = births['0'].astype(int)

births

In [None]:
# add boys + girls

births = (births.groupby(['year']).sum()).T
births.index.name='age_grp'
births

In [None]:
# add zero years old to top of population, subtract z-years from 1-4
population = pd.concat([births,population],axis=0)
population.loc['1-4',:] = population.loc['1-4',:] - population.loc['0',:]
population


In [None]:
# check for correct totals

population.sum()

In [None]:
#yby_pop_increase = population.sum() / (population.sum()).shift()
#yby_pop_increase
yby_age_increase = population.T / population.T.shift()
yby_age_increase


In [None]:
max_age_increase = yby_age_increase.max(axis=1)
max_age_increase_idx =yby_age_increase.apply(np.argmax,axis=1)
print (max_age_increase)
max_age_increase_idx

In [None]:
age_increase_2019 = yby_age_increase.loc[2019]
age_increase_2019

In [None]:
### ASSUME population for 2020 increases by age group as it did from 2018 to 2019
print (population.sum())
pop_2020 = population[2019] * age_increase_2019
population[2020] = pop_2020.astype(int)
#population
population.sum()

In [None]:
medel_pop = (population.T + population.T.shift()) / 2 #snittet av två påföljande år
medel_pop = medel_pop.T
medel_pop = medel_pop.drop(2000,axis=1)

# use the official population average of two consequtive years; drop column 2000 in death rate also

#### CHANGING POPULATION to official AVERAGE population! 
population = medel_pop
####

death_rate = death_rate.drop(2000,axis=1)

In [None]:
population.sum()

In [None]:
population.to_pickle('population.pkl')
population

In [None]:
death_rate

In [None]:
mortality = (death_rate/ 1000)
mortality.to_pickle('mortality.pkl')
mortality

In [None]:
(population / population.sum()).T.plot(kind='bar',stacked=True,figsize=(18,12),
                                       title='SWEDEN population age group fraction')

plt.savefig('sweden_age_grp_fraction.jpg',format='jpg')

In [None]:
# table taken from scb excel sheet 2
# to get 2020 deaths

age_df = pd.read_csv('scb_dead_per_age.csv',sep=';',header=None,usecols=range(19),index_col=0)

cols = ['2019_tot','2019_M_64','2019_M_79','2019_M_89','2019_M_90+',
       '2019_W_64','2019_W_79','2019_W_89','2019_W_90+',
       '2020_tot','2020_M_64','2020_M_79','2020_M_89','2020_M_90+',
       '2020_W_64','2020_W_79','2020_W_89','2020_W_90+']

index = pd.date_range('2020-01-01','2021-01-01')
age_df.index = index
age_df.columns = cols
age_df

In [None]:
weekly_2020 = age_df.resample('W').sum()
weekly_2020 = weekly_2020[['2020_M_64','2020_M_79','2020_M_89','2020_M_90+',
                     '2020_W_64','2020_W_79','2020_W_89','2020_W_90+']]

weekly_2020_both = pd.DataFrame()

weekly_2020_both['64'] = (weekly_2020['2020_M_64'] + weekly_2020['2020_W_64']).copy()
weekly_2020_both['79'] = (weekly_2020['2020_M_79'] + weekly_2020['2020_W_79']).copy()
weekly_2020_both['89'] = (weekly_2020['2020_M_89'] + weekly_2020['2020_W_89']).copy()
weekly_2020_both['90+'] = (weekly_2020['2020_M_90+'] + weekly_2020['2020_W_90+']).copy()


weekly_2020_both = weekly_2020_both[(weekly_2020_both.T != 0).all()]
weekly_2020_both = weekly_2020_both.iloc[:-2]
weekly_2020_both

In [None]:
last_week = weekly_2020_both.index[-1].date()
last_week

In [None]:
weekly_2020_both.plot(title='SWEDEN weekly abs deaths per age group YTD {}'.format(last_week),style='o--',
                     figsize=(18,12))

plt.ylabel('absolute weekly deaths')

plt.savefig('sweden_weekly_abs_deaths_age_grp.jpg',format='jpg')

In [None]:
monthly = age_df.resample('M').sum()
monthly

In [None]:

monthly.sum()



In [None]:
##### EVERYTHING BELOW IS SCALED BY EPSILON ! #####

In [None]:



#monthly.drop(['2019_tot','2020_tot'],axis=1,inplace=True)
monthly_sums_per_age = monthly.sum()
cols = ['2019_M_64','2020_M_64','2019_M_79','2020_M_79','2019_M_89','2020_M_89','2019_M_90+','2020_M_90+',
        '2019_W_64','2020_W_64','2019_W_79','2020_W_79','2019_W_89','2020_W_89','2019_W_90+','2020_W_90+']

monthly_sums_per_age = monthly_sums_per_age[cols]

foo = monthly_sums_per_age.index.str.contains('W')

monthly_sums_per_age = pd.DataFrame(monthly_sums_per_age,columns=['dead'])


monthly_sums_per_age['female'] = foo


monthly_sums_per_age.reset_index(inplace=True,drop=False)

bar = monthly_sums_per_age['index'].str[-3:].str.replace('_','').str.replace('+','').astype(int)

monthly_sums_per_age['age'] = bar

monthly_sums_per_age['year'] = monthly_sums_per_age['index'].apply(lambda x : x[:4])
monthly_sums_per_age['year'] = monthly_sums_per_age['year'].apply(lambda x : pd.Timestamp(x).year)
monthly_sums_per_age['gender'] = monthly_sums_per_age['female'].apply(lambda x : 'F' if x == True else 'M')
monthly_sums_per_age.index = monthly_sums_per_age[['year','gender','age']]

monthly_sums_per_age = monthly_sums_per_age.groupby(['year','age'])['dead'].sum()
deaths_2020_ytd = monthly_sums_per_age.unstack()
deaths_2020_ytd = deaths_2020_ytd.drop(2019).T


### forecast full year 2020 deaths ###
### to encounter the raising deaths from sept fwd, reduce epsilon from default 1 ###

#### EPSILON #####

epsilon = 0.96


deaths_2020_full_year = deaths_2020_ytd * ( 1 / (year_fraction * epsilon))

print (monthly_sums_per_age)
print (deaths_2020_full_year)
print (deaths_2020_full_year.sum())

In [None]:
pd.Series(epsilon).to_pickle('epsilon.pkl')

In [None]:
monthly_sums_per_age

In [None]:
# CALCULATE ABSOLUTE YEARLY DEATHS FROM POPULATION x DEATH RATE - 2020 forecasted 
absolute_deaths = (death_rate / 1000 * population).astype(int,errors='ignore')
print (absolute_deaths.sum())


In [None]:
# stratify absolute deaths into 4 age groups

abs_deaths_64 = absolute_deaths.loc[:'60-64'].sum()
abs_deaths_79 = absolute_deaths.loc['65-69' : '75-79'].sum()
abs_deaths_89 = absolute_deaths.loc['80-84': '85-89'].sum()
abs_deaths_90 = absolute_deaths.loc['90+']

strata_abs_deaths = pd.concat([abs_deaths_64,abs_deaths_79,abs_deaths_89,abs_deaths_90],axis=1)
strata_abs_deaths.columns = ['64','79','89','90+']

strata_abs_deaths.loc[2020,:] = deaths_2020_full_year.T.values

strata_abs_deaths = strata_abs_deaths.astype(int)
print (strata_abs_deaths.sum(axis=1))
strata_abs_deaths.to_pickle('strata_abs_deaths.pkl')
strata_abs_deaths

In [None]:
strata_abs_deaths.plot(figsize=(18,12),style='o--',title='SWEDEN absolute deaths per age group')
_= plt.xticks(range(2001,2021))
plt.savefig('sweden_absolute_deaths_age_grp.jpg',format='jpg')

In [None]:
# stratify population to 4 age groups

abs_pop_64 = population.loc[:'60-64'].sum()
abs_pop_79 = population.loc['65-69' : '75-79'].sum()
abs_pop_89 = population.loc['80-84' : '85-89'].sum()
abs_pop_90 = population.loc['90+']

strata_abs_pop = pd.concat([abs_pop_64,abs_pop_79,abs_pop_89,abs_pop_90],axis=1)
strata_abs_pop.columns = ['64','79','89','90+']
strata_abs_pop.to_pickle('strata_abs_pop.pkl')
strata_abs_pop

In [None]:
strata_abs_pop.sum(axis=1)

In [None]:
# compute stratified per capita death rates

computed_death_rate_per_capita = strata_abs_deaths / strata_abs_pop
computed_death_rate_per_capita 


In [None]:
### ABSOLUTE SHARK

computed_death_rate_per_capita.plot(title='SWEDEN Mortality age groups -64,65-79,80-89,90+\n'\
'2020 forecast : ASSUMING full year 2020 deaths follow linearly Y2D\nDataSource:SCB.se',style='o--',
                                   figsize=(18,12))
_=plt.xticks(range(2001,2021))

plt.ylabel('SWEDEN Mortality per age group')


plt.savefig('Mortality_absolute_shark.jpg',format='jpg')

In [None]:
fig,axes = plt.subplots(2,2,sharex=True,figsize=(18,12))

computed_death_rate_per_capita.plot(y='64',ax=axes[0,0],style='o--')
computed_death_rate_per_capita.plot(y='79',ax=axes[0,1],style='o--')
computed_death_rate_per_capita.plot(y='89',ax=axes[1,0],style='o--')
computed_death_rate_per_capita.plot(y='90+',ax=axes[1,1],style='o--')

axes[0,0].set_xticks(range(2001,2021))
axes[1,0].set_xticklabels(range(2001,2021),rotation=90)
axes[1,1].set_xticklabels(range(2001,2021),rotation=90)

plt.suptitle('SWEDEN mortality per age group')

plt.savefig('sweden_mortality_age_grp_subplots.jpg',format='jpg')


In [None]:
computed_death_rate_per_capita / computed_death_rate_per_capita.loc[2015:2019].mean()

In [None]:
strata_abs_pop.div(strata_abs_pop.sum(axis=1),axis=0)

In [None]:
(strata_abs_pop.div(strata_abs_pop.sum(axis=1),axis=0)).plot(kind='bar',title='SWEDEN age group ratio of population',
                                                             stacked=True,figsize=(18,12))

In [None]:
# death rate YBY change

stratified_yearly_death_rate_per_capita_YBY_change = computed_death_rate_per_capita / computed_death_rate_per_capita.shift()
stratified_yearly_death_rate_per_capita_YBY_change

In [None]:
# calculate avg 2015-2019 death rate

stratified_avg_death_rate_2015_2019 = computed_death_rate_per_capita.loc[2015:2019].mean()
stratified_avg_death_rate_2015_2019

In [None]:
weekly_2020_per_m = weekly_2020_both / (strata_abs_pop.loc[2020,:] /1e6)
weekly_2020_per_m

In [None]:
plt.figure(figsize=(18,12))
plt.suptitle ('SWEDEN 2020 YTD : weekly deaths per million, per age group\n')
plt.subplot(4,1,1)
plt.title('weekly deaths per million, age group -64')
plt.plot(weekly_2020_per_m['64'][:-1],'o--')
plt.subplot(4,1,2)
plt.title('weekly deaths per million, age group 65-79')
plt.plot(weekly_2020_per_m['79'][:-1],'o--')
plt.subplot(4,1,3)
plt.title('weekly deaths per million, age group 80-89')
plt.plot(weekly_2020_per_m['89'][:-1],'o--')
plt.subplot(4,1,4)
plt.title('weekly deaths per million, age group 90+')
plt.plot(weekly_2020_per_m['90+'][:-1],'o--')
plt.tight_layout()

plt.savefig('sweden_weekly_deaths_per_m_per_age_grp.jpg',format='jpg')

In [None]:
weekly_factors = weekly_2020_per_m / weekly_2020_per_m.shift()
weekly_factors

In [None]:
weekly_factors = weekly_factors.iloc[:-1]


In [None]:
fig,axes = plt.subplots(4,1,sharex=True,sharey=True,figsize=(18,12))
weekly_factors['64'].plot(ax=axes[0],style='o--',title='weekly change factor age group -64')
weekly_factors['79'].plot(ax=axes[1],style='o--',title='weekly change factor age group 65-79')
weekly_factors['89'].plot(ax=axes[2],style='o--',title='weekly change factor age group 80-89')
weekly_factors['90+'].plot(ax=axes[3],style='o--',title='weekly change factor age group 90+')

In [None]:
# factor death yearly death rates vs avg 2015-2019

stratified_death_rate_vs_avg_2015_2019 = computed_death_rate_per_capita / stratified_avg_death_rate_2015_2019

stratified_death_rate_vs_avg_2015_2019

In [None]:
### RELATIVE PENQUIN

title = 'SWEDEN deaths per capita compared to average 2015-2019, age groups -64,65-79,80-89,90+\n'\
'2020 forecast : ASSUMING full year 2020 deaths follow linearly Y2D\nDataSource:SCB.se'

stratified_death_rate_vs_avg_2015_2019.plot(figsize=(18,12),
                                            style='o--',title=title)

plt.axhline(1,color='lightgrey',label='average 2015-2019')

plt.xticks(range(2001,2021))
plt.ylabel('relative change')
plt.legend(loc='upper right')
plt.savefig('mortality_relative_penquin.jpg',format='jpg')

In [None]:
computed_death_rate_per_capita

In [None]:
baseline_deaths = strata_abs_deaths.loc[2015 : 2019]
baseline_deaths = baseline_deaths.sum(axis=1)
baseline_deaths

In [None]:
baseline_pop = population.sum()
baseline_pop = baseline_pop.loc[2015 : 2019]

In [None]:
baseline_mortality = baseline_deaths.div(baseline_pop,axis=0)
baseline_mortality

In [None]:
baseline_mortality_mean = baseline_mortality.mean()
baseline_mortality_mean

In [None]:
all_mortality_2020 = (strata_abs_deaths.loc[2020]).sum() / population[2020].sum()
all_mortality_2020

In [None]:
all_mortality_2020 - baseline_mortality_mean

In [None]:
mortality_mean = computed_death_rate_per_capita.loc['2015' : '2019',:].mean(axis=0)
mortality_mean

In [None]:
combined_shark = pd.concat([mortality_mean,computed_death_rate_per_capita.loc[2020,:]],axis=1)
combined_shark.rename(columns={0 : '2015-2019'},inplace=True)
combined_shark

In [None]:
combined_shark.plot(kind='bar',title='SWEDEN Mortality age groups -64,65-79,80-89,90+, 2020 forecast cmp avg. 2015-2019\n'\
'2020 forecast : ASSUMING full year 2020 deaths follow linearly Y2D\nDataSource:SCB.se',
                   figsize=(18,12))
plt.ylabel('mortality [log scale]')
plt.xlabel('age grp')
plt.yscale('log')

plt.savefig('mortality_absolute_cmp.jpg',format='jpg')

In [None]:
absolute_shark = computed_death_rate_per_capita.loc[2020,:] - mortality_mean
absolute_shark

In [None]:
stratified_death_rate_vs_avg_2015_2019

In [None]:
relative_penquin = stratified_death_rate_vs_avg_2015_2019.loc[2020,:]
relative_penquin

In [None]:
fig,axes = plt.subplots(2,1,sharex=True)

computed_death_rate_per_capita.plot(ax=axes[0],title='SWEDEN Mortality age groups -64,65-79,80-89,90+\n'\
'2020 forecast : ASSUMING full year 2020 deaths follow linearly Y2D\nDataSource:SCB.se',style='o--',
                                   figsize=(18,12))

axes[0].set_ylabel('mortality')
#axes[0].set_yscale('log')
axes[0].legend(loc='upper left')

stratified_death_rate_vs_avg_2015_2019.plot(figsize=(18,12),ax=axes[1],
                                            style='o--',title=title)

axes[1].set_ylabel('mortality relative to avg. 2015-2019')
axes[1].axhline(1,color='lightgrey',label='average 2015-2019')
axes[1].legend(loc='upper right')
_=plt.xticks(range(2001,2021))


In [None]:
colors = ['g','r','g','r']
absolute_shark.plot(kind='bar',title='SWEDEN change in mortality 2020 cmp average 2015-2019, per age grp\n' +\
                   'ASSUMING full year deaths 2020 grow proportionally YTD\nDataSource:SCB.se',
                    figsize=(18,12),color=colors)
plt.ylabel('change in mortality')
plt.xlabel('age grp')

plt.savefig('mortality_abs_change.jpg',format='jpg')

In [None]:
colors = ['g','r','g','r']
relative_penquin.plot(kind='bar',figsize=(18,12),
                      color=colors,
                     title='SWEDEN relative change in mortality 2020 vs average 2015-2019, per age group')
plt.ylabel('relative change')
plt.xlabel('age grp')
plt.savefig('mortality_relative_change.jpg',format='jpg')

In [None]:
title = 'Sweden Deaths per Capita per Age Group : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) 2020 forecast : linear growth for full year assumed. \n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'

(computed_death_rate_per_capita).plot(kind='bar',
                                    figsize=(18,12),
                                      title=title)
plt.yscale('log')
plt.ylabel('deaths per capita [log scale]')
plt.legend(['0-64','65-79','80-89','90+'])
plt.savefig('age_adj_death_ratio_1.jpg',format='jpg')


In [None]:
per_M_ylim = [0,14000]

In [None]:
title = 'Sweden Deaths per Age Group, non-adj (abs numbers) : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Linear growth for full year assumed\n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'
print (computed_death_rate_per_capita)
total_computed_deaths = computed_death_rate_per_capita * strata_abs_pop
total_computed_deaths.plot(kind='bar',
                           stacked=True,figsize=(18,12),title=title)
plt.legend(['0-64','65-79','80-89','90+'])
plt.ylabel('number of deaths')
total_computed_deaths.sum(axis=1)
plt.savefig('age_adj_abs_numbers_age_grp_1.jpg',format='jpg',dpi=600)
total_computed_deaths.sum(axis=1)


In [None]:
tot_abs_deaths = total_computed_deaths.sum(axis=1)
mean_2010_2018 = tot_abs_deaths.loc['2010':'2018'].mean()
tot_abs_deaths

In [None]:
dead_per_M_strata = (total_computed_deaths / strata_abs_pop * 1e6).astype(int)
dead_per_M_strata

In [None]:
std_pop = strata_abs_pop.loc[2010]
std_pop

In [None]:
computed_death_rate_per_capita.to_pickle('computed_death_rate_per_capita.pkl')
computed_death_rate_per_capita

In [None]:
age_adj_deaths = std_pop * computed_death_rate_per_capita
age_adj_deaths

In [None]:
age_adj_deaths.sum(axis=1)

In [None]:
title = 'Sweden Age Adjusted (std pop : 2010) Deaths per Age Group, non-pop-adjusted (abs numbers) : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Linear growth for full year assumed\n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'
age_adj_deaths.plot(kind='bar',stacked=True,figsize=(18,12),title=title)
plt.legend(['0-64','65-79','80-89','90+'])
plt.ylabel('number of deaths [age adjusted]')
#plt.ylim([0,100000])
plt.savefig('age_adj_no_pop_adjusted_3.jpg',format='jpg',dpi=600)


In [None]:
print (std_pop.sum())
age_adj_deaths_per_M = age_adj_deaths.sum(axis=1) / (std_pop.sum() / 1e6)
age_adj_deaths_per_M

In [None]:
title = 'Sweden Age Adjusted (std pop : 2010) + Population Adjusted Deaths All Age Groups : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Linear growth for full year assumed\n' +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se'

age_adj_deaths_per_M.plot(kind='bar',figsize=(18,12),title=title,color='crimson')
plt.ylabel('age adjusted deaths per million')
plt.ylim(per_M_ylim)
plt.savefig('age_adj_pop_adj_4.jpg',format='jpg')


In [None]:
age_adj_mortality = age_adj_deaths.sum(axis=1) / std_pop.sum()
age_adj_mortality

In [None]:
age_adj_2015_2018_mean = age_adj_mortality.loc['2015' : '2018'].mean()
age_adj_2015_2018_mean

In [None]:
age_adj_mortality.plot(kind='bar',figsize=(18,12),color='crimson',label='mortality',
                      title='Sweden Age Adjusted (std pop : 2010) Mortality : 2001-2020\n\n' +\
'ASSUMPTIONS: \n1) Linear growth for full year assumed [$\epsilon$ : {:.2f}]\n'.format(epsilon) +\
'2) population 2020 grows, per age group, as between 2018-2019\n\n' +\
r'DataSource : SCB.se')
plt.axhline(age_adj_2015_2018_mean,color='orange',ls='dashed',label='2015-2018 average')
plt.legend(loc='upper right')
plt.ylabel('mortality')
plt.savefig('mortality_2001_2020.jpg',format='jpg')

In [None]:
age_adj_mortality_mean = (age_adj_mortality.loc[2015 : 2018]).mean()
age_adj_mortality_mean 

In [None]:
age_adj_mortality_2020 = age_adj_mortality[2020]
age_adj_mortality_2020

In [None]:
age_adj_mortality_means = pd.Series({'2015-2018-mean' : age_adj_mortality_mean,
                                        '2020-forecast' : age_adj_mortality_2020})
age_adj_mortality_means

In [None]:
age_adj_mortality_means.plot(kind='bar',figsize=(18,12),
                             title='Sweden age & pop. adjusted mortality, 2020 forecast vs avg 2015-2018',
                            color='r')
plt.ylabel('mortality')
plt.savefig('age_and_pop_adjusted_mortality_2020_vs_2015-2018.jpg',format='jpg')

In [None]:
strata_abs_pop.head()

In [None]:
def create_age_idx(a):
    if a == '64' : return 1
    if a == '79' : return 2
    if a == '89' : return 3
    if a == '90+' : return 4

all_data = strata_abs_pop.unstack().reset_index()
all_data.rename(columns={'level_0' : 'age_grp',0 : 'pop' },inplace=True)

temp = strata_abs_deaths.unstack().reset_index()

temp.rename(columns={'level_0' : 'age_grp', 0 : 'dead'},inplace=True)
all_data = pd.concat([all_data,temp['dead']],axis=1)
all_data['year_idx'] = all_data['year'] - 2000
all_data['age_idx'] = all_data['age_grp'].apply(create_age_idx)
all_data['pop'] = all_data['pop'].astype(int)
all_data.to_pickle('stratified_population_death_age_control.pkl')
all_data.tail(40)

In [None]:
#PYMC
import pymc3 as pm
import arviz as az

#from pymc.Matplot import plot as pmplot


### condition on year by assigning a unique alpha to each year
### condition on age_grp by assigning a unique beta to each age group

# model: 
# dead ~ Binomial(population,p)
# p = logit(alpha[year_idx] + beta[age_idx])
# alpha[year_idx] ~ Normal(0,10)
# beta[age_idx] ~ Normal(0,10) 

model = pm.Model()

with model:
    year_idx = all_data['year_idx'] 
    age_idx = all_data['age_idx']

    alpha = pm.Normal('alpha',0, sigma=1.5,shape=len(year_idx)) # 20 years

    beta = pm.Normal('beta',0,sigma=1.5,shape=4)


    logit_age = np.exp(alpha[year_idx-1] + beta[age_idx-1]) / (
            1 + np.exp(alpha[year_idx-1] + beta[age_idx-1]))


    lkh_age = pm.Binomial('lkh_age',n=all_data['pop'],p=logit_age,observed=all_data['dead'])

    step1 = pm.Metropolis([alpha,beta])
    
    trace = pm.sample(50000,step=step1,tune=30000)
    _= az.plot_trace(trace)
    print (az.summary(trace,round_to=2))
    

In [None]:
def logit_pure(x):
    return (np.exp(x)) / (1 + np.exp(x))

def logistic_pure(p):
    return np.log(p / (1 - p))

In [None]:
#

alpha_2001 = trace['alpha'][:,0]
alpha_2002 = trace['alpha'][:,1]
alpha_2003 = trace['alpha'][:,2]
alpha_2004 = trace['alpha'][:,3]
alpha_2005 = trace['alpha'][:,4]
alpha_2006 = trace['alpha'][:,5]
alpha_2007 = trace['alpha'][:,6]
alpha_2008 = trace['alpha'][:,7]
alpha_2009 = trace['alpha'][:,8]
alpha_2010 = trace['alpha'][:,9]
alpha_2011 = trace['alpha'][:,10]
alpha_2012 = trace['alpha'][:,11]
alpha_2013 = trace['alpha'][:,12]
alpha_2014 = trace['alpha'][:,13]
alpha_2015 = trace['alpha'][:,14]
alpha_2016 = trace['alpha'][:,15]
alpha_2017 = trace['alpha'][:,16]
alpha_2018 = trace['alpha'][:,17]
alpha_2019 = trace['alpha'][:,18]
alpha_2020 = trace['alpha'][:,19]


beta_64 = trace['beta'][:,0]
beta_79 = trace['beta'][:,1]
beta_89 = trace['beta'][:,2]
beta_90 = trace['beta'][:,3]
'''
pmplot(beta_64,'beta_64')
pmplot(beta_79,'beta_79')
pmplot(beta_89,'beta_89')
pmplot(beta_90,'beta_90')

pmplot(alpha_2001,'alpha_2001')
pmplot(alpha_2002,'alpha_2002')
pmplot(alpha_2012,'alpha_2012')
pmplot(alpha_2015,'alpha_2015')
pmplot(alpha_2019,'alpha_2019')
pmplot(alpha_2020,'alpha_2020')
'''



result = pd.DataFrame({
                       'alpha_2001': alpha_2001,
                       'alpha_2002' : alpha_2002,
                       'alpha_2003' : alpha_2003,
                       'alpha_2004' : alpha_2004,
                       'alpha_2005' : alpha_2005,
                       'alpha_2006' : alpha_2006,
                       'alpha_2007' : alpha_2007,
                       'alpha_2008' : alpha_2008,
                       'alpha_2009' : alpha_2009,
                       'alpha_2010' : alpha_2010,
                       'alpha_2011' : alpha_2011,
                       'alpha_2012' : alpha_2012,
                       'alpha_2013' : alpha_2013,
                       'alpha_2014' : alpha_2014,
                       'alpha_2015' : alpha_2015,
                       'alpha_2016' : alpha_2016,
                       'alpha_2017' : alpha_2017,
                       'alpha_2018' : alpha_2018,
                       'alpha_2019' : alpha_2019,
                        'alpha_2020' : alpha_2020,
                        'beta_64' : beta_64,
                        'beta_79' : beta_79,
                        'beta_89' : beta_89,
                        'beta_90' : beta_90})
                      

result.describe()


In [None]:
result_p = pd.DataFrame()

result_p['2001_64'] = logit_pure(result['alpha_2001'] + result['beta_64']) 
result_p['2001_79'] = logit_pure(result['alpha_2001'] + result['beta_79']) 
result_p['2001_89'] = logit_pure(result['alpha_2001'] + result['beta_89']) 
result_p['2001_90'] = logit_pure(result['alpha_2001'] + result['beta_90']) 

result_p['2002_64'] = logit_pure(result['alpha_2002'] + result['beta_64']) 
result_p['2002_79'] = logit_pure(result['alpha_2002'] + result['beta_79']) 
result_p['2002_89'] = logit_pure(result['alpha_2002'] + result['beta_89']) 
result_p['2002_90'] = logit_pure(result['alpha_2002'] + result['beta_90']) 

result_p['2003_64'] = logit_pure(result['alpha_2003'] + result['beta_64']) 
result_p['2003_79'] = logit_pure(result['alpha_2003'] + result['beta_79']) 
result_p['2003_89'] = logit_pure(result['alpha_2003'] + result['beta_89']) 
result_p['2003_90'] = logit_pure(result['alpha_2003'] + result['beta_90']) 

result_p['2004_64'] = logit_pure(result['alpha_2004'] + result['beta_64']) 
result_p['2004_79'] = logit_pure(result['alpha_2004'] + result['beta_79']) 
result_p['2004_89'] = logit_pure(result['alpha_2004'] + result['beta_89']) 
result_p['2004_90'] = logit_pure(result['alpha_2004'] + result['beta_90']) 

result_p['2005_64'] = logit_pure(result['alpha_2005'] + result['beta_64']) 
result_p['2005_79'] = logit_pure(result['alpha_2005'] + result['beta_79']) 
result_p['2005_89'] = logit_pure(result['alpha_2005'] + result['beta_89']) 
result_p['2005_90'] = logit_pure(result['alpha_2005'] + result['beta_90']) 

result_p['2006_64'] = logit_pure(result['alpha_2006'] + result['beta_64']) 
result_p['2006_79'] = logit_pure(result['alpha_2006'] + result['beta_79']) 
result_p['2006_89'] = logit_pure(result['alpha_2006'] + result['beta_89']) 
result_p['2006_90'] = logit_pure(result['alpha_2006'] + result['beta_90']) 

result_p['2007_64'] = logit_pure(result['alpha_2007'] + result['beta_64']) 
result_p['2007_79'] = logit_pure(result['alpha_2007'] + result['beta_79']) 
result_p['2007_89'] = logit_pure(result['alpha_2007'] + result['beta_89']) 
result_p['2007_90'] = logit_pure(result['alpha_2007'] + result['beta_90']) 

result_p['2008_64'] = logit_pure(result['alpha_2008'] + result['beta_64']) 
result_p['2008_79'] = logit_pure(result['alpha_2008'] + result['beta_79']) 
result_p['2008_89'] = logit_pure(result['alpha_2008'] + result['beta_89']) 
result_p['2008_90'] = logit_pure(result['alpha_2008'] + result['beta_90']) 

result_p['2009_64'] = logit_pure(result['alpha_2009'] + result['beta_64']) 
result_p['2009_79'] = logit_pure(result['alpha_2009'] + result['beta_79']) 
result_p['2009_89'] = logit_pure(result['alpha_2009'] + result['beta_89']) 
result_p['2009_90'] = logit_pure(result['alpha_2009'] + result['beta_90']) 

result_p['2010_64'] = logit_pure(result['alpha_2010'] + result['beta_64']) 
result_p['2010_79'] = logit_pure(result['alpha_2010'] + result['beta_79']) 
result_p['2010_89'] = logit_pure(result['alpha_2010'] + result['beta_89']) 
result_p['2010_90'] = logit_pure(result['alpha_2010'] + result['beta_90']) 

result_p['2011_64'] = logit_pure(result['alpha_2011'] + result['beta_64']) 
result_p['2011_79'] = logit_pure(result['alpha_2011'] + result['beta_79']) 
result_p['2011_89'] = logit_pure(result['alpha_2011'] + result['beta_89']) 
result_p['2011_90'] = logit_pure(result['alpha_2011'] + result['beta_90']) 

result_p['2012_64'] = logit_pure(result['alpha_2012'] + result['beta_64']) 
result_p['2012_79'] = logit_pure(result['alpha_2012'] + result['beta_79']) 
result_p['2012_89'] = logit_pure(result['alpha_2012'] + result['beta_89']) 
result_p['2012_90'] = logit_pure(result['alpha_2012'] + result['beta_90']) 

result_p['2013_64'] = logit_pure(result['alpha_2013'] + result['beta_64']) 
result_p['2013_79'] = logit_pure(result['alpha_2013'] + result['beta_79']) 
result_p['2013_89'] = logit_pure(result['alpha_2013'] + result['beta_89']) 
result_p['2013_90'] = logit_pure(result['alpha_2013'] + result['beta_90']) 

result_p['2014_64'] = logit_pure(result['alpha_2014'] + result['beta_64']) 
result_p['2014_79'] = logit_pure(result['alpha_2014'] + result['beta_79']) 
result_p['2014_89'] = logit_pure(result['alpha_2014'] + result['beta_89']) 
result_p['2014_90'] = logit_pure(result['alpha_2014'] + result['beta_90']) 

result_p['2015_64'] = logit_pure(result['alpha_2015'] + result['beta_64']) 
result_p['2015_79'] = logit_pure(result['alpha_2015'] + result['beta_79']) 
result_p['2015_89'] = logit_pure(result['alpha_2015'] + result['beta_89']) 
result_p['2015_90'] = logit_pure(result['alpha_2015'] + result['beta_90']) 

result_p['2016_64'] = logit_pure(result['alpha_2016'] + result['beta_64']) 
result_p['2016_79'] = logit_pure(result['alpha_2016'] + result['beta_79']) 
result_p['2016_89'] = logit_pure(result['alpha_2016'] + result['beta_89']) 
result_p['2016_90'] = logit_pure(result['alpha_2016'] + result['beta_90']) 

result_p['2017_64'] = logit_pure(result['alpha_2017'] + result['beta_64']) 
result_p['2017_79'] = logit_pure(result['alpha_2017'] + result['beta_79']) 
result_p['2017_89'] = logit_pure(result['alpha_2017'] + result['beta_89']) 
result_p['2017_90'] = logit_pure(result['alpha_2017'] + result['beta_90']) 

result_p['2018_64'] = logit_pure(result['alpha_2018'] + result['beta_64']) 
result_p['2018_79'] = logit_pure(result['alpha_2018'] + result['beta_79']) 
result_p['2018_89'] = logit_pure(result['alpha_2018'] + result['beta_89']) 
result_p['2018_90'] = logit_pure(result['alpha_2018'] + result['beta_90']) 

result_p['2019_64'] = logit_pure(result['alpha_2019'] + result['beta_64']) 
result_p['2019_79'] = logit_pure(result['alpha_2019'] + result['beta_79']) 
result_p['2019_89'] = logit_pure(result['alpha_2019'] + result['beta_89']) 
result_p['2019_90'] = logit_pure(result['alpha_2019'] + result['beta_90']) 

result_p['2020_64'] = logit_pure(result['alpha_2020'] + result['beta_64']) 
result_p['2020_79'] = logit_pure(result['alpha_2020'] + result['beta_79']) 
result_p['2020_89'] = logit_pure(result['alpha_2020'] + result['beta_89']) 
result_p['2020_90'] = logit_pure(result['alpha_2020'] + result['beta_90']) 

result_p.describe()


In [None]:
p_means = result_p.describe().loc['mean']
p_means = p_means.reset_index()
p_means[['year','age_grp']] = p_means['index'].str.split('_',1,expand=True)
p_means = pd.pivot(p_means,index='age_grp',columns='year',values='mean')
p_means = p_means.T
p_means.rename(columns={'90' : '90+'},inplace=True)

In [None]:
p_means.index = range(2001,2021)

In [None]:
strata_abs_pop.to_pickle('strata_abs_pop.pkl')

In [None]:
pymc_abs_deaths_cond_year_age = strata_abs_pop * p_means

pymc_abs_sum = pymc_abs_deaths_cond_year_age.sum(axis=1)
pymc_abs_deaths_cond_year_age

In [None]:
computed_abs_sum = total_computed_deaths.sum(axis=1)
total_computed_deaths

In [None]:
combined_abs_sums = pd.concat([pymc_abs_sum,computed_abs_sum],axis=1)
combined_abs_sums.columns=['pymc_abs_dead','analytic_abs_dead']

combined_abs_sums

In [None]:
title = 'SWEDEN All Cause Deaths conditioned on year + age group,ABS values, two different methods\n'\
'ASSUMING LINEAR GROWTH FOR FULL YEAR 2020\nDataSource : SCB.se'

combined_abs_sums.plot(kind='bar',
                       figsize=(18,12),
                       title=title)
plt.ylabel('number of deaths')
plt.savefig('pymc_cond_year_age_abs.jpg',format='jpg')

In [None]:
strata_abs_pop.sum(axis=1)

In [None]:
yearly_pop = strata_abs_pop.sum(axis=1)

pop_adj_deaths = combined_abs_sums.div(yearly_pop / 1e6,axis=0)
pop_adj_deaths.columns = ['pymc_dead_per_M','analytic_dead_per_M']
pop_adj_deaths['factor'] = pop_adj_deaths['pymc_dead_per_M'] / pop_adj_deaths['analytic_dead_per_M']
pop_adj_deaths

In [None]:
title = 'SWEDEN All Cause Deaths conditioned on year + age group,per Million, two different methods\n'\
'ASSUMING LINEAR GROWTH FOR FULL YEAR 2020\n'+\
'DataSource : SCB.se'
pop_adj_deaths.plot(kind='bar',figsize=(18,12),
                    title=title)

plt.ylabel('deaths per million')
plt.ylim(per_M_ylim)
plt.savefig('pymc_cond_year_age_per_M.jpg',format='jpg')

In [None]:
pymc_deaths_per_M_per_age_grp = pymc_abs_deaths_cond_year_age.T / (yearly_pop / 1e6)
pymc_deaths_per_M_per_age_grp = pymc_deaths_per_M_per_age_grp.T
pymc_deaths_per_M_per_age_grp

In [None]:
pymc_deaths_per_M_per_age_grp.sum(axis=1)

In [None]:
title = 'SWEDEN All Cause Deaths conditioned on year + age group, per million, per age group\n'\
'ASSUMING LINEAR GROWTH FOR FULL YEAR 2020\n'+\
'DataSource : SCB.se'
pymc_deaths_per_M_per_age_grp.plot(kind='bar',title=title,
                                   stacked=True,figsize=(18,12))
plt.ylabel('deaths per million')
plt.ylim(per_M_ylim)
plt.savefig('pymc_cond_age_year_per_M_stratified.jpg',format='jpg')

In [None]:
### include median age of death as proxy for health

url4 = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101I/LivslangdEttariga'
data4 = {
  "query": [
    {
      "code": "Alder",
      "selection": {
        "filter": "item",
        "values": [
          "0"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "BE0101A$"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r4 = requests.post(url4,json=data4)
r4.status_code

In [None]:
json4 = r4.json()
json4['data'][0]

In [None]:
data_list4 = []

for rec in json4['data']:
    data_list4.append((rec['key'][0],rec['key'][2],rec['values'][0]))
    
median_age_of_death = pd.DataFrame(data_list4,columns=['gender','year','median'])
median_age_of_death.set_index('year',inplace=True)
median_age_of_death['median'] = median_age_of_death['median'].astype(float)

median_age_of_death

In [None]:
gender_groups = median_age_of_death.groupby('gender')

male = gender_groups.get_group('1')
male.index = range(1960,2020)

ax = male.plot(style='o--',figsize=(18,12))

female = gender_groups.get_group('2')
female.index = range(1960,2020)
female.plot(ax=ax,style='o--')

ax.legend(['male','female'])
plt.title('SWEDEN Median Age of Death')
plt.ylabel('median age death')

In [None]:
all_data

In [None]:
all_data_no_2020 = all_data.loc[all_data['year'] != 2020]

# use male median death as health proxy
all_data_no_2020 = all_data_no_2020.join(male,on='year')
all_data_no_2020['std_median'] = all_data_no_2020['median'].apply(
    lambda x : (x - all_data_no_2020['median'].mean()) / all_data_no_2020['median'].std())

all_data_no_2020['mortality'] = all_data_no_2020['dead'] / all_data_no_2020['pop']

all_data_no_2020['health_idx'] = all_data_no_2020['median'].astype(int) - 76 # index 1..5
all_data_no_2020['std_year'] = all_data_no_2020['year_idx'].apply(
    lambda x : (x - all_data_no_2020['year_idx'].mean()) / all_data_no_2020['year_idx'].std())

all_data_no_2020

In [None]:
all_data_no_2020.loc[all_data_no_2020['year'].isin([2008,2009,2010,2011])]

In [None]:
all_data_no_2020.corr()

In [None]:
model2 = pm.Model()
with model2:
    
    alpha = pm.Normal('alpha', 0, sigma=10,shape=19)
    beta = pm.Normal('beta',0, sigma=10,shape=4)

    age_idx = all_data_no_2020['age_idx']
    year_idx = all_data_no_2020['year_idx']


    lgit = np.exp(alpha[year_idx-1]   + beta[age_idx-1]) / (
            1 + np.exp(alpha[year_idx-1]  + beta[age_idx-1] ))

    likelihood = pm.Binomial('likelihood',n=all_data_no_2020['pop'],p=lgit,observed=all_data_no_2020['dead'])
    step1 = pm.Metropolis([alpha,beta])
    
    
    trace2 = pm.sample(500,tune=300)
    
    _= az.plot_trace(trace2)
    print(az.summary(trace2,round_to=2))
    
    

In [None]:
alpha_post = trace2['alpha'][:]
beta_post = trace2['beta'][:]



result = pd.DataFrame(alpha_post)

In [None]:
result.columns = range(2001,2020)
result = pd.concat([result,pd.DataFrame(beta_post)],axis=1)


In [None]:
result.describe()

In [None]:
result.columns = ['alpha_2001','alpha_2002','alpha_2003','alpha_2004','alpha_2005',
                 'alpha_2006','alpha_2007','alpha_2008','alpha_2009','alpha_2010',
                 'alpha_2011','alpha_2012','alpha_2013','alpha_2014','alpha_2015',
                 'alpha_2016','alpha_2017','alpha_2018','alpha_2019','beta_64','beta_79',
                 'beta_89','beta_90']
result.describe()

In [None]:
result_p = pd.DataFrame()

result_p['2001_64'] = logit_pure(result['alpha_2001'] + result['beta_64'])
result_p['2001_79'] = logit_pure(result['alpha_2001'] + result['beta_79'])
result_p['2001_89'] = logit_pure(result['alpha_2001'] + result['beta_89'])
result_p['2001_90'] = logit_pure(result['alpha_2001'] + result['beta_90'])

result_p['2002_64'] = logit_pure(result['alpha_2002'] + result['beta_64'])
result_p['2002_79'] = logit_pure(result['alpha_2002'] + result['beta_79'])
result_p['2002_89'] = logit_pure(result['alpha_2002'] + result['beta_89'])
result_p['2002_90'] = logit_pure(result['alpha_2002'] + result['beta_90'])

result_p['2003_64'] = logit_pure(result['alpha_2003'] + result['beta_64'])
result_p['2003_79'] = logit_pure(result['alpha_2003'] + result['beta_79'])
result_p['2003_89'] = logit_pure(result['alpha_2003'] + result['beta_89'])
result_p['2003_90'] = logit_pure(result['alpha_2003'] + result['beta_90'])

result_p['2004_64'] = logit_pure(result['alpha_2004'] + result['beta_64'])
result_p['2004_79'] = logit_pure(result['alpha_2004'] + result['beta_79'])
result_p['2004_89'] = logit_pure(result['alpha_2004'] + result['beta_89'])
result_p['2004_90'] = logit_pure(result['alpha_2004'] + result['beta_90'])

result_p['2005_64'] = logit_pure(result['alpha_2005'] + result['beta_64'])
result_p['2005_79'] = logit_pure(result['alpha_2005'] + result['beta_79'])
result_p['2005_89'] = logit_pure(result['alpha_2005'] + result['beta_89'])
result_p['2005_90'] = logit_pure(result['alpha_2005'] + result['beta_90'])

result_p['2006_64'] = logit_pure(result['alpha_2006'] + result['beta_64'])
result_p['2006_79'] = logit_pure(result['alpha_2006'] + result['beta_79'])
result_p['2006_89'] = logit_pure(result['alpha_2006'] + result['beta_89'])
result_p['2006_90'] = logit_pure(result['alpha_2006'] + result['beta_90'])

result_p['2007_64'] = logit_pure(result['alpha_2007'] + result['beta_64'])
result_p['2007_79'] = logit_pure(result['alpha_2007'] + result['beta_79'])
result_p['2007_89'] = logit_pure(result['alpha_2007'] + result['beta_89'])
result_p['2007_90'] = logit_pure(result['alpha_2007'] + result['beta_90'])

result_p['2008_64'] = logit_pure(result['alpha_2008'] + result['beta_64'])
result_p['2008_79'] = logit_pure(result['alpha_2008'] + result['beta_79'])
result_p['2008_89'] = logit_pure(result['alpha_2008'] + result['beta_89'])
result_p['2008_90'] = logit_pure(result['alpha_2008'] + result['beta_90'])

result_p['2009_64'] = logit_pure(result['alpha_2009'] + result['beta_64'])
result_p['2009_79'] = logit_pure(result['alpha_2009'] + result['beta_79'])
result_p['2009_89'] = logit_pure(result['alpha_2009'] + result['beta_89'])
result_p['2009_90'] = logit_pure(result['alpha_2009'] + result['beta_90'])

result_p['2010_64'] = logit_pure(result['alpha_2010'] + result['beta_64'])
result_p['2010_79'] = logit_pure(result['alpha_2010'] + result['beta_79'])
result_p['2010_89'] = logit_pure(result['alpha_2010'] + result['beta_89'])
result_p['2010_90'] = logit_pure(result['alpha_2010'] + result['beta_90'])

result_p['2011_64'] = logit_pure(result['alpha_2011'] + result['beta_64'])
result_p['2011_79'] = logit_pure(result['alpha_2011'] + result['beta_79'])
result_p['2011_89'] = logit_pure(result['alpha_2011'] + result['beta_89'])
result_p['2011_90'] = logit_pure(result['alpha_2011'] + result['beta_90'])

result_p['2012_64'] = logit_pure(result['alpha_2012'] + result['beta_64'])
result_p['2012_79'] = logit_pure(result['alpha_2012'] + result['beta_79'])
result_p['2012_89'] = logit_pure(result['alpha_2012'] + result['beta_89'])
result_p['2012_90'] = logit_pure(result['alpha_2012'] + result['beta_90'])

result_p['2013_64'] = logit_pure(result['alpha_2013'] + result['beta_64'])
result_p['2013_79'] = logit_pure(result['alpha_2013'] + result['beta_79'])
result_p['2013_89'] = logit_pure(result['alpha_2013'] + result['beta_89'])
result_p['2013_90'] = logit_pure(result['alpha_2013'] + result['beta_90'])

result_p['2014_64'] = logit_pure(result['alpha_2014'] + result['beta_64'])
result_p['2014_79'] = logit_pure(result['alpha_2014'] + result['beta_79'])
result_p['2014_89'] = logit_pure(result['alpha_2014'] + result['beta_89'])
result_p['2014_90'] = logit_pure(result['alpha_2014'] + result['beta_90'])

result_p['2015_64'] = logit_pure(result['alpha_2015'] + result['beta_64'])
result_p['2015_79'] = logit_pure(result['alpha_2015'] + result['beta_79'])
result_p['2015_89'] = logit_pure(result['alpha_2015'] + result['beta_89'])
result_p['2015_90'] = logit_pure(result['alpha_2015'] + result['beta_90'])

result_p['2016_64'] = logit_pure(result['alpha_2016'] + result['beta_64'])
result_p['2016_79'] = logit_pure(result['alpha_2016'] + result['beta_79'])
result_p['2016_89'] = logit_pure(result['alpha_2016'] + result['beta_89'])
result_p['2016_90'] = logit_pure(result['alpha_2016'] + result['beta_90'])

result_p['2017_64'] = logit_pure(result['alpha_2017'] + result['beta_64'])
result_p['2017_79'] = logit_pure(result['alpha_2017'] + result['beta_79'])
result_p['2017_89'] = logit_pure(result['alpha_2017'] + result['beta_89'])
result_p['2017_90'] = logit_pure(result['alpha_2017'] + result['beta_90'])

result_p['2018_64'] = logit_pure(result['alpha_2018'] + result['beta_64'])
result_p['2018_79'] = logit_pure(result['alpha_2018'] + result['beta_79'])
result_p['2018_89'] = logit_pure(result['alpha_2018'] + result['beta_89'])
result_p['2018_90'] = logit_pure(result['alpha_2018'] + result['beta_90'])

result_p['2019_64'] = logit_pure(result['alpha_2019'] + result['beta_64'])
result_p['2019_79'] = logit_pure(result['alpha_2019'] + result['beta_79'])
result_p['2019_89'] = logit_pure(result['alpha_2019'] + result['beta_89'])
result_p['2019_90'] = logit_pure(result['alpha_2019'] + result['beta_90'])



In [None]:
result_p.describe()

In [None]:
plt.figure(figsize=(18,12))
plt.title('Mortality')
result_p.describe().loc['mean'].plot(kind='bar',color=['b','orange','g','r'])
_= plt.xticks(rotation=90)

In [None]:
abs_pop_no_2020 = strata_abs_pop.loc[2001:2019]
abs_pop_no_2020

In [None]:
p_means = result_p.describe().loc['mean']
p_means = p_means.reset_index()
p_means[['year','age_grp']] = p_means['index'].str.split('_',1,expand=True)
p_means = pd.pivot(p_means,index='age_grp',columns='year',values='mean')
p_means = p_means.T
p_means.rename(columns={'90' : '90+'},inplace=True)
p_means.index = range(2001,2020)
p_means

In [None]:
abs_d = p_means * abs_pop_no_2020
abs_d

In [None]:
abs_d.sum(axis=1)

In [None]:
abs_d.sum(axis=1).plot(kind='bar',figsize=(18,12),color='r')

In [None]:
model3 = pm.Model()
with model3:
    
    alpha = pm.Normal('alpha', 0, sigma=10,shape=4)
    beta = pm.Normal('beta',0, sigma=10)

    std_year = all_data_no_2020['std_year']
    age_idx = all_data_no_2020['age_idx']

    lgit = np.exp(alpha[age_idx-1]   + beta * std_year) / (
            1 + np.exp(alpha[age_idx-1]  + beta * std_year ))

    likelihood = pm.Binomial('likelihood',n=all_data_no_2020['pop'],p=lgit,observed=all_data_no_2020['dead'])
    step1 = pm.Metropolis([alpha,beta])
    trace3 = pm.sample(5000,step1,tune=1000)
    az.plot_trace(trace3)
    print(az.summary(trace3,round_to=2))
    
    
    

In [None]:
alpha_post = trace3['alpha'][:]
beta_post = trace3['beta'][:]



In [None]:
result=pd.DataFrame({'64' : alpha_post[:,0],'79' : alpha_post[:,1],
                    '89' : alpha_post[:,2],'90' : alpha_post[:,3],
                    'beta' : beta_post})

In [None]:
result.describe()


In [None]:
result_p = pd.DataFrame()
result_p['64'] = logit_pure(result['64'] + result['beta'])
result_p['79'] = logit_pure(result['79'] + result['beta'])
result_p['89'] = logit_pure(result['89'] + result['beta'])
result_p['90'] = logit_pure(result['90'] + result['beta'])

result_p.describe()

In [None]:
### 
df_for_CI = parse_scb_prel(YTD=False)
df_for_CI

In [None]:
# skip last 7 days of prel data

last_day_mask = df_for_CI['2020'] == 0
last_day = df_for_CI[last_day_mask].index[0]
last_day = last_day - pd.DateOffset(days=7)
last_day

In [None]:
df_for_CI.loc[last_day:,'2020'] = df_for_CI.loc[last_day:,'2015':'2018'].mean(axis=1)

In [None]:
df_for_CI

In [None]:
df_for_CI.drop(['2015-2019','Månad'],axis=1,inplace=True)

In [None]:
df_for_CI

In [None]:
strata_abs_pop.sum(axis=1)

In [None]:
age_adj_pops = strata_abs_pop.sum(axis=1)
age_adj_pops = age_adj_pops[[2015,2016,2017,2018,2019,2020]]
age_adj_pops

In [None]:
df_for_CI = df_for_CI / (age_adj_pops.values / 1e6)
df_for_CI

In [None]:
df_for_CI_trans = df_for_CI.unstack().reset_index()


In [None]:
df_for_CI_trans

In [None]:
df_for_CI_trans.loc[df_for_CI_trans['level_1'] == '2020-02-29']

In [None]:
df_for_CI_trans.drop([59,791,1157,1523,1889],inplace=True)

In [None]:
df_for_CI_trans['day_idx'] = df_for_CI_trans['level_1'].dt.dayofyear
df_for_CI_trans.set_index('level_1',inplace=True)

In [None]:
df_for_CI_trans.index.name='date'
df_for_CI_trans.rename(columns={'level_0' : 'year',0 : 'dead'},inplace=True)
df_for_CI_trans

In [None]:
df_for_CI_trans['dead'] = df_for_CI_trans['dead'].astype(float)
df_for_CI_trans

In [None]:
year_groups = df_for_CI_trans.groupby('year').sum()
year_groups

In [None]:
import pymc3 as pm
import arviz as az 

model5 = pm.Model()

day_idx = df_for_CI_trans['day_idx'].values

with model5:

    lambda_bar = pm.Exponential('lambda_bar',1 / df_for_CI_trans['dead'].mean())
    
    lambda_ = pm.Exponential('lambda_',1 / lambda_bar,shape=len(df_for_CI_trans['day_idx'].unique()))
    
    lkh = pm.Poisson('lkh',lambda_[day_idx-1],observed=df_for_CI_trans['dead'])
    
    trace5 = pm.sample(500,tune=500)
    
    summary5 = az.summary(trace5,hdi_prob=0.89)

In [None]:
with model5:
    print (summary5)

In [None]:
with model5:
    az.plot_forest(trace5,var_names='lambda_',hdi_prob=0.89)

In [None]:
daily_samples = pd.DataFrame(trace5['lambda_']).T
dates = pd.date_range('2020-01-01','2020-12-31')
daily_samples.index=dates
daily_samples

In [None]:
daily_CIs = np.percentile(daily_samples,[5.5,94.5],axis=1)
daily_means = daily_samples.mean(axis=1)
daily_means

In [None]:
daily_err_low = np.abs(daily_means - daily_CIs[0,:])
daily_err_high = np.abs(daily_means - daily_CIs[1,:])

daily_err_high

In [None]:
df_for_CI

In [None]:
df_predict = df_for_CI.copy()
df_predict

In [None]:
worst_case = daily_means + daily_err_high
best_case = daily_means - daily_err_low

df_predict['worst_case'] = worst_case
df_predict['best_case'] = best_case

df_predict

In [None]:
df_predict[['2020','worst_case','best_case']].plot()

In [None]:
df_predict_cmp = df_predict.copy()
df_predict_cmp

In [None]:
df_predict_cmp.loc[last_day:,'2020_best'] = df_predict_cmp.loc[last_day:,'best_case']
df_predict_cmp.loc[last_day:,'2020_worst'] = df_predict_cmp.loc[last_day:,'worst_case']

df_predict_cmp.loc[:last_day,'2020_best'] = df_predict_cmp.loc[:last_day,'2020']
df_predict_cmp.loc[:last_day,'2020_worst'] = df_predict_cmp.loc[:last_day,'2020']


df_predict_cmp

In [None]:
df_predict_cmp.drop(['worst_case','best_case'],axis=1,inplace=True)

In [None]:
df_predict_cmp

In [None]:
df_predict_cmp.plot(y=['2020_best','2020_worst','2020'],figsize=(18,12))

In [None]:
df_predict_cmp.cumsum().plot(figsize=(18,12))

In [None]:
df_predict_cmp['2015-2018'] = df_predict_cmp[['2015','2016','2017','2018']].mean(axis=1)
df_predict_cmp

In [None]:

df_predict_cmp[['2015-2018','2020_best','2020_worst','2020']].cumsum().plot(figsize=(18,12))

In [None]:
low = df_predict_cmp['2020_best'].cumsum().values.astype(float)
high = df_predict_cmp['2020_worst'].cumsum().values.astype(float)
print (low[-1])
print (high[-1])

In [None]:
last_day = last_day.date()


In [None]:

ax = df_predict_cmp[['2020','2015-2018']].cumsum().plot(figsize=(18,12))

ax.fill_between(x=df_predict_cmp.index,color='m',alpha=0.2,
                y1=high,
                y2=low,label='89% CI for remaining days of 2020')

ax.axvline(last_day,color='orange',ls='dashed',label='prediction starts {}'.format(last_day))

ax.legend(loc='upper left')
ax.set_ylabel('cumulative deaths per million')
ax.set_yticks(range(0,10000,1000))

plt.title('SWEDEN cumulative deaths per million : 2020 Forecasted vs avg. 2015-2018\nData until {}\nDataSource : scb.se'.format(last_day))
plt.savefig('stratified_cumulative_deaths.jpg',format='jpg')

In [None]:
yearly_totals = df_predict_cmp.sum()['2015' : '2020']
yearly_totals

In [None]:
errs = np.array([[0,0,0,0,0,np.abs(low[-1] - yearly_totals['2020'])],
                [0,0,0,0,0,np.abs(high[-1] - yearly_totals['2020'])]])

In [None]:
title = 'SWEDEN Predicted Full Year 2020 deaths per million cmp 2015-2019\nDataSource : scb.se'

ax = yearly_totals.plot(kind='bar',figsize=(18,12),color='r',yerr=errs,capsize=5,title=title)
ax.set_ylabel('deaths per million')
_= ax.set_yticks(range(0,10000,1000))
plt.savefig('stratified_deaths_2015-2020-predicted.jpg',format='jpg')

In [None]:
df_predict_cmp

In [None]:
df_predict_cmp.plot(y='2020')

In [None]:
yearly_totals

In [None]:
age_adj_mortality * 1e6

In [None]:
df_predict_cmp.sum()

In [None]:
yearly_totals / 1e6


In [None]:
yearly_mortality = yearly_totals / 1e6
mortality_errs = errs / 1e6
mortality_errs

In [None]:
print (yearly_mortality)
print ('mortality low', yearly_mortality.loc['2020'] - mortality_errs[0,-1])
print ('mortality high',yearly_mortality.loc['2020'] + mortality_errs[1,-1])

In [None]:
title = 'SWEDEN Predicted Full Year 2020 mortality cmp 2015-2019\nDataSource : scb.se'

ax = yearly_mortality.plot(kind='bar',figsize=(18,12),color='r',title=title,yerr=mortality_errs,capsize=5)
ax.set_ylabel('mortality')
_= ax.set_yticks(np.arange(0,0.011,0.001))
plt.savefig('stratified_mortality_2015-2020-predicted.jpg',format='jpg')