In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import requests

sns.set()

In [None]:
dead_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101I/DodaHandelseK'
dead_query = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

pop_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/FolkmangdNov'
pop_query = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
def get_data(url,query):
    r = requests.post(url,json=query)
    
    if r.status_code != 200 :
        print (r.status_code)
    
    data = pd.DataFrame.from_dict(r.json()['data'])
    data[['area','age','gender','year']] = data['key'].to_list()
    data['count'] = data['values'].apply(lambda x : x[0])
    data['age'] = data['age'].apply(lambda x : x.replace('+','')).astype(int)
    data.drop(['values','key','area'],axis=1,inplace=True)
    data.set_index('year',inplace=True)
    data['count'] = data['count'].astype(int)
    data = data.groupby(['year','age']).sum()
    
    return data
    



In [None]:
dead_df = get_data(dead_url,dead_query)
dead_df

In [None]:
dead_df.loc['2020'].head(40)

In [None]:
pop_df = get_data(pop_url,pop_query)
pop_df

In [None]:
dead_df.to_excel('official_deaths.xlsx')
pop_df.to_excel('official_pop.xlsx')

In [None]:
total_dead = dead_df.groupby('year').sum()
total_pop = pop_df.groupby('year').sum()

mortality = (total_dead / total_pop).dropna()
mortality

In [None]:
title='SWEDEN Final Official Mortality 2002-2020\nDataSource : scb.se'
mortality.plot(kind='bar',color='crimson',figsize=(18,12),title=title)
plt.legend(['Mortality'])
plt.ylabel('Mortality')
plt.yticks(np.arange(0,0.012,0.001))
plt.savefig('SCB_FINAL_mortality.jpg',format='jpg')

In [None]:
def binner(df,binsize=10):
    
    binned = pd.cut(df.index.get_level_values(1),range(-1,100+binsize,binsize),labels=range(0,100+binsize,binsize))
    return binned

In [None]:
pop_df['bin_10'] = binner(pop_df)
dead_df['bin_10'] = binner(dead_df)

dead_df

In [None]:
binned_pop = pop_df.groupby(['year','bin_10']).sum()
binned_dead = dead_df.groupby(['year','bin_10']).sum()
binned_dead

In [None]:
age_grp_mortality = (binned_dead / binned_pop).dropna()
age_grp_mortality

In [None]:
unstacked_age_grp_mortality = age_grp_mortality.unstack()
unstacked_age_grp_mortality = unstacked_age_grp_mortality.droplevel(0,axis=1)
unstacked_age_grp_mortality

In [None]:
age_grp_baseline = unstacked_age_grp_mortality.loc['2015' : '2018'].mean()
age_grp_baseline

In [None]:
title = 'SWEDEN Final Official Age Group Mortality [10 year bins], 2002-2020\nDataSource : scb.se'
axes = unstacked_age_grp_mortality.plot(subplots=True,figsize=(18,12),style='ro--',title=title)
plt.ylabel('mortality')

for i,ax in enumerate(axes):
    ax.legend(loc='upper left')
    ax.axhline(age_grp_baseline.iloc[i],ls='dashed',color='k')
    
plt.savefig('SCB_FINAL_age_grp_mort.jpg',format='jpg')


In [None]:
age_grp_growth_vs_baseline = unstacked_age_grp_mortality / age_grp_baseline
age_grp_growth_vs_baseline

In [None]:
title = 'SWEDEN age group mortality growth 2020 vs baseline 2015-2018\nDataSource : scb.se'
age_grp_growth_vs_baseline.loc['2020'].plot(kind='bar',figsize=(18,12),color='red',title=title)

plt.ylabel('growth factor')
plt.xlabel('age group')
plt.axhline(1,ls='dashed',color='k')

plt.savefig('SCB_FINAL_age_grp_growth.jpg',format='jpg')

In [None]:
binned_dead.loc['2020']

In [None]:
pop_df['bin_5'] = binner(pop_df,5)
dead_df['bin_5'] = binner(dead_df,5)
pop_df.tail()

In [None]:
binned_5_dead = dead_df.groupby(['year','bin_5']).sum()
binned_5_pop = pop_df.groupby(['year','bin_5']).sum()
binned_5_dead.loc['2020']

In [None]:

binned_5_mort = (binned_5_dead / binned_5_pop).dropna()
binned_5_unstacked = binned_5_mort.unstack().droplevel(0,axis=1)
binned_5_unstacked

In [None]:
binned_5_unstacked.plot(figsize=(18,12),style='o--',logy=True)
plt.legend(loc='upper left')

In [None]:
binned_5_baseline = binned_5_unstacked.loc['2015' : '2018'].mean()
binned_5_baseline

In [None]:
binned_5_growth = binned_5_unstacked.loc['2020'] / binned_5_baseline
binned_5_growth

In [None]:
binned_5_growth.plot(kind='bar',figsize=(18,12),color='r')
plt.yticks(np.arange(0,1.2,0.05))
plt.axhline(1,ls='dashed',color='k')
plt.xlabel('age group')
plt.ylabel('growth factor')

In [None]:
expected_deaths = binned_pop.unstack().droplevel(0,axis=1) * age_grp_baseline
expected_deaths


In [None]:
expectation = expected_deaths.sum(axis=1)
expectation

In [None]:
excess = (dead_df.groupby('year')['count'].sum() - expectation).dropna()
excess

In [None]:
pop_df['bin_1'] = pop_df.index.get_level_values(1)
dead_df['bin_1'] = dead_df.index.get_level_values(1)

dead_df

In [None]:
dead_df['scb_prel_bin'] = pd.cut(dead_df.index.get_level_values(1),[-1,64,79,89,100],labels=[64,79,89,100])
pop_df['scb_prel_bin'] = pd.cut(pop_df.index.get_level_values(1),[-1,64,79,89,100],labels=[64,79,89,100])


In [None]:
def binned_mort(bin):
    return (dead_df.groupby(['year',bin])['count'].sum() / pop_df.groupby(['year',bin])['count'].sum()).dropna()

binned_mort('bin_5').tail(40)

In [None]:
def baseline(start_year='2015',end_year='2018',bin='bin_10'):
    tmp = binned_mort(bin)
    base = tmp.loc[start_year : end_year]
    s = base.unstack().mean()
    s.name=bin
    return s


In [None]:
def expectation(base):
    grp_pop = pop_df.groupby(['year',base.name])['count'].sum()
    return base * grp_pop
    

def excess(base,start_year='2015',end_year='2018'):
    return (dead_df.groupby(['year',base.name])['count'].sum() - expectation(base)).dropna()


excess_15_18_bin_1 = excess(baseline('2015','2018','bin_1')).groupby('year').sum()
excess_15_18_bin_5 = excess(baseline('2015','2018','bin_5')).groupby('year').sum()
excess_15_18_bin_10 = excess(baseline('2015','2018','bin_10')).groupby('year').sum()
excess_15_18_bin_scb_prel = excess(baseline('2015','2018','scb_prel_bin')).groupby('year').sum()

print (excess_15_18_bin_1)
print ()
print (excess_15_18_bin_5)
print ()
print (excess_15_18_bin_10)
print ()
print (excess_15_18_bin_scb_prel)

In [None]:
title = 'SWEDEN Excess Deaths vs baseline 2015-2018, 4 different binnings\nDataSource : scb.se'
ax = excess_15_18_bin_1.loc['2010':].plot(figsize=(18,12),style='o--',title=title)
excess_15_18_bin_5.loc['2010':].plot(ax=ax,style='o--')
excess_15_18_bin_10.loc['2010':].plot(ax=ax,style='o--')
excess_15_18_bin_scb_prel.loc['2010':].plot(ax=ax,style='o--')

plt.ylabel('Excess Deaths')
plt.axhline(0,ls='dashed',color='k')
plt.yticks(range(-6500,7000,500))
plt.legend(['bin_1','bin_5','bin_10','-64,79,89,90+'])
plt.savefig('SCB_FINAL_excess_deaths_4_bins.jpg',format='jpg')

In [None]:
one_bin_mort = (dead_df.groupby(['year','bin_1'])['count'].sum() / pop_df.groupby(['year','bin_1'])['count'].sum()).dropna()
one_bin_growth = one_bin_mort / baseline(bin='bin_1')
one_bin_growth.loc['2020'].plot(kind='bar',figsize=(18,12),color='r')
plt.axhline(1,ls='dashed',color='k')
plt.yticks(np.arange(0,1.7,0.05))
one_bin_growth.loc['2020']

In [None]:
title='SWEDEN 2020 deaths per age group\nDataSource : scb.se'
ax = (dead_df.groupby(['year','bin_1']).sum()).loc['2020'].plot(kind='bar',figsize=(18,12),label='deaths',
                                                           color='r',title=title,alpha=0.7)

(dead_df.groupby(['year','bin_1']).sum().loc['2015' : '2018']).groupby(
    'bin_1').mean().plot(ax=ax,kind='bar',color='g',alpha=0.5)

plt.legend(['deaths 2020','deaths 2015-2018'],loc='upper left')
plt.ylabel('nr of deaths')
plt.xlabel('age group')

plt.savefig('SCB_FINAL_2020_deaths_per_age_grp.jpg',format='jpg')

In [None]:
(dead_df.groupby(['year','bin_1']).sum().loc['2015' : '2018']).groupby('bin_1').mean()


In [None]:
(dead_df.groupby(['year','bin_1']).sum()).loc['2020'].head(40)

In [None]:
growth_old = one_bin_growth.loc['2020'].loc[60:].sort_values()
growth_old

In [None]:
growth_old.plot(style='o',kind='bar',figsize=(18,12))

In [None]:
pop_df

In [None]:
### PARAM ###
bin = 'bin_1'

def std_pop_f(bin='bin_1'):
    std_pop = pop_df.groupby(['year',bin])['count'].sum()
    return (std_pop.loc['2019'])

std_pop = std_pop_f(bin)
std_pop

In [None]:
def age_adj_mortality(bin='bin_1'):
    binned_mortality = (dead_df.groupby(['year',bin])['count'].sum() / pop_df.groupby(['year',bin])['count'].sum()).dropna()
    return binned_mortality

In [None]:
bin_mortality = age_adj_mortality(bin)
std_deaths = bin_mortality.unstack() * std_pop
std_deaths

In [None]:
age_adj_mortality = std_deaths.sum(axis=1) / std_pop.sum()
age_adj_mortality

In [None]:
title='SWEDEN Age Adjusted Mortality [std. year 2019]\nDataSource : scb.se'
age_adj_mortality.plot(kind='bar',figsize=(18,12),color='r',title=title)
plt.ylabel('Mortality')
_= plt.yticks(np.arange(0,0.012,0.0005))

plt.savefig('SCB_FINAL_age_adj_mort.jpg',format='jpg')

In [None]:
mort_1 = binned_mort('bin_1')
mort_5 = binned_mort('bin_5')
mort_10 = binned_mort('bin_10')
mort_prel = binned_mort('scb_prel_bin')

ax = mort_1.loc['2020'].plot(figsize=(18,12),style='o--')
mort_5.loc['2020'].plot(ax=ax)
mort_10.loc['2020'].plot(ax=ax,style='x--')
mort_prel.loc['2020'].plot(ax=ax,style='.--')
mort_10.loc['2020']

In [None]:
mort_1_baseline = mort_1.unstack().loc['2015' : '2018'].mean()
mort_5_baseline = mort_5.unstack().loc['2015' : '2018'].mean()
mort_1_baseline

In [None]:
ax = mort_1_baseline.plot(figsize=(18,12),style='ko--',label='baseline')
mort_1.loc['2019'].plot(ax=ax,style='go--',label='2019')
mort_1.loc['2020'].plot(ax=ax,style='ro--',label='2020')
plt.legend(loc='upper left')

In [None]:
mort_1_change = mort_1.loc['2020'] / mort_1.loc['2019']
mort_1_change

In [None]:
mort_1_change.plot(figsize=(18,12),style='o--')

In [None]:
mort_5_2020_vs_base = mort_5.loc['2020'] / mort_5_baseline
mort_5_2019_vs_base = mort_5.loc['2019'] / mort_5_baseline

In [None]:
title = 'SWEDEN age group mortality growth : 2019 & 2020 vs baseline 2015-2018\nDataSource : SCB.se'
ax = mort_5_2020_vs_base.plot(figsize=(18,12),style='ro--',label='2020',title=title)
mort_5_2019_vs_base.plot(style='go--',label='2019')
plt.axhline(1,ls='dashed',color='k',label='baseline')
plt.xlabel('age group')
plt.ylabel('growth factor')
plt.yticks(np.arange(0.65,1.12,0.025))
plt.xticks(range(0,105,5))
plt.legend(loc='upper right')
plt.savefig('SCB_FINAL_age_grp_mort_growth.jpg',format='jpg')

In [None]:
(dead_df.groupby(['year','bin_5']).sum()['count']).loc['2020']

In [None]:
pop_5_flat = pop_df.groupby(['year','bin_5'])['count'].sum().unstack()
pop_5_flat

In [None]:
dead_5_flat = dead_df.groupby(['year','bin_5'])['count'].sum().unstack()
dead_5_flat


In [None]:
title = 'SWEDEN Expected vs Actual Deaths per age group, 2019 & 2020\nDataSource : scb.se'

expected_5 = pop_5_flat * mort_5_baseline

ax = expected_5.loc['2020'].plot(figsize=(18,12),style='r--',label='2020 expected deaths',title=title)
expected_5.loc['2019'].plot(ax=ax,style='g--',label='2019 expected deaths')

dead_5_flat.loc['2020'].plot(ax=ax,style='rx',label='2020 actual deaths')
dead_5_flat.loc['2019'].plot(ax=ax,style='gx',label='2019 actual deaths')

plt.xticks(range(len(expected_5.loc['2020'])),range(0,105,5))
plt.yticks(range(0,19000,1000))
plt.xlabel('age_grp')
plt.ylabel('number of deaths')
plt.legend(loc='upper left')
plt.savefig('SCB_FINAL_expected_vs_actual_deaths.jpg',format='jpg')

In [None]:

ax = expected_5.loc['2020',60:].plot(figsize=(18,12),style='r--',label='2020 expected deaths',title=title)
expected_5.loc['2019',60:].plot(ax=ax,style='g--',label='2019 expected deaths')

dead_5_flat.loc['2020',60:].plot(ax=ax,style='rx',label='2020 actual deaths')
dead_5_flat.loc['2019',60:].plot(ax=ax,style='gx',label='2019 actual deaths')

In [None]:
mort_5.loc['2020'].plot(figsize=(18,12))

In [None]:
title = 'SWEDEN deaths per age group 2019 & 2020\nDataSource : scb.se'
(dead_df.groupby(['year','bin_5'])['count'].sum()).loc['2019' : '2020'].unstack().T.plot(kind='bar',title=title,
                                                                                        figsize=(18,12))

plt.ylabel('nr deaths')
plt.xlabel('age group')

plt.savefig('SCB_FINAL_nr_deaths_age_grp_2019_2020.jpg',format='jpg')

In [None]:
title = 'SWEDEN population per age grp 2019 & 2020\nDataSource : scb.se'
(pop_df.groupby(['year','bin_5'])['count'].sum()).loc['2019' : '2020'].unstack().T.plot(kind='bar',title=title,
                                                                                        figsize=(18,12))


In [None]:
mort_5_cmp = mort_5.loc['2019' : '2020'].unstack().T
mort_5_cmp['baseline_2015_2018'] = mort_5_baseline

title = 'SWEDEN Age Grp Mortality 2019 & 2020 vs baseline 2015-2018\nDataSource : scb.se'

mort_5_cmp.plot(figsize=(18,12),kind='bar',color=['g','r','k'],title=title)
plt.ylabel('mortality')
plt.xlabel('age group')
plt.yticks(np.arange(0,0.5,0.05))
plt.savefig('SCB_FINAL_2020_age_grp_mort_cmp.jpg',format='jpg')

mort_5_cmp['over_base_20'] = mort_5_cmp['2020'] > mort_5_cmp['baseline_2015_2018']
mort_5_cmp['under_base_19'] = mort_5_cmp['2019'] < mort_5_cmp['baseline_2015_2018']
mort_5_cmp

In [None]:
mort_5_cmp.plot(figsize=(18,12),kind='bar',color=['g','r','k'],title=title)
plt.yscale('log')
plt.ylabel('mortality [log scale]')
plt.xlabel('age group')
plt.savefig('SCB_FINAL_2020_age_grp_mort_cmp_log.jpg',format='jpg')


In [None]:
mort_5_cmp

In [None]:
expected_5.sum(axis=1)

In [None]:
bin_5_dead_sums = binned_5_dead.unstack().sum(axis=1)
bin_5_dead_sums

In [None]:
totals_expected_actuals = pd.concat([expected_5.sum(axis=1),bin_5_dead_sums],axis=1)
totals_expected_actuals.columns=['Expected_Deaths','Actual_Deaths']
totals_expected_actuals['excess'] = totals_expected_actuals['Actual_Deaths'] - totals_expected_actuals['Expected_Deaths']
totals_expected_actuals.dropna(inplace=True)
totals_expected_actuals

In [None]:
title='SWEDEN Excess Deaths (calculated on age group mortality baseline 2015-2018)\nDataSource : scb.se'
totals_expected_actuals.plot(kind='bar',figsize=(18,12),y='excess',color='r',title=title)
plt.ylabel('Excess Deaths')
plt.xlabel('year')
plt.savefig('SCB_FINAL_excess_deaths_bar.jpg',format='jpg')

In [None]:
totals_expected_actuals.plot(y='Expected_Deaths')

In [None]:
import scipy.stats as sps

### linear regression for expected deaths 2002 - 2018 ###
x = totals_expected_actuals.index.values[:-2].astype(int)
y = totals_expected_actuals['Expected_Deaths'][:-2]
print (y)

slope,intercept,_,_,_ = sps.linregress(x,
                                       y)

print (slope,intercept)

In [None]:
plt.plot(range(2002,2021),totals_expected_actuals['Actual_Deaths'],'o--')
X = range(2002,2021)
plt.plot(X,X*slope + intercept,'x--')

In [None]:
reg_expectation = X * slope + intercept

### excess deaths from linear regression ###
reg_excess = totals_expected_actuals['Actual_Deaths'] - reg_expectation
reg_excess

In [None]:
title = 'SWEDEN Excess Deaths calculated by linear regression model 2002-2018\nDataSource : scb.se'
reg_excess.plot(kind='bar',figsize=(18,12),color='r',title=title)
plt.ylabel('Excess Deaths')
plt.xlabel('year')
plt.savefig('SCB_FINAL_excess_bar_linreq.jpg',format='jpg')

In [None]:
mort_5_flat = mort_5.unstack()
mort_5_flat

In [None]:
start_year = 2015
end_year = 2018


In [None]:


def age_grp_slope_intercept(s):

    return sps.linregress(np.arange(start_year,end_year + 1),s.loc[str(start_year) : str(end_year):])[:2]

slope_intercept = mort_5_flat.apply(age_grp_slope_intercept)
slope_intercept = slope_intercept.T
slope_intercept.columns = ['slope','intercept']
slope_intercept

In [None]:
X = np.arange(2002,2021)
age_reg_mort = pd.DataFrame([X[i] * slope_intercept['slope'] + slope_intercept['intercept'] for i in range(len(X))])
age_reg_mort.index = np.arange(2002,2021).astype(str)
age_reg_mort

In [None]:
age_reg_mort.plot(figsize=(18,12))


In [None]:
age_reg_mort_expectation = (pop_5_flat * age_reg_mort).sum(axis=1)
age_reg_mort_expectation

In [None]:
age_reg_excess = (dead_5_flat.sum(axis=1) - age_reg_mort_expectation).dropna()
age_reg_excess

In [None]:
age_reg_excess.plot(figsize=(18,12),kind='bar',color='r')

In [None]:
mortality


In [None]:
slope,intercept,_,_,_ = sps.linregress(mortality.loc[:'2018'].index.values.astype(int),
                                       mortality['count'][:-2])

X = np.arange(2002,2200)

reg_mortality = X * slope + intercept
plt.plot(X,reg_mortality)