In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import requests

sns.set()

In [None]:
dead_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101I/DodaHandelseK'
dead_query = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

pop_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/FolkmangdNov'
pop_query = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
def get_data(url,query):
    r = requests.post(url,json=query)
    
    if r.status_code != 200 :
        print (r.status_code)
    
    data = pd.DataFrame.from_dict(r.json()['data'])
    data[['area','age','gender','year']] = data['key'].to_list()
    data['count'] = data['values'].apply(lambda x : x[0])
    data['age'] = data['age'].apply(lambda x : x.replace('+','')).astype(int)
    data.drop(['values','key','area'],axis=1,inplace=True)
    data.set_index('year',inplace=True)
    data['count'] = data['count'].astype(int)
    data = data.groupby(['year','age']).sum()
    
    return data
    


In [None]:
dead_df = get_data(dead_url,dead_query)
dead_df

In [None]:
pop_df = get_data(pop_url,pop_query)
pop_df

In [None]:
def bin_category(df,binsize=10):
    
    binned = pd.cut(df.index.get_level_values(1),range(-1,100+binsize,binsize),labels=range(0,100+binsize,binsize))
    return binned

In [None]:
def binit(df,binsize=10):
    return df.groupby(['year',bin_category(df,binsize=binsize)]).sum()
    


In [None]:
dead_bin_5 = binit(dead_df,binsize=5)
pop_bin_5 = binit(pop_df,binsize=5)

dead_bin_1 = binit(dead_df,binsize=1)
pop_bin_1 = binit(pop_df,binsize=1)

In [None]:
def agg_bin_data(pop,dead):
    
    bin_data = pd.concat([pop,dead],axis=1).dropna()
    bin_data.columns = ['pop','dead']
    bin_data['mortality'] = bin_data['dead'] / bin_data['pop']
    bin_data.index.names=('year','age_grp')
    return bin_data

In [None]:
bin_1_data = agg_bin_data(pop_bin_1,dead_bin_1)
bin_5_data = agg_bin_data(pop_bin_5,dead_bin_5)

print (bin_1_data.head())

# sanity check # 
((bin_5_data.groupby('year').sum() == bin_1_data.groupby('year').sum()).all()) & \
(bin_1_data.groupby('year').sum().loc['2020','dead'] == 98124)

In [None]:
bin_1_data

In [None]:
yearly_stats = bin_1_data.groupby('year')[['pop','dead']].sum()
yearly_stats

In [None]:
yearly_stats_base_15_18 = yearly_stats.loc['2015' : '2018'].mean()
yearly_stats_base_15_18

In [None]:
yearly_growth = yearly_stats / yearly_stats.shift()
yearly_growth

In [None]:
axes = yearly_stats.plot(subplots=True,style='o--',sharex=True)

axes[1].set_xticks(range(len(yearly_stats)))
_= axes[1].set_xticklabels(range(2002,2021),rotation=90)

In [None]:
yearly_growth.plot(style='o--')

In [None]:
bin_1_base_15_18 = bin_1_data.loc['2015' : '2018'].groupby(['age_grp']).mean()
bin_1_base_15_19 = bin_1_data.loc['2015' : '2019'].groupby(['age_grp']).mean()

bin_1_base_15_18

In [None]:
bin_1_base_15_18_expected = bin_1_base_15_18['mortality'] * bin_1_data['pop']
bin_1_base_15_18_expected_tot = bin_1_base_15_18_expected.groupby('year').sum()

bin_1_base_15_19_expected = bin_1_base_15_19['mortality'] * bin_1_data['pop']
bin_1_base_15_19_expected_tot = bin_1_base_15_19_expected.groupby('year').sum()

bin_1_base_15_18_expected_tot

In [None]:
bin_1_actuals = bin_1_data.groupby(['year'])['dead'].sum()
bin_1_actuals

In [None]:
bin_1_excess = pd.DataFrame()
bin_1_excess['age_mort_base_15_18'] = bin_1_data.groupby('year')['dead'].sum() - bin_1_base_15_18_expected_tot
bin_1_excess['age_mort_base_15_18_pct'] = bin_1_data.groupby('year')['dead'].sum() / bin_1_base_15_18_expected_tot

bin_1_excess['age_mort_base_15_19'] = bin_1_data.groupby('year')['dead'].sum() - bin_1_base_15_19_expected_tot
bin_1_excess['age_mort_base_15_19_pct'] = bin_1_data.groupby('year')['dead'].sum() / bin_1_base_15_19_expected_tot

bin_1_excess

In [None]:
fig,axes = plt.subplots(2)

bin_1_excess.plot(ax = axes[0],y=['age_mort_base_15_18','age_mort_base_15_19'],figsize=(18,12),style='o--')
axes[0].set_ylabel('excess deaths')

bin_1_excess.plot(ax=axes[1],y=['age_mort_base_15_18_pct','age_mort_base_15_19_pct'],style='o--')
axes[1].set_ylabel('excess growth factor')

plt.suptitle('SWE excess deaths by age specific mortality [1 year bins] two baselines\nDataSource : scb.se')

In [None]:
abs_excess = pd.DataFrame()

abs_excess['abs_base_15_18'] = bin_1_data.groupby('year')['dead'].sum() - bin_1_base_15_18['dead'].sum()
abs_excess['abs_base_15_18_pct'] = bin_1_data.groupby('year')['dead'].sum() / bin_1_base_15_18['dead'].sum()

abs_excess['abs_base_15_19'] = bin_1_data.groupby('year')['dead'].sum() - bin_1_base_15_19['dead'].sum()
abs_excess['abs_base_15_19_pct'] = bin_1_data.groupby('year')['dead'].sum() / bin_1_base_15_19['dead'].sum()

abs_excess

In [None]:
import scipy.stats as sps

trend_start = 2015
trend_end = 2018

def find_slope_intercept(s):
    s.index = s.index.astype(int)
    slope,intercept,_,_,_ = sps.linregress(np.arange(trend_start,trend_end + 1),s.loc[trend_start:trend_end])
    return slope,intercept
    
    
bin_1_data_flat = bin_1_data['mortality'].unstack()
bin_1_trend = bin_1_data_flat.apply(find_slope_intercept).T
bin_1_trend.columns = ['slope','intercept']
bin_1_trend

In [None]:
X = np.arange(2002,2021)
bin_1_trend_mortality = pd.DataFrame([X[i] * bin_1_trend['slope'] + \
                                        bin_1_trend['intercept'] for i in range(len(X))]) 

bin_1_trend_mortality.index = np.arange(2002,2021).astype(str)

bin_1_trend_mortality

In [None]:
bin_1_pop_flat = bin_1_data['pop'].unstack()
bin_1_trend_expectation = bin_1_pop_flat * bin_1_trend_mortality
bin_1_trend_expectation_tot = bin_1_trend_expectation.sum(axis=1)
bin_1_trend_expectation_tot

In [None]:
bin_1_trend_excess = pd.DataFrame(bin_1_data.groupby('year')['dead'].sum() - bin_1_trend_expectation_tot,
                                  columns = ['bin_1_15_18_trend'])

bin_1_trend_excess['bin_1_15_18_trend_pct'] = bin_1_data.groupby('year')['dead'].sum() /\
bin_1_trend_expectation_tot
bin_1_trend_excess

In [None]:
all_excess = pd.concat([bin_1_excess,abs_excess,bin_1_trend_excess],axis=1)
all_excess

In [None]:
fig,axes = plt.subplots(2,sharex=True)

all_excess.plot(ax = axes[0],
                y=['abs_base_15_18','abs_base_15_19',
                   'age_mort_base_15_18','age_mort_base_15_19',
                  'bin_1_15_18_trend'],
                figsize=(18,12),style='o--')

axes[0].set_ylabel('excess deaths')


all_excess.plot(ax=axes[1],
                y=['abs_base_15_18_pct','abs_base_15_19_pct',
                   'age_mort_base_15_18_pct','age_mort_base_15_19_pct',
                  'bin_1_15_18_trend_pct'],
                style='o--')

axes[1].set_ylabel('relative vs expectation excess')

axes[1].set_xticks(range(len(all_excess)))
axes[1].set_xticklabels(range(2002,2021))

plt.suptitle('SWE excess deaths by 5 different expectations\nDataSource : scb.se')
plt.savefig('SCB_ALL_EXCESS.jpg',format='jpg')

In [None]:
bin_5_data

In [None]:
dead_bin_10 = binit(dead_df,binsize=10)
pop_bin_10 = binit(pop_df,binsize=10)

dead_bin_20 = binit(dead_df,binsize=20)
pop_bin_20 = binit(pop_df,binsize=20)

dead_bin_1 = binit(dead_df,binsize=1)
pop_bin_1 = binit (pop_df,binsize=1)

age_mort_bin_10 = (dead_bin_10 / pop_bin_10).unstack().droplevel(0,axis=1).dropna()
baseline_age_mort_bin_10 = age_mort_bin_10.loc['2015' : '2018'].mean()

age_mort_bin_5 = (dead_bin_5 / pop_bin_5).unstack().droplevel(0,axis=1).dropna()
baseline_age_mort_bin_5 = age_mort_bin_5.loc['2015' : '2018'].mean()

age_mort_bin_1 = (dead_bin_1 / pop_bin_1).unstack().droplevel(0,axis=1).dropna()
baseline_age_mort_bin_1 = age_mort_bin_1.loc['2015' : '2018'].mean()

age_mort_bin_20 = (dead_bin_20 / pop_bin_20).unstack().droplevel(0,axis=1).dropna()
baseline_age_mort_bin_20 = age_mort_bin_20.loc['2015' : '2018'].mean()


In [None]:
ax = baseline_age_mort_bin_5.plot(style='o--',figsize=(18,12),label='bin_5')
baseline_age_mort_bin_10.plot(ax=ax,style='o--',label='bin_10')
ax.legend(loc='upper left')

In [None]:
bin_1_expectation = baseline_age_mort_bin_1 * pop_bin_1.loc['2020','count']
bin_5_expectation = baseline_age_mort_bin_5 * pop_bin_5.loc['2020','count']
bin_10_expectation = baseline_age_mort_bin_10 * pop_bin_10.loc['2020','count']
bin_20_expectation = baseline_age_mort_bin_20 * pop_bin_20.loc['2020','count']

print (bin_1_expectation.sum())
print (bin_5_expectation.sum())
print (bin_10_expectation.sum())
print (bin_20_expectation.sum())

In [None]:
### excess deaths based on different binnings ###
print (dead_bin_1.loc['2020','count'].sum() - bin_1_expectation.sum())
print (dead_bin_5.loc['2020','count'].sum() - bin_5_expectation.sum())
print (dead_bin_10.loc['2020','count'].sum() - bin_10_expectation.sum())
print (dead_bin_20.loc['2020','count'].sum() - bin_20_expectation.sum())

In [None]:
scb_prel_pop_bin = pd.cut(pop_df.index.get_level_values(1),[-1,64,79,89,200],labels=['-64','65-79','80-89','90+'])
scb_prel_dead_bin = pd.cut(dead_df.index.get_level_values(1),[-1,64,79,89,200],labels=['-64','65-79','80-89','90+'])

In [None]:
pop_df_scb_prel = pop_df.copy()
pop_df_scb_prel['scb_prel_bin'] = scb_prel_pop_bin

dead_df_scb_prel = dead_df.copy()
dead_df_scb_prel['scb_prel_bin'] = scb_prel_dead_bin
dead_df_scb_prel

In [None]:
dead_bin_prel = dead_df_scb_prel.groupby(['year','scb_prel_bin']).sum()
pop_bin_prel = pop_df_scb_prel.groupby(['year','scb_prel_bin']).sum()

mort_prel = (dead_bin_prel / pop_bin_prel).dropna()
mort_prel

In [None]:
mort_prel_15_18 = mort_prel.loc['2015' : '2018'].unstack().droplevel(0,axis=1)
mort_prel_15_18

In [None]:
mort_prel_baseline  = mort_prel_15_18.mean()
mort_prel_baseline

In [None]:
pop_prel_unstacked = pop_bin_prel.unstack().droplevel(0,axis=1)
dead_prel_unstacked = dead_bin_prel.unstack().droplevel(0,axis=1)

In [None]:

prel_expect = (mort_prel_baseline * pop_prel_unstacked).sum(axis=1)
prel_observed = dead_prel_unstacked.sum(axis=1)
prel_observed

In [None]:
### excess deaths based on scb prel binning with 4 age groups ###
(prel_observed - prel_expect).dropna()

In [None]:
l = [dead_bin_1.loc['2020','count'].sum() - bin_1_expectation.sum(),
     dead_bin_5.loc['2020','count'].sum() - bin_5_expectation.sum(),
     dead_bin_10.loc['2020','count'].sum() - bin_10_expectation.sum(),
     dead_bin_20.loc['2020','count'].sum() - bin_20_expectation.sum(),
    prel_observed.loc['2020'] - prel_expect.loc['2020']]
     
twenty_twenty_excess = pd.DataFrame(l,columns=['excess'],index=['bin_1','bin_5','bin_10','bin_20','bin_scb_prel'])
twenty_twenty_excess

In [None]:
title = 'SWEDEN 2020 Excess Deaths using 5 different age binnings, baseline 2015-2018\nDataSource : scb.se'
twenty_twenty_excess.plot(kind='bar',figsize=(18,12),color='red',title=title)
plt.ylabel('Excess Deaths')
plt.xlabel('binning')
plt.savefig('scb_all_excess_different_bins.jpg',format='jpg')