In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import requests

sns.set()

In [None]:
# define parameters for SCB's JSON API for population data #
# don't forget to change the default 'px' response format to 'json' bottom of the pop_query data struct # 

pop_url = 'http://api.scb.se/OV0104/v1/doris/en/ssd/START/BE/BE0101/BE0101A/FolkmangdNov'

pop_query = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
# define parameters for SCB's JSON API for death data #
# don't forget to change the default 'px' response format to 'json' bottom of the dead_query data struct #

dead_url = 'http://api.scb.se/OV0104/v1/doris/en/ssd/START/BE/BE0101/BE0101I/DodaFodelsearK'
dead_query = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    },
    {
      "code": "Tid",
      "selection": {
        "filter": "item",
        "values": [
          "2002",
          "2003",
          "2004",
          "2005",
          "2006",
          "2007",
          "2008",
          "2009",
          "2010",
          "2011",
          "2012",
          "2013",
          "2014",
          "2015",
          "2016",
          "2017",
          "2018",
          "2019"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
### function to fetch SCB population or death data, returns 3 DataFrames w. different age binnings ### 

def fetch_scb_data(url,query,param='pop'):
    
    # call the API, check HTML status code (should be 200 for success) #

    r= requests.post(url,json=query)
    print ('HTML status code fetching {}: '.format(param),r.status_code)
    
    # extract the json data section from the response #

    json_data = r.json()['data'] # get the data section (skip metadata) #

    # we get two dictionaries : 'key' and 'values' #
    # 'key' attributes are : 'geo' (all of sweden), 'age','gender','year' #
    # 'values' attribute is : 'population' / 'dead' #
    
    # check the first record to figure out the structure of the data #
    print ('record layout for {}: '.format(param),json_data[0])
    
    # create a Pandas DataFrame from the list of dictionaries, and massage it to a decent structure #

    age_df = pd.DataFrame.from_dict(json_data)
    age_df[['area','age','gender','year']] = age_df['key'].to_list() # split keys to separate columns #

    age_df[param] = age_df['values'].apply(
        lambda x : x[0]).astype(int) # extract size from the values list #

    age_df['num_age'] = age_df['age'].apply(
        lambda x :x.replace('+','')).astype(int) # create numeric age column #

    age_df = age_df.drop(['key','values','area'],axis=1) # delete unwanted columns #

    # combine the two genders #
    age_df = age_df.groupby(['year','num_age']).sum()

    # bin the data to the same age bins that are used in SCB's prel. death data #
    age_df['scb_prel_age_bin'] = pd.cut(age_df.index.get_level_values(1),[-1,64,79,89,200],
                                        labels=['-64','65-79','80-89','90+'])

    # and finally, for each year, combine the 1 year age bin sums to match the 4 age bins of the prel death data #

    age_scb_prel_df = age_df.groupby([age_df.index.get_level_values(0),
                                                      'scb_prel_age_bin']).sum()
    
    # create a df with total yearly values #
    df = age_scb_prel_df.groupby('year').sum()
    
    
    

    return age_df,age_scb_prel_df,df
    

In [None]:
### get the population & death data from SCB ###
pop_age_df,pop_age_scb_prel_df,pop_df = fetch_scb_data(pop_url,pop_query,param='pop')
dead_age_df,dead_age_scb_prel_df,dead_df = fetch_scb_data(dead_url,dead_query,param='dead')


In [None]:
### combine the pop & death data to 3 separate, complete df's with: ###
### 1) 1 year age bins; 2) 4 age bins as with scb prel death data, 3) no age bins ###

one_year_binned = pd.DataFrame(pop_age_df['pop']).join(dead_age_df['dead'])
scb_prel_binned = pd.DataFrame(pop_age_scb_prel_df['pop']).join(dead_age_scb_prel_df['dead'])
no_bins = pd.DataFrame(pop_df['pop']).join(dead_df['dead'])


In [None]:
### now we need to fetch the 2020 prel death data which do no yet reside in the scb database ###

scb_prel_url = 'https://www.scb.se/hitta-statistik/statistik-efter-amne/befolkning/'\
'befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/'

scb_prel_file = pd.ExcelFile(scb_prel_url)

In [None]:
# check the sheet names #
# Tabell 1 contains the daily, non age-binned data #

scb_prel_file.sheet_names

In [None]:
# parse the prel daily death data #

scb_prel_daily = scb_prel_file.parse(sheet_name='Tabell 1',skiprows=6,usecols=range(10))

scb_prel_daily


In [None]:
# get the deaths occurred on unknown days #
unknown_death_day = scb_prel_daily.iloc[-1,1:]

# remove the unknown deaths from the df and drop the DagMånad column #

scb_prel_daily = scb_prel_daily.iloc[:-1,1:]
scb_prel_daily

In [None]:
# let's allocate the unknown deaths proportionally to the days of year #

daily_proportion_of_total_deaths = scb_prel_daily / scb_prel_daily.sum()
deaths_to_add = daily_proportion_of_total_deaths * unknown_death_day
scb_prel_daily += deaths_to_add
scb_prel_daily

In [None]:
total_deaths = scb_prel_daily.sum()
total_deaths

In [None]:
### let's add the prel 2020 daily death total to no_bins df ###
### notice that the 2020 total death toll calculated by daily deaths is not identical to ###
### the total calculated by weekly sums below due to week 53 stretching to Jan 3'd ###
### the difference is ~800 more deaths for the weekly calculation ###

no_bins.loc['2020','dead'] = total_deaths['2020']
no_bins

In [None]:
### get the age group binned prel death data ###
scb_prel_age = scb_prel_file.parse('Tabell 2',skiprows=7,usecols=[0,1,4,5,6,7,9,10,11,12])

scb_prel_age = scb_prel_age.loc[scb_prel_age['År'] == 2020]
scb_prel_age = scb_prel_age[::-1] # reverse order
scb_prel_age.drop(['År','DagMånad'],axis=1,inplace=True)

scb_prel_age


In [None]:
### function to combine gender specific deaths per age group to age grp total ###

def combine_ages(col):
    
    class AgeError(Exception):
        pass
    
    if '0-64' in col:
        return '-64'
    if '65-79' in col:
        return '65-79'
    if '80-89' in col:
        return '80-89'
    if '90+' in col:
        return '90+'
    else:
        return AgeError('no such age group')
    

In [None]:
### combine the two genders ###
scb_prel_age = scb_prel_age.groupby(combine_ages,axis=1).sum()
scb_prel_age

In [None]:
unknown_2020_age_date = scb_prel_age.iloc[0] # get deaths with unknown date #
scb_prel_age = scb_prel_age.iloc[1:] # drop unknown dates from df #


In [None]:
### add unknown deaths proportionally to the days ###
prop_deaths_per_day = scb_prel_age / scb_prel_age.sum()
add_ons = prop_deaths_per_day * unknown_2020_age_date
scb_prel_age += add_ons
scb_prel_age.sum().sum()

In [None]:
### set datetime index ###
scb_prel_age.index = pd.date_range('2020-01-01','2020-12-31')
scb_prel_age

In [None]:
### totals per age group ###
scb_age_2020_total = scb_prel_age.sum()
scb_age_2020_total

In [None]:
### total totals for 2020 ###
scb_age_2020_total.sum()

In [None]:
### now, let's add the 2020 prel death data to the age binned death dataframe from above ###

# first we need a multi-index for 2020 for the 4 age groups # 

idx = pd.MultiIndex.from_arrays([ ['2020'] * len (scb_prel_age.columns), scb_prel_age.columns ])

scb_age_2020_total.index = idx
scb_age_2020_total.name='dead'
scb_age_2020_total = pd.DataFrame(scb_age_2020_total)
scb_age_2020_total

In [None]:
scb_prel_binned

In [None]:
### and now let's add 2020 to the scb_prel_binned_df ###

scb_prel_binned.at['2020','dead'] = scb_age_2020_total
scb_prel_binned

In [None]:
### yearly totals calculated on the age binned data ###

scb_prel_binned.groupby('year').sum()

In [None]:
### now, let's add age group based mortality ###
scb_prel_binned['mortality'] = scb_prel_binned['dead'] / scb_prel_binned['pop']
scb_prel_binned

In [None]:
### let's define 2019 as our standard population, for age adjusted overall mortality ###
### and then add 'standardized deaths', that is, absolute deaths based on ACTUAL yearly mortality ###
### acting on the standard population. I.e we are computing the number absolute deaths given actual ###
### mortality but as if the population age group sizes had been equal to that of the std year (2019) ###

std_pop = scb_prel_binned.loc['2019','pop']

scb_prel_binned['std_deaths'] = scb_prel_binned['mortality'] * std_pop
scb_prel_binned

In [None]:
### now, to compute age adj mortality, for each year we sum the age_group std_deaths ###
### and divide by total std_population ###

age_adj_mortality = scb_prel_binned.groupby('year').sum()['std_deaths'] / std_pop.sum()
age_adj_mortality

In [None]:
title = 'SWEDEN Age Adj. Mortality (std.pop : 2019) 2002-2020\nDataSource : scb.se'
colors = ['b'] * len (age_adj_mortality)
colors[-1] = 'orange'

age_adj_mortality.plot(kind='bar',figsize=(18,12),color=colors,title=title)
plt.ylabel('age adjusted mortality')
_= plt.yticks(np.arange(0,0.015,0.0005))

plt.savefig('SCB_excess_age_adj_mortality.jpg',format='jpg')

In [None]:
### lets compute non-age-adj mortality ###
no_bins['mortality'] = no_bins['dead'] / no_bins['pop']
no_bins

In [None]:
title = 'SWEDEN Mortality (deaths / capita) 2002-2020\nDataSource : scb.se'
colors = ['b'] * len (age_adj_mortality)
colors[-1] = 'orange'

no_bins.plot(y='mortality',kind='bar',figsize=(18,12),color=colors,title=title)
plt.ylabel('mortality')
_= plt.yticks(np.arange(0,0.011,0.0005))
plt.savefig('SCB_excess_mortality.jpg',format='jpg')

In [None]:
### age adj Mortality baselines ###

age_adj_grp_mort_base_15_18 = scb_prel_binned.loc['2015':'2018','mortality'].groupby('scb_prel_age_bin').mean()
age_adj_grp_mort_base_15_19 = scb_prel_binned.loc['2015' :'2019','mortality'].groupby('scb_prel_age_bin').mean()

print ('base_15_18_mortality :',age_adj_grp_mort_base_15_18)
print ()
print ('base_15_19_mortality :',age_adj_grp_mort_base_15_19)

In [None]:
### compute expected age grp deaths given mortality per age group is baseline mortality ###
scb_prel_binned['exp_dead_base_15_18'] = scb_prel_binned['pop'] * age_adj_grp_mort_base_15_18
scb_prel_binned['exp_dead_base_15_19'] = scb_prel_binned['pop'] * age_adj_grp_mort_base_15_19

In [None]:
### compute excess age grp deaths as diff actual age grp deaths - expected age grp deaths ### 
scb_prel_binned['excess_15_18'] = scb_prel_binned['dead'] - scb_prel_binned['exp_dead_base_15_18']
scb_prel_binned['excess_15_19'] = scb_prel_binned['dead'] - scb_prel_binned['exp_dead_base_15_19'] 

scb_prel_binned

In [None]:
### compute yearly sums of total excess deaths based on age groups ###
age_grp_based_excess_deaths = scb_prel_binned.groupby('year')[['excess_15_18','excess_15_19']].sum()
age_grp_based_excess_deaths

In [None]:
### compute non-age-adj baselines ###
mort_15_18_base = no_bins.loc['2015':'2018']['mortality'].mean()
mort_15_19_base = no_bins.loc['2015' : '2019']['mortality'].mean()

print ('non-age-adj-mort_base_15_18 :',mort_15_18_base)
print ()
print ('non-age_adj-mort_base_15_19 :',mort_15_19_base)

In [None]:
### compute expected deaths given these baselines ###
no_bins['exp_dead_base_15_18'] = no_bins['pop'] * mort_15_18_base
no_bins['exp_dead_base_15_19'] = no_bins['pop'] * mort_15_19_base

no_bins

In [None]:
### compute excess deaths given the two expectations ###

no_bins['excess_15_18'] = no_bins['dead'] - no_bins['exp_dead_base_15_18']
no_bins['excess_15_19'] = no_bins['dead'] - no_bins['exp_dead_base_15_19']

no_bins

In [None]:
### finally, let's compute the excess for absolute deaths, with the same baseline years ###
abs_base_15_18 = no_bins.loc['2015' : '2018','dead'].mean()
abs_base_15_19 = no_bins.loc['2015' : '2019','dead'].mean()

print ('abs_base_15_18 :',abs_base_15_18)
print ('abs_base_15_19 :',abs_base_15_19)

In [None]:
abs_excess = pd.DataFrame({'15_18_base' : no_bins['dead'] - abs_base_15_18,
                          '15_19_base' : no_bins['dead'] - abs_base_15_19})
abs_excess


In [None]:
### summary of excess deaths, computed in different ways, with two different baselines (15-18,15-19) ###

print ('\n2020 excess deaths :\n'.upper())

print ('Absolute excess deaths cmp baseline 15-18 :',abs_excess.loc['2020','15_18_base'].astype(int))
print ('\nAbsolute excess deaths cmp baseline 15-19 :',abs_excess.loc['2020','15_19_base'].astype(int))
print ('\nPopulation adjusted excess deaths cmp baseline 15-18 :',no_bins.loc['2020','excess_15_18'].astype(int))
print ('\nPopulation adjusted excess deaths cmp baseline 15-19 :',no_bins.loc['2020','excess_15_19'].astype(int))

print ('\nAge adjusted excess deaths cmp baseline 15-18 :',
       age_grp_based_excess_deaths.loc['2020','excess_15_18'].astype(int))

print ('\nAge adjusted excess deaths cmp baseline 15-19 :',
       age_grp_based_excess_deaths.loc['2020','excess_15_19'].astype(int))

print ('\n2019 death deficit :\n'.upper())

print ('Absolute death deficit cmp baseline 15-18 :',abs_excess.loc['2019','15_18_base'].astype(int))
print ('\nAbsolute death deficit cmp baseline 15-19 :',abs_excess.loc['2019','15_19_base'].astype(int))
print ('\nPopulation adjusted death deficit cmp baseline 15-18 :',no_bins.loc['2019','excess_15_18'].astype(int))
print ('\nPopulation adjusted death deficit cmp baseline 15-19 :',no_bins.loc['2019','excess_15_19'].astype(int))

print ('\nAge adjusted death deficit cmp baseline 15-18 :',
       age_grp_based_excess_deaths.loc['2019','excess_15_18'].astype(int))

print ('\nAge adjusted death deficit cmp baseline 15-19 :',
       age_grp_based_excess_deaths.loc['2019','excess_15_19'].astype(int))




In [None]:
title = 'SWEDEN population & absolute deaths 2002 - 2020\nDataSource : scb.se'
ax = no_bins['dead'].plot(style='ro--',figsize=(18,12),label='abs deaths',title=title)
ax.set_ylim([80000,100000])

ax2 = plt.twinx()

no_bins['pop'].plot(ax=ax2,style='bo--',label='population')

ax.legend(loc='upper left')
ax2.legend(loc='upper center')

ax.set_ylabel('absolute deaths')
ax2.set_ylabel('population')

In [None]:
title = 'SWEDEN mortality (deaths/capita) 2002 - 2020\nDataSource : scb.se'
no_bins.plot(y='mortality',figsize=(18,12),style='o--',title=title)
plt.ylabel('population adj. absolute excess deaths')



In [None]:
title = 'SWEDEN age adjusted mortality 2002-2020 (std.pop : 2019)\nDataSource : scb.se'
age_adj_mortality.plot(figsize=(18,12),style='ro--',title=title)
plt.ylabel('age adj. mortality')

In [None]:
title = 'SWEDEN absolute excess deaths 2002-2020, two different baselines\nDataSource : scb.se'
abs_excess.plot(figsize=(18,12),style='o--',title=title)
plt.ylabel('absolute excess deaths')

In [None]:
title = 'SWEDEN population adjusted excess deaths 2002-2020\nDataSource : scb.se'
no_bins.plot(y=['excess_15_18','excess_15_19'],style='o--',figsize=(18,12),title=title)
plt.ylabel('population adjusted excess deaths')
plt.axhline(0,ls='dashed',color='k')

In [None]:
title = 'SWEDEN age adjusted excess deaths 2002-2020 (std pop 2019), two different baselines\nDataSource : scb.se'
age_grp_based_excess_deaths.plot(figsize=(18,12),style='o--',title=title)
plt.ylabel('age adj. excess deaths')

In [None]:
age_mortality_flat = scb_prel_binned['mortality'].unstack()
age_mortality_flat

In [None]:
#### sanity check ####
(scb_prel_binned.loc['2020','pop'] * scb_prel_binned.loc['2020','mortality']).sum()

In [None]:
fig,axes = plt.subplots(2,2,figsize=(18,12))

x_labels = [age_mortality_flat.index[i] for i in range(len(age_mortality_flat))]

rows = [0,0,1,1]

plt.suptitle('SWEDEN Mortality Trend 2002 - 2020 per Age Group\nDataSource : SCB.se')

for i,grp in enumerate(age_mortality_flat.columns):
    
    axes[rows[i],i%2].plot(age_mortality_flat[grp],'o--')
    axes[rows[i],i%2].set_title('Age Group {}'.format(grp))
    axes[rows[i],i%2].set_xticklabels(x_labels,rotation=90)
    axes[rows[i],i%2].set_ylabel('mortality')
    
    axes[rows[i],i%2].axhline(age_adj_grp_mort_base_15_18.iloc[i],
                              ls='dashed',color='orange',label='baseline_15_18')
    
    axes[rows[i],i%2].axhline(age_adj_grp_mort_base_15_19.iloc[i],
                              ls='dashed',color='red',label='baseline_15_19')
    
    axes[rows[i],i%2].legend(['mortality','baseline_15_18','baseline_15_19'],loc='upper right')
    
plt.tight_layout()
plt.savefig('Age_grp_mortality_trends.jpg',format='jpg')

In [None]:
excess_vals = [abs_excess.loc['2020'].values,
 no_bins.loc['2020',['excess_15_18','excess_15_19']].values,
 age_grp_based_excess_deaths.loc['2020',['excess_15_18','excess_15_19']].values ]

excess = pd.DataFrame(excess_vals)
excess.columns = ['excess_rel_baseline_2015_2018','excess_rel_baseline_2015_2019']
excess.index = ['abs_deaths','pop_adj_deaths','age_adj_deaths']
excess = excess.astype(int)
excess

In [None]:
title = 'SWEDEN "Excess Deaths" 2020 by three different methods. Two different baselines\nDataSource : SCB.se'
excess.plot(kind='bar',figsize=(18,12),title=title)
plt.ylabel('Excess deaths')
plt.xlabel('excess calculation method')

plt.savefig('SCB_excess_excess_deaths.jpg',format='jpg')

In [None]:
#### let's do one for seasonal deaths as well ####
### SECTION ###
scb_prel_daily

In [None]:
### find out the last reported day of 2021, and define last_good_day ###

days_of_2021 = len(scb_prel_daily['2021'].loc[scb_prel_daily['2021'] != 0 ])
last_data_day = pd.date_range('2021-01-01',periods=days_of_2021)[-1]
last_good_day = last_data_day - pd.DateOffset(weeks=2)
last_good_day

In [None]:
#### leap_day_idx : 60:th day of year in the prel datafame ####
leap_day_idx = 59
leap_year_mask = scb_prel_daily.iloc[leap_day_idx] != 0
leap_years = scb_prel_daily.columns[leap_year_mask][:-2].values
leap_years


In [None]:
#### serialize the dataframe by years ####

serialized_daily_15_21 = []

for c in scb_prel_daily.columns[:-2]:
    if c not in leap_years:
        scb_prel_daily[c].drop(leap_day_idx,inplace=True) # drop the leap day for non-leap years #
        
    serialized_daily_15_21.extend(scb_prel_daily[c])

serialized_daily_15_21 = pd.DataFrame(serialized_daily_15_21)
serialized_daily_15_21.columns = ['dead']

In [None]:
#### since scb_prel_daily has 366 days for each year, even those without leap days, ####
#### we can't do a straight datetimeindex for 2015-2021 using pd.date_range(). ####
#### Instead, we build it up year-by-year #### 

def build_multi_year_idx(start_year,end_year):
    
    serialized_idx = list(pd.date_range(str(start_year) + '-01-01',str(start_year) + '-12-31'))
    
    for y in range(start_year + 1,end_year + 1):
        serialized_idx.extend(list(pd.date_range(str(y) + '-01-01',str(y) + '-12-31')))
    return serialized_idx
        
serialized_idx = pd.DatetimeIndex(build_multi_year_idx(2015,2021))
serialized_idx

In [None]:
### assume population 2021 initially is same as 2020 ###
pop_df.at['2021','pop'] = pop_df.loc['2020','pop']


In [None]:
#### add year,month,day explicit for convenience ####
#### compute daily mortality ####

serialized_daily_15_21.index = serialized_idx

### skip the last two weeks of data ###
serialized_daily_stable = serialized_daily_15_21.loc[:last_good_day].copy()
serialized_daily_stable['year'] = serialized_daily_stable.index.year.astype(str)

serialized_daily_stable['month'] = serialized_daily_stable.index.month
serialized_daily_stable['day'] = serialized_daily_stable.index.day

serialized_daily_stable['daily_mortality'] = serialized_daily_stable.apply(
    lambda row : row.dead / pop_df.loc[row.year,'pop'],axis=1)

serialized_daily_stable

In [None]:
#### define season e.g. as starting from oct 1:st to sep last ####

season_start = 10 # month number
season_end = 9

#### this drops the first season of the first year (2015) from the data ####

def seasonal(df,start_year,next_year):
    first_part = df.loc[(df['month'] >= season_start) & (df['year'] == start_year)]
    second_part = df.loc[(df['month'] <= season_end) & (df['year'] == next_year)]
    
    return pd.concat([first_part,second_part])



In [None]:
#### collect the different seasons into a single df ####
all_seasons_df = pd.DataFrame()

years = scb_prel_daily.columns.values

for y in range(len(years[:-2]) - 1):
    season_df = pd.DataFrame(seasonal(serialized_daily_stable,years[y],years[y+1]))
    season_df['season'] = years[y] + '/' + years[y+1]
    all_seasons_df = pd.concat([all_seasons_df,season_df])
    
all_seasons_df

In [None]:
all_seasons_df.loc[all_seasons_df['season'] == '2016/2017']

In [None]:
title = 'SWEDEN seasonal mortality 2015/2016 - 2019/2020\nDataSource : scb.se'
seasonal_mortality = all_seasons_df.groupby('season')['daily_mortality'].sum()
seasonal_mortality[:-1].plot(figsize=(18,12),kind='bar',title=title)
plt.yticks(np.arange(0,0.01,0.0005))
plt.ylabel('mortality')
plt.savefig('seasonal_deaths.jpg',format='jpg')


In [None]:
title = 'SWEDEN monthly absolute deaths 2015-2020\nDataSource : scb.se'
monthly_dead = serialized_daily_stable.resample('M')['dead'].sum()[:-1]
monthly_dead.plot(kind='bar',figsize=(18,12),style='o--',title=title)
x_labels = [ monthly_dead.index[i].date() for i in range(len(monthly_dead))]
plt.xticks(range(len(monthly_dead)),x_labels)
[ plt.axvline(11 + i * 12,color='orange',ls='dashed') for i in range(0,6) ]

plt.ylabel('number of deaths per month')
monthly_dead

In [None]:
title = 'SWEDEN monthly mortality 2015 - 2020\nDataSource : scb.se'
monthly_mortality = serialized_daily_stable.resample('M')['daily_mortality'].sum()[:-1]
monthly_mortality.plot(kind='bar',figsize=(18,12))
plt.xticks(range(len(monthly_mortality)),x_labels)
plt.ylabel('monthly mortality')
[ plt.axvline(11 + i * 12,color='orange',ls='dashed') for i in range(0,6) ]
monthly_mortality

In [None]:
#### summaries for Prof. Levitt #### 

summary_age_adj_flat = scb_prel_binned.reset_index().groupby('year')[['pop','dead','std_deaths',
                                               'exp_dead_base_15_18','exp_dead_base_15_19',
                                               'excess_15_18','excess_15_19']].sum()

summary_age_adj_flat.to_excel('scb_yearly_age_adjusted_deaths_summary.xlsx')
scb_prel_binned.to_excel('scb_age_binned_data_2002_2020.xlsx')

In [None]:
### cmp mortality vs age adj. mortality std.pop 2019 ###

In [None]:
ax = no_bins.plot(y='mortality',figsize=(18,12),style='o--')
age_adj_mortality.plot(ax=ax,style='o--')

x_labels = [ no_bins.index[i] for i in range(len(no_bins)) ]
ax.set_xticks(range(len(no_bins)))
_= ax.set_xticklabels(labels=x_labels,rotation=90)
ax.legend(['mortality','age adj. mortality'])
