# Python / Pandas example of age adjusted mortality using SCB's data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import requests

sns.set()

In [None]:
# get latest official population data from SCB, updated 2020-12-08, includes 2020
pop_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/FolkmangdNov'

In [None]:
pop_json_data = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
    },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
pop_r = requests.post(pop_url, json=pop_json_data)
pop_r.status_code

In [None]:
pop_json = pop_r.json()
nr_recs = len(pop_json['data'])
pop_json['data'][0]

In [None]:
pop_list = []

for rec in range(nr_recs):
  pop_list.append([pop_json['data'][rec]['key'][1],
                   pop_json['data'][rec]['key'][2],
                   pop_json['data'][rec]['key'][3],
                   pop_json['data'][rec]['values'][0]])
  


In [None]:
pop_df = pd.DataFrame(pop_list,columns=['age','gender',
                                        'year','pop'])

pop_df['pop'] = pop_df['pop'].astype(int)

pop_df['age'] = (pop_df['age'].apply(
    lambda x : x.replace('+','')).astype(int))

pop_df

In [None]:
pop_df = pop_df.groupby(['year','age']).sum()
pop_df

In [None]:
yearly_tot_pop = pop_df.groupby('year').sum()
yearly_tot_pop

In [None]:
pop_growth = yearly_tot_pop / yearly_tot_pop.shift()
pop_growth

In [None]:
# get official scb deaths for years up to 2019 

In [None]:
#deaths
dead_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101I/DodaHandelseK'
dead_json_data = {
  "query": [
    {
      "code": "Region",
      "selection": {
        "filter": "vs:RegionRiket99",
        "values": [
          "00"
        ]
      }
    },
    {
      "code": "Alder",
      "selection": {
        "filter": "vs:Ålder1årA",
        "values": [
          "0",
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "40",
          "41",
          "42",
          "43",
          "44",
          "45",
          "46",
          "47",
          "48",
          "49",
          "50",
          "51",
          "52",
          "53",
          "54",
          "55",
          "56",
          "57",
          "58",
          "59",
          "60",
          "61",
          "62",
          "63",
          "64",
          "65",
          "66",
          "67",
          "68",
          "69",
          "70",
          "71",
          "72",
          "73",
          "74",
          "75",
          "76",
          "77",
          "78",
          "79",
          "80",
          "81",
          "82",
          "83",
          "84",
          "85",
          "86",
          "87",
          "88",
          "89",
          "90",
          "91",
          "92",
          "93",
          "94",
          "95",
          "96",
          "97",
          "98",
          "99",
          "100+"
        ]
      }
      },
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1",
          "2"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r_dead = requests.post(dead_url,json=dead_json_data)
r_dead.status_code

In [None]:
dead_json = r_dead.json()
nr_recs = len(dead_json['data'])
data = dead_json['data']
print (data[0])
dead_list = []

for rec in range (nr_recs):
  dead_list.append([data[rec]['key'][1],
                    data[rec]['key'][2],
                    data[rec]['key'][3],
                    data[rec]['values'][0]])
  

In [None]:
dead_df = pd.DataFrame(dead_list,columns=['age',
                                          'gender',
                                          'year',
                                          'dead'])

dead_df['dead'] = dead_df['dead'].astype(int)

dead_df['age'] = (dead_df['age'].apply(
    lambda x : x.replace('+','')).astype(int))

dead_df = dead_df.groupby(['year','age']).sum()
dead_df



In [None]:
pop_data = pop_df.join(dead_df)
pop_data = pop_data.replace(np.nan,0)
pop_data

In [None]:
# since the prel. SCB 2020 age grp death data are binned to 4 groups, we need to adjust our age grps accordingly 
pop_data['age_grp'] = pd.cut(pop_data.index.get_level_values(1),bins=[-1,64,79,89,200],labels=[64,79,89,200])
pop_data

In [None]:
pop_data_4_grps = pop_data.groupby(['year','age_grp']).sum()
pop_data_4_grps

In [None]:
# fetch 2020 prel death data from SCB's excel file
scb_excel = pd.ExcelFile('https://www.scb.se/hitta-statistik/statistik-efter-amne/befolkning/befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/')
scb_excel.sheet_names



In [None]:
scb_prel_dead = scb_excel.parse('Tabell 7',skiprows = 8,usecols=range(11,19))
scb_prel_dead_orig = scb_prel_dead.copy()

scb_prel_dead

In [None]:
unknowns = scb_prel_dead.iloc[53]
unknowns.index = [64,79,89,200] * 2

unknown_male = unknowns[:4]
unknown_female = unknowns[4:]

unknown_female.name='2020'
unknown_male.name='2020'

print (unknown_male,unknown_female)


In [None]:
scb_prel_dead = scb_prel_dead.loc[:52]
scb_prel_dead

In [None]:
scb_prel_dead_male = scb_prel_dead.iloc[:,:4]
scb_prel_dead_male.columns=[64,79,89,200]
scb_prel_dead_female = scb_prel_dead.iloc[:,4:]
scb_prel_dead_female.columns = [64,79,89,200]


In [None]:
def allocate_unknowns(df,unknown):
  daily_fraction = df / df.sum()
  add_ons = daily_fraction * unknown
  return df + add_ons

In [None]:
unknown_male

In [None]:
corrected_male_dead = allocate_unknowns(scb_prel_dead_male,unknown_male)
corrected_female_dead = allocate_unknowns(scb_prel_dead_female,unknown_female)

corrected_dead = corrected_male_dead + corrected_female_dead
corrected_dead

In [None]:
print (scb_prel_dead_orig.sum(axis=1).sum())
corrected_dead.sum(axis=1).sum()

In [None]:
scb_prel_dead_combined = scb_prel_dead_male + scb_prel_dead_female
scb_prel_dead_combined

In [None]:
scb_2020_age_grp_ytd = scb_prel_dead_combined.sum()
scb_2020_age_grp_ytd

In [None]:
multi_idx = pd.MultiIndex.from_arrays([['2020'] * 4,[64,79,89,200]])
scb_2020_age_grp_ytd.index = multi_idx
scb_2020_age_grp_ytd

In [None]:
pop_data_4_grps.loc['2020','dead'] = scb_2020_age_grp_ytd
pop_data_4_grps              

In [None]:
pop_data_4_grps['mortality'] = pop_data_4_grps['dead'] / pop_data_4_grps['pop']
pop_data_4_grps

In [None]:
std_pop = pop_data_4_grps.loc['2015','pop']
std_pop

In [None]:
age_adj_dead = pop_data_4_grps['mortality'] * std_pop
age_adj_dead

In [None]:
pop_data_4_grps['age_adj_dead'] = age_adj_dead
pop_data_4_grps

In [None]:
age_adj_years = pop_data_4_grps.groupby('year')['age_adj_dead'].sum()
age_adj_years

In [None]:
age_adj_mort = age_adj_years / std_pop.sum()
age_adj_mort

In [None]:
colors = ['crimson'] * (len(age_adj_mort) - 1)
colors.append('orange')

title = 'Age adj. mortality 2020 YTD, others full years. [based on 4 age grps, std year 2015]'

age_adj_mort.plot(kind='bar',figsize=(18,12),title=title,color=colors)
plt.ylabel('mortality')
_= plt.yticks(np.arange(0,0.012,0.0005))

In [None]:
param = 'pop'
age_grp_y2y_growth = pop_data_4_grps[param].unstack() / pop_data_4_grps[param].unstack().shift()
age_grp_y2y_growth

In [None]:
age_grp_y2y_growth.plot(figsize=(18,12),style='o--')
plt.axhline(1,ls='dashed',color='k')
_= plt.xticks(range(len(age_grp_y2y_growth)),age_grp_y2y_growth.index)

In [None]:
param = 'mortality'
age_grp_y2y_mort_growth = pop_data_4_grps[param].unstack() / pop_data_4_grps[param].unstack().shift()
age_grp_y2y_mort_growth

In [None]:
age_grp_y2y_mort_growth[:-1].plot(figsize=(18,12),style='o--')
_= plt.xticks(range(len(age_grp_y2y_mort_growth) -1 ),age_grp_y2y_mort_growth.index[:-1])
plt.axhline(1,ls='dashed',color='k')

In [None]:
base_15_19_mort = pop_data_4_grps.loc['2015':'2019','mortality'].groupby(['age_grp']).mean()
base_15_19_mort.index = [64,79,89,200]
base_15_19_mort

In [None]:
ax = pop_data_4_grps.unstack()['mortality'].plot(figsize=(18,12),style='o--')
for i in base_15_19_mort.index:
    plt.axhline(base_15_19_mort[i],color='k',ls='dashed',lw=1)
 
plt.yscale('log')
_= plt.xticks(range(len(age_grp_y2y_mort_growth) ),age_grp_y2y_mort_growth.index)
