In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import requests

sns.set()

In [None]:
pop_2020 = 10378483 # scb november population, updated dec 2020


In [None]:
scb_prel = pd.ExcelFile('scb_prel_deaths.xlsx')
scb_prel.sheet_names

In [None]:
deaths_2020 = scb_prel.parse('Tabell 1',skiprows=6,usecols=[6])
deaths_2020

In [None]:
deaths_2020 = pd.Series(deaths_2020.sum(),name='dead',index=['2020'])
deaths_2020

In [None]:
dead_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101G/BefUtvKon1749'

dead_json_post = {
  "query": [
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1+2"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "0000001F"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r_dead = requests.post(dead_url,json=dead_json_post)
r_dead.status_code

In [None]:
dead_json = r_dead.json()
dead_json['data'][0]

In [None]:
dead_df = pd.DataFrame.from_dict(dead_json['data'])

dead_df[['gender','year']] = dead_df['key'].to_list()
dead_df['dead'] = dead_df['values'].apply(lambda x :x[0])
dead_df.drop(['key','values','gender'],axis=1,inplace=True)
dead_df = dead_df.set_index('year')
dead_df['dead'] = dead_df['dead'].astype(int)

dead_df.loc['2020','dead'] = deaths_2020.values

dead_df


In [None]:
pop_url = 'http://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101G/BefUtvKon1749'
pop_json_post = {
  "query": [
    {
      "code": "Kon",
      "selection": {
        "filter": "item",
        "values": [
          "1+2"
        ]
      }
    },
    {
      "code": "ContentsCode",
      "selection": {
        "filter": "item",
        "values": [
          "000000LV"
        ]
      }
    }
  ],
  "response": {
    "format": "json"
  }
}

In [None]:
r_pop = requests.post(pop_url,json=pop_json_post)
r_pop.status_code

In [None]:
pop_json = r_pop.json()
pop_json['data'][0]

In [None]:
pop_df = pd.DataFrame.from_dict(pop_json['data'])
pop_df[['gender','year']] = pop_df['key'].to_list()
pop_df['pop'] = pop_df['values'].apply(lambda x : x[0])
pop_df.drop(['key','values','gender'],axis=1,inplace=True)
pop_df['pop'] = pop_df['pop'].astype(int)
pop_df = pop_df.set_index('year')
pop_df.loc['2020','pop'] = pop_2020
pop_df

In [None]:
mortality = dead_df['dead'] / pop_df['pop']
mortality

In [None]:
def smooth(x,window_len=10,window='hanning'):
    """smooth the data using a window with requested size.
    
    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.
    
    input:
        x: the input signal 
        window_len: the dimension of the smoothing window
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

    output:
        the smoothed signal
        
    example:

    t=linspace(-2,2,0.1)
    x=sin(t)+randn(len(t))*0.1
    y=smooth(x)
    
    see also: 
    
    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    scipy.signal.lfilter
 
    TODO: the window parameter could be the window itself if an array instead of a string   
    """
    
    '''
    if x.ndim != 1:
        raise ValueError, "smooth only accepts 1 dimension arrays."

    if x.size < window_len:
        raise ValueError, "Input vector needs to be bigger than window size."


    if window_len<3:
        return x


    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"

    '''
    x = np.array(x,dtype=float) #conv from dataframe col to array
    s=np.r_[2*x[0]-x[window_len:1:-1],x,2*x[-1]-x[-1:-window_len:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='same')
    return y[window_len-1:-window_len+1]


In [None]:
start_at = 200 #index
# the shorter window, the more excess 2020

window_len = 20

smoothed = smooth(mortality[start_at:],window_len=window_len,window='hanning')
smoothed

In [None]:
pop_df

In [None]:
abs_excess = (mortality[start_at:] - smoothed) * pop_df['pop']
abs_excess

In [None]:
abs_excess.plot(style='o--',figsize = (18,12),title = 'SWEDEN absolute excess deaths based on Curve fitting')
plt.ylabel('number of excess deaths')
plt.savefig('swe_curve_fit_excess.jpg',format='jpg')

In [None]:
plt.figure(figsize=(18,12))
title1 = 'SWEDEN non-age-adj. mortality {}-2020 Y2D  - Curve Fitting Model (Hanning Window)'.format(
mortality.index[start_at])
    
title2 = '\nPURPOSE : to illustrate the pitfalls with "Excess Deaths"'
title4 = '\nDataSource : scb.se'

plt.title(title1 + title2 + title4) 
plt.plot(mortality.index[start_at:],mortality[start_at:],'o--',label='2020 data YTD')
plt.plot(mortality.index[start_at:],smoothed,label='Baseline by Smoothed Hanning Window, win_len:{}'.format(
window_len))

plt.ylabel('mortality')
plt.xlabel('year')
plt.legend(loc='upper left')

ax = plt.gca()

for label in ax.xaxis.get_ticklabels()[::2]:
        label.set_visible(False)
        
_= plt.xticks(rotation=90)

plt.savefig('swe_excess_pitfall_smoothed_{}.jpg'.format(window_len),format='jpg')

In [None]:
delta = mortality[start_at:] / smoothed
delta.plot(figsize=(18,12))