In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pymc as pm
import scipy.stats as sps

sns.set()


In [None]:
df = pd.read_csv('scb-dead_per_day.csv',sep=';',usecols=range(8),encoding='ISO-8859-1')
print (df.sum())
df

In [None]:
dates = pd.date_range('2020-01-01','2020-12-31')
df.set_index(dates,inplace=True)
df.loc['2020-02-25' : '2020-03-02']

mean = df['2015-2019'].mean()
std = df['2015-2019'].std()
print (mean,std)
print (df.sum())
print (df[['2015','2019']].sum().max() - df[['2015-2019']].sum().min())


In [None]:
# random variability 954 vs diff max-min 842 vs 2020 y2d excess 2300
yearly_mean = df.loc[:,'2015' : '2019'].sum().mean()
print (yearly_mean)
yearly_std = df.loc[:,'2015' : '2019'].sum().std()
print (yearly_std)

samples = pm.rpoisson(yearly_mean,size=100000)
ci = np.percentile(samples,[5.5,94.5])
print (ci)
print (ci[1] - ci[0])
samples_mean = samples.mean()
samples_std = samples.std()

print (samples_mean)
print (samples_std)
plt.hist(samples,density=True)
plt.hist(df.loc[:,'2015':'2019'].sum(),density=True)

x = np.arange(86000,92000)
y = sps.poisson.pmf(x,mu=yearly_mean)
plt.plot(x,y)

y = sps.norm.pdf(x,yearly_mean,yearly_std)
plt.plot(x,y)


In [None]:
daily_mean = df['2015-2019'].mean()
daily_std = df['2015-2019'].std()

print (daily_mean,daily_std)
plt.hist(df['2015-2019'],density=True,alpha=0.7)

daily_samples = pm.rpoisson(daily_mean,10000)
plt.hist(daily_samples,density=True,alpha=0.7)

In [None]:
df.plot(figsize=(18,12))
plt.title('Daily Deaths Sweden ')
plt.axhline(mean,ls='dashed',color='crimson',label='mean 2015-2019')
plt.axhline(mean + std,ls='dashed',color='orange',label='standard dev 2015-2019')
plt.axhline(mean - std,ls='dashed',color='orange')
plt.axhline(mean + 2 * std,ls='dashed',color='orange')
plt.axhline(mean - 2 * std,ls='dashed',color='orange')
plt.axhline(mean + 3 * std,ls='dashed',color='orange')
plt.axhline(mean - 3 * std,ls='dashed',color='orange')
plt.axhline(mean + 4 * std,ls='dashed',color='orange')
plt.axhline(mean - 4 * std,ls='dashed',color='orange')
plt.axhline(mean + 5 * std,ls='dashed',color='orange')
plt.axhline(mean - 5 * std,ls='dashed',color='orange')
plt.axhline(mean + 6 * std,ls='dashed',color='orange')
plt.axhline(mean - 6 * std,ls='dashed',color='orange')
plt.ylabel('Nr of deaths per day')

plt.legend(loc='upper right')


In [None]:
fig,axes = plt.subplots(3,2,sharex=True,sharey=True,figsize=(18,12))

axes[0,0].plot(df['2015'])
axes[0,0].plot(df['2015-2019'],ls='dashed',color='crimson')
axes[0,0].set_title('2015')

axes[0,1].plot(df['2016'])
axes[0,1].plot(df['2015-2019'],ls='dashed',color='crimson')
axes[0,1].set_title('2016')

axes[1,0].plot(df['2017'])
axes[1,0].plot(df['2015-2019'],ls='dashed',color='crimson')
axes[1,0].set_title('2017')

axes[1,1].plot(df['2018'])
axes[1,1].plot(df['2015-2019'],ls='dashed',color='crimson')
axes[1,1].set_title('2018')

axes[2,0].plot(df['2019'])
axes[2,0].plot(df['2015-2019'],ls='dashed',color='crimson')
axes[2,0].set_title('2019')

axes[2,1].plot(df['2020'])
axes[2,1].plot(df['2015-2019'],ls='dashed',color='crimson')
axes[2,1].set_title('2020')


In [None]:
df.cumsum().plot(figsize=(18,12))
plt.title ('Cumulative deaths Sweden')
plt.ylabel('total number of deaths')

In [None]:
fig,axes = plt.subplots(3,2,sharex=True,sharey=True,figsize=(18,12))

axes[0,0].plot(df['2015'].cumsum())
axes[0,0].plot(df['2015-2019'].cumsum(),ls='dashed',color='crimson')
axes[0,0].set_title('2015')

axes[0,1].plot(df['2016'].cumsum())
axes[0,1].plot(df['2015-2019'].cumsum(),ls='dashed',color='crimson')
axes[0,1].set_title('2016')

axes[1,0].plot(df['2017'].cumsum())
axes[1,0].plot(df['2015-2019'].cumsum(),ls='dashed',color='crimson')
axes[1,0].set_title('2017')

axes[1,1].plot(df['2018'].cumsum())
axes[1,1].plot(df['2015-2019'].cumsum(),ls='dashed',color='crimson')
axes[1,1].set_title('2018')

axes[2,0].plot(df['2019'].cumsum())
axes[2,0].plot(df['2015-2019'].cumsum(),ls='dashed',color='crimson')
axes[2,0].set_title('2019')

axes[2,1].plot(df['2020'].cumsum())
axes[2,1].plot(df['2015-2019'].cumsum(),ls='dashed',color='crimson')
axes[2,1].set_title('2020')


In [None]:
df['excess_deaths'] = df['2020'] - df['2015-2019']

last_day = df[df['2020'] == 0].iloc[0].name.date()
last_day = last_day - pd.DateOffset(days=1)

print (last_day)

df_until_last_day = df.loc[:last_day].copy()
df_until_last_day['cum_excess'] = df_until_last_day['excess_deaths'].cumsum()
df_until_last_day.tail(30)

cumsums = df_until_last_day[['2015-2019','2020']].cumsum()
cumsums['delta'] = cumsums['2020'] - cumsums['2015-2019'] 
cumsums['delta_pct'] = cumsums['2020'] / cumsums['2015-2019'] 

cumsums.tail(20)

In [None]:
df_until_last_day.index = df_until_last_day.index.date
plt.figure(figsize=(18,12))
plt.title('Sweden Excess Deaths \nData Source: SCB.se')
plt.bar(df_until_last_day.index,df_until_last_day['excess_deaths'],color='crimson')
plt.xticks(rotation=90)
plt.ylabel('number of excess deaths')
plt.savefig('SCB_excess_daily_{}.jpg'.format(last_day.date()),format='jpg')

In [None]:
plt.figure(figsize=(18,12))
plt.title('Sweden Excess Deaths Cumulative \nData Source: SCB.se')
plt.bar(df_until_last_day.index,df_until_last_day['cum_excess'],color='crimson')
plt.xticks(rotation=90)
plt.ylabel('cumulative number of excess deaths')
print (df_until_last_day['cum_excess'].tail(60))
plt.savefig('SCB_excess_daily_cumsum_{}.jpg'.format(last_day.date()),format='jpg')

In [None]:
df_until_last_day.index = pd.to_datetime(df_until_last_day.index)

### skip the last 7 days since they are likely to be updated ### 

last_week = df_until_last_day.index[-1] - pd.DateOffset(weeks=1)
last_week = last_week.date()

until_last_week = df_until_last_day[:last_week]
until_last_week.tail(40)

weekly = until_last_week['excess_deaths'].resample('W').sum()
weekly

In [None]:
#plt.figure(figsize=(18,12))
#plt.bar(weekly.index,weekly,color='crimson')
weekly.index = weekly.index.date
weekly.plot.bar(figsize=(18,12),color='crimson',title='Sweden Excess Deaths 2020 by week\n Data Source: SCB.se')
plt.ylabel('nr of excess deaths')
plt.savefig('SCB_excess_weekly_{}.jpg'.format(last_day.date()),format='jpg')


In [None]:
latest = weekly.cumsum()[-1]
weekly.cumsum().plot.bar(figsize=(18,12),color='crimson',
                         title='Sweden cumulative excess deaths for year 2020, currently: {} \n Data Source: SCB.se'.format(latest))
print (weekly.cumsum())
plt.ylabel('total number of excess deaths')
plt.savefig('SCB_excess_weekly_cumsum_{}.jpg'.format(last_day.date()),format='jpg')


In [None]:
df

In [None]:
# 2019 flu season

df['excess_2019'] = df['2019'] - df['2015-2019']
df

In [None]:
df_weekly = df.resample('W').sum()
df_weekly['2020_cum'] = df_weekly['excess_deaths'].cumsum()
df_weekly['2019_cum'] = df_weekly['excess_2019'].cumsum()
df_weekly.index = df_weekly.index - pd.DateOffset(years=1)

df_weekly

In [None]:
plt.figure(figsize=(18,12))
plt.bar(df_weekly.index,df_weekly['excess_2019'],color='crimson')
plt.title('Excess deaths 2019')

In [None]:
plt.figure(figsize=(18,12))
plt.bar(df_weekly.index,df_weekly['2019_cum'],color='crimson')
plt.title('2019 Excess Deaths Cumulative')

In [None]:
df

In [None]:
print(df.loc['2020-10-01':,'excess_2019'].sum())
df.loc[:'2020-05-15',['excess_deaths','excess_2019']].sum()

In [None]:
start_day = '2020-05-22' # back 7 days to get stable data from scb
start_period = pd.Timestamp(start_day)


df_19_2h = df.loc[start_period :,'2019']
df_19_2h.index = pd.date_range(start_period - pd.DateOffset(years=1),'2019-12-31')

dayname = df_19_2h.index[0].day_name()

df_20_1h = df.loc['2020-01-01' : start_period - pd.DateOffset(days=1),'2020']
print (df_20_1h)

df_19_20 = pd.concat([df_19_2h,df_20_1h])
df_19_20.sum()

In [None]:
df_baseline_1h = df.loc[start_period :,'2015-2019']
df_baseline_1h.index = pd.date_range(start_period - pd.DateOffset(years=1),'2019-12-31')
df_baseline_2h = df.loc[:start_period - pd.DateOffset(days=1),'2015-2019']

df_baseline = pd.concat([df_baseline_1h,df_baseline_2h])
(df_19_20 - df_baseline).sum()

In [None]:
last_day = df_19_20.index[-1].date()
plt.figure(figsize=(18,12))
plt.title('Sweden Daily Deaths : season 2019/2020 vs baseline [Last day: {}]'.format(last_day))
plt.plot(df_19_20,'x--',color='crimson',label='2019/2020')
plt.plot(df_baseline,color='orange',label='Average 2015-2019')
plt.ylabel('# daily deaths')
plt.legend(loc='upper left')
plt.savefig('SCB_season_19_20.jpg',format='jpg')

In [None]:
season_diff = df_19_20.sum() - df_baseline.sum()
print (season_diff)
plt.figure(figsize=(18,12))
plt.title('Sweden Cumulative Deaths : season 2019/2020 vs baseline [Last day: {}] season diff: {}\nData Source: SCB.se'.format(last_day,season_diff))
plt.plot(df_19_20.cumsum(),color='crimson',label='season 2019/2020')
plt.plot(df_baseline.cumsum(),color='orange',label='Average 2015-2019')
plt.legend(loc='upper left')
plt.ylabel('number of deaths, season diff:{}'.format(df_19_20.sum() - df_baseline.sum()))
plt.savefig('SCB_season_19_20_cumulative.jpg',format='jpg')

In [None]:
print ('startday in 2019 calendar:',dayname)
previous_day = df_19_20.index[0] - pd.DateOffset(days=1)
print ('previous day:',previous_day)
day_code = 'W-' + previous_day.day_name()[:3].upper()
print (day_code)
df_weekly_19_20 = df_19_20.resample(day_code,convention='end').sum()
df_weekly_baseline = df_baseline.resample(day_code,convention='end').sum()
df_weekly_19_20.tail(40)

In [None]:
#flu season

flu_baseline = df_baseline.loc['2019-12-01' : '2020-03-31']
flu_19_20 = df_19_20.loc['2019-12-01' : '2020-03-31']
print (flu_baseline.sum())
print (flu_19_20.sum())

flu = flu_19_20.sum() - flu_baseline.sum()
print (flu)

end_period = start_period - pd.DateOffset(days=1)
corona_baseline = df_baseline.loc['2020-04-01' : end_period].sum()
corona_19_20 = df_19_20.loc['2020-04-01' : end_period].sum()

corona = corona_19_20 - corona_baseline
print (corona)

In [None]:
#note cut of last incomplete week
last_day = df_weekly_19_20.index[-2].date()

plt.figure(figsize=(18,12))
plt.title('Sweden Deaths per week season 19/20 vs baseline [Last day: {}]\nData Source: SCB.se'.format(last_day))
plt.ylabel('Number of deaths')
plt.plot(df_weekly_19_20[:-1],
         'x--',color='crimson',
         label='season 19/20 : non-flu deficit: {} Corona Excess: {} Delta: {}'.format(flu,corona,corona+flu))
plt.plot(df_weekly_baseline[:-1],color='orange',label='average 2015-2019')
plt.legend(loc='upper left')
plt.savefig('SCB_season_19_20_weekly.jpg',format='jpg')

In [None]:
df_monthly_19_20 = df_19_20.resample('M').sum()
df_monthly_baseline = df_baseline.resample('M').sum()
print (df_monthly_19_20)
print (df_monthly_baseline)
print (df_monthly_19_20 - df_monthly_baseline)

plt.figure(figsize=(18,12))
plt.plot(df_monthly_19_20[1:-1],'x--',color='crimson')
plt.plot(df_monthly_baseline[1:-1],color='orange')