In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm


sns.set()

def strip(x):
    return x.replace('\t','')

global_df = pd.read_pickle('global_data.pkl')
country_dick = pd.read_pickle('country_data.pkl')
country_populations = pd.read_csv('world_pop.csv',
                                  sep=';',header=None,index_col=0,names=['population'],
                                 thousands=',',converters={0 : strip})

country_death_rate = pd.read_csv('deaths_per_year.csv',sep=';',
                                 header=None,names=['country','deaths_per_thousand'],index_col=0,usecols=[1,2])

country_death_rate.sort_index(inplace=True)
world_pop = 7.8e9
global_deaths_per_year = 56e6
global_deaths_per_day = 56e6 / 365

population_data =  country_populations.join(country_death_rate,how='outer')



In [None]:
country_dick['Sweden'].tail()
country_death_rate.loc['Sweden'] * 10000 / 365

In [None]:
def plot_country(country_name):
    country = country_dick[country_name]
    country_pop = population_data.loc[country_name,'population']
    country_death_rate = population_data.loc[country_name,'deaths_per_thousand'] / 1000 / 365
    country_deaths_per_day = country_death_rate * country_pop

    if country_pop > 1e6: #skip lilleput-countries
        
        mask = (1e6 * country['confirmed'] / country_pop) > 1
        country = country[mask]
        mask = country['dead_inc'] >= 1
        country = country[mask]
    
        
        country_deaths_by_corona_per_day = country['dead_inc']
        country_total_deaths_per_day =  country_deaths_per_day + country_deaths_by_corona_per_day
        ratio = country_deaths_by_corona_per_day / country_total_deaths_per_day
    
        return pd.Series(ratio,name=country_name)

death_ratio_df = pd.DataFrame()

for c in country_dick.keys():
    
    try:
        pop = population_data.loc[c] #test if present in world_population.csv
        s = plot_country(c)
        death_ratio_df = pd.concat([death_ratio_df,s],axis=1)
    except KeyError:
        print ('cant find {} in population data'.format(c))

death_ratio_df.index = pd.DatetimeIndex(death_ratio_df.index).date
mask = ~death_ratio_df.isnull().all()
df = death_ratio_df.loc[:,mask].T
last_days_status = df.iloc[:,-4:].median(axis=1)
last_days_status = last_days_status.sort_values(ascending=False)
last_days_status.dropna(inplace=True)
last_days_status *= 100


plt.figure(figsize=(18,12))
plt.title('Corona - By country ratio daily deaths caused by Corona relative to all Daily deaths [worst case]')
plt.ylabel('Corona deaths of all deaths %')
plt.axhline(last_days_status.median() , color='orange',ls='dashed',label='Median')
plt.legend (loc='upper left')


plt.savefig('corona_corona_death_ratio.jpg',format='jpg')
last_days_status.plot(kind='bar',color='crimson') #exclude San Marino
plt.savefig('corona_death_percentage.jpg',format='jpg')
last_days_status.describe()



In [None]:
some_countries = ['China','Korea, South','Italy','Spain','US','France','Sweden']
colors = ['crimson','sienna','lime','orange','navy','b','yellow']

plt.figure(figsize=(18,12))
for i,c in enumerate(some_countries):
    country = df.loc[c] * 100
    plt.plot(country,'x--',label=c,color=colors[i])

plt.title('Corona - death ratio timeline')
plt.ylabel('ratio assumed Corona caused daily deaths relative all daily deaths in %')
plt.legend(loc='upper left')
plt.savefig('corona_death_ratio.jpg',format='jpg')

In [None]:
plt.figure(figsize=(18,12))
for i,c in enumerate(some_countries):
    country = df.loc[c] * 100
    plt.plot(country,'x--',label=c,color=colors[i])

plt.title('Corona - death ratio timeline [log scale]')
plt.ylabel('ratio assumed Corona caused daily deaths relative all daily deaths in % [log scale]')
plt.legend(loc='upper left')
plt.yscale('log')
plt.savefig('corona_death_ratio_log.jpg',format='jpg')

In [None]:
swe = country_dick['Sweden']
swe_excess_deaths = 100 * swe['dead_inc'] / (swe['dead_inc'] + \
                                       country_death_rate.loc['Sweden','deaths_per_thousand'] * 10000 / 365)
swe_excess_deaths

In [None]:
swe_excess_deaths.plot(figsize=(18,12),color='crimson')
plt.title('Sweden Excess Deaths')
plt.ylabel('%')

In [None]:
#### befoe running, edit the csv-file to eliminate comments on top and bottom, and 
#### rename column dagmånad to dag_månad
#### 
scb_dead = pd.read_csv('scb-dead_per_day.csv',sep=';')
scb_dead = scb_dead[['2020','2015-2019']]
scb_dead.index=pd.DatetimeIndex(pd.date_range('2020-01-01','2020-12-31'))

last_day = scb_dead[scb_dead['2020'] != 0]

last_day = last_day.index[-1]
last_day = last_day.date()
print (last_day)
scb_dead[:'2020-05-11']
scb_dead[:last_day]

In [None]:


expected_daily_deaths = pd.Series(np.ones(366) * country_death_rate.loc['Sweden','deaths_per_thousand'] * 10000 / 365)
expected_daily_deaths.index=pd.DatetimeIndex(pd.date_range('2020-01-01' , '2020-12-31'))

expected_eoy = expected_daily_deaths.cumsum()[:last_day][-1]

joint_deaths = expected_daily_deaths + swe['dead_inc']
joint_deaths.fillna(country_death_rate.loc['Sweden','deaths_per_thousand'] * 10000 / 365,inplace=True)

joint_eoy = joint_deaths.cumsum()[:last_day][-1] 

plt.figure(figsize=(18,12))
plt.title('Cumulative deaths Sweden per day - average 2015-2019 vs 2020 actuals until {} \n Data Source: SCB.se'.format(last_day))
#plt.plot(expected_daily_deaths.cumsum()[:last_day],ls='dashed',label='normal expected deaths')
print (joint_deaths)
#plt.plot(joint_deaths.cumsum()[:last_day],ls='dashed',label='normal expected + corona actual deaths')
plt.ylabel('Total Number of Deaths (cumulative)')
print (expected_eoy)
print (joint_eoy)
print (joint_eoy - expected_eoy)

last_scb_actual = scb_dead['2020'].cumsum()[:last_day][-1]
print (last_scb_actual)

last_scb_mean = scb_dead['2015-2019'].cumsum()[:last_day][-1]
print (last_scb_mean)

print (last_scb_actual - last_scb_mean)

plt.plot(scb_dead['2020'].cumsum()[:last_day],ls='dashed',color='crimson',label='scb 2020 total deaths')
plt.plot(scb_dead['2015-2019'].cumsum()[:last_day],ls='dashed',color='orange',label='scb 2015-2019 mean deaths')
plt.legend(loc='upper left')
plt.savefig('scb-daily-deaths.jpg',format='jpg')

In [None]:
scb_dead = pd.read_csv('scb-dead_per_day.csv',sep=';')
scb_dead = scb_dead[['2020','2015-2019']]
scb_dead
scb_dead.index=pd.DatetimeIndex(pd.date_range('2020-01-01','2020-12-31'))
plt.plot(scb_dead.cumsum())

In [None]:
#excess_deaths per day

scb_dead = scb_dead[:last_day].copy()
scb_dead['excess_deaths'] = scb_dead['2020'] - scb_dead['2015-2019']

print (scb_dead['excess_deaths'].mean())
print (scb_dead['excess_deaths'].sum())

print (scb_dead.sum())
excess_average_mortality = scb_dead['2020'].sum() / scb_dead['2015-2019'].sum()
print (excess_average_mortality)
print (scb_dead.mean())

daily_average_15_19 = scb_dead['2015-2019'].mean()
print (daily_average_15_19)
scb_dead

In [None]:
plt.figure(figsize=(18,12))
plt.title('Excess deaths day-by-day Sweden 2020 compared to daily average 2015-2019 \n Data Source SCB.se')

plt.bar(scb_dead.index,scb_dead['excess_deaths'],
        color='orange',
        label='excess deaths per day (accumulated excess deaths YTD: {})'.format(scb_dead['excess_deaths'].sum()))

plt.axhline(scb_dead['excess_deaths'].mean(),
            color='crimson',ls='dashed',
            label='average excess deaths per day: {:.0f} daily average 2015-2019: {:.0f}'.format(
                scb_dead['excess_deaths'].mean(),daily_average_15_19))

plt.ylabel('excess deaths')
plt.legend(loc='upper left')
plt.savefig('scb_daily_excess_deaths.jpg',format='jpg')

In [None]:
lambda_ = pm.Exponential('lambda_',1/scb_dead['2015-2019'].mean())

lkh = pm.Poisson('lkh',lambda_,observed=True,value=scb_dead['2015-2019'])

model = pm.Model([lambda_,lkh])

map_ = pm.MAP(model)
map_.fit()

mcmc = pm.MCMC(model)

sample = mcmc.sample(50000,10000,2)

lambda__post = mcmc.trace('lambda_')[:]

result = pd.DataFrame({'lambda__post' : lambda__post})



In [None]:
print (result.describe())
result.head()

In [None]:
nr_days = len(scb_dead)
#nr_days = 5
nr_samples_per_day = 10000

samples = np.zeros((nr_days,nr_samples_per_day))

for day in range(nr_days):
    rows = np.random.choice(result.index,replace=True,size=nr_samples_per_day)
    samples[day] = pm.rpoisson(result.iloc[rows].lambda__post)
    
samples = pd.DataFrame(samples)
idx = pd.date_range('2020-01-01',last_day)
#idx = pd.date_range('2020-01-01','2020-01-05')

samples.index = idx.date

CI_89 = np.percentile(samples,[5.5,94.5],axis=1)


In [None]:
plt.figure(figsize=(18,12))
plt.title('Expected vs Actual Deaths Y2D Sweden')
plt.ylabel('Number of deaths')

plt.errorbar(samples.index,
             samples.mean(axis=1),
             yerr=np.array((np.abs(samples.mean(axis=1) - CI_89[0]),
                            np.abs(samples.mean(axis=1) - CI_89[1]))),fmt='o',capsize=5,label='Expected deaths, 89% CI')

plt.plot(scb_dead['2020'],'o',label='actual deaths 2020')
plt.xticks(rotation=90)
plt.legend(loc='upper left')
plt.savefig('scb_expected_vs_actual_deaths.jpg',format='jpg')