In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import pymc3 as pm
import arviz as az

sns.set()


In [None]:
meta_df = pd.read_excel('mortality_org.xlsx',sheet_name='Description',skiprows=21,usecols=[1,8])
meta_df.dropna(inplace=True)
meta_df

In [None]:
nordic_abbs = ['SWE','FIN','NOR','DNK','ISL']

#must fix explicit ref's to countries below, e.g. swe,fin,nor etc before using other countries
#high_hit_abbs = ['SWE','BEL','ESP','ITA','GBRTENW','USA']

abbs = nordic_abbs

In [None]:
df = pd.DataFrame()

for a in abbs:
    temp = pd.read_excel('mortality_org.xlsx',sheet_name=a,skiprows=2)
    
    df = pd.concat([df,temp],axis=0)

In [None]:
# the col Total.1 represents mortality, and is obtained by dividing Total with population and mult by 52

df

In [None]:
df_2020 = (df[(df.loc[:,'Sex'] == 'b') & (df['Year'] == 2020)]).copy()

df_2020.rename(columns={'0-14.1': 'age_grp_0-14','15-64.1' : 'age_grp_15-64',
                      '65-74.1' : 'age_grp_65-74', '75-84.1' : 'age_grp_75-84',
                      '85+.1' : 'age_grp_85+','Total.1' : 'all_age_grp'},inplace=True)
df_2020

In [None]:
df_timeline = (df[(df.loc[:,'Sex'] == 'b')]).copy()
df_timeline.rename(columns={'0-14.1': 'age_grp_0-14','15-64.1' : 'age_grp_15-64',
                      '65-74.1' : 'age_grp_65-74', '75-84.1' : 'age_grp_75-84',
                      '85+.1' : 'age_grp_85+','Total.1' : 'all_age_grp'},inplace=True)
df_timeline

In [None]:
def year_week_2_date(y,w):
    return pd.to_datetime(str(y) + ' ' + str(w) + ' 0',format='%Y %W %w')


df_timeline.index = df_timeline.apply(lambda row : year_week_2_date(row.Year,row.Week),axis=1)
df_timeline

In [None]:
country_grp = df_timeline.groupby('Country')
swe = country_grp.get_group('SWE').resample('Y').mean().loc['2000-01-01' :]
fin = country_grp.get_group('FIN').resample('Y').mean().loc['2000-01-01' :]
den = country_grp.get_group('DNK').resample('Y').mean().loc['2000-01-01' :]
nor = country_grp.get_group('NOR').resample('Y').mean().loc['2000-01-01' :]
isl = country_grp.get_group('ISL').resample('Y').mean().loc['2000-01-01' :]

swe

In [None]:
### note that graph goes beyond 2020, and that the value presented is an avg. of weekly mortality ###


title = 'Nordic countries : trend overall mortality rate -2020 YTD\nDataSource : mortality.org'

ax = swe['all_age_grp'].plot(style='yo--',figsize=(18,12),label='SWE',title=title)
fin['all_age_grp'].plot(style='bo--',ax=ax,label='FIN')
den['all_age_grp'].plot(style='ro--',ax=ax,label='DNK')
nor['all_age_grp'].plot(style='ko--',ax=ax,label='NOR')
isl['all_age_grp'].plot(style='go--',ax=ax,label='ISL')

ax.set_ylabel('mortality')

ax.set_ylim([0,0.012])
plt.legend(loc='upper left')
plt.savefig('nordic_mortality_trend.jpg',format='jpg')

In [None]:
swe_2020 = df_2020.loc[df_2020['Country'] == 'SWE']
fin_2020 = df_2020.loc[df_2020['Country'] == 'FIN']
den_2020 = df_2020.loc[df_2020['Country'] == 'DNK']
nor_2020 = df_2020.loc[df_2020['Country'] == 'NOR']
isl_2020 = df_2020.loc[df_2020['Country'] == 'ISL']

ax = swe_2020.plot(x='Week',y='all_age_grp',style=['o--'],label='SWE',figsize=(18,12),
             title='2020 Weekly Overall Death Rate Nordic Countries\nDataSource : mortality.org')

fin_2020.plot(x='Week',y='all_age_grp',style=['o--'],label='FIN',ax=ax)
den_2020.plot(x='Week',y='all_age_grp',style=['o--'],label='DNK',ax=ax)
nor_2020.plot(x='Week',y='all_age_grp',style=['o--'],label='NOR',ax=ax)
isl_2020.plot(x='Week',y='all_age_grp',style=['yo--'],label='ISL',ax=ax)



plt.ylabel('Weekly Death Rate')
plt.savefig('mortality_org_overall_death_rate_nor_2020dics.jpg',format='jpg')

In [None]:
swe_2020

In [None]:
swe_2020['age_grp_85+'] 

In [None]:
ax = swe_2020.plot(x='Week',y=['age_grp_0-14','age_grp_15-64','age_grp_65-74',
                         'age_grp_75-84','age_grp_85+'],style=['o--'] * 5,figsize=(18,12),
             title='SWE 2020 Weekly Death Rate Nordic Countries\nDataSource : mortality.org')

plt.ylabel('Weekly Death Rate')

In [None]:
ax = fin_2020.plot(x='Week',y=['age_grp_0-14','age_grp_15-64','age_grp_65-74',
                         'age_grp_75-84','age_grp_85+'],style=['o--'] * 5,figsize=(18,12),
             title='FIN 2020 Weekly Mortality Nordic Countries\nDataSource : mortality.org')

In [None]:
ax = den_2020.plot(x='Week',y=['age_grp_0-14','age_grp_15-64','age_grp_65-74',
                         'age_grp_75-84','age_grp_85+'],style=['o--'] * 5,figsize=(18,12),
             title='DNK 2020 Weekly Mortality Nordic Countries\nDataSource : mortality.org')

In [None]:
ax = nor_2020.plot(x='Week',y=['age_grp_0-14','age_grp_15-64','age_grp_65-74',
                         'age_grp_75-84','age_grp_85+'],style=['o--'] * 5,figsize=(18,12),
             title='NOR 2020 Weekly Mortality Nordic Countries\nDataSource : mortality.org')

In [None]:
ax = isl_2020.plot(x='Week',y=['age_grp_0-14','age_grp_15-64','age_grp_65-74',
                         'age_grp_75-84','age_grp_85+'],style=['o--'] * 5,figsize=(18,12),
             title='ISL 2020 Weekly Mortality Nordic Countries\nDataSource : mortality.org')

In [None]:
ax = swe_2020.plot(x='Week',y='age_grp_0-14',style='o--',figsize=(18,12),label='SWE 0-14',
             title='2020 Weekly Death Rate per age group Nordic Countries\nDataSource : mortality.org')

fin_2020.plot(x='Week',y='age_grp_0-14',style='o--',ax=ax,label='FIN 0-14')
den_2020.plot(x='Week',y='age_grp_0-14',style='o--',ax=ax,label='DNK 0-14')
nor_2020.plot(x='Week',y='age_grp_0-14',style='o--',ax=ax,label='NOR 0-14')
isl_2020.plot(x='Week',y='age_grp_0-14',style='yo--',ax=ax,label='ISL 0-14')

plt.ylabel('Weekly Death Rate')
plt.savefig ('mortality_org_0_14.jpg',format='jpg')

In [None]:
ax = swe_2020.plot(x='Week',y='age_grp_15-64',style='o--',figsize=(18,12),label='SWE 15-64',
             title='2020 Weekly Death Rate per age group Nordic Countries\nDataSource : mortality.org')

fin_2020.plot(x='Week',y='age_grp_15-64',style='o--',ax=ax,label='FIN 15-64')
den_2020.plot(x='Week',y='age_grp_15-64',style='o--',ax=ax,label='DNK 15-64')
nor_2020.plot(x='Week',y='age_grp_15-64',style='o--',ax=ax,label='NOR 15-64')
isl_2020.plot(x='Week',y='age_grp_15-64',style='yo--',ax=ax,label='ISL 15-64')

plt.ylabel('Weekly Death Rate')

plt.savefig('mortality_org_15_64.jpg',format='jpg')

In [None]:
ax = swe_2020.plot(x='Week',y='age_grp_65-74',style='o--',figsize=(18,12),label='SWE 65-74',
             title='2020 Weekly Death Rate per age group Nordic Countries\nDataSource : mortality.org')

fin_2020.plot(x='Week',y='age_grp_65-74',style='o--',ax=ax,label='FIN 65-74')
den_2020.plot(x='Week',y='age_grp_65-74',style='o--',ax=ax,label='DNK 65-74')
nor_2020.plot(x='Week',y='age_grp_65-74',style='o--',ax=ax,label='NOR 65-74')
isl_2020.plot(x='Week',y='age_grp_65-74',style='yo--',ax=ax,label='ISL 65-74')

plt.ylabel('Weekly Death Rate')

plt.savefig('mortality_org_65_74.jpg',format='jpg')

In [None]:
ax = swe_2020.plot(x='Week',y='age_grp_75-84',style='o--',figsize=(18,12),label='SWE 75-84',
             title='2020 Weekly Death Rate per age group Nordic Countries\nDataSource : mortality.org')

fin_2020.plot(x='Week',y='age_grp_75-84',style='o--',ax=ax,label='FIN 75-84')
den_2020.plot(x='Week',y='age_grp_75-84',style='o--',ax=ax,label='DNK 75-84')
nor_2020.plot(x='Week',y='age_grp_75-84',style='o--',ax=ax,label='NOR 75-84')
isl_2020.plot(x='Week',y='age_grp_75-84',style='yo--',ax=ax,label='ISL 75-84')

plt.ylabel('Weekly Death Rate')

plt.savefig('mortality_org_75_84.jpg',format='jpg')

In [None]:
ax = swe_2020.plot(x='Week',y='age_grp_85+',style='o--',figsize=(18,12),label='SWE 85+',
             title='2020 Weekly Death Rate per age group Nordic Countries\nDataSource : mortality.org')

fin_2020.plot(x='Week',y='age_grp_85+',style='o--',ax=ax,label='FIN 85+')
den_2020.plot(x='Week',y='age_grp_85+',style='o--',ax=ax,label='DNK 85+')
nor_2020.plot(x='Week',y='age_grp_85+',style='o--',ax=ax,label='NOR 85+')
isl_2020.plot(x='Week',y='age_grp_85+',style='yo--',ax=ax,label='ISL 85+')

plt.ylabel('Weekly Death Rate')

plt.savefig('mortality_org_85+.jpg',format='jpg')

In [None]:
swe_2020['all_age_grp']

In [None]:
swe_2020['Total'].sum() / (10.23e6 * len(swe_2020)/52)



In [None]:
swe_2020

In [None]:
swe_2020['Total'] / 10.23e6 *52

In [None]:
def age_grp_mean(col):
    return col.mean()

age_grp_mean(swe_2020['age_grp_0-14'])
age_grp_mean(swe_2020['age_grp_15-64'])
age_grp_mean(swe_2020['age_grp_65-74'])
age_grp_mean(swe_2020['age_grp_75-84'])
age_grp_mean(swe_2020['age_grp_85+'])

In [None]:
df_2020

In [None]:
fig,axes = plt.subplots(5,1,figsize=(18,12),sharex=True)


title='Nordic Countries 2020 death rate distribution, age group 0-14'
sns.violinplot(x='Country',y='age_grp_0-14',data=df_2020,ax=axes[0])
axes[0].set_title(title)
axes[0].set_ylabel('death rate')

title='Nordic Countries 2020 death rate distribution, age group 15-64'
sns.violinplot(x='Country',y='age_grp_15-64',data=df_2020,ax=axes[1])
axes[1].set_title(title)
axes[1].set_ylabel('death rate')

title='Nordic Countries 2020 death rate distribution, age group 65-74'
sns.violinplot(x='Country',y='age_grp_65-74',data=df_2020,ax=axes[2])
axes[2].set_title(title)
axes[2].set_ylabel('death rate')

title='Nordic Countries 2020 death rate distribution, age group 75-84'
sns.violinplot(x='Country',y='age_grp_75-84',data=df_2020,ax=axes[3])
axes[3].set_title(title)
axes[3].set_ylabel('death rate')

title='Nordic Countries 2020 death rate distribution, age group 85+'
sns.violinplot(x='Country',y='age_grp_85+',data=df_2020,ax=axes[4])
axes[4].set_title(title)
axes[4].set_ylabel('death rate')

plt.tight_layout()

plt.savefig('nor_2020dic_death_rate_distr.jpg',format='jpg')

In [None]:
plt.figure(figsize=(18,12))
plt.title('Nordic countries 2020 weekly death rate distribution, all age groups')
sns.violinplot(x='Country',y='all_age_grp',data=df_2020)
plt.ylabel('death rate distribution')

plt.savefig('nor_2020dic_death_distr_all_ages.jpg',format='jpg')

In [None]:

transaction_df = df_2020[['Country','age_grp_0-14','age_grp_15-64']].copy()
#transaction_df['age_grp_0-14'].apply()

In [None]:
tips = sns.load_dataset('tips')
tips

In [None]:
df_timeline = df_timeline.loc['2007-01-01' :]
df_timeline

In [None]:
unique_countries = df_timeline['Country'].unique()
country_idx_map = dict(zip(unique_countries,range(len(unique_countries))))
country_idx_map

In [None]:
unique_dates = np.unique(df_timeline.index)
date_idx_map = dict(zip(pd.to_datetime(unique_dates),range(len(unique_dates))))


In [None]:
df_timeline['country_idx'] = df_timeline['Country'].apply(lambda x : country_idx_map[x])
df_timeline

In [None]:
week_idx = df_timeline.reset_index()['index'].apply(lambda x : date_idx_map[x])

In [None]:
df_timeline['week_idx'] = week_idx.values
df_timeline

In [None]:
df_timeline.loc[df_timeline['Country'] == 'NOR']

In [None]:
last_date = df_timeline.index.max().date()
last_date

In [None]:
plt.scatter(df_timeline.loc[df_timeline['Country'] == 'SWE']['week_idx'],
            df_timeline.loc[df_timeline['Country']=='SWE']['all_age_grp'])

In [None]:

x = df_timeline['week_idx'].values
x

In [None]:
def standardize(series):
    return (series - series.mean()) / series.std()

model = pm.Model()
scale_outcome = 1000


def regression(outcome='all_age_grp'):

    week_idx = df_timeline['week_idx'].values
    country_idx = df_timeline['country_idx'].values


    
    with model:

        alpha = pm.Normal('alpha',0,1,shape=len(unique_countries))
        beta = pm.Normal('beta',0, 1,shape=len(unique_countries))

        sigma = pm.Uniform('sigma',0,5)

        mu = pm.Deterministic('mu',alpha[country_idx] + standardize(x) * beta[country_idx])

        obs = pm.Normal('obs',mu,sigma,observed=df_timeline[outcome] * scale_outcome) 

        trace = pm.sample(2000,tune=2000)
        
        return trace

In [None]:
df_timeline.head()

In [None]:
outcome = 'age_grp_85+'

trace=regression(outcome)

In [None]:
with model:
    print (az.summary(trace,var_names=['~mu'],hdi_prob=0.89))

In [None]:
with model:
    az.plot_posterior(trace,var_names=['beta'],hdi_prob=0.89)

In [None]:
def plot_reg(outcome):
    
    fin_beta = trace['beta'][:,1].mean() / scale_outcome
    fin_alpha = trace['alpha'][:,1].mean() / scale_outcome
    swe_beta = trace['beta'][:,0].mean() / scale_outcome
    swe_alpha = trace['alpha'][:,0].mean() / scale_outcome
    nor_beta = trace['beta'][:,2].mean() / scale_outcome
    nor_alpha = trace['alpha'][:,2].mean() / scale_outcome
    den_beta = trace['beta'][:,3].mean() / scale_outcome
    den_alpha = trace['alpha'][:,3].mean() / scale_outcome
    isl_beta = trace['beta'][:,4].mean() / scale_outcome
    isl_alpha = trace['alpha'][:,4].mean() / scale_outcome
    
    
    fin_hdi_kwargs = {'color':'lightblue','alpha':1.0,'label' : 'FIN'}
    swe_hdi_kwargs = {'color' : 'yellow','alpha' : 1.0,'label' : 'SWE'}
    den_hdi_kwargs = {'color' : 'orange','alpha' : 1.0,'label' : 'DNK'}
    nor_hdi_kwargs = {'color' : 'lime','alpha' : 1.0,'label' : 'NOR'}
    isl_hdi_kwargs = {'color' : 'cyan','alpha' : 1.0,'label' : 'ISL'}

    numeric_timeline = df_timeline.reset_index().copy()

    hdi_swe_x = numeric_timeline.loc[numeric_timeline['Country'] == 'SWE']['week_idx']
    hdi_fin_x = numeric_timeline.loc[numeric_timeline['Country'] == 'FIN']['week_idx']
    hdi_den_x = numeric_timeline.loc[numeric_timeline['Country'] == 'DNK']['week_idx']
    hdi_nor_x = numeric_timeline.loc[numeric_timeline['Country'] == 'NOR']['week_idx']
    hdi_isl_x = numeric_timeline.loc[numeric_timeline['Country'] == 'ISL']['week_idx']


    swe_num = numeric_timeline.loc[numeric_timeline['Country'] == 'SWE']
    swe_idx = swe_num.index.values
    hdi_swe_mu = trace['mu'][:,swe_idx]

    fin_num = numeric_timeline.loc[numeric_timeline['Country'] == 'FIN']
    fin_idx = fin_num.index.values
    hdi_fin_mu = trace['mu'][:,fin_idx]

    den_num = numeric_timeline.loc[numeric_timeline['Country'] == 'DNK']
    den_idx = den_num.index.values
    hdi_den_mu = trace['mu'][:,den_idx]

    nor_num = numeric_timeline.loc[numeric_timeline['Country'] == 'NOR']
    nor_idx = nor_num.index.values
    hdi_nor_mu = trace['mu'][:,nor_idx]

    isl_num = numeric_timeline.loc[numeric_timeline['Country'] == 'ISL']
    isl_idx = isl_num.index.values
    hdi_isl_mu = trace['mu'][:,isl_idx]

    fig,axes = plt.subplots(2,3,sharex=True,sharey=True,figsize=(18,12))

    plt.suptitle('Nordic countries : mortality trends for [{}]'.format(outcome) +\
                 ' based on weekly death rates Jan 2007 - {}'.format(last_date))


    axes[0,0].plot(hdi_swe_x,
            (trace['alpha'][0,0].mean() + \
               standardize(numeric_timeline.loc[numeric_timeline['Country'] \
                                           == 'SWE']['week_idx']) * trace['beta'][:,0].mean()) / scale_outcome,
            '--',color='k',lw=1)



    axes[0,0].scatter(df_timeline.loc[df_timeline['Country'] == 'SWE']['week_idx'],
                df_timeline.loc[df_timeline['Country']=='SWE'][outcome],color='lightgrey',alpha=0.3)

    az.plot_hdi(hdi_swe_x,hdi_swe_mu / scale_outcome,hdi_prob=0.89,ax=axes[0,0],fill_kwargs=swe_hdi_kwargs)
    
    axes[0,0].axvline(x.mean(),ls='dashed',color='orange')

    ##

    axes[0,1].plot(hdi_fin_x,
            (trace['alpha'][:,1].mean() + \
               standardize(numeric_timeline.loc[numeric_timeline['Country'] \
                                           == 'FIN']['week_idx']) * trace['beta'][:,1].mean()) / scale_outcome,
            '--',color='k',lw=1)


    axes[0,1].scatter(df_timeline.loc[df_timeline['Country'] == 'FIN']['week_idx'],
                df_timeline.loc[df_timeline['Country']=='FIN'][outcome],color='lightgrey',alpha=0.3)

    az.plot_hdi(hdi_fin_x,hdi_fin_mu / scale_outcome,hdi_prob=0.89,ax=axes[0,1],fill_kwargs=fin_hdi_kwargs)
    
    axes[0,1].axvline(x.mean(),ls='dashed',color='orange')


    ##

    axes[0,2].plot(hdi_den_x,
            (trace['alpha'][:,3].mean() + \
               standardize(numeric_timeline.loc[numeric_timeline['Country'] \
                                           == 'DNK']['week_idx']) * trace['beta'][:,3].mean()) / scale_outcome,
            '--',color='k',lw=1)


    axes[0,2].scatter(df_timeline.loc[df_timeline['Country'] == 'DNK']['week_idx'],
                df_timeline.loc[df_timeline['Country']=='DNK'][outcome],color='lightgrey',alpha=0.3)

    az.plot_hdi(hdi_den_x,hdi_den_mu / scale_outcome,hdi_prob=0.89,ax=axes[0,2],fill_kwargs=den_hdi_kwargs)
    
    axes[0,2].axvline(x.mean(),ls='dashed',color='orange')


    ##
    axes[1,0].plot(hdi_nor_x,
            (trace['alpha'][:,2].mean() + \
               standardize(numeric_timeline.loc[numeric_timeline['Country'] \
                                           == 'NOR']['week_idx']) * trace['beta'][:,2].mean()) / scale_outcome,
            '--',color='k',lw=1)


    axes[1,0].scatter(df_timeline.loc[df_timeline['Country'] == 'NOR']['week_idx'],
                df_timeline.loc[df_timeline['Country']=='NOR'][outcome],color='lightgrey',alpha=0.3)

    az.plot_hdi(hdi_nor_x,hdi_nor_mu / scale_outcome,hdi_prob=0.89,ax=axes[1,0],fill_kwargs=nor_hdi_kwargs)
    
    axes[1,0].axvline(x.mean(),ls='dashed',color='orange')


    ##

    axes[1,1].plot(hdi_isl_x,
            (trace['alpha'][:,4].mean() + \
               standardize(numeric_timeline.loc[numeric_timeline['Country'] \
                                           == 'ISL']['week_idx']) * trace['beta'][:,4].mean()) / scale_outcome,
            '--',color='k',lw=1)


    axes[1,1].scatter(df_timeline.loc[df_timeline['Country'] == 'ISL']['week_idx'],
                df_timeline.loc[df_timeline['Country']=='ISL'][outcome],color='lightgrey',alpha=0.3)

    az.plot_hdi(hdi_isl_x,hdi_isl_mu / scale_outcome,hdi_prob=0.89,ax=axes[1,1],fill_kwargs=isl_hdi_kwargs)
    
    axes[1,1].axvline(x.mean(),ls='dashed',color='orange')


    plt.legend(loc='upper center')

    axes[1,0].set_xlabel('week number')
    axes[1,1].set_xlabel('week_number')
    axes[1,2].set_xlabel('week_number')

    axes[0,0].set_title(r'SWE $\alpha$ : {:.4f}   $\beta$ : {:.6f}'.format(swe_alpha,swe_beta))
    axes[0,1].set_title(r'FIN $\alpha$ : {:.4f}   $\beta$ : {:.6f}'.format(fin_alpha,fin_beta))
    axes[0,2].set_title(r'DNK $\alpha$ : {:.4f}   $\beta$ : {:.6f}'.format(den_alpha,den_beta))
    axes[1,0].set_title(r'NOR $\alpha$ : {:.4f}   $\beta$ : {:.6f}'.format(nor_alpha,nor_beta))
    axes[1,1].set_title(r'ISL $\alpha$ : {:.4f}   $\beta$ : {:.6f}'.format(isl_alpha,isl_beta))

    axes[1,2].axis('off')

    plt.savefig('nordic_mortality_bayesian_trends_{}.jpg'.format(outcome),format='jpg')



In [None]:
# reported alpha's dont cross at 0 b/c x vals are standardized in calculation : mean at about week 350
plot_reg(outcome)

In [None]:
df_2020

In [None]:
age_grps_2020 = df_2020[['Country','age_grp_0-14','age_grp_15-64','age_grp_65-74',
                        'age_grp_75-84','age_grp_85+','all_age_grp']].copy()

age_grps_2020

In [None]:
country_age_grps = age_grps_2020.groupby('Country')

swe_2020 = country_age_grps.get_group('SWE')
fin_2020 = country_age_grps.get_group('FIN')
nor_2020 = country_age_grps.get_group('NOR')
den_2020 = country_age_grps.get_group('DNK')
isl_2020 = country_age_grps.get_group('ISL')

swe_means = swe_2020.mean()
swe_2020_ci = swe_2020.quantile([0.055,0.945])
swe_2020_ci = np.abs(swe_2020_ci - swe_means)

fin_means = fin_2020.mean()
fin_2020_ci = fin_2020.quantile([0.055,0.945])
fin_2020_ci = np.abs(fin_2020_ci - fin_means)

nor_means = nor_2020.mean()
nor_2020_ci = nor_2020.quantile([0.055,0.945])
nor_2020_ci = np.abs(nor_2020_ci - nor_means)

den_means = den_2020.mean()
den_2020_ci = den_2020.quantile([0.055,0.945])
den_2020_ci = np.abs(den_2020_ci - den_means)

isl_means = isl_2020.mean()
isl_2020_ci = isl_2020.quantile([0.055,0.945])
isl_2020_ci = np.abs(isl_2020_ci - isl_means)

print (swe_means)
print (swe_2020_ci.T)

swe_2020.describe(percentiles=[0.055,0.945])


In [None]:
all_means = pd.concat([swe_means,fin_means,nor_means,den_means,isl_means],axis=1)
all_means.columns=['SWE','FIN','NOR','DNK','ISL']
all_means

In [None]:
all_cis = np.array([swe_2020_ci.values,fin_2020_ci.values,nor_2020_ci.values,
                    den_2020_ci.values,isl_2020_ci.values])
all_cis.shape

In [None]:
all_means

In [None]:
all_means.mean(axis=1)

In [None]:
all_means.sub(all_means.mean(axis=1),axis=0)

In [None]:
all_means.plot(kind='bar',
               figsize=(18,12),yerr=all_cis,error_kw={'capsize' : 5,'ecolor':'k','label' : '89%'},
               title='Nordic Countries 2020 age grp mortality from weekly death rates Jan - {}\nDataSource : mortality.org'.format(last_date))

plt.ylabel('mortality')
plt.yscale('log')
plt.savefig('nordic_mortality_bar_chart.jpg',format='jpg')

In [None]:
all_means.loc['age_grp_85+'].plot(kind='bar')

In [None]:
all_means.loc['age_grp_0-14'].plot(kind='bar')

In [None]:
df_2020.loc[df_2020['Country'] == 'SWE']

In [None]:
swe_2020_abs = df_2020.loc[df_2020['Country'] == 'SWE'][['0-14','15-64','65-74','75-84','85+']]
fin_2020_abs = df_2020.loc[df_2020['Country'] == 'FIN'][['0-14','15-64','65-74','75-84','85+']]
den_2020_abs = df_2020.loc[df_2020['Country'] == 'DNK'][['0-14','15-64','65-74','75-84','85+']]
nor_2020_abs = df_2020.loc[df_2020['Country'] == 'NOR'][['0-14','15-64','65-74','75-84','85+']]
isl_2020_abs = df_2020.loc[df_2020['Country'] == 'ISL'][['0-14','15-64','65-74','75-84','85+']]

swe_2020_abs

In [None]:
swe_2020_abs.sum()

In [None]:
mortality_pop = true_pop = 10327589

swe_mortality_2020 = swe_2020_abs.sum().sum() / true_pop / (len(swe_2020_abs) / 52)
swe_mortality_2020

In [None]:
df_2020

In [None]:
nordic_age_grp_pop = df_2020.set_index('Country')[['0-14','15-64','65-74','75-84','85+']] /\
df_2020.set_index('Country')[['age_grp_0-14','age_grp_15-64','age_grp_65-74','age_grp_75-84','age_grp_85+']].values

nordic_age_grp_pop *= 52
nordic_age_grp_pop.dropna(inplace=True)
nordic_age_grp_pop

In [None]:
nordic_age_grp_pop = nordic_age_grp_pop.astype(int)
nordic_age_grp_pop

In [None]:
nordic_age_grp_pop = nordic_age_grp_pop.drop_duplicates()
nordic_age_grp_pop

In [None]:
nordic_age_grp_pop.sum(axis=1)

In [None]:
swe_2020_mortality = swe_2020_abs / nordic_age_grp_pop.loc['SWE'] * 52
fin_2020_mortality = fin_2020_abs / nordic_age_grp_pop.loc['FIN'] * 52
den_2020_mortality = den_2020_abs / nordic_age_grp_pop.loc['DNK'] * 52
nor_2020_mortality = nor_2020_abs / nordic_age_grp_pop.loc['NOR'] * 52
isl_2020_mortality = isl_2020_abs / nordic_age_grp_pop.loc['ISL'] * 52

swe_2020_mortality

In [None]:
print ('SWE',swe_2020_mortality.mean())
print ('FIN',fin_2020_mortality.mean())
print ('DNK',den_2020_mortality.mean())
print ('NOR',nor_2020_mortality.mean())
print ('ISL',isl_2020_mortality.mean())

In [None]:
mortality_avg = pd.DataFrame({'SWE' : swe_2020_mortality.mean(),
                             'FIN' : fin_2020_mortality.mean(),
                             'DNK' : den_2020_mortality.mean(),
                             'NOR' : nor_2020_mortality.mean(),
                             'ISL' : isl_2020_mortality.mean()})

mortality_avg

In [None]:
mortality_avg.plot(kind='bar',figsize=(18,12))
plt.yscale('log')

In [None]:
df_2020.head()

In [None]:
def trans(x):
    return x

nordic_abs = df_2020.set_index(['Country','Week']).groupby(
    'Country')[['0-14','15-64','65-74','75-84','85+']].transform(trans)

nordic_abs

In [None]:
nordic_abs.sum(axis=1).groupby('Country').sum()

In [None]:
std_pop = nordic_age_grp_pop.loc['DNK']
std_pop

In [None]:
# https://www.healthknowledge.org.uk/e-learning/epidemiology/specialists/standardisation

std_abs_deaths = mortality_avg.mul(std_pop,axis=0)
std_abs_deaths

In [None]:
total_age_adj_deaths = std_abs_deaths.sum()
total_age_adj_deaths

In [None]:
age_adj_mortality = total_age_adj_deaths / (std_pop.sum())
age_adj_mortality

In [None]:
title = 'Nordic Countries Age Adjusted [stdPop Denmark] Mortality 2020 YTD week 1 - {} \nDataSource:mortality.org\
'.format(len(swe_2020))
age_adj_mortality.sort_values(ascending=False).plot(kind='bar',figsize=(18,12),color='crimson',title=title)
plt.ylabel('mortality')
plt.savefig('nordic_age_adj_mortality.jpg',format='jpg')

In [None]:
all_means.loc['all_age_grp'].sort_values(ascending=False).plot(kind='bar',figsize=(18,12))

In [None]:
### new ###

xl = pd.ExcelFile('mortality_org.xlsx')


In [None]:
xl.sheet_names

In [None]:
df_all_countries = pd.DataFrame()

abbs = xl.sheet_names[1:]

for a in abbs:
    temp = pd.read_excel('mortality_org.xlsx',sheet_name=a,skiprows=2)
    
    df_all_countries = pd.concat([df_all_countries,temp],axis=0)

In [None]:
df_all_countries

In [None]:
df_all_countries_2020 = df_all_countries.loc[(df_all_countries['Year'] == 2020) & (df_all_countries['Sex'] == 'b')]
df_all_countries_2020

In [None]:
latest_week = df_all_countries_2020.groupby('Country')['Week'].max().max()
latest_week

In [None]:
df_all_countries_2020 = df_all_countries_2020.rename(columns={'0-14.1': 'age_grp_0-14','15-64.1' : 'age_grp_15-64',
                      '65-74.1' : 'age_grp_65-74', '75-84.1' : 'age_grp_75-84',
                      '85+.1' : 'age_grp_85+','Total.1' : 'all_age_grp'})

df_all_countries_2020

In [None]:
country_max_week = df_all_countries_2020.groupby('Country')['Week'].max()
country_max_week

In [None]:
recent_countries = country_max_week.loc[country_max_week >= latest_week - 2].index.values
recent_countries

In [None]:
'ITA' in recent_countries

In [None]:
df_all_countries_2020 = df_all_countries_2020.set_index('Country')


In [None]:
df_all_countries_2020 = df_all_countries_2020.loc[recent_countries]



#!!!!#
#df_all_countries_2020 = df_all_countries_2020.loc[df_all_countries_2020['Week'] <=26]
#!!!!#

latest_week = df_all_countries_2020.groupby('Country')['Week'].max().max()
print ('latest_week',latest_week)

df_all_countries_2020

In [None]:
all_age_grp_pop = df_all_countries_2020[['0-14','15-64','65-74','75-84','85+']] /\
df_all_countries_2020[['age_grp_0-14','age_grp_15-64','age_grp_65-74','age_grp_75-84','age_grp_85+']].values

all_age_grp_pop *= 52
all_age_grp_pop.dropna(inplace=True)

all_age_grp_pop.dropna(inplace=True)
all_age_grp_pop = all_age_grp_pop.astype(int)
all_age_grp_pop = all_age_grp_pop.drop_duplicates()
all_age_grp_pop

In [None]:
all_age_grp_pop.sum(axis=1)

In [None]:
all_age_grp_abs_deaths = df_all_countries_2020[['0-14','15-64','65-74','75-84','85+']]
deaths_YTD = all_age_grp_abs_deaths.sum(axis=1).groupby('Country').sum()
deaths_YTD

In [None]:
all_countries_mortality_age_grp = \
df_all_countries_2020[['age_grp_0-14','age_grp_15-64','age_grp_65-74','age_grp_75-84','age_grp_85+']].groupby('Country').mean()

all_countries_mortality_age_grp.columns = all_age_grp_pop.columns
all_countries_mortality_age_grp

In [None]:
(all_age_grp_pop * all_countries_mortality_age_grp).sum(axis=1).astype(int)

In [None]:
all_abs_deaths = all_age_grp_abs_deaths.groupby('Country').sum()
all_abs_deaths

In [None]:
all_abs_deaths.sum(axis=1)

In [None]:
all_YTD_mortality = all_abs_deaths / (all_age_grp_pop * ((latest_week - 1)/52))
all_YTD_mortality

In [None]:
all_std_pop = all_age_grp_pop.loc['DNK']
all_std_pop

In [None]:
all_std_abs_age_grp_deaths = all_std_pop * all_countries_mortality_age_grp
all_std_abs_age_grp_deaths

In [None]:
all_std_abs_age_grp_deaths.sum(axis=1)

In [None]:
all_std_pop.sum()

In [None]:
all_std_mortality = all_std_abs_age_grp_deaths.sum(axis=1) / all_std_pop.sum()
all_std_mortality

In [None]:
title = "Selected countries 2020 YTD age adjusted [stdPop : DNK] Mortality\nDataSource : mortality.org"
all_std_mortality.sort_values(ascending=False).plot(kind='bar',color='crimson',
                                                    figsize=(18,12),title=title)
plt.ylabel('mortality')
plt.savefig('age_adj_mortality_selected_countries.jpg',format='jpg')

In [None]:
all_non_age_adj_mortality = all_abs_deaths.sum(axis=1) / (all_age_grp_pop.sum(axis=1) * ((latest_week -1)/52))
all_non_age_adj_mortality

In [None]:
all_non_age_adj_mortality.sort_values(ascending=False).plot(kind='bar',figsize=(18,12))

In [None]:
latest_week

In [None]:
all_age_grp_pop

In [None]:
all_age_pct = all_age_grp_pop.div(all_age_grp_pop.sum(axis=1),axis=0)
all_age_pct

In [None]:
all_age_pct.loc[['SWE','FIN','DNK','NOR','ISL']].plot(kind='bar',stacked=True,figsize=(18,12))
_= plt.yticks(np.arange(0,1.1,0.1))

In [None]:
all_age_pct.plot(kind='bar',stacked=True,figsize=(18,12))
_= plt.yticks(np.arange(0,1.1,0.1))

In [None]:
(all_std_abs_age_grp_deaths.div(all_std_abs_age_grp_deaths.sum(axis=1),axis=0)).plot(
    kind='bar',stacked=True,figsize=(18,12))

all_deaths_age_pct = all_std_abs_age_grp_deaths.div(all_std_abs_age_grp_deaths.sum(axis=1),axis=0)
all_deaths_age_pct

In [None]:
all_abs_deaths

In [None]:
all_age_grp_pop

In [None]:
all_abs_deaths_pct = all_abs_deaths.div(all_abs_deaths.sum(axis=1),axis=0)

title = 'Selected Countries : pct deaths per age grp of all deaths'
all_abs_deaths_pct.plot(kind='bar',stacked=True,figsize=(18,12),title=title)
all_abs_deaths_pct

In [None]:
SWE_2020 = df_timeline.loc[(df_timeline['Country'] == 'SWE') & (df_timeline['Year'] == 2020)]
SWE_2020


In [None]:
nordic_2020_pop = nordic_age_grp_pop.sum(axis=1)
nordic_2020_pop

In [None]:
nordic_2020_abs = nordic_abs.groupby('Country').sum().sum(axis=1)
nordic_2020_abs

In [None]:
# https://www.statista.com/statistics/580174/death-rate-in-iceland/ #

title = 'Nordic All Cause Mortality 2020\nDataSource : Mortality.org'
nordic_2020_mort = nordic_2020_abs / nordic_2020_pop
nordic_2020_mort.plot(kind='bar',figsize=(18,12),color='r',title=title)
plt.ylabel('mortality')
plt.yticks(np.arange(0,0.012,0.0005))
plt.savefig('mortality_org_nordic_mortality_2020.jpg',format='jpg')

In [None]:
# weekly mean does give too low value #
df_timeline.loc[( df_timeline['Country'] == 'SWE' ) & (df_timeline['Year'] == 2020)]['all_age_grp'].mean()