In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc3 as pm
import arviz as az

sns.set()

In [None]:
dick = pd.read_pickle('country_data.pkl')
dick['Sweden']

In [None]:
pop = pd.read_csv('world_pop.csv',sep=';',thousands=',',header=None,index_col=0,names=['pop'])
pop


In [None]:
df = pd.DataFrame()

for k,v in dick.items():
    temp = pd.DataFrame(v)
    temp['country'] = k
    
    df = pd.concat([df,temp])
    
df.drop(['conf_over_dead','dead_conf_ratio'],axis=1,inplace=True)

In [None]:
mask = (df['confirmed'] > 1000) & (df['deceased'] > 100)
df = df[mask]

df

In [None]:
df = df.merge(pop,left_on='country',right_index=True)
df

In [None]:
unique_countries = list(df['country'].unique())

country_idx_dict = dict(zip(unique_countries,range(len(unique_countries))))

country_idx_dict

In [None]:
df['country_idx'] = df['country'].apply(lambda x : country_idx_dict[x])

df['dead_inc'] = df['dead_inc'].apply(lambda x : x if x >= 0 else 0)
df['confirmed'] = df['confirmed'].apply(lambda x : x if x >= 0 else 0)
df['inc'] = df['inc'].apply(lambda x : x if x >= 0 else 0)
df['deceased'] = df['deceased'].apply(lambda x : x if x >= 0 else 0)

df['dead_per_M'] = df['deceased'] / (df['pop'] / 1e6)
df['conf_per_M'] = df['confirmed'] / (df['pop'] / 1e6)


df


In [None]:
df.loc[df['country'] == 'Sweden']

In [None]:
M_prior = pm.Beta.dist(5,1000).random(size=1000)
plt.hist(M_prior)

In [None]:
F_prior = pm.Lognormal.dist(1,0.9).random(size=1000)
plt.hist(F_prior)

In [None]:
model = pm.Model()

pop = df['pop'].values
x = df['conf_per_M'].values
y = df['dead_per_M'].values

country_idx = df['country_idx'].values

with model:
    
    
    F_mu = pm.Uniform('F_mu',lower=1.1,upper=5)
    F_sigma = pm.Uniform('F_sigma',lower=0.1,upper=0.9)
    
    #interaction_coeff = pm.HalfNormal('interaction_coeff',1,shape=len(unique_countries))
    #interaction_coeff = pm.Exponential('interaction_coeff',1,shape=len(unique_countries))


    M_alpha = pm.Uniform('M_alpha',lower=1,upper=10)
    M_beta = pm.Uniform ('M_beta',lower=500,upper=1000)

    F = pm.Lognormal('F',mu=F_mu, sd=F_sigma,shape=len(unique_countries)) 

    M = pm.Beta('M',alpha=M_alpha,beta=M_beta,shape=len(unique_countries))
    
    #gen_dead = x * M[country_idx] * F[country_idx] * interaction_coeff[country_idx]
    gen_dead = x * M[country_idx] * F[country_idx] 

    
    obs = pm.Poisson('obs', gen_dead,observed=y)
    
    step1 = pm.Metropolis()
    
    trace = pm.sample(5000,tune=5000,chains=2,cores=2,target_accept=0.995)

    result = pm.trace_to_dataframe(trace)
    summary = az.summary(trace,hdi_prob=0.89,stat_funcs=[np.median])


In [None]:
summary

In [None]:
with model:
    az.plot_trace(trace)

In [None]:
ppc = pm.sample_posterior_predictive(trace,samples=200,model=model)

In [None]:
ppc['obs'].shape

In [None]:
data_ppc = az.from_pymc3(trace,model=model,posterior_predictive=ppc)
az.plot_ppc(data_ppc)

In [None]:
mean = result.describe().loc['mean','M__0': 'M__101'].mean()
print (mean)

In [None]:
result.describe().loc['mean','M__88']

In [None]:
result.describe().loc['mean','M__0':'M__101'].plot(figsize=(18,12),kind='bar',
                                                   title='Estimated COVID-mortality rate [Infection Mortality Rate]')

_= plt.xticks(range(len(unique_countries)),unique_countries,rotation=90)

plt.ylabel('mortality rate')

plt.axhline(mean,color='r',ls='dashed')
plt.savefig('COVID_estimated_IFR_countries.jpg',format='jpg')

In [None]:
# set the F_ and M_ below to this value -1
len(unique_countries)

In [None]:
result_F = result.loc[:,'F__0' : 'F__106']
result_M = result.loc[:,'M__0' : 'M__106']
result_F.columns=unique_countries
result_M.columns = unique_countries
result_M.describe()

In [None]:
def plot(df,country_name):
    
    CI = np.percentile(df,[5.5,94.5])
    
    ax = df[country_name].plot(kind='hist',title='COVID Estimated IFR {}'.format(country_name.upper()),
                          density=True,bins=np.arange(0,0.03,0.0005),color='yellow',alpha=0.7,
                              figsize=(18,12))
    
    ax.axvline(CI[0],color='orange',ls='dashed',label='CI-89%')
    ax.axvline(CI[1],color='orange',ls='dashed')
    
    ax.axvline(df[country_name].mean(),color='g',label='mean: {:.4f}'.format(df[country_name].mean()))
    ax.axvline(df[country_name].median(),color='r',label='median: {:.4f}'.format(df[country_name].median()))
    
    #ax.axvline(sps.mode(df[country_name])[0][0],color='navy',
                        #label='mode: {:.4f}'.format(sps.mode(df[country_name])[0][0]))
    
    ax.set_xlabel('Estimated IFR')
    ax.set_ylabel('Probability Density')
    
    ax.legend(loc='lower right')
    
    dist = result_M[country_name].sort_values() / result_M[country_name].sum()

    ax2 = plt.twinx()
    ax2.plot(result_M[country_name].sort_values(),dist.cumsum(),ls='dashed',color='k')
    ax2.set_ylabel('cumulative probability')
    
    plt.savefig('IFR_{}.jpg'.format(country_name),format='jpg')

In [None]:
hdi = pd.DataFrame(az.hdi(trace['M'],hdi_prob=0.89),index=unique_countries,columns=['5.5','94.5'])
hdi.loc['Sweden']

In [None]:
bins = pd.cut(result_M['Sweden'],10)
bins.value_counts()

In [None]:
countries = ['Sweden','United Kingdom','US','Argentina','Peru','Australia',
            'Belgium','Denmark','Finland','Norway','Germany','Austria','Italy','Spain']

for c in countries:
    plt.figure()
    plot(result_M,c)

In [None]:
summary

In [None]:
plt.plot(trace['M'][88],trace['F'][88],'o')