In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm
from pymc.Matplot import plot as pmplot


sns.set()


In [None]:
dick = pd.read_pickle('country_data.pkl')

### 2020-04-14
# Germany has a negative dead_inc 2020-04-11, which makes poisson obs fail. 
# patching that number here

ger = dick['Germany']
ger.at['2020-04-11','dead_inc'] = 0
dick['Germany'] = ger
ger

In [None]:


def strip(x):
    return x.replace('\t','')

country_populations = pd.read_csv('world_pop.csv',
                                  sep=';',header=None,index_col=0,names=['population'],
                                 thousands=',',converters={0 : strip})

def process_country(country_name):
    
    country = dick[country_name]
    country_population = country_populations.loc[country_name,'population']
    country['conf_per_M'] = country['confirmed'] / (country_population / 1e6)
    country['dead_per_M'] = country['deceased'] / (country_population / 1e6)
    
    return country



In [None]:
r_prior = pm.rlognormal(0.27,1/0.05**2,100000)
f_prior = pm.rbeta(5,200,100000)

In [None]:
### ASSUMPTIONS ###
### 1) nr of dead inc is a function of nr of confirmed
### D = mortality rate (f) x confirmed total (C) x growth rate for confirmed (r)
### 2) D = f * C
### 3) D = C * r * f 

def inference(df,C,D,country_name):
    
    f_alpha = pm.Uniform('f_alpha',1,100)
    f_beta = pm.Uniform ('f_beta',50,500)
    
    #r_mu = pm.Uniform('r_mu',0.2,0.6) #high autocorrelation for Italy
    r_mu = pm.Uniform('r_mu',0.05,0.6)

    r_sigma = pm.Uniform('r_sigma',0,0.2)
    
    r = pm.Lognormal('r',r_mu, 1 / r_sigma ** 2)
    
    #r = pm.Lognormal('r',0.27,1/0.05**2)
    
    f = pm.Beta('f',f_alpha,f_beta)
    #m = pm.Uniform ('m',-2,2)

    @pm.deterministic()
    def prediction(C=C,r=r,f=f):
        return C * r * f 

    obs = pm.Poisson('obs',prediction, observed=True,value=D)

    model = pm.Model([f,r,prediction,obs])
    try:
        map_ = pm.MAP(model)
        map_.fit()
        
    except:
        print ('cant fit {}'.format(country_name))

    mcmc = pm.MCMC(model)

    mcmc.sample(200000,60000,4)
    
    r_mu_post = mcmc.trace('r_mu')[:]
    r_sigma_post = mcmc.trace('r_sigma')[:]
    r_post = mcmc.trace('r')[:]
    f_post = mcmc.trace('f')[:]
    f_alpha_post = mcmc.trace('f_alpha')[:]
    f_beta_post = mcmc.trace('f_beta')[:]
    
    
    pmplot(r_mu_post,'r_mu_post')
    pmplot(r_sigma_post,'r_sigma_post')
    pmplot(f_alpha_post,'f_alpha_post')
    pmplot(f_beta_post,'f_beta_post')
    pmplot(r_post,'r_post')
    pmplot(f_post,'f_post')

    prediction_post = mcmc.trace('prediction')[:,0]

    result = pd.DataFrame({'r_post':r_post,
                          'f_post':f_post,
                          'prediction_post':prediction_post})

    print ()
    print ('++++++++++++++ {} ++++++++++++++++'.format(country_name))
    print (result.describe())
    print()
    
    plt.figure(figsize=(18,12))
    plt.hist(r_prior,color='r',label='r prior',alpha=0.5,histtype='stepfilled',density=True)
    plt.hist(result.r_post,color='g',label='r posterior',alpha=0.7,histtype='stepfilled',density=True)
    plt.legend(loc='upper left')

    plt.figure(figsize=(18,12))
    plt.hist(f_prior,color='r',label='f prior',alpha=0.5,histtype='stepfilled',density=True)
    plt.hist(result.f_post,color='g',label='f posterior',alpha=0.7,histtype='stepfilled',density=True)
    
    plt.legend(loc='upper left')
    
    return result



In [None]:

def create_posterior_samples(result):
    N = 10000
    f_samples = np.random.choice(result.f_post,replace=True,size=N)
    r_samples = np.random.choice(result.r_post,replace=True,size=N)

    death_samples = np.array([pm.rpoisson(C[i] * r_samples * f_samples,N) for i in range(len(C))])
    conf_samples = np.array([pm.rpoisson(C[i] * r_samples,N) for i in range(len(C))])

    death_sample_df = pd.DataFrame(death_samples,index=range(len(death_samples)))
    death_sample_df.index.name='day'
    death_sample_df.columns.name='sample_nr'
    
    conf_sample_df = pd.DataFrame(conf_samples,index=range(len(conf_samples)))
    conf_sample_df.index.name='day'
    conf_sample_df.columns.name='sample_nr'
    
    
    return death_sample_df,conf_sample_df

In [None]:

def plot(result,df,sample_df,country_name,param='dead_inc',title_patch='deaths per day'):
    
    if title_patch == 'deaths per day':
        col = 5 # dead_inc
    else:
        col = 0 # confirmed
    
    samples_89 = np.percentile(sample_df.iloc[-1,:],[5.5,94.5])
    samples_daily_mean = sample_df.mean(axis=1)
    
    plt.figure(figsize=(18,12))
    title_postfix = r' posterior mean growth factor $\mu$: {:.2f} posterior mean mortality rate $\mu$: {:.4f}'.format(
    result.r_post.mean(),result.f_post.mean())
    
    title_postfix_2 = ' Last Day Sample 89% CI: {:.2f} , {:.2f} : Last Day Actual: {}'.format(
        int(samples_89[0]),int(samples_89[1]),df.iloc[-1,col])

    sns.violinplot(data=sample_df.T,inner='quartile',scale='count',palette=['orange'])
    
    plt.plot(range(len(df)),samples_daily_mean,color='crimson',ls='dashed',label='death samples daily mean ')

    plt.title('Bayesian Posterior Expected number of {} {}, after the day CONFIRMED hit {} \n'.format(
       title_patch, country_name,limit) + title_postfix + '\n' + title_postfix_2)

    plt.ylabel('Expected number of {}'.format(title_patch))
    plt.xlabel('Day after number of confirmed hit {}'.format(limit))
    plt.scatter(range(len(df)),df[param],color='crimson',label='actual data')
    plt.legend(loc='upper left')
    
    plt.savefig('{}_violinplot_{}.jpg'.format(country_name,param),format='jpg')

In [None]:
countries = ['India','Italy','Spain','Sweden','US',
             'Korea, South','Netherlands','Canada','Germany',
             'France','Belgium','Austria',
            'Denmark','Norway','Finland','Australia','Portugal']

#countries = ['United Kingdom','France','Austria','Turkey']
#countries = ['Netherlands','Belgium','France','Austria','United Kingdom']
#countries = ['Sweden']

for country_name in countries:
    
    df = process_country(country_name)

    limit = 1000
    mask = df['confirmed'] >= limit
    df = df[mask]

    #df = df[-10:]

    print ()
    print ('++++++++++++++++++++++ {} +++++++++++++++++++++++'.format(country_name))
    print (df.tail())

    D = df['dead_inc']
    C = df['confirmed']
    
    result = inference(df,C,D,country_name)
    sample_df,conf_sample_df = create_posterior_samples(result)
    
    print ('sample_df()')
    print (sample_df.tail(40))
    print ('daily death mean samples')
    print (sample_df.mean(axis=1).tail(40))
    print ('Last day summary stats')
    sample_df.iloc[-1].describe()
    
    print ('conf sample_df.tail(40)')
    print (conf_sample_df.tail(40))
    print ('conf sample daily mean')
    print (conf_sample_df.mean(axis=1))
    
    plot(result,df,sample_df,country_name)
    plot(result,df,conf_sample_df,country_name,'confirmed','confirmed')
    result.to_pickle('violin_daily_result_{}.pkl'.format(country_name.replace(' ','_')))

In [None]:
#plot(result,df,sample_df,country_name)


In [None]:
US = pd.read_pickle('us_state_dict.pkl')

In [None]:
ny = US['New York']
ny = ny[['confirmed','deceased','recovered','inc','factor','dead_inc','dead_factor']]
ny

In [None]:
print (ny)

D = ny['dead_inc']
C = ny['confirmed']

country_name='New York'
result = inference(ny,C,D,'New York')
result.to_pickle('violin_daily_result_{}.pkl'.format(country_name.replace(' ','_')))
sample_df,conf_sample_df = create_posterior_samples(result)
plot(result,ny,sample_df,'New York')
plot(result,ny,conf_sample_df,'New York','confirmed','confirmed')