In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm

sns.set()

In [None]:
dick = pd.read_pickle('country_data.pkl')

def strip(x):
    return x.replace('\t','')

country_populations = pd.read_csv('world_pop.csv',
                                  sep=';',header=None,index_col=0,names=['population'],
                                 thousands=',',converters={0 : strip})

def process_country(country_name):
    
    country = dick[country_name]
    country_population = country_populations.loc[country_name,'population']
    country['conf_per_M'] = country['confirmed'] / (country_population / 1e6)
    country['dead_per_M'] = country['deceased'] / (country_population / 1e6)
    
    return country



In [None]:
r_prior = pm.rlognormal(0.27,1/0.05**2,100000)
f_prior = pm.rbeta(5,200,100000)

In [None]:
### ASSUMPTIONS ###
### 1) nr of dead inc is a function of nr of confirmed
### 2) D = f * C
### 3) D = C * r * f + m

def inference(df,C,D,country_name):
   
    alpha=10

    f_alpha = 5
    f_beta = 100
    f = pm.rbeta(f_alpha,f_beta,size=100000)

    r = pm.Lognormal('r',0.27,1/0.05**2)
    f = pm.Beta('f',f_alpha,f_beta)
    #m = pm.Uniform ('m',-2,2)

    @pm.deterministic()
    def prediction(C=C,r=r,f=f):
        return C * r * f 

    obs = pm.Poisson('obs',prediction, observed=True,value=D)

    model = pm.Model([f,r,prediction,obs])
    try:
        map_ = pm.MAP(model)
        map_.fit()
        
    except:
        print ('cant fit {}'.format(country_name))

    mcmc = pm.MCMC(model)

    mcmc.sample(100000,50000,2)

    r_post = mcmc.trace('r')[:]
    f_post = mcmc.trace('f')[:]
    prediction_post = mcmc.trace('prediction')[:,0]

    result = pd.DataFrame({'r_post':r_post,
                          'f_post':f_post,
                          'prediction_post':prediction_post})

    print ()
    print ('++++++++++++++ {} ++++++++++++++++'.format(country_name))
    print (result.describe())
    print()
    
    plt.figure(figsize=(18,12))
    plt.hist(r_prior,color='r',label='r prior',alpha=0.5,histtype='stepfilled',density=True)
    plt.hist(result.r_post,color='g',label='r posterior',alpha=0.7,histtype='stepfilled',density=True)
    plt.legend(loc='upper left')

    plt.figure(figsize=(18,12))
    plt.hist(f_prior,color='r',label='f prior',alpha=0.5,histtype='stepfilled',density=True)
    plt.hist(result.f_post,color='g',label='f posterior',alpha=0.7,histtype='stepfilled',density=True)
    
    plt.legend(loc='upper left')
    
    return result



In [None]:
def create_posterior_samples(result):
    N = 10000
    f_samples = np.random.choice(result.f_post,replace=True,size=N)
    r_samples = np.random.choice(result.r_post,replace=True,size=N)

    samples = np.array([pm.rpoisson(C[i] * r_samples * f_samples,N) for i in range(len(C))])

    samples.shape

    sample_df = pd.DataFrame(samples,index=range(len(samples)))
    sample_df.index.name='day'
    sample_df.columns.name='sample_nr'
    return sample_df

In [None]:

def plot(df,sample_df,country_name):
    plt.figure(figsize=(18,12))
    title_postfix = r'posterior growth factor $\mu$: {:.2f} posterior mortality rate $\mu$: {:.4f}'.format(
    result.r_post.mean(),result.f_post.mean())

    sns.violinplot(data=sample_df.T,inner='quartile',scale='count')

    plt.title('Bayesian Posterior Expected number of deaths per day {}, after the day CONFIRMED hit {} \n'.format(
        country_name,limit) + title_postfix)

    plt.ylabel('Expected number of deaths per day')
    plt.xlabel('Day after number of confirmed hit {}'.format(limit))
    plt.scatter(range(len(df)),df['dead_inc'],color='crimson',label='actual data')
    plt.legend(loc='upper left')
    plt.savefig('{}_violinplot.jpg'.format(country_name),format='jpg')

In [None]:
countries = ['India','Italy','Spain','Sweden','US',
             'Korea, South','Netherlands','Canada','Germany','New York']

#countries = ['Finland','Norway','Denmark','Sweden','Iceland']

for country_name in countries:
    
    df = process_country(country_name)

    limit = 1000
    mask = df['confirmed'] >= limit
    df = df[mask]

    #df = df[-10:]

    print (df['dead_inc'])

    D = df['dead_inc']
    C = df['confirmed']

    result = inference(df,C,D,country_name)
    sample_df = create_posterior_samples(result)
    #print (sample_df.head(40))
    sample_df.iloc[-1].describe()
    plot(df,sample_df,country_name)

In [None]:
US = pd.read_pickle('us_state_dict.pkl')

In [None]:
ny = US['New York']
ny

In [None]:
print (ny['dead_inc'])

D = ny['dead_inc']
C = ny['confirmed']

result = inference(ny,C,D,'New York')
sample_df = create_posterior_samples(result)
plot(ny,sample_df,'New York')