In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm
import requests
import scipy.stats as sps

from pymc.Matplot import plot as pmplot

sns.set()

In [None]:
start_date = '2020-04-01'
end_date = '2020-07-24'

url = 'https://covidtrackerapi.bsg.ox.ac.uk/api/v2/stringency/date-range/{}/{}'.format(start_date,end_date)

r = requests.get(url,timeout=5.0)
r.status_code

In [None]:
keys = pd.date_range(start_date,end_date)
keys

In [None]:
json = r.json()
data = json['data']
countries = json['countries']

In [None]:
data_list = []

for k in keys:
    date = k.date().strftime('%Y-%m-%d')
    for c in countries:
        try:
            data_list.append((date,c,data[date][c]['confirmed'],data[date][c]['deaths'],data[date][c]['stringency']))
        except KeyError:
            print ('cant find',c)


In [None]:
df = pd.DataFrame(data_list,columns=['date','country','confirmed','dead','oxford_stringency'])
df['date'] = pd.to_datetime(df['date'])
df.set_index(['country','date'],inplace=True)
df.loc['SWE']

In [None]:
average_ox_idx = df['oxford_stringency'].groupby(level=0).mean()
average_ox_idx

In [None]:
population = pd.read_csv('world_pop.csv',sep=';',thousands=',',header=None,index_col=0)
population.columns=['pop']
population.index.name='country'
population

In [None]:
three_letter_abb = pd.read_csv('three_letter_country_abb.csv',sep=';',header=None,index_col=2)
three_letter_abb = three_letter_abb[0]
three_letter_abb

In [None]:
df = df.reset_index(level=1)

In [None]:
df.loc['SWE']

In [None]:
df = df.join(three_letter_abb)

In [None]:
df.rename(columns={0 : 'country'},inplace=True)

In [None]:
df = df.join(average_ox_idx,lsuffix='_current',rsuffix='_mean')

In [None]:
df

In [None]:
foo = df.merge(population,left_on='country',right_on=population.index)
foo['dead_per_M'] = foo['dead'] / (foo['pop'] / 1e6)
foo

In [None]:
three_letter_abb.name='country'
three_letter_abb.index.name='abb'
three_letter_abb = pd.DataFrame(three_letter_abb)
three_letter_abb.columns=['country']
three_letter_abb.reset_index(inplace=True)
three_letter_abb

In [None]:
last_days = foo.loc[foo['date'] == end_date]
drop = ['San Marino','Andorra']

mask = last_days['country'].isin(drop)
last_days = last_days[~mask]

last_days = last_days.merge(three_letter_abb,left_on='country',right_on='country')

last_days['conf_per_M'] = last_days['confirmed'] / (last_days['pop'] / 1e6)

mask2 = last_days['dead_per_M'] > 40
last_days = last_days[mask2]
last_days

In [None]:
last_days.sort_values('oxford_stringency_mean',ascending=True).head(50)



In [None]:
x = last_days['oxford_stringency_mean']

y_param = 'dead_per_M'

if y_param == 'dead_per_M':
    beta_mean = 0
    beta_std = 10
    alpha_low = 100
    alpha_high = 300
    obs_sigma_high = 500
    
else:
    beta_mean = 0
    beta_std = 1000
    alpha_low = -4000
    alpha_high = 4000
    obs_sigma_high = 10000

alpha = pm.Uniform('alpha',alpha_low,alpha_high)
#beta = pm.Uniform('beta',beta_low,beta_high)
beta = pm.Normal('beta',beta_mean, 1 / beta_std **2)
obs_sigma = pm.Uniform('obs_sigma',0,obs_sigma_high)

@pm.deterministic
def linreq(x=x,alpha=alpha,beta=beta):
    return alpha + x * beta

obs = pm.Normal('obs',mu = linreq,tau = 1 / obs_sigma ** 2,observed = True,value = last_days[y_param])

model = pm.Model([alpha,beta,linreq,obs])

map_ = pm.MAP(model)
map_.fit()

mcmc = pm.MCMC(model)

sample = mcmc.sample(50000,20000,3)

In [None]:
beta_post = mcmc.trace('beta')[:]
alpha_post = mcmc.trace('alpha')[:]
obs_sigma_post = mcmc.trace('obs_sigma')[:]

pmplot(beta_post,'beta_post')
pmplot(alpha_post,'alpha_post')
pmplot(obs_sigma_post,'obs_sigma_post')

result = pd.DataFrame({'beta_post' : beta_post,
                      'alpha_post' : alpha_post,
                      'obs_sigma_post' : obs_sigma_post})

result.describe()

In [None]:
beta_ci = np.percentile(result['beta_post'],[5.5,94.5])
alpha_ci = np.percentile(result['alpha_post'],[5.5,94.5])

In [None]:
mean_req = np.array([x * result['beta_post'].mean() + result['alpha_post'].mean() for x in range(0,101)])

slope,intercept,_,_,_ = sps.linregress(last_days['oxford_stringency_mean'],last_days[y_param])

least_squares = np.array([x * slope + intercept for x in range(0,101)])

In [None]:
X = np.linspace(0,100,1000)
rows = np.random.choice(result.index,replace=True,size=len(X))
beta_samples = result['beta_post'][rows]
alpha_samples = result['alpha_post'][rows]

lines = np.array([X[i] * beta_samples + alpha_samples for i in range(len(X))])

samples = np.array([pm.rnormal(X[i] * result.beta_post[rows] + result.alpha_post[rows],
                                      1 / result.obs_sigma_post[rows] ** 2,size=len(X)) for i in range(len(X))])

high,low = np.percentile(samples,[5.5,94.5],axis=1)




In [None]:
plt.figure(figsize=(18,12))
plt.title('Bayesian Linear Regression : COVID-19'\
          '- Effect of Lockdowns on {} \n'.format(y_param)+\
          'based on {} countries\n'.format(len(last_days))+\
          'Assuming average lockdown index value April 1st-July 24th\n'\
         'death/conf data as of {}'.format(end_date))

plt.scatter(last_days['oxford_stringency_mean'],last_days[y_param],color='r')
plt.ylabel(y_param)
plt.xlabel('Oxford COVID-19 Government Response Tracker index\nHale, Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz Kira (2020)')

plt.plot(range(0,101),mean_req,ls='dashed',color='k')
plt.plot(range(0,101),least_squares,ls='dashed',color='crimson')

plt.fill_between(X,high,low,color='c',alpha=0.2)

countries_of_interest = ['SWE','GBR','BEL','ESP','ITA','PER','FRA',
                         'USA','DNK','FIN','NOR','BRA','CHL','QAT','BHR',
                        'HNT','GTM','NIC','JPN','ISL','ARG','NLD','DEU','ZAF','HND','IRQ']

for i,p in enumerate(last_days['oxford_stringency_mean']):
    #if last_days.iloc[i]['abb'] in countries_of_interest:
    plt.text(last_days.iloc[i]['oxford_stringency_mean'],last_days.iloc[i][y_param],last_days.iloc[i]['abb'])

plt.plot(X,lines[0,:],
         color='orange',
         alpha=0.03,
         label=r'$\beta$: {:.2f} 89% CI: [ {:.2f} {:.2f} ] $\alpha$: {:.2f} CI: [ {:.2f} {:.2f} ]'.format(
         result.beta_post.mean(),beta_ci[0],beta_ci[1],result.alpha_post.mean(),alpha_ci[0],alpha_ci[1]))

_=plt.plot(X,lines,color='orange',alpha=0.03)

plt.legend(loc='upper left')

plt.savefig('lockdown_index_{}.jpg'.format(y_param),format='jpg')

In [None]:
last_days.sort_values('oxford_stringency_mean',ascending=False)
