In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pymc as pm
import requests
import scipy.stats as sps

from pymc.Matplot import plot as pmplot

sns.set()

In [None]:
start_date = '2020-03-01'
end_date = '2020-08-01'

url = 'https://covidtrackerapi.bsg.ox.ac.uk/api/v2/stringency/date-range/{}/{}'.format(start_date,end_date)

r = requests.get(url,timeout=5.0)
r.status_code

In [None]:
keys = pd.date_range(start_date,end_date)
keys

In [None]:
json = r.json()
data = json['data']
countries = json['countries']

In [None]:
data_list = []

for k in keys:
    date = k.date().strftime('%Y-%m-%d')
    for c in countries:
        try:
            data_list.append((date,c,data[date][c]['confirmed'],data[date][c]['deaths'],data[date][c]['stringency']))
        except KeyError:
            print ('cant find',c)


In [None]:
df = pd.DataFrame(data_list,columns=['date','country','confirmed','dead','oxford_stringency'])
df['date'] = pd.to_datetime(df['date'])
df.set_index(['country','date'],inplace=True)
df.loc['GBR']

In [None]:
average_ox_idx = df['oxford_stringency'].groupby(level=0).mean()
average_ox_idx

In [None]:
population = pd.read_csv('world_pop.csv',sep=';',thousands=',',header=None,index_col=0)
population.columns=['pop']
population.index.name='country'
population

In [None]:
three_letter_abb = pd.read_csv('three_letter_country_abb.csv',sep=';',header=None,index_col=2)
three_letter_abb = three_letter_abb[0]
three_letter_abb.loc['GBR']

In [None]:
df = df.reset_index(level=1)

In [None]:
df.loc['GBR']

In [None]:
df = df.join(three_letter_abb)

In [None]:
df.rename(columns={0 : 'country'},inplace=True)

In [None]:
df = df.join(average_ox_idx,lsuffix='_current',rsuffix='_mean')

In [None]:
df

In [None]:
foo = df.merge(population,left_on='country',right_on=population.index)
foo['dead_per_M'] = foo['dead'] / (foo['pop'] / 1e6)
foo

In [None]:
three_letter_abb.name='country'
three_letter_abb.index.name='abb'
three_letter_abb = pd.DataFrame(three_letter_abb)
three_letter_abb.columns=['country']
three_letter_abb.reset_index(inplace=True)


In [None]:
three_letter_abb

In [None]:
foo = foo.merge(three_letter_abb,left_on='country',right_on='country')
foo

In [None]:
gbr = foo.loc[foo['abb'] == 'GBR']
gbr

In [None]:
last_data_date = gbr['date'].iloc[-1]
last_data_date

In [None]:
last_days = foo.loc[foo['date'] == last_data_date]
drop = ['San Marino','Andorra']

mask = last_days['country'].isin(drop)
last_days = last_days[~mask]

#last_days = last_days.merge(three_letter_abb,left_on='country',right_on='country')

last_days['conf_per_M'] = last_days['confirmed'] / (last_days['pop'] / 1e6)

### MASK ###
mask2 = last_days['dead_per_M'] > 10
############

last_days = last_days[mask2]
last_days['dead_per_M_log'] = np.log10(last_days['dead_per_M'])

last_days

In [None]:
last_days.sort_values('oxford_stringency_mean',ascending=True).head(50)



In [None]:
last_days.shape

In [None]:
x = last_days['oxford_stringency_mean']

y_param = 'dead_per_M'

if y_param == 'dead_per_M':
    beta_mean = 0
    beta_std = 1
    alpha_low = 0
    alpha_high = 500
    obs_sigma_high = 200
    
else:
    beta_mean = 0
    beta_std = 1000
    alpha_low = -4000
    alpha_high = 4000
    obs_sigma_high = 10000

alpha = pm.Uniform('alpha',alpha_low,alpha_high)
#beta = pm.Uniform('beta',beta_low,beta_high)
beta = pm.Normal('beta',beta_mean, 1 / beta_std **2)
obs_sigma = pm.Uniform('obs_sigma',0,obs_sigma_high)

@pm.deterministic
def linreq(x=x,alpha=alpha,beta=beta):
    return alpha + x * beta

obs = pm.Normal('obs',mu = linreq,tau = 1 / obs_sigma ** 2,observed = True,value = last_days[y_param])

model = pm.Model([alpha,beta,linreq,obs])

map_ = pm.MAP(model)
map_.fit()

mcmc = pm.MCMC(model)

sample = mcmc.sample(50000,20000,3)

In [None]:
beta_post = mcmc.trace('beta')[:]
alpha_post = mcmc.trace('alpha')[:]
obs_sigma_post = mcmc.trace('obs_sigma')[:]

pmplot(beta_post,'beta_post')
pmplot(alpha_post,'alpha_post')
pmplot(obs_sigma_post,'obs_sigma_post')

result = pd.DataFrame({'beta_post' : beta_post,
                      'alpha_post' : alpha_post,
                      'obs_sigma_post' : obs_sigma_post})

result.describe()

In [None]:
beta_ci = np.percentile(result['beta_post'],[5.5,94.5])
alpha_ci = np.percentile(result['alpha_post'],[5.5,94.5])

In [None]:
mean_req = np.array([x * result['beta_post'].mean() + result['alpha_post'].mean() for x in range(0,101)])

slope,intercept,_,_,_ = sps.linregress(last_days['oxford_stringency_mean'],last_days[y_param])

least_squares = np.array([x * slope + intercept for x in range(0,101)])

In [None]:
X = np.linspace(0,100,1000)
rows = np.random.choice(result.index,replace=True,size=len(X))
beta_samples = result['beta_post'][rows]
alpha_samples = result['alpha_post'][rows]

lines = np.array([X[i] * beta_samples + alpha_samples for i in range(len(X))])

samples = np.array([pm.rnormal(X[i] * result.beta_post[rows] + result.alpha_post[rows],
                                      1 / result.obs_sigma_post[rows] ** 2,size=len(X)) for i in range(len(X))])

high,low = np.percentile(samples,[5.5,94.5],axis=1)




In [None]:
plt.figure(figsize=(18,12))
plt.title('Bayesian Linear Regression : COVID-19'\
          '- Effect of Lockdowns on {} \n'.format(y_param)+\
          'based on {} countries\n'.format(len(last_days))+\
          'Assuming average lockdown index value {} - {}\n'.format(start_date,end_date)+\
         'death/conf data as of {}'.format(end_date))

plt.scatter(last_days['oxford_stringency_mean'],last_days[y_param],color='r')
plt.ylabel(y_param)
plt.xlabel('Oxford COVID-19 Government Response Tracker index\nHale, Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz Kira (2020)')

plt.plot(range(0,101),mean_req,ls='dashed',color='k')
plt.plot(range(0,101),least_squares,ls='dashed',color='crimson')

plt.fill_between(X,high,low,color='c',alpha=0.2)

countries_of_interest = ['SWE','GBR','BEL','ESP','ITA','PER','FRA',
                         'USA','DNK','FIN','NOR','BRA','CHL','QAT','BHR',
                        'HNT','GTM','NIC','JPN','ISL','ARG','NLD','DEU','ZAF','HND','IRQ']

for i,p in enumerate(last_days['oxford_stringency_mean']):
    #if last_days.iloc[i]['abb'] in countries_of_interest:
    plt.text(last_days.iloc[i]['oxford_stringency_mean'],last_days.iloc[i][y_param],last_days.iloc[i]['abb'])

plt.plot(X,lines[0,:],
         color='orange',
         alpha=0.03,
         label=r'$\beta$: {:.2f} 89% CI: [ {:.2f} {:.2f} ] $\alpha$: {:.2f} CI: [ {:.2f} {:.2f} ]'.format(
         result.beta_post.mean(),beta_ci[0],beta_ci[1],result.alpha_post.mean(),alpha_ci[0],alpha_ci[1]))

_=plt.plot(X,lines,color='orange',alpha=0.03)

plt.legend(loc='upper left')

plt.savefig('lockdown_index_{}.jpg'.format(y_param),format='jpg',dpi=400)

In [None]:
foo.dropna(inplace=True)

In [None]:
countries = foo.groupby('country')
some_countries = ['Sweden','Belgium','UK','South Africa','Germany','US',
                  'Denmark','Austria','Italy','France','Argentina','Brazil','Chile','Peru']

colors = ['black','grey','lightcoral','mistyrose','tan','gold','crimson','yellow',
         'deepskyblue','royalblue','orangered','lime','aqua','darkmagenta']



In [None]:
from datetime import datetime

weeklies = dict()

fig,ax = plt.subplots(2,1,sharex=True,figsize=(18,12))


for i,c in enumerate(some_countries):
    country = (countries.get_group(c)).copy()
    country.set_index('date',inplace=True)
    country['dead_inc'] = country['dead'] - country['dead'].shift()
    country['dead_inc_per_M'] = country['dead_inc'] / (country['pop'] / 1e6)
    
    resample_period = ['W-MON','W-TUE','W-WED','W-THU','W-FRI','W-SAT','W-SUN']
    
    #today = datetime.today().weekday()
    last_data_day = last_data_date.weekday()

    #yesterday = lambda i : resample_period[i % 7 -1] if i != 0 else resample_period[6]
    period = resample_period[last_data_day]
    
    weekly = country.resample(period).agg({'dead_inc_per_M' : np.sum, 'dead_per_M' :np.sum,
                                           'oxford_stringency_current' : np.mean})
    
    weeklies[c] = weekly
        
    ax[1].plot(weekly.index,weekly['dead_inc_per_M'],'o--',label=c,color=colors[i])
    ax[0].plot(weekly.index,weekly['oxford_stringency_current'],'o--',label=c,color=colors[i])
 
citation = '\nHale, Thomas, Sam Webster, Anna Petherick, Toby Phillips, and Beatriz Kira (2020).\nOxford COVID-19 Government Response Tracker, Blavatnik School of Government.\nData use policy: Creative Commons Attribution CC BY standard.'
ax[0].legend(loc='upper left')
ax[0].set_title('Oxford Covid Government Response Tracker Index\n' + citation)
ax[0].set_ylabel('OXCGRT')
#ax[0].set_yscale('log')
ax[1].legend(loc='upper left')
ax[1].set_title('Weekly deaths per M')
ax[1].set_ylabel('Weekly deaths per M')
#ax[1].set_yscale('log')
plt.savefig('oxcgrt_country_plot.jpg',format='jpg',dpi=400)

In [None]:
steps = 100000
burn = 40000
thin = 3

dist_size = (steps - burn) // thin

def inference(x,y):
    
    beta_mean = 0
    beta_std = 10
    alpha_low = -500
    alpha_high = 500
    obs_sigma_high = 1000

    alpha = pm.Uniform('alpha',alpha_low,alpha_high)
    beta = pm.Normal('beta',beta_mean, 1 / beta_std **2)
    obs_sigma = pm.Uniform('obs_sigma',0,obs_sigma_high)

    @pm.deterministic
    def linreq(x=x,alpha=alpha,beta=beta):
        return alpha + x * beta

    obs = pm.Normal('obs',mu = linreq,tau = 1 / obs_sigma ** 2,observed = True,value = y)

    model = pm.Model([alpha,beta,linreq,obs])

    map_ = pm.MAP(model)
    map_.fit()

    mcmc = pm.MCMC(model)

    sample = mcmc.sample(steps,burn,thin)   
    
    beta_post = mcmc.trace('beta')[:]
    alpha_post = mcmc.trace('alpha')[:]
    obs_sigma_post = mcmc.trace('obs_sigma')[:]

    pmplot(beta_post,'beta_post')
    pmplot(alpha_post,'alpha_post')
    pmplot(obs_sigma_post,'obs_sigma_post')

    result = pd.DataFrame({'beta_post' : beta_post,
                          'alpha_post' : alpha_post,
                          'obs_sigma_post' : obs_sigma_post})

    return result


In [None]:
import scipy.stats as sps

weeklies_df = pd.DataFrame()


fig,axes = plt.subplots(7,2,sharex=True,figsize=(18,12))

for i,k in enumerate(weeklies.keys()):
        
    weeklies[k]['country'] = k
        
    weeklies_df = pd.concat([weeklies_df,weeklies[k]])
    
    
    axes[i % 7,i % 2].plot(weeklies[k].index,weeklies[k]['dead_inc_per_M'],'o--',label='weekly deaths per M',
                           color='r')
    axes[i % 7,i % 2].plot(weeklies[k].index,weeklies[k]['oxford_stringency_current'],'o--',label='weekly deaths per M',
                           color='orange')
    axes[i % 7,i % 2].tick_params('x',rotation=90)
    axes[i % 7,i % 2].set_title('{}'.format(k))
    axes[i % 7, i % 2].set_ylabel('deaths per M\nOxCGRT')
    
plt.savefig('oxcgrt_14.jpg',format='jpg',dpi=400)

In [None]:
results = dict()

for i,k in enumerate(weeklies.keys()):

    results[k] = inference(weeklies[k]['oxford_stringency_current'],weeklies[k]['dead_inc_per_M'])



In [None]:


print (results['Argentina'].describe())
results['Sweden'].describe()


In [None]:
fig,axes = plt.subplots(7,2,figsize=(18,12))

nr_samples = 1000

for i,k in enumerate(weeklies.keys()):

    axes[i % 7,i %2].scatter(weeklies[k]['oxford_stringency_current'],weeklies[k]['dead_inc_per_M'],color='r')

    slope,intercept,_,_,_ = sps.linregress(weeklies[k]['oxford_stringency_current'],
                                           weeklies[k]['dead_inc_per_M'])

    X = np.linspace(weeklies[k]['oxford_stringency_current'].min(),
                    weeklies[k]['oxford_stringency_current'].max(),nr_samples)
        
    #X = np.linspace(0,100,nr_samples) # for computing samples, this must concur with nr of sample rows
    
    alpha_mean = results[k].alpha_post.mean()
    beta_mean = results[k].beta_post.mean()
    
    axes[i % 7, i %2].plot(X,X*beta_mean + alpha_mean,'--',color='k')
    
    rows = np.random.choice(range(len(weeklies[k].index)),replace=True,size=nr_samples)
    
    lines = [X[i] * results[k].iloc[rows].beta_post + results[k].iloc[rows].alpha_post for i in range(len(X))]
    
    samples = np.array([pm.rnormal(X[i] * results[k].iloc[rows].beta_post + results[k].iloc[rows].alpha_post,
                                      1 / results[k].iloc[rows].obs_sigma_post ** 2,size=len(X)) for i in range(len(X))])

    high,low = np.percentile(samples,[5.5,94.5],axis=1)
    
    axes[i % 7, i % 2].plot(X,lines,color='orange',alpha=0.01)
    
    least_squares = X * slope + intercept

    axes[i % 7, i % 2].plot(X,least_squares,'--',color='r')
    
    axes[i % 7, i % 2].fill_between(X,high,low,color='c',alpha=0.2)

    
    axes[i % 7, i % 2].set_title(k)
    axes[i % 7, i % 2].set_ylabel('weekly\ndead_per_M')
    axes[i % 7, i % 2].set_xlabel('OxCGRT')
    #axes[i % 7,i % 2].set_ylim([0,150])
    
plt.tight_layout()
plt.savefig('OxCGRT_regression_14.jpg',format='jpg',dpi=400)

In [None]:
swe_weekly = weeklies_df.loc[weeklies_df['country'] == 'Sweden']
bel_weekly = weeklies_df.loc[weeklies_df['country'] == 'Belgium']
arg_weekly = weeklies_df.loc[weeklies_df['country'] == 'Argentina']

In [None]:
swe_weekly

In [None]:

arg_weekly.plot(y=['oxford_stringency_current','dead_inc_per_M'],style='o--')

In [None]:
all_countries = pd.DataFrame()

for c in countries.groups:
    country = countries.get_group(c)
    inc = country['dead'] - country['dead'].shift()
    inc.name='inc'
    country = pd.concat([country,inc],axis=1)
    
    country = country.replace(np.inf,np.nan)
    country = country.dropna()
    all_countries = pd.concat([all_countries,country],axis=0)
    


In [None]:
all_countries.head(50)


In [None]:
select_countries = ['Sweden','UK','Spain','Italy','UK','Belgium','France','US','Germany']
select_countries = ['Sweden','UK','Belgium']

mask = all_countries['country'].isin(select_countries)

selected_countries = (all_countries[mask]).copy()
selected_countries

In [None]:
unique_country_abbs = selected_countries['abb'].unique()
print (unique_country_abbs)
country_idx = dict(zip(unique_country_abbs,range(1,len(unique_country_abbs) + 1)))
country_idx    

In [None]:
unique_days = selected_countries['date'].unique()
day_idx = dict(zip(pd.to_datetime(unique_days),range(1,len(unique_days) + 1)))


In [None]:
def create_country_index(abb):
    return country_idx[abb]

def create_day_index(day):
    return day_idx[day]

selected_countries['country_idx'] = selected_countries['abb'].apply(create_country_index)
selected_countries['day_idx'] = selected_countries['date'].apply(create_day_index)
selected_countries['ox_idx'] = selected_countries['oxford_stringency_current'].astype(int)
selected_countries['inc_per_M'] = selected_countries['inc'] / (selected_countries['pop'] / 1e6)
selected_countries

In [None]:
selected_countries.corr()

In [None]:
def logit_pure(x):
    return (np.exp(x)) / (1 + np.exp(x))

def logistic_pure(p):
    return np.log(p / (1 - p))

In [None]:
country_idx = selected_countries['country_idx']
day_idx = selected_countries['day_idx']
ox_idx = selected_countries['ox_idx']

x = selected_countries['oxford_stringency_mean']

r_alpha = pm.rnormal(0,1/1**2,10)
r_beta = pm.rnormal(0,1/1**2,10)

#[ logit_pure(r_alpha + r_beta * x) for x in selected_countries['oxford_stringency_current']]

In [None]:
#PYMC
import pymc as pm
from pymc.Matplot import plot as pmplot


x = np.log(selected_countries['oxford_stringency_current'])

alpha = pm.Normal('alpha',0, 1 / 1 ** 2,size=len(unique_country_abbs))

beta = pm.Normal('beta',0, 1/ 1 ** 2,size=len(unique_days))

@pm.deterministic
def logit(country_idx=country_idx-1,x=x,day_idx=day_idx-1,alpha=alpha,beta=beta):
        
    return np.exp(alpha[country_idx] + beta[day_idx]*x) / (
        1 + np.exp(alpha[country_idx] + beta[day_idx]*x) )


lkh = pm.Binomial('lkh',n=selected_countries['pop'],p=logit,observed=True,
                      value=selected_countries['inc'])

model = pm.Model([alpha,beta,logit,lkh])

_map = pm.MAP(model)
_map.fit()

mcmc = pm.MCMC(model)
sample = mcmc.sample(50000,20000,3)

In [None]:
alpha = mcmc.trace('alpha')[:]
beta = mcmc.trace('beta')[:]

pmplot(alpha,'alpha')
#pmplot(beta,'beta')

result_alpha = pd.DataFrame(alpha,columns=unique_country_abbs)
result_beta = pd.DataFrame(beta)
result_beta.columns.name = 'day nr'

result_alpha.describe()

In [None]:
result_beta.describe()

In [None]:
swe_p = np.zeros((10000,146))
gbr_p = np.zeros((10000,146))
bel_p = np.zeros((10000,146))



for c in range(len(result_beta.columns)):
    
    swe_p[:,c] = logit_pure(result_alpha['SWE'].add(result_beta[c]))
    gbr_p[:,c] = logit_pure(result_alpha['GBR'].add(result_beta[c]))
    bel_p[:,c] = logit_pure(result_alpha['BEL'].add(result_beta[c]))
    
swe_p = pd.DataFrame(swe_p)
gbr_p = pd.DataFrame(gbr_p)
bel_p = pd.DataFrame(bel_p)

swe_p.describe()

In [None]:
ax = swe_p.describe().loc['mean'].plot(color='yellow',figsize=(18,12))
gbr_p.describe().loc['mean'].plot(ax=ax,color='b')
bel_p.describe().loc['mean'].plot(ax=ax,color='r')