In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import requests
import re
import scipy.stats as sps

sns.set()

In [None]:
url = 'https://www.worldometers.info/coronavirus/#countries'

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

r = requests.get(url, headers=header)

html_source = r.text

html_source = re.sub(r'<.*?>', lambda g: g.group(0).upper(), html_source)
dfs = pd.read_html(html_source,encoding='utf-8')

In [None]:
df = dfs[0]
df

In [None]:
covid_df = df.loc[~df['#'].isna()]
covid_df = covid_df.rename(columns={'Country,Other' : 'country'})
covid_df = covid_df.set_index('country')
covid_df = covid_df.rename(index={'UK' : 'United Kingdom','Czechia' : 'Czech Republic'})
covid_df.columns = covid_df.columns.str.replace(u'\xa0', u' ')

covid_df

In [None]:
covid_df.sort_index().iloc[40:100]

In [None]:
PWD_df = pd.read_excel('pwd_country.xlsx')
PWD_df = PWD_df.rename(columns={'COUNTRY_NAME' : 'country'})
PWD_df = PWD_df.set_index('country')
PWD_df.columns = ['COUNTRY_CODE','PWD_2010','PWD_2020','PWD_2030','PWD_2040','PWD_2050']

PWD_df

In [None]:
comb_df = PWD_df.join(covid_df)
comb_df.drop('Europe',inplace=True)
comb_df

In [None]:
comb_df.columns

In [None]:
comb_df['Tests/ 1M pop']

In [None]:
slope_test,intercept_test,_,_,_ = sps.linregress(comb_df['PWD_2020'],comb_df['Tests/ 1M pop'])
slope_cases,intercept_cases,_,_,_ = sps.linregress(comb_df['PWD_2020'],comb_df['Tot Cases/1M pop'])
slope_dead,intercept_dead,_,_,_ = sps.linregress(comb_df['PWD_2020'],comb_df['Deaths/1M pop'])

In [None]:
def scatter(ax,param='Deaths/1M pop',color='r',slope=slope_test,intercept=intercept_test):
    
    comb_df.plot(ax=ax,x='PWD_2020',y=param,figsize=(18,12),style=color+'o')
    ax.plot(comb_df['PWD_2020'],comb_df['PWD_2020'].values * slope + intercept,ls='dashed',color='orange')
    
    for i,j,k in zip(comb_df['PWD_2020'],comb_df[param],comb_df['COUNTRY_CODE']):
        ax.annotate(xy=(i,j),text = k)
    
fig,axes = plt.subplots(3,figsize=(18,12))

scatter(axes[0],'Tests/ 1M pop','b',slope_test,intercept_test)
scatter(axes[1],'Tot Cases/1M pop','g',slope_cases,intercept_cases)
scatter(axes[2],'Deaths/1M pop','r',slope_dead,intercept_dead)

title = 'Population Weighted Density --> Covid Outcomes\nDataSource : EU Joint Research Centre,Worldometers.info'

plt.suptitle(title)
plt.savefig('covid_population_weighted_density.jpg',format='jpg')

In [None]:
#### assumed values ####

base_rate = 0.001 #https://www.theguardian.com/world/2021/apr/15/rapid-covid-testing-in-england-may-be-scaled-back-over-false-positives
pcr_specificity = 0.99 #https://en.wikipedia.org/wiki/COVID-19_testing
pcr_sensitivity = 0.95 #https://en.wikipedia.org/wiki/COVID-19_testing

pop = 100000
infected = pop * base_rate
infected 
not_infected = pop-infected

true_positives = pcr_sensitivity * infected
true_positives

In [None]:
false_positives = (1-pcr_specificity) * not_infected
false_positives

In [None]:
p_infected_given_pos_test = true_positives / (true_positives + false_positives)
p_infected_given_pos_test

In [None]:
### by Bayes Rule ###

'''
P(A|B) = P(B|A) * P(A) / P(B) 

P(infected  | positive) = P(positive | infected) * P(infected) / P(positive)

P(infected | positive) = True_Positives / (True_Positives + False_Positives) 

P(infected | positive) = sensitivity * base_rate / (base_rate * sensitivity + (1-base_rate) * (1 - specificity) )
'''

P_infected_given_positive =\
pcr_sensitivity * base_rate / (base_rate * pcr_sensitivity + (1 - base_rate) * (1 - pcr_specificity))

P_infected_given_positive

In [None]:
# RETURNS p(positive | positive tests) # 

def bayes_rule(likelihood,prior,evidence):
    return likelihood * prior / evidence

#initial test # 
posterior = bayes_rule(
    pcr_sensitivity,base_rate,base_rate * pcr_sensitivity + (1 - base_rate) * (1 - pcr_specificity))

'''
example values from https://towardsdatascience.com/bayes-rule-with-a-simple-and-practical-example-2bce3d0f4ad0
pcr_sensitivity = 0.97
pcr_specificity = 0.95
base_rate = 0.005
'''

posterior = bayes_rule(pcr_sensitivity,base_rate,base_rate * pcr_sensitivity + (1 - base_rate) * (1 - pcr_specificity))
# update based on initial test #

additional_tests = 3
all_posteriors = np.zeros(additional_tests + 1)
all_posteriors[0] = posterior


for t in range(additional_tests):
    
    posterior = bayes_rule(
        pcr_sensitivity,posterior,posterior * pcr_sensitivity + (1 - posterior) * (1 - pcr_specificity))
    all_posteriors[t+1] = posterior

print (all_posteriors)

In [None]:
plt.figure(figsize=(18,12))
plt.plot(np.arange(1,1 + additional_tests + 1),all_posteriors,'o--')
plt.xlabel('test nr')
plt.ylabel('probability infected')
_= plt.xticks(range(1, len(all_posteriors) + 1))

txt = ['Pos Test','Pos Test','Pos Test','Pos Test']
colors = ['red'] * 4

for x,y,txt,c in zip(range(1,1 + additional_tests + 1),all_posteriors,txt,colors):
    plt.annotate(xy=(x,y),text=txt,color=c)
    
plt.title('probability positive given n positive tests, assuming incidence {:.3f}, sensitivity {:.2f} specificity {:.2f}'.format(
base_rate,pcr_sensitivity,pcr_specificity))
base_rate

In [None]:

def bayes_rule_2(base_rate,sensitivity,specificity,p_pos = True):
    true_pos = base_rate * sensitivity
    false_neg = base_rate * (1 - sensitivity)
    true_neg = (1 - base_rate) * specificity
    false_pos =  (1 - base_rate) * (1 - specificity)
    
    if p_pos:
        return true_pos / (true_pos + false_pos)
    else:
        return true_neg / (true_neg + false_neg)

bayes_rule_2(0.001,0.95,0.99,p_pos=True)

In [None]:
# pos,neg #

sensitivity = 0.95
specificity = 0.99




additional_tests = 4
result = np.zeros(additional_tests + 1)

# probability for positive given positive test #
result[0] = bayes_rule_2(0.001,sensitivity,specificity,p_pos=True)

# probability for being negative given negative test AND previous positive test #
result[1] = bayes_rule_2(result[0],sensitivity,specificity,p_pos=False)

# probability for being negative given negative test AND prior results #
result[2] = bayes_rule_2(1-result[1],sensitivity,specificity,p_pos=False)

result[3] = bayes_rule_2(1-result[2],sensitivity,specificity,p_pos=False)

result[4] = bayes_rule_2(1-result[3],sensitivity,specificity,p_pos=False)


print ('p_positive|positive {:.3f} p_negative|negative,previous {:.3f} p_negative|negative,previous {:.3f} p_negative|negative,previous {:.3f}'.format(
    result[0],result[1],result[2],result[3]))

result[1:] = 1 - result[1:] 
result

In [None]:
plt.figure(figsize=(18,12))
plt.plot(range(1,len(result) + 1),result,'o--')
plt.xlabel('test nr')
plt.ylabel('probability infected')

txt = ['Pos Test','Neg Test','Neg Test','Neg Test','Neg Test']
colors = ['red','green','green','green','green']

for x,y,txt,c in zip(range(1,len(result) + 1),result,txt,colors):
    plt.annotate(xy=(x,y),text=txt,color=c)
    
_= plt.xticks(range(1,len(result) + 1))


plt.title('Probability of being infected after first receiving a positive test, then subsequent negatives'+\
         '\nbase rate : {:.3f} sensitivity : {:.3f} specificity : {:.3f}'.format(
base_rate,sensitivity,specificity))

In [None]:
p_inf_given_pos_neg = 1-bayes_rule_2(p_infected_given_pos_test,sensitivity,specificity,p_pos=False)
p_inf_given_pos_neg

In [None]:
# next section #

In [None]:
title = 'Europe : Covid Tests per 1M population\nDataSource : Worldometers.info'
comb_df.plot(kind='bar',y='Tests/ 1M pop',figsize=(18,12),title=title)
plt.savefig('covid_pop_weighted_density_test_per_m.jpg',format='jpg')

In [None]:
covid_df

In [None]:
fig,axes = plt.subplots(4,figsize=(18,12))
covid_df['Tests/ 1M pop'].sort_values(ascending=False)[:len(covid_df) // 4].plot(kind='bar',ax=axes[0])

covid_df['Tests/ 1M pop'].sort_values(
    ascending=False)[len(covid_df) // 4 : len(covid_df) // 4 + len(covid_df) // 4].plot(kind='bar',ax=axes[1])

covid_df['Tests/ 1M pop'].sort_values(
    ascending=False)[len(covid_df) // 4 + len(covid_df) // 4 : len(covid_df) // 4 + 2 * len(covid_df) // 4].plot(kind='bar',ax=axes[2])

covid_df['Tests/ 1M pop'].sort_values(
    ascending=False)[len(covid_df) // 4 + 2 * len(covid_df) // 4 :].plot(kind='bar',ax=axes[3])

plt.suptitle('Covid Tests per Million pop\nDataSource : worldometers.info')
plt.tight_layout()

plt.savefig('covid_pop_tests_per_m_worldwide.jpg',format='jpg')

In [None]:
ax = covid_df['Tests/ 1M pop'].sort_values(ascending=False).plot(figsize=(18,12))

_=ax.set_xticks(range(len(covid_df)))
_= ax.set_xticklabels([''] * len(covid_df))

In [None]:
plt.plot(range(len(covid_df)),covid_df['Tests/ 1M pop'].sort_values(ascending=False))
plt.yscale('log')


In [None]:
6 * 365 * 2 * 17