In [44]:
from typing import Optional
import requests as re
import datetime as dt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Bayes' Theorem applied to likelihood of current individual COVID infection

## I = infected, V = vaccinated
### P(I|V) = P(V|I) P(I) / P(V)
### P(I|¬V) = P(¬V|I) P(I) / P(¬V)

In [45]:
def infection_likelihood(
    incident_rate: float,
    vaccination_rate: float,
    vaccine_efficacy: float,
    identification_rate: Optional[float]=None
):
    """Return probabilities of current infection in vaccinated and unvaccinated individuals
    
    Given Bayes' Theorem that P(A|B) = P(B|A) P(A) / P(B):
    
    For vaccinated individuals (V), the probability of current COVID infection (I) is
    
    P(I|V) = P(V|I) P(I) / P(V)
    
    where 
    
    P(V) is the current local vaccination rate
    P(I) is the current local infection rate
    P(V|I) is related to vaccine efficacy against prevalent variants by
        P(V|I) = (1 - vaccine_efficacy)/(1 + (1 - vaccine_efficacy))
    
    For unvaccinated individuals (¬V), the probability of current COVID infection (I) is
    
    P(I|¬V) = P(¬V|I) P(I) / P(¬V)
    
    where P(¬V|I) = 1 - P(V|I)
    
    Assumptions include:
        - Equal risk of virus exposure to vaccinated and unvaccianted individuals
    
    Args:
        incident_rate (float): Local rate of  active infection (per 100,000 pop)
        vaccination_rate (float): Local vaccination rate (0.0-1.0)
        vaccine_efficacy (float): Proportion of potential infections blocked by vaccine (0.0-1.0)
        identification_rate (Optional[float]): Proportion of true infection count represented in data.
            If None, infection_rate assumed to be accurate.
    Returns:
        risk (dict): Current risk of infection in vaccinated and unvaccinated individuals
    """
    risk = {}

    # Scale from per 100,000 to 0.0 -s 1.0 range
    infection_rate = incident_rate / 1e5

    if identification_rate:
        infection_rate /= identification_rate # Adjust infeciton rate to account for missed diagnoses
    
    p_v = vaccination_rate # P(V)
    p_nv = 1 - vaccination_rate # P(¬V)
    p_i = infection_rate # P(I)
    p_vi = (p_v*(1 - vaccine_efficacy)) / ((p_v*(1 - vaccine_efficacy)) + p_nv) # P(V|I)
    p_nv_i = p_nv / ((p_v*(1 - vaccine_efficacy)) + p_nv) # P(¬V|I)
    
    risk['vaccinated'] = np.round(p_vi * p_i / p_v, 3)
    risk['unvaccinated'] = np.round(p_nv_i * p_i / p_nv, 3)
    risk['p_vi'] = p_vi
    risk['p_nv_i'] = p_nv_i
    
    return risk

In [46]:
tn_pop = 1000
daily_infections = 100
tn_infections = 1 * daily_infections # Need a better heuristic

infection_rate = tn_infections/tn_pop
vaccination_rate = 0.9
vaccine_efficacy = 0

risk = infection_likelihood(infection_rate, vaccination_rate, vaccine_efficacy)
print(risk)
print(risk['vaccinated'] / risk['unvaccinated'])

{'vaccinated': 0.1, 'unvaccinated': 0.1, 'p_vi': 0.9, 'p_nv_i': 0.09999999999999998}
1.0


In [61]:
today = dt.date.today()
frames = []

for offset in range(15,0,-1):
    date = (today - dt.timedelta(days=offset)).strftime('%m-%d-%Y')
    url = (
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master'
        '/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv'.format(date)
    )
    frames.append(pd.read_csv(url))

df = pd.concat(frames).reset_index()

# url = (
#     'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master'
#     '/csse_covid_19_data/csse_covid_19_daily_reports/08-20-2021.csv'
# )
# # response = re.get(url)
# # response.content
# df = pd.read_csv(url)

In [79]:
df_subset = df.loc[:,[
            'FIPS',
            'Admin2',
            'Province_State',
            'Country_Region',
            'Last_Update',
            'Incident_Rate'
            ]]