In [116]:
import os
from typing import Optional
import requests as re
import datetime as dt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Bayes' Theorem applied to likelihood of current individual COVID infection

## I = infected, V = vaccinated
### P(I|V) = P(V|I) P(I) / P(V)
### P(I|¬V) = P(¬V|I) P(I) / P(¬V)

In [99]:
def predict_risk(
    infectious_rate: float,
    vaccination_rate: float,
    vaccine_efficacy: float,
    identification_rate: Optional[float]=None
):
    """Return probabilities of current infection in vaccinated and unvaccinated individuals
    
    Given Bayes' Theorem that P(A|B) = P(B|A) P(A) / P(B):
    
    For vaccinated individuals (V), the probability of current COVID infection (I) is
    
    P(I|V) = P(V|I) P(I) / P(V)
    
    where 
    
    P(V) is the current local vaccination rate
    P(I) is the current local infection rate
    P(V|I) is related to vaccine efficacy against prevalent variants by
        P(V|I) = (1 - vaccine_efficacy)/(1 + (1 - vaccine_efficacy))
    
    For unvaccinated individuals (¬V), the probability of current COVID infection (I) is
    
    P(I|¬V) = P(¬V|I) P(I) / P(¬V)
    
    where P(¬V|I) = 1 - P(V|I)
    
    Assumptions include:
        - Equal risk of virus exposure to vaccinated and unvaccianted individuals
    
    Args:
        incfectious_rate (float): Local rate of  active infection (range 0 to 1)
        vaccination_rate (float): Local vaccination rate (0.0-1.0)
        vaccine_efficacy (float): Proportion of potential infections blocked by vaccine (0.0-1.0)
        identification_rate (Optional[float]): Proportion of true infection count represented in data.
            If None, infection_rate assumed to be accurate.
    Returns:
        risk (dict): Current risk of infection in vaccinated and unvaccinated individuals
    """
    risk = {}


    if identification_rate:
        infectious_rate /= identification_rate # Adjust infeciton rate to account for missed diagnoses
    
    p_v = vaccination_rate # P(V)
    p_nv = 1 - vaccination_rate # P(¬V)
    p_i = infectious_rate # P(I)
    p_vi = (p_v*(1 - vaccine_efficacy)) / ((p_v*(1 - vaccine_efficacy)) + p_nv) # P(V|I)
    p_nv_i = p_nv / ((p_v*(1 - vaccine_efficacy)) + p_nv) # P(¬V|I)
    
    risk['vaccinated'] = np.round(p_vi * p_i / p_v, 3)
    risk['unvaccinated'] = np.round(p_nv_i * p_i / p_nv, 3)
    risk['p_vi'] = p_vi
    risk['p_nv_i'] = p_nv_i
    
    return risk

In [115]:
tn_pop = 4.9e6
daily_infections = 37639
tn_infections = 1 * daily_infections # Need a better heuristic

infection_rate = tn_infections/tn_pop
print(infection_rate)
vaccination_rate = 0.4
vaccine_efficacy = 0.65

risk = infection_likelihood(infection_rate, vaccination_rate, vaccine_efficacy)
print(risk)
print(risk['vaccinated'] / risk['unvaccinated'])

0.007681428571428572
{'vaccinated': 0.004, 'unvaccinated': 0.01, 'p_vi': 0.18918918918918917, 'p_nv_i': 0.8108108108108107}
0.4


In [101]:
today = dt.date.today()
frames = []

for offset in range(15,0,-1):
    date = (today - dt.timedelta(days=offset)).strftime('%m-%d-%Y')
    url = (
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master'
        '/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv'.format(date)
    )
    frames.append(pd.read_csv(url))

df = pd.concat(frames).reset_index()

# url = (
#     'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master'
#     '/csse_covid_19_data/csse_covid_19_daily_reports/08-20-2021.csv'
# )
# # response = re.get(url)
# # response.content
# df = pd.read_csv(url)

In [102]:
df_subset = df.loc[:,[
            'FIPS',
            'Admin2',
            'Province_State',
            'Country_Region',
            'Last_Update',
            'Incident_Rate'
            ]]

In [103]:
df_subset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59805 entries, 0 to 59804
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   FIPS            48990 non-null  float64
 1   Admin2          49065 non-null  object 
 2   Province_State  57195 non-null  object 
 3   Country_Region  59805 non-null  object 
 4   Last_Update     59805 non-null  object 
 5   Incident_Rate   58470 non-null  float64
dtypes: float64(2), object(4)
memory usage: 2.7+ MB


In [104]:
set(df_subset.Country_Region)

{'Afghanistan',
 'Albania',
 'Algeria',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burma',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czechia',
 'Denmark',
 'Diamond Princess',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Eswatini',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'Gabon',
 'Gambia',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Grenada',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',

In [105]:
subset = df.loc[(df.Country_Region=='US')&(df.Province_State=='Arkansas')&(df.Admin2=='Adams')]

In [106]:
subset.sort_values(by=['Admin2'])

Unnamed: 0,index,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio


In [132]:
usecols = [
    'Date',
    'Country_Region',
    'Province_State',
    'People_Fully_Vaccinated',
    'People_Partially_Vaccinated'
]
us_path = '../data/raw/vaccinations/vaccinations_us.csv'
vacc_us = pd.read_csv(us_path, index_col=0, usecols=usecols)

In [133]:
vacc_us

Unnamed: 0_level_0,Country_Region,Date,People_Fully_Vaccinated,People_Partially_Vaccinated
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,US,2021-08-22,1777585,563197.0
Alaska,US,2021-08-22,343000,48562.0
American Samoa,US,2021-08-22,25242,5076.0
Arizona,US,2021-08-22,3459329,633251.0
Arkansas,US,2021-08-22,1193299,367501.0
...,...,...,...,...
Virginia,US,2021-08-22,4798319,688448.0
Washington,US,2021-08-22,4512230,548029.0
West Virginia,US,2021-08-22,788532,130626.0
Wisconsin,US,2021-08-22,3087640,270561.0


In [138]:
usecols = [
    'Date',
    'Country_Region',
    'Province_State',
    'People_Fully_Vaccinated',
    'People_Partially_Vaccinated'
]
global_path = '../data/raw/vaccinations/vaccinations_global.csv'
vacc_global = pd.read_csv(global_path, index_col=0)#, usecols=usecols)

In [139]:
vacc_global

Unnamed: 0,Country_Region,Date,Doses_admin,People_partially_vaccinated,People_fully_vaccinated,Report_Date_String,UID,Province_State
0,Afghanistan,2021-02-22,0,0.000000e+00,0.000000e+00,2021-02-22,4.0,
1,Afghanistan,2021-02-23,0,0.000000e+00,0.000000e+00,2021-02-23,4.0,
2,Afghanistan,2021-02-24,0,0.000000e+00,0.000000e+00,2021-02-24,4.0,
3,Afghanistan,2021-02-25,0,0.000000e+00,0.000000e+00,2021-02-25,4.0,
4,Afghanistan,2021-02-26,0,0.000000e+00,0.000000e+00,2021-02-26,4.0,
...,...,...,...,...,...,...,...,...
57839,Venezuela,2021-08-20,4678086,2.900000e+06,1.100000e+06,2021-08-21,862.0,
57840,Vietnam,2021-08-20,16306199,1.466983e+07,1.636372e+06,2021-08-21,704.0,
57841,World,2021-08-20,4893124049,2.477610e+09,1.872840e+09,2021-08-21,,
57842,Zambia,2021-08-20,543529,3.086040e+05,2.278750e+05,2021-08-21,894.0,
