In [1]:
import numpy as np
import pandas as pd

In [3]:
datadir = "/export/storage_covidvaccine/Data"
outdir = "/export/storage_covidvaccine/Result"

In [4]:
# import tract data
tract_nearest_df = pd.read_csv(f"{datadir}/Intermediate/tract_nearest_dist.csv", dtype={'tract': str}) #from read_tract_dist.py
tract_hpi_df = pd.read_csv(f"{datadir}/Raw/hpi2score.csv", dtype={'geoid': str}, usecols=['geoid', 'value', 'percentile'])
tract_hpi_df.drop(columns=['percentile'], inplace=True)
tract_hpi_df.sort_values(by='value', inplace=True)
tract_hpi_df['hpi_quartile'] = pd.qcut(tract_hpi_df['value'], 4, labels=False) + 1
tract_hpi_df.rename(columns={'geoid': 'tract', 'value': 'hpi'}, inplace=True)
tract_hpi_df.tract.apply(len).value_counts() # all 11 digits
tract_hpi_df['tract'] = tract_hpi_df['tract'].str[1:] # drop the first digit
# merge hpi and nearest pharmacy
tracts = tract_nearest_df.merge(tract_hpi_df, on='tract', how='outer', indicator=True)
print("Distance to HPI merge:\n", tracts._merge.value_counts())
tracts = tracts.loc[tracts._merge != 'right_only', :] #only one right_only
tracts.drop(columns=['_merge'], inplace=True)
tracts = tracts.loc[tracts['hpi'].notnull(), :] #keep only tracts with HPI

Distance to HPI merge:
 both          7789
left_only      268
right_only       1
Name: _merge, dtype: int64


In [5]:
# tract demographics
tract_demog = pd.read_csv(f"{datadir}/Raw/notreallyraw/TRACT_merged.csv", dtype={'tract': str})
tract_demog.columns = tract_demog.columns.str.lower()
tract_demog.rename(columns={'population': 'tr_pop'}, inplace=True)
tract_demog.drop(columns=['state_id', 'county_id', 'tract_id', 'hpi', 'hpiquartile', 'dshare', 'rshare', 'dvotes', 'rvotes', 'sum_votes', 'latitude', 'longitude', 'land_area', 'health_none', 'race_white'], inplace=True) #TODO: re-construct these things 
tract_demog['tract'] 
tract_demog['tract'].apply(lambda x: x[0]).value_counts() 
tract_demog['tract'].apply(len).value_counts() # all 10-digits that start with 6
for vv in ['health_employer','health_medicare','health_medicaid','health_other']:
    tract_demog[vv] = tract_demog[vv].fillna(tract_demog[vv].mean())


In [6]:
tract_demog.loc[tract_demog['tr_pop'] == 0, 'tr_pop'] = 1 # avoid divide by zero
tracts = tracts.merge(tract_demog, on='tract', how='outer', indicator=True)
print("Merge to demographics:\n", tracts._merge.value_counts()) #perfect match
tracts = tracts.loc[tracts._merge != 'right_only', :]
tracts.drop(columns=['_merge'], inplace=True)
# drop tracts with zero population
tracts = tracts.loc[tracts['tr_pop'] > 0, :]

Merge to demographics:
 both          8057
left_only        0
right_only       0
Name: _merge, dtype: int64


In [None]:
# Mechanism "Random-FCFS": First-come, first-served with a random order for where people fall in line. 
# Mechanism "Sequential": Everyone tries their first-choice and ties are broken randomly, then everyone tries their second choice and ties are broken randomly, etc. Narratively, this would be people signing on and trying to schedule an appointment and if they fail, by the time they get to try again, everyone else will have tried once. 