In [31]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import pathlib


In [32]:
load_dotenv(dotenv_path=pathlib.Path('.') / '.env')


True

In [17]:

# --------------------
# census_pull.py 
# --------------------
# Fetches 2021 ACS 5-Year data for Georgia counties:
# --> poverty & population (B17001)
# --> education levels (B15003)
# --> housing cost burden (B25070)
# Computes the SEV by taking the mean of all three metrics
# Resiliency Score = 1 - SEV
# Writes all of the data to data/socioeconomic_full.csv


# 1) Load API key
API_KEY = os.getenv("CENSUS_API_KEY")
if not API_KEY:
    raise RuntimeError("Please enter your CENSUS_API_KEY environment variable")

# 2) Define endpoint & variables
BASE_URL = 'https://api.census.gov/data/2021/acs/acs5'
# Poverty: B17001_002E = poverty estimate, B17001_001E = total population
# Education: B15003_001E = total pop 25+, B15003_002E..B15003_015E = pop without HS diploma
# Housing cost burden: B25070_010E = households paying >30% income, B25070_001E = total units
edu_fields = [f'B15003_{i:03d}E' for i in range(2, 16)]  # 002E to 015E
VARS = [
    'NAME',
    'B17001_002E', 'B17001_001E',
    'B15003_001E', *edu_fields,
    'B25070_010E', 'B25070_001E'
]
params = {
    'get': ','.join(VARS),
    'for': 'county:*',
    'in': 'state:13',  # Georgia
    'key': API_KEY
}

# 3) Request data
response = requests.get(BASE_URL, params=params)
response.raise_for_status()
records = response.json()

# 4) Build DataFrame
columns = records[0]
rows = records[1:]
data_frame = pd.DataFrame(rows, columns=columns)

# 5) Convert relevant columns to numeric
num_cols = ['B17001_002E','B17001_001E','B15003_001E','B25070_010E','B25070_001E'] + edu_fields
for col in num_cols:
    data_frame[col] = pd.to_numeric(data_frame[col], errors='coerce')

# 6) Compute derived metrics
data_frame['poverty_rate'] = data_frame['B17001_002E'] / data_frame['B17001_001E']
data_frame['education_no_hs_rate'] = data_frame[edu_fields].sum(axis=1) / data_frame['B15003_001E']
data_frame['housing_cost_burden'] = data_frame['B25070_010E'] / data_frame['B25070_001E']

# 7) Select & rename columns
output = data_frame[['NAME','state','county','poverty_rate','education_no_hs_rate','housing_cost_burden']].copy()
output = output.rename(columns={
    'NAME': 'County Name',
    'state': 'State',
    'poverty_rate': 'Poverty Rate',
    'education_no_hs_rate': 'No_HS_Education', 
    'housing_cost_burden': "Housing_Cost_Burden"

})

# 8) Save to CSV
os.makedirs('data', exist_ok=True)
out_path = 'data/socioeconomic_full.csv'
output.to_csv(out_path, index=False)
print(f"Saved enriched socioeconomic data to {out_path}")



Saved enriched socioeconomic data to data/socioeconomic_full.csv


In [18]:
# --------------------
# socioeconomic_sev.py
# --------------------
# Purpose:
#   Calculate the Socioeconomic Vulnerability Score (SEV) and its resilience component
#   from an enriched Census dataset (socioeconomic_full.csv).
#   The script applies min-max normalization to three indicators:
#     - poverty_rate
#     - education_no_hs_rate
#     - housing_cost_burden
#   and then computes SEV and Resilience_Socio = 1 - SEV.
#
# Input:
#   data/socioeconomic_full.csv
# Output:
#   data/socioeconomic_sev.csv

# 1) Load enriched socioeconomic data
input_path = 'data/socioeconomic_full.csv'
if not os.path.exists(input_path):
    raise FileNotFoundError(f"Missing input file: {input_path}")

df = pd.read_csv(input_path)

# 2) Normalize indicators via min-max to [0,1]
metrics = ['Poverty Rate', 'No_HS_Education', 'Housing_Cost_Burden']
for metric in metrics:
    minimum = df[metric].min()
    maximum = df[metric].max()
    # Avoid division by zero if all values are equal
    if maximum > minimum:
        df[f'norm_{metric}'] = (df[metric] - minimum) / (maximum - minimum)
    else:
        df[f'norm_{metric}'] = 0.0

# 3) Compute Socioeconomic Vulnerability Score (SEV) as the average of normalized metrics
df['SEV'] = df[[f'norm_{metric}' for metric in metrics]].mean(axis=1)

# 4) 1 - SEV = Resiliency Score
df['Resilience_Socio'] = 1 - df['SEV']

# 5) Save the results
os.makedirs('data', exist_ok=True)
output_path = 'data/socioeconomic_sev.csv'
df.to_csv(output_path, index=False)
print(f"Saved socioeconomic SEV data to {output_path}")

Saved socioeconomic SEV data to data/socioeconomic_sev.csv


In [21]:
# --------------------
# usda_loader.py
# --------------------
# Purpose:
#   Calculate the Food Insecurity Score as well as the Food Resilience Score
#   Combine enriched socioeconomic data with USDA Food Access Research Atlas data
#   to compute a county-level Food Insecurity Score (FIS) and its resilience food metric
# Input:
#   - data/socioeconomic_full.csv --> counties with poverty, education, and housing metrics.
#   - data/2019 Food Access Research Atlas Data/Food Access Research Atlas.csv --> tract-level LILA flags and low-access percentages.
# Output:
#   data/food_access_score.csv

# load and pads the FIPS with zeros
census = pd.read_csv('data/socioeconomic_full.csv', dtype=str)
census['county'] = census['county'].str.zfill(3)

atlas = pd.read_csv('data/2019 Food Access Research Atlas Data/Food Access Research Atlas.csv', dtype=str, low_memory=False)
# extract FIPS
atlas['State']  = atlas['CensusTract'].str[:2]
atlas['county'] = atlas['CensusTract'].str[2:5]
atlas = atlas[atlas['State']=='13']  # Georgia only
# casts LILA (Low Income, Low Access)
atlas['LILATracts_1And10'] = atlas['LILATracts_1And10'].astype(int)

# Calculates the fraction of LILA tracts that are flagged
county_flag = (
    atlas.groupby(['State','county'])
         .agg(frac_lila_tracts=('LILATracts_1And10','mean'))
         .reset_index()
)

# Merges with the socioeconomic_full.csv file we made earlier
merged = census.merge(county_flag, on=['State','county'], how='left')

# Renames columns
merged = merged.rename(columns={'frac_lila_tracts':'FIS'})
merged['Resilience_Food'] = 1 - merged['FIS']

merged.to_csv('data/food_access_score.csv', index=False)
print("✅ food_access_score.csv written")

✅ food_access_score.csv written


In [33]:
# --------------------
# Purpose:
#   Query the Census ACS API to fetch the **raw uninsured count**
#   among residents under 65 for every county in Georgia.
#   Doing the population under 65 because after 65, Medicare is available
#

# 1) Load your Census API key from the environment
API_KEY = os.getenv('HEALTHCARE_API_KEY')
if not API_KEY:
    raise RuntimeError("Please set the HEALTHCARE_API_KEY environment variable.")

# 2) Define the ACS endpoint and variables
BASE_URL = 'https://api.census.gov/data/2021/acs/acs5'
VARS = {
    'total_under65': 'B27010_001E',
    'uninsured_under65': 'B27010_017E'
}
params = {
    'get': f"{VARS['total_under65']},{VARS['uninsured_under65']},NAME",
    'for': 'county:*',
    'in': 'state:13',   # 13 = Georgia
    'key': API_KEY
}

# 3) Send request
resp = requests.get(BASE_URL, params=params)
resp.raise_for_status()
data = resp.json()

# 4) Parse into DataFrame
columns = data[0]
rows = data[1:]
df = pd.DataFrame(rows, columns=columns)

# 5) Convert to numeric
df['total_under65'] = df[VARS['total_under65']].astype(int)
df['uninsured_count'] = df[VARS['uninsured_under65']].astype(int)
df['county_name'] = df['NAME']
df['state'] = df['state']
df['county'] = df['county']

# 6) Select & reorder columns
out = df[['state', 'county', 'county_name', 'uninsured_count', 'total_under65']]

# 7) Save to CSV
os.makedirs('data', exist_ok=True)
output_path = 'data/healthcare_uninsured_counts.csv'

out = out.rename(columns={
    'state': 'StateFIPS',
    'county': 'CountyFIPS',
    'county_name': 'County Name',
    'uninsured_count': 'Uninsured Population Under 65',
    'total_under65': 'Total Population Under 65'
})


out.to_csv(output_path, index=False)
print(f"Saved to {output_path}")

Saved to data/healthcare_uninsured_counts.csv


In [34]:
# --------------------
# healthcare_score.py
# --------------------
#Purpose:
#    - Load county-level uninsured counts under 65 (healthcare_uninsured_counts.csv)
#    - Compute the uninsured rate = Uninsured population / Total population under 65
#    - Normalize uninsured_rate into [0,1] via min-max method into NormInsured
#    - Derive Resilience_Health = 1 - Normalized_Uninsured
#    - Output augmented CSV with new columns: uninsured_rate, NormUninsured, Resilience_Health
#Inputs:
#    - data/healthcare_uninsured_counts.csv
#Outputs:
#    - data/healthcare_resilience.csv

# 1) Paths
INPUT_CSV  = 'data/healthcare_uninsured_counts.csv'
OUTPUT_CSV = 'data/healthcare_resilience.csv'

# 2) Load data
df = pd.read_csv(INPUT_CSV)

# 3) Compute uninsured rate
df['uninsured_rate'] = df['Uninsured Population Under 65'] / df['Total Population Under 65']

# 4) Normalize uninsured_rate via min-max
minimum_val = df['uninsured_rate'].min()
maximum_val = df['uninsured_rate'].max()
if maximum_val > minimum_val:
    df['Normalized_Uninsured'] = (df['uninsured_rate'] - minimum_val) / (maximum_val - minimum_val)
else:
    df['Normalized_Uninsured'] = df['uninsured_rate']

# 5) Derive resilience
df['Resilience_Health'] = 1 - df['Normalized_Uninsured']

# 6) Save results
os.makedirs('data', exist_ok=True)
df.to_csv(OUTPUT_CSV, index=False)
print(f"✅ Saved healthcare resilience data to {OUTPUT_CSV}")

✅ Saved healthcare resilience data to data/healthcare_resilience.csv


In [None]:
# Will compute the final CRI score
# Will apply equal weights (1/3) to each of the three metrics:  
#   - Food Insecurity Score (FIS)
#   - Healthcare Uninsured Count
#   - Socioeconomic Vulnerability (SEV)

# For now, we are keeping the default weights = 1/3

# If case studies/research show one metric is more important, we can adjust the weights later

data_directory = pathlib.Path('data')
socioeconomic_sev = data_directory / 'socioeconomic_sev.csv'
healthcare_sev = data_directory / 'healthcare_resilience.csv'
food_access_score = data_directory / 'food_access_score.csv'
OUTPUT_CSV = data_directory / 'community_resilience_index.csv'

# Load & rename FIPS columns
socio_df = pd.read_csv(socioeconomic_sev, dtype=str)
# Rename uppercase 'State' → 'state'
socio_df = socio_df.rename(columns={'State': 'state'})
food_df  = pd.read_csv(food_access_score, dtype=str)
# Rename uppercase 'State' → 'state'
food_df  = food_df.rename(columns={'State': 'state'})
health_df= pd.read_csv(healthcare_sev, dtype=str)
# Rename StateFIPS/CountyFIPS → state/county
health_df = health_df.rename(columns={
    'StateFIPS': 'state',
    'CountyFIPS': 'county'
})

# Zero-pad FIPS strings
for dataframe in (socio_df, food_df, health_df):
    dataframe['state']  = dataframe['state'].str.zfill(2)
    dataframe['county'] = dataframe['county'].str.zfill(3)

# Merge three components on (state, county)
merged_file = (
    socio_df
    .merge(food_df[['state','county','Resilience_Food']], on=['state','county'], how='left')
    .merge(health_df[['state','county','Resilience_Health']], on=['state','county'], how='left')
)

# Convert all resilience columns to numeric
for col in ['Resilience_Socio','Resilience_Food','Resilience_Health']:
    merged_file[col] = pd.to_numeric(merged_file[col], errors='coerce')

# Computes the CRI with equal weights
w1 = w2 = w3 = 1/3
merged_file['CRI'] = (
    w1 * merged_file['Resilience_Socio'] +
    w2 * merged_file['Resilience_Food'] +
    w3 * merged_file['Resilience_Health']
)

merged_file['state_name']  = 'Georgia'

# Only keeps the relevant variables for the CRI
output = merged_file[[
    'state',        
    'state_name',    
    'county',        
    'County Name',   
    'Resilience_Socio',
    'Resilience_Food',
    'Resilience_Health',
    'CRI'
]]

# Renames the columns to be more descriptive
output.columns = [
    'StateFIPS',
    'State Name',
    'CountyFIPS',
    'County Name',
    'Socioeconomic Resilience',
    'Food Resilience',
    'Healthcare Resilience',
    'Community Resilience Index (CRI)'
]

# 8) Save that
os.makedirs(data_directory, exist_ok=True)
output.to_csv(OUTPUT_CSV, index=False)
print(f"Saved final CRI to {OUTPUT_CSV}")

NameError: name 'Path' is not defined