## B2. Alignment Analysis – Need vs AI Implementation

**Description**  
This section evaluates the alignment between healthcare need and AI implementation levels across hospitals. Tertiles are computed for both variables to assess patterns of alignment. 

**Purpose**  
To examine whether AI implementation correspond with areas of greatest need. 

**Method Summary**  
- Rank-based tertiles were created for HPSA, MUA, ADI, SVI scores.  
- AI implementation scores were already categorized into three ctegories (Low, Medium, High).  
- Cross-tabulations were generated and visualized using heatmaps.  


### 1 Load necessary libraries, functions, and pre-processed data 

In [None]:

# load necessary libraries 
import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
ai_exposures = ["ai_base_score",
"ai_base_breadth_score",
"ai_base_dev_score",
"ai_base_eval_score_2023",
"ai_base_eval_score_2024",
"llm_readiness_score", 
"ai_base_score_imputed",
"ai_base_breadth_score_imputed",
"ai_base_dev_score_imputed",
"ai_base_eval_score_2023_imputed",
"ai_base_eval_score_2024_imputed"]

In [None]:
AHA_master = pd.read_csv("../../data/AHA_master_external_data.csv", low_memory=False)
AHA_IT = AHA_master[AHA_master.id_it.notna()]

### 2 Data engineering 

In [None]:
# Import all functions but only use what you need
import sys
sys.path.append('../')
from calculate_scores import (
    create_union_aipred_row, 
    calculate_base_ai_implementation_row_imputed
)

In [None]:
AHA_IT['aipred_it_union'] = AHA_IT.apply(calculate_scores.create_union_aipred_row, axis=1)
AHA_IT = calculate_scores.apply_ai_scores_to_dataframe(AHA_IT)

In [None]:
state_to_division = {
    # Division 1: New England
    'ME': 'New England', 'NH': 'New England', 'VT': 'New England', 
    'MA': 'New England', 'RI': 'New England', 'CT': 'New England',
    
    # Division 2: Mid Atlantic
    'NY': 'Mid Atlantic', 'NJ': 'Mid Atlantic', 'PA': 'Mid Atlantic',
    
    # Division 3: South Atlantic
    'DE': 'South Atlantic', 'MD': 'South Atlantic', 'DC': 'South Atlantic',
    'VA': 'South Atlantic', 'WV': 'South Atlantic', 'NC': 'South Atlantic',
    'SC': 'South Atlantic', 'GA': 'South Atlantic', 'FL': 'South Atlantic',
    
    # Division 4: East North Central
    'OH': 'East North Central', 'IN': 'East North Central', 'IL': 'East North Central',
    'MI': 'East North Central', 'WI': 'East North Central',
    
    # Division 5: East South Central
    'KY': 'East South Central', 'TN': 'East South Central', 
    'AL': 'East South Central', 'MS': 'East South Central',
    
    # Division 6: West North Central
    'MN': 'West North Central', 'IA': 'West North Central', 'MO': 'West North Central',
    'ND': 'West North Central', 'SD': 'West North Central', 'NE': 'West North Central',
    'KS': 'West North Central',
    
    # Division 7: West South Central
    'AR': 'West South Central', 'LA': 'West South Central', 
    'OK': 'West South Central', 'TX': 'West South Central',
    
    # Division 8: Mountain
    'MT': 'Mountain', 'ID': 'Mountain', 'WY': 'Mountain', 'CO': 'Mountain',
    'NM': 'Mountain', 'AZ': 'Mountain', 'UT': 'Mountain', 'NV': 'Mountain',
    
    # Division 9: Pacific
    'WA': 'Pacific', 'OR': 'Pacific', 'CA': 'Pacific', 
    'AK': 'Pacific', 'HI': 'Pacific',
    
    # Territories
    'PR': 'Territories', 'GU': 'Territories', 'VI': 'Territories', 
    'AS': 'Territories', 'MP': 'Territories'
}
division_to_region = {
    'New England' : 'Northeast',
    'Mid Atlantic' : 'Northeast', 
    'East North Central' : 'Midwest', 
    'West North Central' : 'Midwest', 
    'South Atlantic' : 'South', 
    'East South Central' : 'South', 
    'West South Central' : 'South', 
    'Mountain' : 'West', 
    'Pacific' : 'West'
 }
# Add census division column to the dataframe
AHA_IT['division'] = AHA_IT['mstate_it'].map(state_to_division)
AHA_IT['region'] = AHA_IT['division'].map(division_to_region)
AHA_IT_US = AHA_IT[AHA_IT2['division']!='Territories']

In [None]:
# Create model_type mapping
AHA_IT_US['model_type'] = AHA_IT_US['ai_base_score_imputed'].map({
    0: 'No Models',
    1: 'Non-AI Predictive Models', 
    2: 'AI Predictive Models'
})

### 3 Alignment analysis 

In [None]:
def create_rank_based_tertiles(df, column_name, labels=['Low Need', 'Medium Need', 'High Need']):
        return pd.qcut(df[column_name].rank(method='first'), 3, labels=labels)

def create_standard_tertiles(df, column_name, labels=['Low Need', 'Medium Need', 'High Need']):
        return pd.qcut(df[column_name], 3, labels=labels)

def create_designation_binary(df, column_name):
        return (df[column_name] > 0).astype(int)
    
def rr_from_joint_table(tbl, ai_label="AI Predictive Models", hi_label=None, lo_label=None):
    if hi_label is None or lo_label is None:
        idx = list(tbl.index)
        hi_label = next((lab for lab in idx if "high" in str(lab).lower()), idx[-1])
        lo_label = next((lab for lab in idx if "low" in str(lab).lower()), idx[0])
    num_H = float(tbl.loc[hi_label, ai_label])
    den_H = float(tbl.loc[hi_label, :].sum())
    num_L = float(tbl.loc[lo_label, ai_label])
    den_L = float(tbl.loc[lo_label, :].sum())
    pH = num_H / den_H if den_H > 0 else np.nan
    pL = num_L / den_L if den_L > 0 else np.nan
    RR = np.nan if (pL == 0 or np.isnan(pL)) else pH / pL
    rel = 100 * (RR - 1) if np.isfinite(RR) else np.nan
    return pH, pL, RR, rel, hi_label, lo_label

In [None]:
# Create AI implementation categories
AHA_IT_US['model_type'] = AHA_IT_US['ai_base_score_imputed'].map({
    0: 'No Models',
    1: 'Non-AI Predictive Models', 
    2: 'AI Predictive Models'
})

AHA_IT_US['model_type'] = pd.Categorical(
    AHA_IT_US['model_type'],
    categories=['No Models', 'Non-AI Predictive Models', 'AI Predictive Models'],
    ordered=True
)

In [None]:
# HPSA/MUA measures - RANK-BASED tertiles
hpsa_mua_measures = {
    'primary_hpss_tertile': 'mean_primary_hpss',
    'mental_hpss_tertile': 'mean_mental_hpss',
    'dental_hpss_tertile': 'mean_dental_hpss',
    'mua_score_tertile': 'mean_mua_score',
    'mua_elder_tertile': 'mean_mua_elders_score',
    'mua_infant_tertile': 'mean_mua_infant_score'
}
for tertile_col, score_col in hpsa_mua_measures.items():
    AHA_IT_US[tertile_col] = create_rank_based_tertiles(AHA_IT_US, score_col)

# Socioeconomic measures - STANDARD tertiles
socio_measures = {
    'adi_tertile': 'national_adi_median',
    'svi_tertile': 'svi_themes_median'
}
    
for tertile_col, score_col in socio_measures.items():
    AHA_IT_US[tertile_col] = create_standard_tertiles(AHA_IT_US, score_col)
    
# Create designation flags
designation_measures = {
    "primary_hpsa_desig": "mean_primary_hpss",
    "mental_hpsa_desig": "mean_mental_hpss",
    "dental_hpsa_desig": "mean_dental_hpss",
    "mua_overall_desig": "mean_mua_score",
    "mua_elder_desig": "mean_mua_elders_score",
    "mua_infant_desig": "mean_mua_infant_score"
}
    
for desig_col, score_col in designation_measures.items():
    AHA_IT_US[desig_col] = create_designation_binary(AHA_IT_US, score_col)
    

In [None]:
# Create socioeconomic cross-tabulations
socioeconomic_measures = {
    'Area Deprivation Index': 'adi_tertile',
    'Social Vulnerability Index': 'svi_tertile'
}

socioeconomic_tables = {}
socioeconomic_stats = {}

for name, column in socioeconomic_measures.items():
    # Create cross-tabulation
    cross_tab = pd.crosstab(AHA_IT_US[column], AHA_IT_US['model_type'], normalize=True) * 100
    
    # Reorder to put High Need at TOP, Low Need at BOTTOM
    cross_tab = cross_tab.reindex(['High Need', 'Medium Need', 'Low Need'])
    
    # Ensure all expected columns exist, fill missing with 0
    for col in MODEL_LABELS:
        if col not in cross_tab.columns:
            cross_tab[col] = 0
    
    # Reorder columns
    cross_tab = cross_tab[MODEL_LABELS]
    
    socioeconomic_tables[name] = cross_tab
    
    # Calculate statistics
    pH, pL, RR, rel, hi_label, lo_label = rr_from_joint_table(cross_tab, ai_label=AI_LABEL)
    socioeconomic_stats[name] = {'pH': pH, 'pL': pL, 'RR': RR, 'rel': rel}

In [None]:
# Create designation tables
provider_names = ["Primary HPSA", "Mental HPSA", "Dental HPSA", "MUA Overall", "MUA Elder", "MUA Infant"]
flag_col = {
    "Primary HPSA": "primary_hpsa_desig",
    "Mental HPSA": "mental_hpsa_desig",
    "Dental HPSA": "dental_hpsa_desig",
    "MUA Overall": "mua_overall_desig",
    "MUA Elder": "mua_elder_desig",
    "MUA Infant": "mua_infant_desig",
}

designation_tables = {}
designation_stats = {}

for name in provider_names:
    d = AHA_IT_US[AHA_IT_US["_mt3"].notna()].copy()
    desig_mask = d[flag_col[name]] == 1
    
    # Count by designation status and model type
    cnt_desig = d.loc[desig_mask, "_mt3"].value_counts().reindex(MODEL_LABELS, fill_value=0)
    cnt_not_desig = d.loc[~desig_mask, "_mt3"].value_counts().reindex(MODEL_LABELS, fill_value=0)
    
    # Convert to percentages of total
    pct_desig = (cnt_desig / len(d)) * 100.0
    pct_not_desig = (cnt_not_desig / len(d)) * 100.0
    
    # Create table
    table = pd.DataFrame([pct_desig.values, pct_not_desig.values],
                        index=["Designated", "Not Designated"], columns=MODEL_LABELS)
    
    designation_tables[name] = table
    
    # Calculate statistics
    n_desig = int(desig_mask.sum())
    n_not_desig = int((~desig_mask).sum())
    ai_desig = int((d.loc[desig_mask, "_mt3"] == AI_LABEL).sum())
    ai_not_desig = int((d.loc[~desig_mask, "_mt3"] == AI_LABEL).sum())
    
    p_desig = ai_desig / n_desig if n_desig > 0 else np.nan
    p_not_desig = ai_not_desig / n_not_desig if n_not_desig > 0 else np.nan
    RR = p_desig / p_not_desig if p_not_desig > 0 else np.nan
    rel_diff = 100 * (RR - 1) if np.isfinite(RR) else np.nan
    
    designation_stats[name] = {
        'n_desig': n_desig, 'n_not_desig': n_not_desig,
        'p_desig': p_desig, 'p_not_desig': p_not_desig,
        'RR': RR, 'rel_diff': rel_diff
    }

In [None]:
# Display socioeconomic results
print("Socioeconomic Measures (Tertiles)")
print("="*60)
for name, table in socioeconomic_tables.items():
    print(f"\n{name}:")
    print(table.round(1))
    stats = socioeconomic_stats[name]
    print(f"High vs Low AI: {100*stats['pH']:.1f}% vs {100*stats['pL']:.1f}%")
    print(f"RR = {stats['RR']:.2f} ({stats['rel']:+.0f}% relative)")

In [None]:
# Display designation results
print("\nHRSA Designations (Binary)")
print("="*60)
for name, table in designation_tables.items():
    print(f"\n{name}:")
    print(table.round(1))
    stats = designation_stats[name]
    print(f"Designated: {stats['n_desig']} hospitals ({100*stats['p_desig']:.1f}% AI)")
    print(f"Not Designated: {stats['n_not_desig']} hospitals ({100*stats['p_not_desig']:.1f}% AI)")
    print(f"Risk Ratio: {stats['RR']:.2f} ({stats['rel_diff']:+.0f}% relative)")