## B1. Descriptive statistics - Table One 

**Description**  
This section generates summary statistics (Table One) comparing hospital characteristics / community characteristics across regions and AI implementation categories.

**Purpose**  
To identify patterns and potential disparities in hospital and community features across different regions and levels of AI implementation. This provides context for interpreting downstream analyses and highlights structural or contextual differences that may influence AI adoption.

**Note**  
- AHA data is subscription-based and not publicly shareable. Although this notebook includes code for hospital-level visualizations, such visualizations are not presented in publications or shared notebooks. All reported results are aggregated at the state or census division level.




### B1_0 Load necessary libraries, functions, and pre-processed data 

In [93]:

# load necessary libraries 
import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [94]:
ai_exposures = ["ai_base_score",
"ai_base_breadth_score",
"ai_base_dev_score",
"ai_base_eval_score"]

In [95]:
AHA_master = pd.read_csv("./data/AHA_master_external_data.csv", low_memory=False)
AHA_IT = AHA_master[AHA_master.id_it.notna()]

In [None]:
state_to_division = {
    # Division 1: New England
    'ME': 'New England', 'NH': 'New England', 'VT': 'New England', 
    'MA': 'New England', 'RI': 'New England', 'CT': 'New England',
    
    # Division 2: Mid Atlantic
    'NY': 'Mid Atlantic', 'NJ': 'Mid Atlantic', 'PA': 'Mid Atlantic',
    
    # Division 3: South Atlantic
    'DE': 'South Atlantic', 'MD': 'South Atlantic', 'DC': 'South Atlantic',
    'VA': 'South Atlantic', 'WV': 'South Atlantic', 'NC': 'South Atlantic',
    'SC': 'South Atlantic', 'GA': 'South Atlantic', 'FL': 'South Atlantic',
    
    # Division 4: East North Central
    'OH': 'East North Central', 'IN': 'East North Central', 'IL': 'East North Central',
    'MI': 'East North Central', 'WI': 'East North Central',
    
    # Division 5: East South Central
    'KY': 'East South Central', 'TN': 'East South Central', 
    'AL': 'East South Central', 'MS': 'East South Central',
    
    # Division 6: West North Central
    'MN': 'West North Central', 'IA': 'West North Central', 'MO': 'West North Central',
    'ND': 'West North Central', 'SD': 'West North Central', 'NE': 'West North Central',
    'KS': 'West North Central',
    
    # Division 7: West South Central
    'AR': 'West South Central', 'LA': 'West South Central', 
    'OK': 'West South Central', 'TX': 'West South Central',
    
    # Division 8: Mountain
    'MT': 'Mountain', 'ID': 'Mountain', 'WY': 'Mountain', 'CO': 'Mountain',
    'NM': 'Mountain', 'AZ': 'Mountain', 'UT': 'Mountain', 'NV': 'Mountain',
    
    # Division 9: Pacific
    'WA': 'Pacific', 'OR': 'Pacific', 'CA': 'Pacific', 
    'AK': 'Pacific', 'HI': 'Pacific',
    
    # Territories
    'PR': 'Territories', 'GU': 'Territories', 'VI': 'Territories', 
    'AS': 'Territories', 'MP': 'Territories'
}
division_to_region = {
    'New England' : 'Northeast',
    'Mid Atlantic' : 'Northeast', 
    'East North Central' : 'Midwest', 
    'West North Central' : 'Midwest', 
    'South Atlantic' : 'South', 
    'East South Central' : 'South', 
    'West South Central' : 'South', 
    'Mountain' : 'West', 
    'Pacific' : 'West'
 }
# Add census division column to the dataframe
AHA_master2['division'] = AHA_master2['mstate_it'].map(state_to_division)
AHA_master2['region'] = AHA_master2['division'].map(division_to_region)


In [None]:
AHA_IT_US = AHA_master2[AHA_master2['division']!='Territories']
AHA_IT_US.shape

In [None]:
ai_base = {1: "AI/ML model", 2: "non-AI/ML predictive model", 3: "Do not know/Neither", 4: "Do not know/Neither"}
binary = {1: "Yes", 0: "No/No Response", np.nan : "No/No Response"}
eval = {1: "All models", 2: "Most models", 3: "Some models", 4: "Few models", 5: "Do not know/None/No response", 6: "Do not know/None/No response", np.nan : "Do not know/None/No response"}
ehr = {5: "Epic", 2: "Cerner", 9:"Meditech", 17:"CPSI/Evident", 1: "Allscripts", 15: "Other", 3: "Other", 4: "Other", 6: "Other", 7: "Other", 8: "Other", 10: "Other", 11: "Other", 12: "Other", 13: "Other", 14: "Other", 16: "Other", 18: "Other", 19: "Other", 20: "Other", 21: "Other", 22: "MEDHOST", 23: "Other", 24: "Other", 25: "Athena", 26: "Other", 27: "Other", 28: "Other", 29: "Altera Digital Health", np.nan : "No response"}
# Create a new column with mapped responses
AHA_IT_US['aipred_it_mapped'] = AHA_IT_US['aipred_it'].map(ai_base)
AHA_IT_US['usecase_traj'] = AHA_IT_US['aitraj_it'].map(binary)
AHA_IT_US['usecase_followup'] = AHA_IT_US['airfol_it'].map(binary)
AHA_IT_US['usecase_monitor'] = AHA_IT_US['aimhea_it'].map(binary)
AHA_IT_US['usecase_recommend'] = AHA_IT_US['airect_it'].map(binary)
AHA_IT_US['usecase_bill'] = AHA_IT_US['aibill_it'].map(binary)
AHA_IT_US['usecase_schedule'] = AHA_IT_US['aische_it'].map(binary)
AHA_IT_US['usecase_other_process_op'] = AHA_IT_US['aipoth_it'].map(binary)
AHA_IT_US['usecase_other_clinical'] = AHA_IT_US['aicloth_it'].map(binary)
AHA_IT_US['dev_EHRdev'] = AHA_IT_US['mldev_it'].map(binary)
AHA_IT_US['dev_thirdparty'] = AHA_IT_US['mlthd_it'].map(binary)
AHA_IT_US['dev_self'] = AHA_IT_US['mlsed_it'].map(binary)
AHA_IT_US['dev_public'] = AHA_IT_US['mlpubd_it'].map(binary)
AHA_IT_US['eval_accuracy'] = AHA_IT_US['mlaccu_it'].map(eval)
AHA_IT_US['eval_bias'] = AHA_IT_US['mlbias_it'].map(eval)
AHA_IT_US['ehr_provider'] = AHA_IT_US['piemr_it'].map(ehr)


In [101]:

from tableone import TableOne

# Create a list of variables to include in the table
ai_variables = [
    # AI Scores
    'ai_base_score', 'ai_base_breadth_score', 'ai_base_dev_score', 'ai_base_eval_score',
    
    # AI characteristics 
    'aipred_it_mapped', 'usecase_traj', 'usecase_followup', 'usecase_monitor', 'usecase_recommend', 'usecase_bill', 'usecase_schedule', 'usecase_other_process_op', 'usecase_other_clinical',
    'dev_EHRdev', 'dev_thirdparty', 'dev_self', 'dev_public',
    'eval_accuracy', 'eval_bias',
]
ai_categorical_variables = ['aipred_it_mapped', 'usecase_traj', 'usecase_followup', 'usecase_monitor', 'usecase_recommend', 'usecase_bill', 'usecase_schedule', 'usecase_other_process_op', 'usecase_other_clinical',
    'dev_EHRdev', 'dev_thirdparty', 'dev_self', 'dev_public',
    'eval_accuracy', 'eval_bias']

ai_continuous_variables = [var for var in ai_variables if var not in ai_categorical_variables]



In [None]:
order_map = {'aipred_it_mapped' : ["AI/ML model", "non-AI/ML predictive model", "Do not know/Neither"], 
    'usecase_traj': ['Yes', "No/No Response"],
    'usecase_followup' : ['Yes', "No/No Response"],
    'usecase_monitor' : ['Yes', "No/No Response"], 
    'usecase_recommend' : ['Yes', "No/No Response"],
    'usecase_bill' : ['Yes', "No/No Response"],
    'usecase_schedule' :  ['Yes', "No/No Response"],
    'usecase_other_process_op' : ['Yes', "No/No Response"],
    'usecase_other_clinical' : ['Yes', "No/No Response"],
    'dev_EHRdev' : ['Yes', "No/No Response"],
    'dev_thirdparty' :  ['Yes', "No/No Response"],
    'dev_self' : ['Yes', "No/No Response"],
    'dev_public': ['Yes', "No/No Response"], 
    'eval_accuracy' : ['All models', 'Most models', 'Some models', 'Few models', 'Do not know/None/No response'],
    'eval_bias' : ['All models', 'Most models', 'Some models', 'Few models', 'Do not know/None/No response']
}

# Define the order of divisions you want
ordered_divisions = ['New England', 'Mid Atlantic', 'South Atlantic', 'East North Central', 
                     'East South Central', 'West North Central', 'West South Central', 
                     'Mountain', 'Pacific', 'Territories']

# Create table with ordered divisions
table_one = TableOne(AHA_IT_US, 
                    columns=ai_variables, 
                    categorical=ai_categorical_variables,
                    groupby='division',
                    order=order_map,
                    pval=True,
                    pval_threshold=0.05,
                    pval_digits=4,
                    missing=True)
table_one

In [None]:
# Continue with CBSA type and other variables
binary_map = {1:'Yes', 0 : 'No/No Response', np.nan: 'No/No Response'}
other_binary_map = {1 : 'Yes', 2: 'No/No response', np.nan: 'No/No Response'}
AHA_IT_US['rural_urban_category'] = AHA_IT_US['cbsatype_as']

# Create new column 'system_member' based on the conditions
AHA_IT_US['system_member'] = AHA_IT_US['mhsmemb_as'].copy()

# Set to 1 where sysid_as is not null and mhsmemb_as is null
AHA_IT_US.loc[(AHA_IT_US['sysid_as'].notna()) & (AHA_IT_US['mhsmemb_as'].isna()), 'system_member'] = 1

# Convert all remaining null values to 0
AHA_IT_US['system_member'] = AHA_IT_US['system_member'].fillna(0).map(binary_map)
delivery_system_map = {1:'Centralized Health System', 
                       2: 'Centralized Physician/Insurance Health System', 
                       3: 'Moderately Centralized Health System', 
                       4: 'Decentralized Health System', 
                       5: 'Independent Hospital System'}
AHA_IT_US['delivery_system'] = AHA_IT_US['cluster_as'].map(delivery_system_map)
AHA_IT_US['community_hospital'] = AHA_IT_US['chc_as'].replace(2, 0).map(binary_map)
AHA_IT_US['subsidary_hospital'] = AHA_IT_US['subs_as'].map(binary_map)
AHA_IT_US['frontline_hospital'] = AHA_IT_US['frtln_as'].replace('.', 0).astype(int).map(binary_map)
AHA_IT_US['joint_commission_accreditation'] = AHA_IT_US['mapp1_as'].replace(2,0).map(binary_map)
AHA_IT_US['aha_member'] = AHA_IT_US['ahambr_as'].map(binary_map)
AHA_IT_US['center_improvement_quality'] = AHA_IT_US['mapp22_as'].replace(2,0).map(binary_map)

# teaching hospitals 
AHA_IT_US['teaching_hospital'] = ((AHA_IT_US['mapp5_as'] == 1) | (AHA_IT_US['mapp3_as'] == 1) | (AHA_IT_US['mapp8_as'] == 1)).astype(int).map(binary_map)
AHA_IT_US['major_teaching_hospital'] = ((AHA_IT_US['mapp8_as'] == 1)).astype(int).map(binary_map)
AHA_IT_US['minor_teaching_hospital'] = (((AHA_IT_US['mapp5_as'] == 1) | (AHA_IT_US['mapp3_as'] == 1))&~(AHA_IT_US['mapp8_as'] == 1)).astype(int).map(binary_map)

# hospital ownership type 

AHA_IT_US['nonfederal_governement'] = ((AHA_IT_US['cntrl_as'] == 12) | (AHA_IT_US['cntrl_as'] == 13)|(AHA_IT_US['cntrl_as'] == 14) | (AHA_IT_US['cntrl_as'] == 15)| (AHA_IT_US['cntrl_as'] == 16)).astype(int)
AHA_IT_US['non_profit_nongovernment'] = ((AHA_IT_US['cntrl_as'] == 21) | (AHA_IT_US['cntrl_as'] == 23)).astype(int)
AHA_IT_US['for_profit'] = ((AHA_IT_US['cntrl_as'] == 31) | (AHA_IT_US['cntrl_as'] == 32) | (AHA_IT_US['cntrl_as'] == 33)).astype(int)
AHA_IT_US['federal_governement'] = ((AHA_IT_US['cntrl_as'] == 40) | (AHA_IT_US['cntrl_as'] == 44) | (AHA_IT_US['cntrl_as'] == 45) | (AHA_IT_US['cntrl_as'] == 46) | (AHA_IT_US['cntrl_as'] == 47) | (AHA_IT_US['cntrl_as'] == 48)).astype(int)
# Create a categorical column for hospital ownership types
def create_ownership_category(row):
    if row['cntrl_as'] in [12, 13, 14, 15, 16]:
        return 'nonfederal_government'
    elif row['cntrl_as'] in [21, 23]:
        return 'non_profit_nongovernment'
    elif row['cntrl_as'] in [31, 32, 33]:
        return 'for_profit'
    elif row['cntrl_as'] in [40, 44, 45, 46, 47, 48]:
        return 'federal_government'
    else:
        return 'other'

# Create the categorical column
AHA_IT_US['ownership_type'] = AHA_IT_US.apply(create_ownership_category, axis=1)
AHA_IT_US['for_profit_hospital'] = (AHA_IT_US['ownership_type'] == 'for_profit').astype(int)

# other hospital characteristics 
AHA_IT_US['critical_access'] = (AHA_IT_US['mapp18_as'] == 1).astype(int).map(binary_map)
AHA_IT_US['rural_referral'] = (AHA_IT_US['mapp19_as'] == 1).astype(int).map(binary_map)

# medicare medicaid percentage
AHA_IT_US['medicare_ipd_percentage'] = AHA_IT_US['mcripd_as'] / AHA_IT_US['ipdtot_as'] * 100
AHA_IT_US['medicaid_ipd_percentage'] = AHA_IT_US['mcdipd_as'] / AHA_IT_US['ipdtot_as'] * 100
bedsize_map = {1: '6-24 beds', 2: '25-49 beds', 3 : '50-99 beds', 4: '100-199 beds', 5: '200-299 beds', 6: '300-399 beds', 7: '400-499 beds', 8: '500 or more beds'}
# bed size 
AHA_IT_US['bedsize'] = AHA_IT_US['bsc_as'].map(bedsize_map)

In [105]:
order_map = {'rural_urban_category' : ['Metro', 'Micro', 'Rural'],
             'system_member' : ['Yes', 'No/No Response'],
             'community_hospital' : ['Yes', 'No/No Response'], 
             'subsidary_hospital' : ['Yes', 'No/No Response'], 
             'frontline_hospital' : ['Yes', 'No/No Response'], 
             'joint_commission_accreditation' : ['Yes', 'No/No Response'], 
             'aha_member' : ['Yes', 'No/No Response'], 
             'center_improvement_quality' : ['Yes', 'No/No Response'], 
             'teaching_hospital' : ['Yes', 'No/No Response'], 
             'major_teaching_hospital' : ['Yes', 'No/No Response'], 
             'minor_teaching_hospital' : ['Yes', 'No/No Response'],
             'ownership_type' :  ['federal_government', 'nonfederal_government', 'non_profit_nongovernment', 'for_profit'],
             'critical_access' : ['Yes', 'No/No Response'], 
             'rural_referral' : ['Yes', 'No/No Response'],  
             'delivery_system' : ['Centralized Health System', 'Centralized Physician/Insurance Health System', 'Moderately Centralized Health System', 'Decentralized Health System', 'Independent Hospital System'],
             'bedsize' : ['6-24 beds', '25-49 beds',  '50-99 beds', '100-199 beds', '200-299 beds', '300-399 beds', '400-499 beds', '500 or more beds']} 


In [109]:

feature_variables = [
    # Hospital Characteristics
    'system_member', 
    'community_hospital', 
    'ownership_type',
    'delivery_system',  
    'teaching_hospital', 
    'major_teaching_hospital',
    'minor_teaching_hospital',
    'bedsize', 
    'joint_commission_accreditation', 
    'critical_access', 
    'rural_referral',
    'subsidary_hospital',
    'frontline_hospital',
    'center_improvement_quality', 
    'medicare_ipd_percentage',
    'medicaid_ipd_percentage',
    'core_index',
    'friction_index', 
    
    # Geographic Characteristics
    'rural_urban_category',
    'national_adi_median',
    'state_adi_median',
    'svi_themes_median',
    'svi_theme1_median',
    'svi_theme2_median',
    'svi_theme3_median',
    'svi_theme4_median',
    'Device_Percent', 
    'Broadband_Percent',
    'Internet_Percent',
    'mean_primary_hpss',
    'mean_dental_hpss',
    'mean_mental_hpss',
    'mean_mua_score',
    'mean_mua_elders_score',
    'mean_mua_infant_score'
]

feature_categorical_variables = [
    # Hospital Characteristics
    'system_member', 
    'community_hospital', 
    'ownership_type',
    'delivery_system',  
    'teaching_hospital', 
    'major_teaching_hospital',
    'minor_teaching_hospital',
    'bedsize', 
    'joint_commission_accreditation', 
    'critical_access', 
    'rural_referral',
    'subsidary_hospital',
    'frontline_hospital',
    'center_improvement_quality', 
    
    # Geographic Characteristics
    'rural_urban_category'
]
feature_variables_continuous_variables = [var for var in feature_variables if var not in feature_categorical_variables]


In [110]:

svi= ['svi_themes_median', 'svi_theme1_median', 'svi_theme2_median', 'svi_theme3_median', 'svi_theme4_median']

for col in svi:
    AHA_IT_US.loc[(AHA_IT_US[col] < 0) | (AHA_IT_US[col] > 1), col] = np.nan
AHA_IT_US.loc[(AHA_IT_US['national_adi_median'] < 0) | (AHA_IT_US['national_adi_median'] > 100), 'national_adi_median'] = np.nan 


In [None]:
# Create table with ordered divisions
table_two = TableOne(AHA_IT_US, 
                    columns=feature_variables, 
                    categorical=feature_categorical_variables,
                    groupby='division',
                    order=order_map,
                    pval=True,
                    pval_threshold=0.05,
                    pval_digits=4,
                    missing=True)
table_two

In [112]:
order_map = {'aipred_it_mapped' : ["AI/ML model", "non-AI/ML predictive model", "Do not know/Neither"], 
    'usecase_traj': ['Yes', "No/No Response"],
    'usecase_followup' : ['Yes', "No/No Response"],
    'usecase_monitor' : ['Yes', "No/No Response"], 
    'usecase_recommend' : ['Yes', "No/No Response"],
    'usecase_bill' : ['Yes', "No/No Response"],
    'usecase_schedule' :  ['Yes', "No/No Response"],
    'usecase_other_process_op' : ['Yes', "No/No Response"],
    'usecase_other_clinical' : ['Yes', "No/No Response"],
    'dev_EHRdev' : ['Yes', "No/No Response"],
    'dev_thirdparty' :  ['Yes', "No/No Response"],
    'dev_self' : ['Yes', "No/No Response"],
    'dev_public': ['Yes', "No/No Response"], 
    'eval_accuracy' : ['All models', 'Most models', 'Some models', 'Few models', 'Do not know/None/No response'],
    'eval_bias' : ['All models', 'Most models', 'Some models', 'Few models', 'Do not know/None/No response'], 
             'rural_urban_category' : ['Metro', 'Micro', 'Rural'],
             'system_member' : ['Yes', 'No/No Response'],
             'community_hospital' : ['Yes', 'No/No Response'], 
             'subsidary_hospital' : ['Yes', 'No/No Response'], 
             'frontline_hospital' : ['Yes', 'No/No Response'], 
             'joint_commission_accreditation' : ['Yes', 'No/No Response'], 
             'aha_member' : ['Yes', 'No/No Response'], 
             'center_improvement_quality' : ['Yes', 'No/No Response'], 
             'teaching_hospital' : ['Yes', 'No/No Response'], 
             'major_teaching_hospital' : ['Yes', 'No/No Response'], 
             'minor_teaching_hospital' : ['Yes', 'No/No Response'],
             'ownership_type' :  ['federal_government', 'nonfederal_government', 'non_profit_nongovernment', 'for_profit'],
             'critical_access' : ['Yes', 'No/No Response'], 
             'rural_referral' : ['Yes', 'No/No Response'],  
             'delivery_system' : ['Centralized Health System', 'Centralized Physician/Insurance Health System', 'Moderately Centralized Health System', 'Decentralized Health System', 'Independent Hospital System'],
             'bedsize' : ['6-24 beds', '25-49 beds',  '50-99 beds', '100-199 beds', '200-299 beds', '300-399 beds', '400-499 beds', '500 or more beds']} 

total_variables = ai_variables + feature_variables
categorical_variables = ai_categorical_variables + feature_categorical_variables

In [None]:
# Create table with ordered divisions
table_four = TableOne(AHA_IT_US, 
                    columns=total_variables, 
                    categorical=categorical_variables,
                    groupby='mstate_it',
                    order=order_map,
                    pval=True,
                    pval_threshold=0.05,
                    pval_digits=4,
                    missing=True)
table_four

In [None]:
# Create table with ordered divisions
table_five = TableOne(AHA_IT_US, 
                    columns=total_variables, 
                    categorical=categorical_variables,
                    groupby='ai_base_score',
                    order=order_map,
                    pval=True,
                    missing=True)
table_five