## C1. Visualization – Hospital Distribution by AI Implementation Level

**Description**  
This section visualizes the geographic distribution of hospitals in the U.S. based on their level of AI implementation, using aggregated data.

**Purpose**  
To explore patterns in hospital-level AI adoption across geographic regions (state or census division), aiding interpretation of implementation disparities.

**Note**   
- AHA data is subscription-based and not publicly shareable. Although this notebook includes code for hospital-level visualizations, such visualizations are not presented in publications or shared notebooks. All reported results are aggregated at the state or census division level.



### 1 load necessary libraries, functions and preprocessed data 

In [None]:
# Standard libraries
import os
import pandas as pd
import numpy as np

# Visualization libraries
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.lines import Line2D
from matplotlib.patches import Patch, Circle
import seaborn as sns

# Geospatial libraries
import geopandas as gpd
from shapely.geometry import box
from pyproj import Transformer
import contextily as ctx

# Spatial analysis libraries
from esda.moran import Moran
from libpysal.weights import Queen, KNN

# Interactive mapping
import folium
from folium import plugins
from folium.plugins import HeatMap

In [None]:
# Import functions
import sys
sys.path.append('../')
from calculate_scores import create_union_aipred_row, apply_ai_scores_to_dataframe

# Load data
AHA_master = pd.read_csv('../../data/AHA_master_external_data.csv', low_memory=False)

# Create aipred_it_union separately (your choice, works perfectly)
AHA_master['aipred_it_union'] = AHA_master.apply(create_union_aipred_row, axis=1)

# Use the apply function for all other scores
AHA_master2 = apply_ai_scores_to_dataframe(AHA_master)
AHA_IT = AHA_master2[AHA_master2['id_it'].notna()]

In [None]:
# load state shapefile 
os.environ['SHAPE_RESTORE_SHX'] = 'YES'
states = gpd.read_file('../../temp_shp/cb_2018_us_state_500k.shp')

### 2 Data engineering

In [None]:
# Remove rows with missing or invalid coordinates
AHA_IT = AHA_IT.dropna(subset=['lat_as', 'long_as'])

# Filter out invalid coordinates
valid_coords = (
    (AHA_IT['lat_as'] != 0) & 
    (AHA_IT['long_as'] != 0) &
    (AHA_IT['lat_as'] >= -90) & 
    (AHA_IT['lat_as'] <= 90) &
    (AHA_IT['long_as'] >= -180) & 
    (AHA_IT['long_as'] <= 180)
)
AHA_IT = AHA_IT[valid_coords]


# Create GeoDataFrame
hospitals = gpd.GeoDataFrame(
    AHA_IT, 
    geometry=gpd.points_from_xy(AHA_IT.long_as, AHA_IT.lat_as),
    crs="EPSG:4326"
)


### 3 Hospital level AI implementation visualization (scatterplot on US map)

In [None]:
from matplotlib.patches import Patch
colors = {
    'ml': '#2A4D9C',           # Dark blue for Machine Learning
    'non_ml': '#4D9CFF',       # Lighter blue for Other Non-ML Models
    'neither': '#808080',      # Medium gray for Neither/Do not know
    'missing': '#CCCCCC',      # Light gray for missing values
    'outline': '#ffffff',      # White outline for points
    'background': '#ffffff',   # Pure white background
    'state_lines': '#aaaaaa'   # Medium gray for state lines
}
# Create a base map centered on the US
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

# Set up the figure 
fig, ax = plt.subplots(1, 1, figsize=(10, 6), facecolor='white')
fig.subplots_adjust(left=0.01, right=0.78, top=0.9, bottom=0.05)  

# Set pure white background
ax.set_facecolor(colors['background'])

continental_us = states[~states['STUSPS'].isin(['PR', 'GU', 'VI', 'AS', 'MP'])]
    
# Draw state boundaries with more prominent lines
continental_us.boundary.plot(ax=ax, linewidth=0.5, color=colors['state_lines'])
# Fill states with very light color
continental_us.plot(ax=ax, color='white', alpha=0.2, edgecolor=colors['state_lines'], linewidth=0.5)
print("Successfully plotted state boundaries")

# Plot hospital points - using categorical colors based on aipred_it
for idx, row in hospitals.iterrows():
    if row.geometry is not None:
        if pd.isna(row['aipred_it']):
            # Missing value
            color = colors['missing']
            edge_color = '#bbbbbb'
            alpha = 0.3  # Lower alpha for missing 
        elif row['aipred_it'] == 1:
            # Machine Learning
            color = colors['ml']
            edge_color = '#1a3366'
            alpha = 0.8
        elif row['aipred_it'] == 2:
            # Other Non-ML Predictive Models
            color = colors['non_ml']
            edge_color = '#3366cc'
            alpha = 0.8
        else:
            # Neither (3) or Do not know (4)
            color = colors['neither']
            edge_color = '#555555'
            alpha = 0.8
        
        # Plot with appropriate outline for better visibility
        ax.scatter(
            row.geometry.x, row.geometry.y,
            s=25,  # Slightly larger for better visibility
            color=color,
            edgecolor=edge_color,
            linewidth=0.3,
            alpha=alpha,  # Using the alpha we set above
            zorder=3  # Above the state boundaries
        )

# Set US map boundaries (approximate)
ax.set_xlim([-125, -66])  # US longitude range
ax.set_ylim([24, 50])     # US latitude range

# Add title with Helvetica typography
ax.set_title('Predictive Analytics Type Across US Hospitals', 
           fontweight='bold', pad=10, fontname='Helvetica')

# Create legend with descriptive labels to match  example
legend_elements = [
    Patch(facecolor=colors['ml'], edgecolor='#1a3366', label='Machine Learning', alpha=0.8),
    Patch(facecolor=colors['non_ml'], edgecolor='#3366cc', label='Other Non-ML Models', alpha=0.8),
    Patch(facecolor=colors['neither'], edgecolor='#555555', label='Neither/Do not know', alpha=0.8),
    Patch(facecolor=colors['missing'], edgecolor='#bbbbbb', label='Missing', alpha=0.3)
]

# Position the legend at the lower right
leg = ax.legend(handles=legend_elements, loc='lower right', 
              title='Predictive Analytics Type',
              title_fontsize=11,
              frameon=True, 
              framealpha=1.0, 
              edgecolor='lightgray', 
              prop={'family': 'Helvetica', 'size': 9})


# Show the figure
plt.show()

In [None]:
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.colors as mcolors
from matplotlib.cm import ScalarMappable

# Define color scheme with pure white background and very distinct missing color
# Define color scheme with distinct gray shades
colors = {
    'high': '#3f007d',       # Deep purple for high values
    'medium': '#8c6bb1',     # Medium purple
    'low': '#c4b3d6',        # Light lavender
    'zero': '#777777',       # Darker gray for zero values
    'missing': '#e5e5e5',    # Very light gray for missing values
    'outline': '#ffffff',    # White outline for points
    'background': '#ffffff', # Pure white background
    'state_lines': '#aaaaaa' # Medium gray for state lines
}


# Create a gradient colormap
breadth_cmap = LinearSegmentedColormap.from_list(
    'breadth_purples_professional', 
    [colors['zero'], colors['low'], colors['medium'], colors['high']]
)


# Set up the figure with a clean, professional look
fig, ax = plt.subplots(1, 1, figsize=(10, 6), facecolor='white')
fig.subplots_adjust(left=0.01, right=0.78, top=0.9, bottom=0.05)  # More space for labels

# Set pure white background
ax.set_facecolor(colors['background'])

continental_us = states[~states['STUSPS'].isin(['PR', 'GU', 'VI', 'AS', 'MP'])]
    
# Draw state boundaries with more prominent lines
continental_us.boundary.plot(ax=ax, linewidth=0.5, color=colors['state_lines'])
# Fill states with very light color
continental_us.plot(ax=ax, color='white', alpha=0.2, edgecolor=colors['state_lines'], linewidth=0.5)
print("Successfully plotted state boundaries")


# Determine color scale using non-null values only for the range
valid_values = hospitals['ai_base_breadth_score'].dropna()
vmin = valid_values.min() if not valid_values.empty else 0
vmax = valid_values.max() if not valid_values.empty else 1
norm = mcolors.Normalize(vmin=vmin, vmax=vmax)

for idx, row in hospitals.iterrows():
    if row.geometry is not None:
        if pd.isna(row['ai_base_breadth_score']):
            # Special handling for null/missing values - now very light gray
            color = colors['missing']
            edge_color = '#bbbbbb'  # Slightly darker edge for definition
        elif row['ai_base_breadth_score'] == 0:
            color = colors['zero']
            edge_color = '#555555'  # Darker edge for contrast
        else:
            normalized_score = norm(row['ai_base_breadth_score'])
            color = mcolors.to_hex(breadth_cmap(normalized_score))
            edge_color = '#555555'  # Standard edge color
        
        # Plot with appropriate outline for better visibility
        ax.scatter(
            row.geometry.x, row.geometry.y,
            s=25,  # Slightly larger for better visibility
            color=color,
            edgecolor=edge_color,
            linewidth=0.3,
            alpha=0.9,  # Slightly higher alpha on white
            zorder=3  # Above the state boundaries
        )

# Set US map boundaries (approximate)
ax.set_xlim([-125, -66])  # US longitude range
ax.set_ylim([24, 50])     # US latitude range

# Add title with Helvetica typography
ax.set_title('Model Implementation Breadth Across US Hospitals', 
           fontweight='bold', pad=10, fontname='Helvetica')

# Create a professional colorbar with more space
cax = fig.add_axes([0.82, 0.2, 0.02, 0.6])  # Position [left, bottom, width, height]
sm = ScalarMappable(cmap=breadth_cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, cax=cax)
cbar.set_label('Implementation\nBreadth Level', labelpad=10, fontname='Helvetica')


# Create legend with both zero and missing values - with clearly contrasting grays
legend_elements = [
    Patch(facecolor=colors['missing'], edgecolor='#bbbbbb', label='Missing', alpha=0.9)
]
leg = ax.legend(handles=legend_elements, loc='lower right', frameon=True, 
              framealpha=1.0, edgecolor='lightgray', prop={'family': 'Helvetica'})

              
# Show the figure
plt.show()

### 4 Overall AI implementation visualization using heatmap

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.colors as mcolors
from matplotlib.lines import Line2D
import geopandas as gpd
from matplotlib.patches import Patch, Circle 
from pyproj import Transformer  

from shapely.geometry import box


# Create a mapping of states to census divisions
state_to_division = {
    # Division 1: New England
    'ME': 'New England', 'NH': 'New England', 'VT': 'New England', 
    'MA': 'New England', 'RI': 'New England', 'CT': 'New England',
    
    # Division 2: Mid Atlantic
    'NY': 'Mid Atlantic', 'NJ': 'Mid Atlantic', 'PA': 'Mid Atlantic',
    
    # Division 3: South Atlantic
    'DE': 'South Atlantic', 'MD': 'South Atlantic', 'DC': 'South Atlantic',
    'VA': 'South Atlantic', 'WV': 'South Atlantic', 'NC': 'South Atlantic',
    'SC': 'South Atlantic', 'GA': 'South Atlantic', 'FL': 'South Atlantic',
    
    # Division 4: East North Central
    'OH': 'East North Central', 'IN': 'East North Central', 'IL': 'East North Central',
    'MI': 'East North Central', 'WI': 'East North Central',
    
    # Division 5: East South Central
    'KY': 'East South Central', 'TN': 'East South Central', 
    'AL': 'East South Central', 'MS': 'East South Central',
    
    # Division 6: West North Central
    'MN': 'West North Central', 'IA': 'West North Central', 'MO': 'West North Central',
    'ND': 'West North Central', 'SD': 'West North Central', 'NE': 'West North Central',
    'KS': 'West North Central',
    
    # Division 7: West South Central
    'AR': 'West South Central', 'LA': 'West South Central', 
    'OK': 'West South Central', 'TX': 'West South Central',
    
    # Division 8: Mountain
    'MT': 'Mountain', 'ID': 'Mountain', 'WY': 'Mountain', 'CO': 'Mountain',
    'NM': 'Mountain', 'AZ': 'Mountain', 'UT': 'Mountain', 'NV': 'Mountain',
    
    # Division 9: Pacific
    'WA': 'Pacific', 'OR': 'Pacific', 'CA': 'Pacific', 
    'AK': 'Pacific', 'HI': 'Pacific',
    
    # Territories
    'PR': 'Territories', 'GU': 'Territories', 'VI': 'Territories', 
    'AS': 'Territories', 'MP': 'Territories'
}

# Add census division column to the dataframe
hospitals['division'] = hospitals['mstate_it'].map(state_to_division)


# Create a figure with the right dimensions for US map
fig, ax = plt.subplots(figsize=(11, 7), facecolor='white')

# Convert to mercator projection for KDE
hospitals_mercator = hospitals.to_crs(epsg=3857)

# Convert US states to mercator for consistent boundaries
states_mercator = states.to_crs(epsg=3857)

# Filter to continental US
continental_us = states_mercator[~states_mercator['STUSPS'].isin(['PR', 'GU', 'VI', 'AS', 'MP', 'AK', 'HI'])]

# Extract coordinates for hospitals in continental US
# Filter hospitals to only those in the continental US using spatial join
continental_hospitals = gpd.sjoin(
    hospitals_mercator, 
    continental_us, 
    how="inner", 
    predicate="within"
)

continental_hospitals['x'] = continental_hospitals.geometry.x
continental_hospitals['y'] = continental_hospitals.geometry.y

# Create a custom colormap from gray to blue
blue_gray_cmap = mcolors.LinearSegmentedColormap.from_list(
    'gray_to_blue', 
    ['#808080', '#bdd7e7', '#6baed6', '#084594'], 
    N=256
)

# Filter out NaN values before creating heatmap
valid_data = continental_hospitals.dropna(subset=['ai_base_score', 'x', 'y'])

# First plot filled states with very light background
continental_us.plot(ax=ax, color='white', alpha=0.3, edgecolor=None, zorder=1)

# Create KDE plot with the gray-to-blue colormap
kde = sns.kdeplot(
    data=valid_data,
    x='x',
    y='y',
    weights='ai_base_score',
    cmap=blue_gray_cmap,
    fill=True,
    alpha=0.7,
    levels=8,
    ax=ax,
    thresh=0.05,
    bw_adjust=0.6,
    zorder=2
)

# Plot state boundaries AFTER the heatmap so they appear on top
# Use darker color and thicker lines for state boundaries
continental_us.boundary.plot(
    ax=ax, 
    linewidth=1.0,  # Thicker lines
    color='#555555',  # Darker gray for more prominence
    zorder=3  # Ensure on top of heatmap
)

# Add a subtle US outline with even thicker line
us_outline = continental_us.dissolve().boundary
us_outline.plot(
    ax=ax,
    linewidth=1.5,  # Thicker for country outline
    color='#333333',  # Even darker for country outline
    zorder=4  # Top layer
)

# Set plot bounds to continental US
bounds = continental_us.total_bounds
buffer_x = (bounds[2] - bounds[0]) * 0.05
buffer_y = (bounds[3] - bounds[1]) * 0.05
ax.set_xlim(bounds[0] - buffer_x, bounds[2] + buffer_x)
ax.set_ylim(bounds[1] - buffer_y, bounds[3] + buffer_y)

# Customize the plot
ax.set_title('Geographic Density of AI/ML Implementation Level in US Hospitals', 
             fontweight='medium', pad=20, fontname='Helvetica')
ax.set_axis_off()

# Add this after creating the transformer
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)
