## C3_1. Hotspot National Analysis

**Description**  
This section identify statistically significant hotspots and coldspots of hospital AI adoption across the United States. Using Getis-Ord Gi* and Local Moran's I statistics, this analysis pinpoints specific geographic locations where AI adoption is significantly higher (hotspots) or lower (coldspots) 

**Purpose**  
To identify hotspots and coldspots 

**Disclaimer**  
- This codebase was partially cleaned and annotated using OpenAI’s ChatGPT-4o. Please review and validate before using for critical purposes.  
- AHA data is subscription-based and not publicly shareable. All reported results are aggregated at the state or census division level.
- All publicly available data should also be independently downlowded from the source 

**Notebook Workflow**  

0. Load necessary libraries, functions, and pre-processed data 
1. Prepare the data to conduct hotspot analysis 
2. load hotspot functions 
3. run hotspots
4. get hotspot stats and visualize hotspots  

### C3_0 load necessary libraries, functions and preprocessed data 

#### C3_0_1 load libraries 

In [25]:
# Import necessary libraries
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import warnings
from scipy import stats
from scipy.spatial import distance_matrix
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.patches as mpatches
from matplotlib.gridspec import GridSpec
from sklearn.neighbors import NearestNeighbors
import os



#### C3_0_2 load custom functions 

In [26]:
def calculate_base_ai_implementation_row(row):
    """
    Calculate base AI implementation score for a single row (hospital).
    
    Args:
        row: A pandas Series representing a single hospital row
        
    Returns:
        float: Base AI implementation score
    """
    # Base AI implementation score (continuous)
    # Return None if the input value is null
    if pd.isna(row['aipred_it']):
        return None
    elif row['aipred_it'] == 1:  # Machine Learning
        return 2
    elif row['aipred_it'] == 2:  # Other Non-Machine Learning Predictive Models
        return 1
    else:  # Neither (3) or Do not know (4)
        return 0

def calculate_ai_implementation_breadth_row(row):
    """
    Calculate AI implementation breadth score for a single row (hospital).
    
    Args:
        row: A pandas Series representing a single hospital row
        
    Returns:
        float: AI implementation breadth score
    """
    # Start with base score
    base_score = calculate_base_ai_implementation_row(row)
    if base_score is None:
        return None
    elif base_score == 0:
        return 0
    else:
        breadth_score = base_score
        # Implementation Breadth Score - count use cases
        use_case_cols = ['aitraj_it', 'airfol_it', 'aimhea_it', 'airect_it', 
                     'aibill_it', 'aische_it', 'aipoth_it', 'aicloth_it']
        for col in use_case_cols:
            if row[col] is None:
                breadth_score += 0
            else:
                breadth_score += row[col] * 0.25  # 0.25 points per use case
        return breadth_score

def calculate_ai_development_row(row):
    """
    Calculate AI development score for a single row (hospital).
    
    Args:
        row: A pandas Series representing a single hospital row
        
    Returns:
        float: AI development score
    """
    # Start with base score
    base_score = calculate_base_ai_implementation_row(row)
    if base_score is None:
        return None
    elif base_score == 0:
        return 0 
    else:
        dev_score = base_score
        if 'mlsed_it' in row and pd.notna(row['mlsed_it']):
            dev_score += row['mlsed_it'] * 2  # Self-developed
        if 'mldev_it' in row and pd.notna(row['mldev_it']):
            dev_score += row['mldev_it']  # EHR developer
        if 'mlthd_it' in row and pd.notna(row['mlthd_it']):
            dev_score += row['mlthd_it']  # Third-party
        if 'mlpubd_it' in row and pd.notna(row['mlpubd_it']):
            dev_score += row['mlpubd_it'] * 0.5  # Public domain
        return dev_score

def calculate_ai_evaluation_row(row):
    """
    Calculate AI evaluation score for a single row (hospital).
    
    Args:
        row: A pandas Series representing a single hospital row
        
    Returns:
        float: AI evaluation score
    """
    # Start with base score
    base_score = calculate_base_ai_implementation_row(row)
    if base_score is None:
        return None
    elif base_score == 0:
        return 0
    else:
        eval_score = base_score
        # For model accuracy (MLACCU)
        if row['mlaccu_it'] is None:
            eval_score += 0
        elif row['mlaccu_it'] == 1:  # All models
            eval_score += 1
        elif row['mlaccu_it'] == 2:  # Most models
            eval_score += 0.75
        elif row['mlaccu_it'] == 3:  # Some models
            eval_score += 0.5
        elif row['mlaccu_it'] == 4:  # Few models
            eval_score += 0.25
        # For None (5) or Do not know (6), no points added
    
    # For model bias (MLBIAS)
        if row['mlbias_it'] is None:
            eval_score += 0
        elif row['mlbias_it'] == 1:  # All models
            eval_score += 1
        elif row['mlbias_it'] == 2:  # Most models
            eval_score += 0.75
        elif row['mlbias_it'] == 3:  # Some models
            eval_score += 0.5
        elif row['mlbias_it'] == 4:  # Few models
            eval_score += 0.25
        # For None (5) or Do not know (6), no points added
    
        return eval_score

def calculate_all_ai_scores_row(row):
    """
    Calculate all AI/ML implementation scores as continuous measures for a single row.
    
    Args:
        row: A pandas Series representing a single hospital row
        
    Returns:
        dict: Dictionary with all calculated scores
    """
    # Calculate all scores
    base_score = calculate_base_ai_implementation_row(row)
    breadth_score = calculate_ai_implementation_breadth_row(row)
    dev_score = calculate_ai_development_row(row)
    eval_score = calculate_ai_evaluation_row(row)
    
    return {
        'ai_base_score': base_score,
        'ai_base_breadth_score': breadth_score,
        'ai_base_dev_score': dev_score,
        'ai_base_eval_score': eval_score
    }

def apply_ai_scores_to_dataframe(df):
    """
    Apply all AI score calculations row by row to a dataframe.
    
    Args:
        df: A pandas DataFrame with hospital data
        
    Returns:
        pandas.DataFrame: DataFrame with added AI score columns
    """
    # Initialize empty columns for scores
    df['ai_base_score'] = float('nan')
    df['ai_base_breadth_score'] = float('nan')
    df['ai_base_dev_score'] = float('nan')
    df['ai_base_eval_score'] = float('nan')
    
    # Apply row by row calculations
    for index, row in df.iterrows():
        scores = calculate_all_ai_scores_row(row)
        for score_name, score_value in scores.items():
            df.at[index, score_name] = score_value
    
    return df


#### C3_0_3 load processed dataframe

In [None]:
AHA_master = pd.read_csv('./data/AHA_master_external_data.csv', low_memory=False)
AHA_IT = AHA_master[~AHA_master.id_it.isnull()]
AHA_master2 = apply_ai_scores_to_dataframe(AHA_IT)

In [28]:
import os
os.environ['SHAPE_RESTORE_SHX'] = 'YES'
states = gpd.read_file('../../../data/map_data/state_boundary.shp')

#### C3_1 prepare dataframe for analysis 

In [None]:
# Remove rows with missing or invalid coordinates
AHA_master2 = AHA_master2.dropna(subset=['latitude_address', 'longitude_address'])

# Filter out invalid coordinates
valid_coords = (
    (AHA_master['latitude_address'] != 0) & 
    (AHA_master['longitude_address'] != 0) &
    (AHA_master['latitude_address'] >= -90) & 
    (AHA_master['latitude_address'] <= 90) &
    (AHA_master['longitude_address'] >= -180) & 
    (AHA_master['longitude_address'] <= 180)
)
AHA_master2 = AHA_master2[valid_coords]


# Create GeoDataFrame
hospitals = gpd.GeoDataFrame(
    AHA_master2, 
    geometry=gpd.points_from_xy(AHA_master2.longitude_address, AHA_master2.latitude_address),
    crs="EPSG:4326"
)


In [30]:

# Filter hospitals with valid coordinates and implementation scores
valid_hospitals = hospitals.dropna(subset=['longitude_address', 'latitude_address', 'aipred_it'])
valid_geo_hospitals = hospitals.dropna(subset=['longitude_address', 'latitude_address'])
# Create a GeoDataFrame
hospitals_gdf = gpd.GeoDataFrame(
    valid_hospitals, 
    geometry=gpd.points_from_xy(valid_hospitals.longitude_address, valid_hospitals.latitude_address),
    crs="EPSG:4326" #geographic coordinate system using latitude and longitude
)

# Create a GeoDataFrame
geo_hospitals_gdf = gpd.GeoDataFrame(
    valid_geo_hospitals, 
    geometry=gpd.points_from_xy(valid_geo_hospitals.longitude_address, valid_geo_hospitals.latitude_address),
    crs="EPSG:4326" #geographic coordinate system using latitude and longitude
)


In [31]:
# Convert to a projected CRS for accurate distance calculations
hospitals_gdf_projected = hospitals_gdf.to_crs(epsg=3857) # projected coordinate system using flat, 2D plane to represent Earth's surface 
geo_hospitals_gdf_projected = geo_hospitals_gdf.to_crs(epsg=3857) # projected coordinate system using flat, 2D plane to represent Earth's surface 


In [32]:

# Add census division column to the dataframe
hospitals_gdf_projected['division'] = hospitals_gdf_projected['mstate_it'].map(state_to_division)
geo_hospitals_gdf_projected['division'] = geo_hospitals_gdf_projected['mstate_it'].map(state_to_division)

# Loop through each census division and create a heatmap
divisions = [
    'New England', 'Mid Atlantic', 'South Atlantic', 
    'East North Central', 'East South Central', 'West North Central',
    'West South Central', 'Mountain', 'Pacific'
]


#### C3_2 load hotspot function

In [49]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy import stats

def calculate_gi_star(gdf, value_column, k=5):
    """
    Calculate Getis-Ord Gi* statistic for hotspot analysis
    
    Parameters:
    gdf: GeoDataFrame with spatial data
    value_column: Column name for analysis
    k: Number of nearest neighbors
    """
    
    # Extract coordinates and values
    coords = np.vstack((gdf.geometry.x, gdf.geometry.y)).T
    values = gdf[value_column].values
    n = len(values)
    
    # Handle missing values
    valid_mask = ~np.isnan(values)
    if not np.all(valid_mask):
        coords = coords[valid_mask]
        values = values[valid_mask]
        gdf = gdf[valid_mask].copy()
        n = len(values)
    
    if n <= k:
        return gdf
    
    # Create k-nearest neighbors spatial weights (including self for Gi*)
    nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='auto').fit(coords)
    distances, indices = nbrs.kneighbors(coords)
    
    # Create binary weights matrix
    W = np.zeros((n, n))
    for i in range(n):
        neighbors = indices[i, :]
        W[i, neighbors] = 1
    
    # Global statistics
    x_bar = np.mean(values)
    s_squared = np.var(values, ddof=1)
    
    # Calculate Gi* for each location
    gi_star = np.zeros(n)
    z_scores = np.zeros(n)
    p_values = np.zeros(n)
    
    for i in range(n):
        # Calculate Gi* statistic
        gi_star[i] = np.sum(W[i, :] * values)
        
        # Sum of weights
        wi_sum = np.sum(W[i, :])
        
        # Expected value under null hypothesis
        expected_gi = wi_sum * x_bar
        
        # Variance using Getis-Ord formula
        variance_gi = (wi_sum * s_squared * (n - wi_sum)) / (n - 1)
        
        if variance_gi > 0:
            # Standardized z-score
            z_scores[i] = (gi_star[i] - expected_gi) / np.sqrt(variance_gi)
            # Two-tailed p-value
            p_values[i] = 2 * (1 - stats.norm.cdf(abs(z_scores[i])))
        else:
            z_scores[i] = 0
            p_values[i] = 1.0
    
    # Create result dataframe
    result_gdf = gdf.copy()
    result_gdf['gi_star'] = gi_star
    result_gdf['z_score'] = z_scores
    result_gdf['p_value'] = p_values
    
    # Classify hotspots and coldspots
    result_gdf['hotspot_type'] = 'Not Significant'
    
    # 99% confidence
    result_gdf.loc[(z_scores > 0) & (p_values <= 0.01), 'hotspot_type'] = 'Hotspot (99%)'
    result_gdf.loc[(z_scores < 0) & (p_values <= 0.01), 'hotspot_type'] = 'Coldspot (99%)'
    
    # 95% confidence
    result_gdf.loc[(z_scores > 0) & (p_values > 0.01) & (p_values <= 0.05), 'hotspot_type'] = 'Hotspot (95%)'
    result_gdf.loc[(z_scores < 0) & (p_values > 0.01) & (p_values <= 0.05), 'hotspot_type'] = 'Coldspot (95%)'
    
    # 90% confidence
    result_gdf.loc[(z_scores > 0) & (p_values > 0.05) & (p_values <= 0.1), 'hotspot_type'] = 'Hotspot (90%)'
    result_gdf.loc[(z_scores < 0) & (p_values > 0.05) & (p_values <= 0.1), 'hotspot_type'] = 'Coldspot (90%)'
    
    return result_gdf


### C3_3 run hotspot 

In [None]:
# 1. Conduct hotspot analysis for the entire US
print("Performing hotspot analysis for the entire US...")
base_hotspot_results = calculate_gi_star(hospitals_gdf_projected, 'ai_base_score', k=5)
breadth_hotspot_results = calculate_gi_star(hospitals_gdf_projected, 'ai_base_breadth_score', k=5)
dev_hotspot_results = calculate_gi_star(hospitals_gdf_projected, 'ai_base_dev_score', k=5)
eval_hotspot_results = calculate_gi_star(hospitals_gdf_projected, 'ai_base_eval_score', k=5)


In [52]:
hsa_gdf = gpd.read_file('../data/HsaBdry_AK_HI_unmodified.geojson')
# Ensure CRS matches
hsa_gdf = hsa_gdf.to_crs(hospitals_gdf_projected.crs)

In [53]:
# hsa version stat 
base_hsa_status = base_hotspot_results.groupby('hsacode_as')['hotspot_type'] \
    .agg(lambda x: x.value_counts().idxmax()) \
    .reset_index(name='hsa_hotspot_type')

breadth_hsa_status = breadth_hotspot_results.groupby('hsacode_as')['hotspot_type'] \
    .agg(lambda x: x.value_counts().idxmax()) \
    .reset_index(name='hsa_hotspot_type')

dev_hsa_status = dev_hotspot_results.groupby('hsacode_as')['hotspot_type'] \
    .agg(lambda x: x.value_counts().idxmax()) \
    .reset_index(name='hsa_hotspot_type')

eval_hsa_status = eval_hotspot_results.groupby('hsacode_as')['hotspot_type'] \
    .agg(lambda x: x.value_counts().idxmax()) \
    .reset_index(name='hsa_hotspot_type')


In [54]:
base_hsa_gdf = hsa_gdf.merge(base_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
breadth_hsa_gdf = hsa_gdf.merge(breadth_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
dev_hsa_gdf = hsa_gdf.merge(dev_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
eval_hsa_gdf = hsa_gdf.merge(eval_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')

### C3_4 get stat of hotspot visualize hotspot 

In [None]:
print ("ai base score hotspots stats")
base_df_counts = pd.DataFrame(base_hotspot_results.hotspot_type.value_counts()).reset_index()
base_df_counts.columns = ['Hotspot Type', 'Count']
base_df_counts['Percentage'] = (base_df_counts['Count'] / base_df_counts['Count'].sum() * 100).round(2)
base_df_counts.sort_values('Hotspot Type', ascending=True)


In [None]:
print ("ai base score hsa hotspots stats")
base_hsa_df_counts = pd.DataFrame(base_hsa_gdf.hsa_hotspot_type.value_counts()).reset_index()
base_hsa_df_counts.columns = ['Hotspot Type', 'Count']
base_hsa_df_counts['Percentage'] = (base_hsa_df_counts['Count'] / base_hsa_df_counts['Count'].sum() * 100).round(2)
base_hsa_df_counts.sort_values('Hotspot Type', ascending=True)

In [None]:
print ("ai base breadth score hotspots stats")
breadth_df_counts = pd.DataFrame(breadth_hotspot_results.hotspot_type.value_counts()).reset_index()
breadth_df_counts.columns = ['Hotspot Type', 'Count']
breadth_df_counts['Percentage'] = (breadth_df_counts['Count'] / breadth_df_counts['Count'].sum() * 100).round(2)
breadth_df_counts
breadth_df_counts.sort_values('Hotspot Type', ascending=True)



In [None]:
print ("ai base breadth score hsa hotspots stats")
breadth_hsa_df_counts = pd.DataFrame(breadth_hsa_gdf.hsa_hotspot_type.value_counts()).reset_index()
breadth_hsa_df_counts.columns = ['Hotspot Type', 'Count']
breadth_hsa_df_counts['Percentage'] = (breadth_hsa_df_counts['Count'] / breadth_hsa_df_counts['Count'].sum() * 100).round(2)
breadth_hsa_df_counts.sort_values('Hotspot Type', ascending=True)


In [None]:
print ("ai base dev score hotspots stats")
dev_df_counts = pd.DataFrame(dev_hotspot_results.hotspot_type.value_counts()).reset_index()
dev_df_counts.columns = ['Hotspot Type', 'Count']
dev_df_counts['Percentage'] = (dev_df_counts['Count'] / dev_df_counts['Count'].sum() * 100).round(2)
dev_df_counts.sort_values('Hotspot Type', ascending=True)



In [None]:
print ("ai base dev score hsa hotspots stats")
dev_hsa_df_counts = pd.DataFrame(dev_hsa_gdf.hsa_hotspot_type.value_counts()).reset_index()
dev_hsa_df_counts.columns = ['Hotspot Type', 'Count']
dev_hsa_df_counts['Percentage'] = (breadth_hsa_df_counts['Count'] / breadth_hsa_df_counts['Count'].sum() * 100).round(2)
dev_hsa_df_counts.sort_values('Hotspot Type', ascending=True)


In [None]:
print ("ai base eval score hotspots stats")
eval_df_counts = pd.DataFrame(eval_hotspot_results.hotspot_type.value_counts()).reset_index()
eval_df_counts.columns = ['Hotspot Type', 'Count']
eval_df_counts['Percentage'] = (eval_df_counts['Count'] / eval_df_counts['Count'].sum() * 100).round(2)
eval_df_counts.sort_values('Hotspot Type', ascending=True)


In [None]:
print ("ai base eval score hsa hotspots stats")
eval_hsa_df_counts = pd.DataFrame(eval_hsa_gdf.hsa_hotspot_type.value_counts()).reset_index()
eval_hsa_df_counts.columns = ['Hotspot Type', 'Count']
eval_hsa_df_counts['Percentage'] = (eval_hsa_df_counts['Count'] / eval_hsa_df_counts['Count'].sum() * 100).round(2)
eval_hsa_df_counts.sort_values('Hotspot Type', ascending=True)


### C3_4 visualize hotspot  

### C3_4_1 ai_base_score hotspot visualization 

In [None]:
# Hotspot Visualization

# Define color scheme
hotspot_colors = {
    'Hotspot (99%)': '#001f4d',      # Dark blue
    'Hotspot (95%)': '#0050b3',      # Medium blue
    'Hotspot (90%)': '#4d94ff',      # Light blue
    'Not Significant': '#d9d9d9',    # Light gray
    'Coldspot (90%)': '#bfbfbf',     # Medium gray
    'Coldspot (95%)': '#737373',     # Dark gray
    'Coldspot (99%)': '#333333'      # Very dark gray
}

# Set opacity levels
opacity = {
    'Hotspot (99%)': 0.9,
    'Hotspot (95%)': 0.8,
    'Hotspot (90%)': 0.7,
    'Not Significant': 0.1,
    'Coldspot (90%)': 0.7,
    'Coldspot (95%)': 0.8,
    'Coldspot (99%)': 0.9
}

# Create visualization
fig, ax = plt.subplots(figsize=(12, 8))

# Plot non-significant points first (background)
non_sig = hotspot_results[hotspot_results['hotspot_type'] == 'Not Significant']
non_sig.plot(
    ax=ax,
    color=hotspot_colors['Not Significant'],
    markersize=15,
    alpha=opacity['Not Significant']
)

# Plot significant hotspots and coldspots
significant_types = ['Coldspot (99%)', 'Coldspot (95%)', 'Coldspot (90%)', 
                    'Hotspot (90%)', 'Hotspot (95%)', 'Hotspot (99%)']

for hotspot_type in significant_types:
    subset = hotspot_results[hotspot_results['hotspot_type'] == hotspot_type]
    if len(subset) > 0:
        subset.plot(
            ax=ax,
            color=hotspot_colors[hotspot_type],
            markersize=25,
            alpha=opacity[hotspot_type]
        )

# Add basemap if available
try:
    import contextily as ctx
    ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
except:
    pass

# Create legend
legend_elements = [
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Hotspot (99%)'], 
              label='Hotspot (99%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Hotspot (95%)'], 
              label='Hotspot (95%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Hotspot (90%)'], 
              label='Hotspot (90%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Not Significant'], 
              label='Not Significant', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Coldspot (90%)'], 
              label='Coldspot (90%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Coldspot (95%)'], 
              label='Coldspot (95%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Coldspot (99%)'], 
              label='Coldspot (99%)', markersize=10)
]

ax.legend(handles=legend_elements, loc='upper right')
ax.set_axis_off()
ax.set_title('Hotspots and Coldspots of AI Implementation', fontsize=16)

plt.tight_layout()
plt.show()

# Summary statistics
print("Hotspot Analysis Summary:")
for hotspot_type in hotspot_colors.keys():
    count = len(hotspot_results[hotspot_results['hotspot_type'] == hotspot_type])
    print(f"{hotspot_type}: {count} locations")

In [None]:
# Reproject everything to EPSG:3857 for basemap compatibility
base_hsa_gdf = base_hsa_gdf.to_crs(epsg=3857)

# Define colors
custom_colors = {
    'Hotspot (90%)': '#4d94ff',
    'Hotspot (95%)': '#0050b3',
    'Hotspot (99%)': '#001f4d',
    'Coldspot (90%)': '#a6a6a6',
    'Coldspot (95%)': '#595959',
    'Coldspot (99%)': '#262626',
    'Not Significant': '#f2f2f2'  # very light gray
}

# Fill missing values as 'Not Significant'
base_hsa_gdf['hsa_hotspot_type'] = base_hsa_gdf['hsa_hotspot_type'].fillna('Not Significant')

# Plot
fig, ax = plt.subplots(figsize=(13, 11))

for category, color in custom_colors.items():
    subset = base_hsa_gdf[base_hsa_gdf['hsa_hotspot_type'] == category]
    subset.plot(ax=ax, color=color, label=category, edgecolor='black', linewidth=0.2)

# Add basemap
#ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)

ax.set_title('Hotspots and Coldspots of ML Implementation by Hospital Service Area', fontsize=15)
ax.set_axis_off()
ax.legend(title='HSA Classification', loc='upper right', frameon=True)
plt.tight_layout()
plt.show()

# Save the figure if needed
fig.savefig('figures/base_hsa_hotspot_map.pdf', 
            bbox_inches='tight',
            pad_inches=0.1,
            facecolor='white',
            edgecolor='none',
            format='pdf')


### C3_4_2 secondary scores hotspot visualization 

In [104]:

# Define colors for hotspot types - purples for hotspots, grays for coldspots
secondary_hotspot_colors = {
    'Hotspot (99%)': '#4a1486',      # Very dark purple
    'Hotspot (95%)': '#807dba',      # Medium purple
    'Hotspot (90%)': '#bcbddc',      # Light purple
    'Not Significant': '#f0f0f0',    # Very light gray
    'Coldspot (90%)': '#bdbdbd',     # Light gray
    'Coldspot (95%)': '#636363',     # Medium gray
    'Coldspot (99%)': '#252525'      # Dark gray
}

# Create custom color map for z-scores
secondary_hotspot_cmap = LinearSegmentedColormap.from_list(
    'eval_hotspot_cmap', 
    ['#252525', '#636363', '#bdbdbd', '#f0f0f0', '#bcbddc', '#807dba', '#4a1486']
)

# Set the opacity values
opacity = {
    'Hotspot (99%)': 0.9,
    'Hotspot (95%)': 0.8,
    'Hotspot (90%)': 0.7,
    'Not Significant': 0.1,  # Very low opacity for non-significant points
    'Coldspot (90%)': 0.7,
    'Coldspot (95%)': 0.8,
    'Coldspot (99%)': 0.9
}


In [None]:


# Create a single figure
fig, ax = plt.subplots(figsize=(12, 8))

# MAIN PLOT - HOTSPOTS
# First plot non-significant points with very low opacity
non_sig = breadth_hotspot_results[breadth_hotspot_results['hotspot_type'] == 'Not Significant']
non_sig.plot(
    ax=ax,
    color=secondary_hotspot_colors['Not Significant'],
    markersize=15,
    alpha=opacity['Not Significant']
)

for hotspot_type in ['Coldspot (90%)', 'Coldspot (95%)', 'Coldspot (99%)', 
                     'Hotspot (90%)', 'Hotspot (95%)', 'Hotspot (99%)']:
    subset = breadth_hotspot_results[breadth_hotspot_results['hotspot_type'] == hotspot_type]
    subset.plot(
        ax=ax,
        color=secondary_hotspot_colors[hotspot_type],
        markersize=25,
        alpha=opacity[hotspot_type]
    )

# Create legend for hotspot plot
legend_elements = [
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Hotspot (99%)'], 
              label='Hotspot (99% confidence)', markersize=10, alpha=opacity['Hotspot (99%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Hotspot (95%)'], 
              label='Hotspot (95% confidence)', markersize=10, alpha=opacity['Hotspot (95%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Hotspot (90%)'], 
              label='Hotspot (90% confidence)', markersize=10, alpha=opacity['Hotspot (90%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Not Significant'], 
              label='Not Significant', markersize=10, alpha=0.5),  # Higher opacity in legend for visibility
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Coldspot (90%)'], 
              label='Coldspot (90% confidence)', markersize=10, alpha=opacity['Coldspot (90%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Coldspot (95%)'], 
              label='Coldspot (95% confidence)', markersize=10, alpha=opacity['Coldspot (95%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Coldspot (99%)'], 
              label='Coldspot (99% confidence)', markersize=10, alpha=opacity['Coldspot (99%)'])
]

ax.legend(handles=legend_elements, loc='upper right', frameon=True)
ax.set_title('Hotspots and Coldspots of ML Implementation in US Hospitals', fontsize=16)
plt.tight_layout()
plt.show()


In [None]:
# Reproject everything to EPSG:3857 for basemap compatibility
breadth_hsa_gdf = breadth_hsa_gdf.to_crs(epsg=3857)

# Fill missing values as 'Not Significant'
breadth_hsa_gdf['hsa_hotspot_type'] = breadth_hsa_gdf['hsa_hotspot_type'].fillna('Not Significant')

# Plot
fig, ax = plt.subplots(figsize=(13, 11))

for category, color in secondary_hotspot_colors.items():
    subset = breadth_hsa_gdf[breadth_hsa_gdf['hsa_hotspot_type'] == category]
    subset.plot(ax=ax, color=color, label=category, edgecolor='black', linewidth=0.2)


ax.set_title('Hotspots and Coldspots of ML Implementation by Hospital Service Area', fontsize=15)
ax.set_axis_off()
ax.legend(title='HSA Classification', loc='upper right', frameon=True)
plt.tight_layout()
plt.show()

