## C3. Hotspot National Analysis

**Description**  
This section identify statistically significant hotspots and coldspots of hospital AI adoption across the United States. Using Getis-Ord Gi* and Local Moran's I statistics, this analysis pinpoints specific geographic locations where AI adoption is significantly higher (hotspots) or lower (coldspots) 

**Purpose**  
To identify hotspots and coldspots 

### 1. load necessary libraries, functions and preprocessed data 

In [25]:
# Import necessary libraries
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import warnings
from scipy import stats
from scipy.spatial import distance_matrix
import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.patches as mpatches
from matplotlib.gridspec import GridSpec
from sklearn.neighbors import NearestNeighbors
import os



In [None]:
AHA_master = pd.read_csv('./data/AHA_master_external_data.csv', low_memory=False)
AHA_IT = AHA_master[~AHA_master.id_it.isnull()]
AHA_master2 = apply_ai_scores_to_dataframe(AHA_IT)

In [28]:
import os
os.environ['SHAPE_RESTORE_SHX'] = 'YES'
states = gpd.read_file('../../../data/map_data/state_boundary.shp')

#### 2. Data engineering 

In [None]:
# Filter out invalid coordinates
valid_coords = (
    (AHA_master['lat_as'] != 0) & 
    (AHA_master['long_as'] != 0) &
    (AHA_master['lat_as'] >= -90) & 
    (AHA_master['lat_as'] <= 90) &
    (AHA_master['long_as'] >= -180) & 
    (AHA_master['long_as'] <= 180)
)
AHA_master2 = AHA_master2[valid_coords]


# Create GeoDataFrame
hospitals = gpd.GeoDataFrame(
    AHA_master2, 
    geometry=gpd.points_from_xy(AHA_master2.long_as, AHA_master2.lat_as),
    crs="EPSG:4326"
)


In [None]:

# Filter hospitals with valid coordinates and implementation scores
valid_hospitals = hospitals.dropna(subset=['long_as', 'lat_as', 'aipred_it'])

# Create a GeoDataFrame
geo_hospitals_gdf = gpd.GeoDataFrame(
    valid_hospitals, 
    geometry=gpd.points_from_xy(valid_hospitals.long_as, valid_hospitals.lat_as),
    crs="EPSG:4326" #geographic coordinate system using latitude and longitude
)
geo_hospitals_gdf_projected = geo_hospitals_gdf.to_crs(epsg=3857) # projected coordinate system using flat, 2D plane to represent Earth's surface 


In [None]:

geo_hospitals_gdf_projected['division'] = geo_hospitals_gdf_projected['mstate_it'].map(state_to_division)

# Loop through each census division and create a heatmap
divisions = [
    'New England', 'Mid Atlantic', 'South Atlantic', 
    'East North Central', 'East South Central', 'West North Central',
    'West South Central', 'Mountain', 'Pacific'
]


#### 3 hotspot function 

In [None]:

import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy import stats
from statsmodels.stats.multitest import multipletests

# ========= Storey q-value helpers =========
def storey_qvalue_python(pvals, lambdas=None):
    """
    Simple Storey q-value fallback in pure Python.
    """
    p = np.asarray(pvals, dtype=float)
    m = p.size
    if m == 0:
        return p
    if lambdas is None:
        lambdas = np.arange(0.05, 0.95, 0.05)
    pi0_vals = []
    for lam in lambdas:
        denom = 1.0 - lam
        if denom <= 0:
            continue
        pi0_vals.append((p > lam).mean() / denom)
    pi0 = min(1.0, np.min(pi0_vals)) if len(pi0_vals) else 1.0
    order = np.argsort(p)
    p_sorted = p[order]
    q_sorted = pi0 * m * p_sorted / (np.arange(1, m + 1))
    q_sorted = np.minimum.accumulate(q_sorted[::-1])[::-1]
    q = np.empty_like(q_sorted)
    q[order] = q_sorted
    return np.clip(q, 0, 1)


# ========= Classification =========
def _classify_levels(sign, p_like):
    """
    Map sign and p-like values to hotspot classes at 99, 95, 90 percent levels.
    p_like can be unadjusted p, Bonferroni p, BH-FDR p, or Storey q.
    """
    lab = np.array(["Not Significant"] * len(p_like), dtype=object)
    lab[(sign > 0) & (p_like <= 0.01)] = "Hotspot (99%)"
    lab[(sign < 0) & (p_like <= 0.01)] = "Coldspot (99%)"
    lab[(sign > 0) & (p_like > 0.01) & (p_like <= 0.05)] = "Hotspot (95%)"
    lab[(sign < 0) & (p_like > 0.01) & (p_like <= 0.05)] = "Coldspot (95%)"
    lab[(sign > 0) & (p_like > 0.05) & (p_like <= 0.1)] = "Hotspot (90%)"
    lab[(sign < 0) & (p_like > 0.05) & (p_like <= 0.1)] = "Coldspot (90%)"
    return lab

# ========= Core Gi* function =========
def calculate_gi_star_all(gdf, value_column, k=6):
    """
    Compute local Getis-Ord Gi* with k-NN (including self) and multiple testing corrections.

    Inputs
    - gdf: GeoDataFrame with Point geometry (prefer EPSG:5070)
    - value_column: column to analyze (numeric)
    - k: number of nearest neighbors (self included via k+1)

    Outputs
    Returns a copy of gdf (rows with non-null value) with these columns:
    - gi_star: raw Gi* sum over neighbors
    - z_score: standardized Gi* Z
    - p_unadj: unadjusted two-sided p
    - p_bonf: Bonferroni adjusted p
    - p_bh: BH-FDR adjusted p
    - q_storey: Storey q-value
    - hotspot_unadj: class from p_unadj
    - hotspot_bonf: class from p_bonf
    - hotspot_bh: class from p_bh
    - hotspot_storey: class from q_storey
    """
    # coords and values
    coords = np.vstack((gdf.geometry.x, gdf.geometry.y)).T
    values = gdf[value_column].to_numpy(dtype=float)
    valid = ~np.isnan(values)
    coords, values = coords[valid], values[valid]
    out = gdf.loc[valid].copy()
    n = len(values)
    if n <= k:
        print(f"Error: Not enough observations ({n}) for k={k}")
        return gdf

    # kNN including self (k+1)
    nn = NearestNeighbors(n_neighbors=k+1)
    nn.fit(coords)
    _, indices = nn.kneighbors(coords)  # shape (n, k+1)

    # global stats (Ord & Getis, 1995)
    x_bar = values.mean()
    S = np.sqrt((np.sum(values**2) / n) - x_bar**2)

    # Gi*, Z, p
    Gs = np.zeros(n)
    Zs = np.zeros(n)
    Ps = np.ones(n)

    for i in range(n):
        neigh = indices[i]             # length k+1
        wi_sum = float(len(neigh))     # sum w_ij (binary)
        wi2_sum = wi_sum               # sum w_ij^2 (binary)
        Gs[i] = np.sum(values[neigh])  # observed sum
        EGi = x_bar * wi_sum           # expected
        varGi = (S**2) * ((n * wi2_sum - wi_sum**2) / (n - 1)) if n > 1 else 0.0
        if varGi > 0 and not np.isnan(varGi):
            Zs[i] = (Gs[i] - EGi) / np.sqrt(varGi)
            Ps[i] = 2 * (1 - stats.norm.cdf(abs(Zs[i])))
        else:
            Zs[i] = 0.0
            Ps[i] = 1.0

    # multiple testing corrections
    m = n
    p_unadj = Ps
    p_bonf = np.clip(p_unadj * m, 0, 1)
    _, p_bh, _, _ = multipletests(p_unadj, alpha=0.05, method='fdr_bh')
    q_storey = storey_qvalue_python(p_unadj)

    # labels
    sign = np.sign(Zs)
    out['gi_star'] = Gs
    out['z_score'] = Zs
    out['p_unadj'] = p_unadj
    out['p_bonf'] = p_bonf
    out['p_bh'] = p_bh
    out['q_storey'] = q_storey
    out['hotspot_unadj'] = _classify_levels(sign, p_unadj)
    out['hotspot_bonf'] = _classify_levels(sign, p_bonf)
    out['hotspot_bh'] = _classify_levels(sign, p_bh)
    out['hotspot_storey'] = _classify_levels(sign, q_storey)

    # column annotations for reference
    out.attrs["columns_doc"] = {
        "gi_star": "raw Gi* statistic (Ord & Getis, 1995)",
        "z_score": "standardized Z-score for Gi*",
        "p_unadj": "unadjusted two-sided p-value",
        "p_bonf": "Bonferroni-adjusted p-value",
        "p_bh": "Benjamini-Hochberg FDR-adjusted p-value",
        "q_storey": "Storey q-value (FDR with pi0 estimated)",
        "hotspot_unadj": "hotspot class from unadjusted p",
        "hotspot_bonf": "hotspot class from Bonferroni p",
        "hotspot_bh": "hotspot class from BH-FDR p",
        "hotspot_storey": "hotspot class from Storey q-value"
    }

    # quick summary
    def _cnt(col, key): return int(out[col].str.contains(key).sum())
    print(f"Total locations: {n}")
    print(f"Unadj  Hot:{_cnt('hotspot_unadj','Hotspot')}  Cold:{_cnt('hotspot_unadj','Coldspot')}")
    print(f"BH-FDR Hot:{_cnt('hotspot_bh','Hotspot')}  Cold:{_cnt('hotspot_bh','Coldspot')}")
    print(f"Storey Hot:{_cnt('hotspot_storey','Hotspot')}  Cold:{_cnt('hotspot_storey','Coldspot')}")
    print(f"Bonf  Hot:{_cnt('hotspot_bonf','Hotspot')}  Cold:{_cnt('hotspot_bonf','Coldspot')}")

    return out


#### 4 Run hotspot 

In [None]:
# 1. Conduct hotspot analysis for the entire US
print("Performing hotspot analysis for the entire US...")
base_hotspot_results = calculate_gi_star_all(geo_hospitals_gdf_projected, 'ai_base_score_imputed', k=6)
breadth_hotspot_results = calculate_gi_star_all(geo_hospitals_gdf_projected, 'ai_base_breadth_score_imputed', k=6)
dev_hotspot_results = calculate_gi_star_all(geo_hospitals_gdf_projected, 'ai_base_dev_score_imputed', k=6)
eval2023_hotspot_results = calculate_gi_star_all(geo_hospitals_gdf_projected, 'ai_base_eval_score_2023_imputed', k=6)
eval2024_hotspot_results = calculate_gi_star_all(geo_hospitals_gdf_projected, 'ai_base_eval_score_2024_imputed', k=6)
llm_hotspot_results = calculate_gi_star_all(geo_hospitals_gdf_projected, 'llm_readiness_score', k=6)


In [None]:
hsa_gdf = gpd.read_file('../data/HsaBdry_AK_HI_unmodified.geojson')
# Ensure CRS matches
hsa_gdf = hsa_gdf.to_crs(geo_hospitals_gdf_projected.crs)

In [None]:
base_hsa_status = (
    base_hotspot_results
      .groupby("hsacode_as")[["hotspot_unadj","hotspot_bh","hotspot_storey"]]
      .agg(lambda x: x.value_counts().idxmax())
      .reset_index()
      .rename(columns={
          "hotspot_unadj":"hsa_unadj_type",
          "hotspot_bh":"hsa_bh_type",
          "hotspot_storey":"hsa_storey_type"
      })
)

breadth_hsa_status = (
    breadth_hotspot_results
      .groupby("hsacode_as")[["hotspot_unadj","hotspot_bh","hotspot_storey"]]
      .agg(lambda x: x.value_counts().idxmax())
      .reset_index()
      .rename(columns={
          "hotspot_unadj":"hsa_unadj_type",
          "hotspot_bh":"hsa_bh_type",
          "hotspot_storey":"hsa_storey_type"
      })
)

dev_hsa_status = (
    dev_hotspot_results
      .groupby("hsacode_as")[["hotspot_unadj","hotspot_bh","hotspot_storey"]]
      .agg(lambda x: x.value_counts().idxmax())
      .reset_index()
      .rename(columns={
          "hotspot_unadj":"hsa_unadj_type",
          "hotspot_bh":"hsa_bh_type",
          "hotspot_storey":"hsa_storey_type"
      })
)

eval2023_hsa_status = (
    eval2023_hotspot_results
      .groupby("hsacode_as")[["hotspot_unadj","hotspot_bh","hotspot_storey"]]
      .agg(lambda x: x.value_counts().idxmax())
      .reset_index()
      .rename(columns={
          "hotspot_unadj":"hsa_unadj_type",
          "hotspot_bh":"hsa_bh_type",
          "hotspot_storey":"hsa_storey_type"
      })
)
eval2024_hsa_status = (
    eval2024_hotspot_results
      .groupby("hsacode_as")[["hotspot_unadj","hotspot_bh","hotspot_storey"]]
      .agg(lambda x: x.value_counts().idxmax())
      .reset_index()
      .rename(columns={
          "hotspot_unadj":"hsa_unadj_type",
          "hotspot_bh":"hsa_bh_type",
          "hotspot_storey":"hsa_storey_type"
      })
)

llm_hsa_status = (
    llm_hotspot_results
      .groupby("hsacode_as")[["hotspot_unadj","hotspot_bh","hotspot_storey"]]
      .agg(lambda x: x.value_counts().idxmax())
      .reset_index()
      .rename(columns={
          "hotspot_unadj":"hsa_unadj_type",
          "hotspot_bh":"hsa_bh_type",
          "hotspot_storey":"hsa_storey_type"
      })
)



In [None]:
base_hsa_gdf = hsa_gdf.merge(base_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
breadth_hsa_gdf = hsa_gdf.merge(breadth_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
dev_hsa_gdf = hsa_gdf.merge(dev_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
eval2023_hsa_gdf = hsa_gdf.merge(eval2023_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
eval2024_hsa_gdf = hsa_gdf.merge(eval2024_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')
llm_hsa_gdf = hsa_gdf.merge(llm_hsa_status, left_on = 'HSA93', right_on = 'hsacode_as', how = 'left')

#### 5 Hotspot stat 

In [None]:
import pandas as pd
import re

def summarize_hotspots(df, id_col="id_it",
                       value_cols=("hotspot_unadj","hotspot_bh","hotspot_storey"),
                       row_order=("Unadjusted","Storey","BH")):
    # reshape
    long = df.melt(id_vars=id_col, value_vars=list(value_cols),
                   var_name="method", value_name="category")
    method_map = {
        "hotspot_unadj": "Unadjusted",
        "hotspot_bh": "BH",
        "hotspot_storey": "Storey"
    }
    long["method"] = long["method"].map(method_map)

    # parse status + level
    def parse_category(cat):
        if pd.isna(cat): 
            return pd.Series({"status":"NotSig", "level":None})
        s = str(cat).lower()
        if "hotspot" in s:
            m = re.search(r"(90|95|99)", s)
            return pd.Series({"status":"Hotspot", "level":m.group(1) if m else None})
        if "coldspot" in s:
            m = re.search(r"(90|95|99)", s)
            return pd.Series({"status":"Coldspot", "level":m.group(1) if m else None})
        return pd.Series({"status":"NotSig", "level":None})

    parsed = long.join(long["category"].apply(parse_category))

    # combined label
    parsed["label"] = parsed.apply(
        lambda x: f"{x['status']}_{x['level']}" if x["status"] != "NotSig" else "NotSig",
        axis=1
    )

    # pivot
    wide = (pd.pivot_table(parsed, index="method", columns="label", values=id_col,
                           aggfunc="count", fill_value=0)
              .reset_index())

    # ensure consistent columns if some levels are missing
    desired_cols = ["method",
                    "Hotspot_90","Hotspot_95","Hotspot_99",
                    "Coldspot_90","Coldspot_95","Coldspot_99",
                    "NotSig"]
    for c in desired_cols:
        if c not in wide.columns:
            wide[c] = 0
    wide = wide[[c for c in desired_cols if c in wide.columns]]

    # row order
    wide = wide.set_index("method").reindex(row_order).reset_index()

    return wide


import pandas as pd
import re

def summarize_hsa_hotspots(hsa_df,
                           id_col="HSA93",
                           value_cols=("hsa_unadj_type","hsa_bh_type","hsa_storey_type"),
                           row_order=("Unadjusted","Storey","BH")):
    # reshape to long
    long = hsa_df.melt(id_vars=id_col, value_vars=list(value_cols),
                       var_name="method", value_name="category")

    # method map for HSA-level columns
    m = {
        "hsa_unadj_type": "Unadjusted",
        "hsa_bh_type": "BH",
        "hsa_storey_type": "Storey",
        # fallbacks for hospital-level names if passed accidentally
        "hotspot_unadj": "Unadjusted",
        "hotspot_bh": "BH",
        "hotspot_storey": "Storey",
    }
    long["method"] = long["method"].map(m)

    # parse labels like "90% Hotspot", "95% Coldspot", "Not Significant"
    def parse(cat: object):
        if pd.isna(cat):
            return ("NotSig", None)
        s = str(cat).strip()
        s_low = s.lower()
        if s_low.startswith("not"):
            return ("NotSig", None)
        lvl = None
        mnum = re.search(r"(90|95|99)", s_low)
        if mnum:
            lvl = mnum.group(1)
        if "hotspot" in s_low:
            return ("Hotspot", lvl)
        if "coldspot" in s_low:
            return ("Coldspot", lvl)
        return ("NotSig", None)

    long[["status","level"]] = long["category"].apply(lambda x: pd.Series(parse(x)))
    long["label"] = long.apply(
        lambda r: f"{r['status']}_{r['level']}" if r["status"] != "NotSig" else "NotSig",
        axis=1
    )

    # pivot to wide counts
    wide = (pd.pivot_table(long, index="method", columns="label", values=id_col,
                           aggfunc="count", fill_value=0)
              .reset_index())

    # ensure expected columns exist and ordered
    desired_cols = ["method",
                    "Hotspot_90","Hotspot_95","Hotspot_99",
                    "Coldspot_90","Coldspot_95","Coldspot_99",
                    "NotSig"]
    for c in desired_cols:
        if c not in wide.columns:
            wide[c] = 0
    wide = wide[desired_cols]

    # row order
    wide = wide.set_index("method").reindex(row_order).reset_index()

    # ints
    count_cols = [c for c in desired_cols if c != "method"]
    wide[count_cols] = wide[count_cols].astype(int)

    return wide



In [None]:
summarize_hotspots(base_hotspot_results)


In [None]:
summarize_hsa_hotspots(base_hsa_gdf,
                                  id_col="HSA93",
                                  value_cols=("hsa_unadj_type","hsa_bh_type","hsa_storey_type"))

#### 6 Hotspot visualization 

In [None]:
# Hotspot Visualization

# Define color scheme
hotspot_colors = {
    'Hotspot (99%)': '#001f4d',      # Dark blue
    'Hotspot (95%)': '#0050b3',      # Medium blue
    'Hotspot (90%)': '#4d94ff',      # Light blue
    'Not Significant': '#d9d9d9',    # Light gray
    'Coldspot (90%)': '#bfbfbf',     # Medium gray
    'Coldspot (95%)': '#737373',     # Dark gray
    'Coldspot (99%)': '#333333'      # Very dark gray
}

# Set opacity levels
opacity = {
    'Hotspot (99%)': 0.9,
    'Hotspot (95%)': 0.8,
    'Hotspot (90%)': 0.7,
    'Not Significant': 0.1,
    'Coldspot (90%)': 0.7,
    'Coldspot (95%)': 0.8,
    'Coldspot (99%)': 0.9
}

# Create visualization
fig, ax = plt.subplots(figsize=(12, 8))

# Plot non-significant points first (background)
non_sig = hotspot_results[hotspot_results['hotspot_type'] == 'Not Significant']
non_sig.plot(
    ax=ax,
    color=hotspot_colors['Not Significant'],
    markersize=15,
    alpha=opacity['Not Significant']
)

# Plot significant hotspots and coldspots
significant_types = ['Coldspot (99%)', 'Coldspot (95%)', 'Coldspot (90%)', 
                    'Hotspot (90%)', 'Hotspot (95%)', 'Hotspot (99%)']

for hotspot_type in significant_types:
    subset = hotspot_results[hotspot_results['hotspot_type'] == hotspot_type]
    if len(subset) > 0:
        subset.plot(
            ax=ax,
            color=hotspot_colors[hotspot_type],
            markersize=25,
            alpha=opacity[hotspot_type]
        )

# Add basemap if available
try:
    import contextily as ctx
    ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
except:
    pass

# Create legend
legend_elements = [
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Hotspot (99%)'], 
              label='Hotspot (99%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Hotspot (95%)'], 
              label='Hotspot (95%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Hotspot (90%)'], 
              label='Hotspot (90%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Not Significant'], 
              label='Not Significant', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Coldspot (90%)'], 
              label='Coldspot (90%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Coldspot (95%)'], 
              label='Coldspot (95%)', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', 
              markerfacecolor=hotspot_colors['Coldspot (99%)'], 
              label='Coldspot (99%)', markersize=10)
]

ax.legend(handles=legend_elements, loc='upper right')
ax.set_axis_off()
ax.set_title('Hotspots and Coldspots of AI Implementation', fontsize=16)

plt.tight_lat()
plt.show()

# Summary statistics
print("Hotspot Analysis Summary:")
for hotspot_type in hotspot_colors.keys():
    count = len(hotspot_results[hotspot_results['hotspot_type'] == hotspot_type])
    print(f"{hotspot_type}: {count} locations")

In [None]:
# Reproject everything to EPSG:3857 for basemap compatibility
base_hsa_gdf = base_hsa_gdf.to_crs(epsg=3857)

# Define colors
custom_colors = {
    'Hotspot (90%)': '#4d94ff',
    'Hotspot (95%)': '#0050b3',
    'Hotspot (99%)': '#001f4d',
    'Coldspot (90%)': '#a6a6a6',
    'Coldspot (95%)': '#595959',
    'Coldspot (99%)': '#262626',
    'Not Significant': '#f2f2f2'  # very light gray
}

# Fill missing values as 'Not Significant'
base_hsa_gdf['hsa_hotspot_type'] = base_hsa_gdf['hsa_hotspot_type'].fillna('Not Significant')

# Plot
fig, ax = plt.subplots(figsize=(13, 11))

for category, color in custom_colors.items():
    subset = base_hsa_gdf[base_hsa_gdf['hsa_hotspot_type'] == category]
    subset.plot(ax=ax, color=color, label=category, edgecolor='black', linewidth=0.2)

# Add basemap
#ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)

ax.set_title('Hotspots and Coldspots of ML Implementation by Hospital Service Area', fontsize=15)
ax.set_axis_off()
ax.legend(title='HSA Classification', loc='upper right', frameon=True)
plt.tight_lat()
plt.show()

# Save the figure if needed
fig.savefig('figures/base_hsa_hotspot_map.pdf', 
            bbox_inches='tight',
            pad_inches=0.1,
            facecolor='white',
            edgecolor='none',
            format='pdf')


### 6.2 secondary measures hotspot visualization 

In [104]:

# Define colors for hotspot types - purples for hotspots, grays for coldspots
secondary_hotspot_colors = {
    'Hotspot (99%)': '#4a1486',      # Very dark purple
    'Hotspot (95%)': '#807dba',      # Medium purple
    'Hotspot (90%)': '#bcbddc',      # Light purple
    'Not Significant': '#f0f0f0',    # Very light gray
    'Coldspot (90%)': '#bdbdbd',     # Light gray
    'Coldspot (95%)': '#636363',     # Medium gray
    'Coldspot (99%)': '#252525'      # Dark gray
}

# Create custom color map for z-scores
secondary_hotspot_cmap = LinearSegmentedColormap.from_list(
    'eval_hotspot_cmap', 
    ['#252525', '#636363', '#bdbdbd', '#f0f0f0', '#bcbddc', '#807dba', '#4a1486']
)

# Set the opacity values
opacity = {
    'Hotspot (99%)': 0.9,
    'Hotspot (95%)': 0.8,
    'Hotspot (90%)': 0.7,
    'Not Significant': 0.1,  # Very low opacity for non-significant points
    'Coldspot (90%)': 0.7,
    'Coldspot (95%)': 0.8,
    'Coldspot (99%)': 0.9
}


In [None]:


# Create a single figure
fig, ax = plt.subplots(figsize=(12, 8))

# MAIN PLOT - HOTSPOTS
# First plot non-significant points with very low opacity
non_sig = breadth_hotspot_results[breadth_hotspot_results['hotspot_type'] == 'Not Significant']
non_sig.plot(
    ax=ax,
    color=secondary_hotspot_colors['Not Significant'],
    markersize=15,
    alpha=opacity['Not Significant']
)

for hotspot_type in ['Coldspot (90%)', 'Coldspot (95%)', 'Coldspot (99%)', 
                     'Hotspot (90%)', 'Hotspot (95%)', 'Hotspot (99%)']:
    subset = breadth_hotspot_results[breadth_hotspot_results['hotspot_type'] == hotspot_type]
    subset.plot(
        ax=ax,
        color=secondary_hotspot_colors[hotspot_type],
        markersize=25,
        alpha=opacity[hotspot_type]
    )

# Create legend for hotspot plot
legend_elements = [
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Hotspot (99%)'], 
              label='Hotspot (99% confidence)', markersize=10, alpha=opacity['Hotspot (99%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Hotspot (95%)'], 
              label='Hotspot (95% confidence)', markersize=10, alpha=opacity['Hotspot (95%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Hotspot (90%)'], 
              label='Hotspot (90% confidence)', markersize=10, alpha=opacity['Hotspot (90%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Not Significant'], 
              label='Not Significant', markersize=10, alpha=0.5),  # Higher opacity in legend for visibility
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Coldspot (90%)'], 
              label='Coldspot (90% confidence)', markersize=10, alpha=opacity['Coldspot (90%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Coldspot (95%)'], 
              label='Coldspot (95% confidence)', markersize=10, alpha=opacity['Coldspot (95%)']),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=secondary_hotspot_colors['Coldspot (99%)'], 
              label='Coldspot (99% confidence)', markersize=10, alpha=opacity['Coldspot (99%)'])
]

ax.legend(handles=legend_elements, loc='upper right', frameon=True)
ax.set_title('Hotspots and Coldspots of ML Implementation in US Hospitals', fontsize=16)
plt.tight_lat()
plt.show()


In [None]:
# Reproject everything to EPSG:3857 for basemap compatibility
breadth_hsa_gdf = breadth_hsa_gdf.to_crs(epsg=3857)

# Fill missing values as 'Not Significant'
breadth_hsa_gdf['hsa_hotspot_type'] = breadth_hsa_gdf['hsa_hotspot_type'].fillna('Not Significant')

# Plot
fig, ax = plt.subplots(figsize=(13, 11))

for category, color in secondary_hotspot_colors.items():
    subset = breadth_hsa_gdf[breadth_hsa_gdf['hsa_hotspot_type'] == category]
    subset.plot(ax=ax, color=color, label=category, edgecolor='black', linewidth=0.2)


ax.set_title('Hotspots and Coldspots of ML Implementation by Hospital Service Area', fontsize=15)
ax.set_axis_off()
ax.legend(title='HSA Classification', loc='upper right', frameon=True)
plt.tight_lat()
plt.show()

