In [67]:
"""
This notebook takes the clustering result from the previous notebook, and visualize the clusters,
alongside performing spatial hotspot analysis.
"""

import folium
import geopandas as gpd
import matplotlib.colors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from branca.element import Element
from folium.plugins import MarkerCluster
from pysal.explore import esda
from pysal.lib import weights
from shapely.geometry import MultiPoint, Polygon
from sklearn.cluster import DBSCAN


def plot_recent_crimes(
    m: folium.Map, recent_crime: pd.DataFrame, color_map_types: dict
) -> folium.Map:
    """
    Plots recent crime incidents on the map as circles, colored by crime type.

    Parameters:
    -----------
    m: folium.Map
        The Folium map object to add the layers to.
    recent_crime: pd.DataFrame
        DataFrame containing recent crime data with a 'crime_type' column.
    color_map_types: dict
        A dictionary mapping crime types to hex colors.

    Returns:
    --------
    folium.Map
        The updated Folium map object with the recent crime layers.
    """
    # Create layers for recent crimes (as circles)
    crime_agg_view = MarkerCluster(name="Recent Crimes (Aggregated)").add_to(m)
    crime_det_view = folium.FeatureGroup(name="Recent Crimes (Detailed)").add_to(m)

    # Add recent crime markers
    for _, row in recent_crime.iterrows():
        popup_text = row["crime_type"].replace("crime_", "")
        marker_args = {
            "location": [row["lat"], row["lon"]],
            "radius": 5,
            "color": color_map_types[row["crime_type"]],
            "fill": True,
            "fill_color": color_map_types[row["crime_type"]],
            "fill_opacity": 0.7,
            "popup": popup_text,
        }
        folium.CircleMarker(**marker_args).add_to(crime_agg_view)
        folium.CircleMarker(**marker_args).add_to(crime_det_view)

    return m


def plot_cluster_outlines(
    m: folium.Map,
    df_clustered: pd.DataFrame,
    color_map_clusters: dict,
    alpha_labels: dict,
    dbscan_threshold: int,
) -> folium.Map:
    """
    Plots intelligent cluster outlines and representative icons on the map with DBSCAN.

    Parameters:
    -----------
    m: folium.Map
        The Folium map object to add the layers to.
    df_clustered: pd.DataFrame
        DataFrame with clustered crime data, including 'cluster_label' and summary stats.
    color_map_clusters: dict
        A dictionary mapping cluster labels to hex colors.
    alpha_labels: dict
        A dictionary mapping numeric cluster labels to alphabetical labels.
    dbscan_threshold: int
        An integer representing how many feet apart crime has to be, to be clustered together.
        Larger values mean capturing broader patterns, while smaller values mean capturing more
        local patterns.

    Returns:
    --------
    folium.Map
        The updated Folium map object with the cluster outline layers.
    """
    # Create new layers for the cluster outlines and icons
    cluster_outlines = folium.FeatureGroup(name="Cluster Outlines").add_to(m)
    cluster_icons = folium.FeatureGroup(name="Cluster Icons").add_to(m)

    # Create subclusters with the clustered data. This is so I can visualize the clusters of crime by
    # outlying distinct regions where crimes cluster together, instead of plotting each individuala
    # crime which would be a lot harder to view and cause lag on the map when rendering

    # Convert df_clustered to a GeoDataFrame and project it for distance calculations
    clustered_gdf = gpd.GeoDataFrame(
        df_clustered,
        geometry=gpd.points_from_xy(df_clustered.lon, df_clustered.lat),
        crs="EPSG:4326",  # Assume standard lat/lon
    ).to_crs(
        "EPSG:2272"
    )  # Project to a system that uses feet for accurate calculations

    # Group by the primary cluster label to process each cluster
    for primary_cluster_label, group in clustered_gdf.groupby("cluster_label"):

        # Extract coordinates for DBSCAN
        coords = np.array(list(zip(group.geometry.x, group.geometry.y)))

        # Need at least 3 points to form a hull
        if len(coords) < 3:
            continue

        # Run DBSCAN to find spatial sub-clusters
        db = DBSCAN(eps=DISTANCE_THRESHOLD, min_samples=3).fit(coords)
        group = group.copy()
        group["sub_cluster"] = db.labels_

        # Now, create an outline for each spatial sub-cluster
        for sub_cluster_label, sub_group in group.groupby("sub_cluster"):
            # Skip noise points from the sub-clustering and groups too small to form a shape
            if sub_cluster_label == -1 or len(sub_group) < 3:
                continue

            # Create a single MultiPoint object from all points in the sub-cluster
            multi_point = MultiPoint(sub_group["geometry"].tolist())

            # Calculate the convex hull (the outline)
            hull = multi_point.convex_hull

            # Calculate the center for the icon (still in projected CRS)
            centroid_proj = hull.centroid

            # Find the most common crime type in this sub-group
            sub_group_crime_cols = [
                col for col in sub_group.columns if col.startswith("crime_")
            ]
            # Get the full column name of the most frequent crime
            dominant_crime_col = sub_group[sub_group_crime_cols].sum().idxmax()
            most_common_crime = dominant_crime_col.replace("crime_", "")

            # Label the crime if it is extremely dominant or not
            dominant_crime_count = sub_group[dominant_crime_col].sum()
            dominant_crime_pct = dominant_crime_count / len(sub_group)
            dominance_indicator = "✓" if dominant_crime_pct >= 0.5 else "⚠️"

            # Calculate average median income, poverty rate, and population density
            avg_median_income = sub_group["income_median"].mean()
            avg_poverty_rate = sub_group["poverty_rate"].mean()
            avg_pop_density = sub_group["pop_density_sq_km"].mean()

            # Get color and info for this cluster (using the primary label for color)
            icon_color_hex = color_map_clusters.get(primary_cluster_label, "#000000")

            # Define HTML for detailed popup text with the statistics
            popup_text = f"""
            <b>Cluster {alpha_labels[primary_cluster_label]} (Sub-Group)</b><br>
            Count: {len(sub_group)}<br>
            <hr style='margin: 2px;'>
            <b>Dominant Crime:</b> {most_common_crime} ({dominant_crime_pct:.0%}) {dominance_indicator}<br>
            <b>Avg. Median Income:</b> ${avg_median_income:,.0f}<br>
            <b>Avg. Poverty Rate:</b> {avg_poverty_rate:.1%}<br>
            <b>Avg. Pop. Density:</b> {avg_pop_density:,.0f}/km²
            """

            # Convert hull back to lat/lon for plotting on Folium map
            hull_gdf = gpd.GeoDataFrame([1], geometry=[hull], crs="EPSG:2272").to_crs(
                "EPSG:4326"
            )

            # Add the outline to its layer
            folium.GeoJson(
                hull_gdf.geometry.to_json(),
                style_function=lambda x, color=icon_color_hex: {
                    "fillColor": color,
                    "color": color,
                    "weight": 2,
                    "fillOpacity": 0.2,
                },
            ).add_to(cluster_outlines)

            # Convert centroid to lat/lon for the marker
            centroid_gdf = gpd.GeoDataFrame(
                [1], geometry=[centroid_proj], crs="EPSG:2272"
            ).to_crs("EPSG:4326")
            centroid_latlon = [
                centroid_gdf.geometry.y.iloc[0],
                centroid_gdf.geometry.x.iloc[0],
            ]

            # Define the HTML for the cluster outline icon
            icon_html = f'<div style="text-align: center; color: {icon_color_hex};"><i class="fa fa-tag fa-2x"></i></div>'

            # Add the representative icon to each layer
            folium.Marker(
                location=centroid_latlon,
                icon=folium.DivIcon(
                    html=icon_html, icon_size=(24, 24), icon_anchor=(12, 12)
                ),
                popup=popup_text,
            ).add_to(cluster_icons)

    return m


def plot_hotspot_analysis(
    m: folium.Map, df_clustered: pd.DataFrame, philly_gdf: gpd.GeoDataFrame
) -> folium.Map:
    """
    Performs a hotspot analysis on the given data and adds it as a choropleth layer.

    Parameters:
    -----------
    m: folium.Map
        The Folium map object to add the layers to.
    df_clustered: pd.DataFrame
        DataFrame with clustered crime data.
    philly_gdf: gpd.GeoDataFrame
        GeoDataFrame containing the Philadelphia boundary.

    Returns:
    --------
    folium.Map
        The updated Folium map object with the hotspot layer.
    """
    # Convert df_clustered to a GeoDataFrame and project it for distance calculations
    clustered_gdf = gpd.GeoDataFrame(
        df_clustered,
        geometry=gpd.points_from_xy(df_clustered.lon, df_clustered.lat),
        crs="EPSG:4326",
    ).to_crs("EPSG:2272")

    # Create grid based on the entire Philadelphia boundary for full coverage
    philly_gdf_proj = philly_gdf.to_crs("EPSG:2272")
    xmin, ymin, xmax, ymax = philly_gdf_proj.total_bounds
    cell_size = 2500  # Grid cell size in feet; can be set higher/lower
    grid_cells = []
    x = xmin
    while x < xmax:
        y = ymin
        while y < ymax:
            grid_cells.append(
                Polygon(
                    [
                        (x, y),
                        (x + cell_size, y),
                        (x + cell_size, y + cell_size),
                        (x, y + cell_size),
                    ]
                )
            )
            y += cell_size
        x += cell_size
    hotspot_grid = gpd.GeoDataFrame(grid_cells, columns=["geometry"], crs="EPSG:2272")

    # Count points from df_clustered in each grid cell
    joined = gpd.sjoin(clustered_gdf, hotspot_grid, how="inner", predicate="within")
    crime_counts = joined.groupby("index_right").size().rename("n_crimes")
    hotspot_grid = hotspot_grid.merge(
        crime_counts, left_index=True, right_index=True, how="left"
    )
    hotspot_grid["n_crimes"].fillna(0, inplace=True)
    # Create a separate grid for the analysis containing only cells with crime
    analysis_grid = hotspot_grid[hotspot_grid["n_crimes"] > 0].copy()

    # Calculate the Gi* statistic (z-scores) only on cells with data
    w = weights.Queen.from_dataframe(analysis_grid)
    g_local = esda.G_Local(analysis_grid["n_crimes"].values, w)
    analysis_grid["z_score"] = g_local.Zs

    # Merge the z-scores back into the full grid for complete visualization
    hotspot_grid = hotspot_grid.merge(
        analysis_grid[["z_score"]], left_index=True, right_index=True, how="left"
    )
    # Fill cells with no z-score (0 crimes or islands) with a neutral value of 0
    hotspot_grid["z_score"].fillna(0, inplace=True)

    # Trim the grid to the Philadelphia boundary
    hotspot_grid_trimmed = gpd.overlay(
        hotspot_grid, philly_gdf_proj, how="intersection"
    )

    # Reset the index to create a column that can be used as a key
    hotspot_grid_for_plot = hotspot_grid_trimmed.reset_index()

    # Select only the necessary columns to prevent JSON serialization errors
    hotspot_data_for_viz = hotspot_grid_for_plot[["index", "z_score", "geometry"]]

    # Create a Choropleth layer for the hotspots; The reversed red/blue colormap is used, so 
    # hotspots (identified with darker red) are shown with a higher z score, and coldspots 
    # (identified with blue) are shown with a negative/near zero z score.
    chlorpleth = folium.Choropleth(
        geo_data=hotspot_data_for_viz.to_crs("EPSG:4326"),
        name="Hotspots",
        data=hotspot_data_for_viz,
        columns=["index", "z_score"],
        key_on="feature.id",
        fill_color="RdBu_r",
        fill_opacity=0.6,
        line_opacity=0.2,
        legend_name="Hotspot Intensity",
        highlight=True,
    ).add_to(m)

    # Add CSS for formatting the chlorpleth legend, so it stands out better with the background of 
    # the map
    legend_css = """
    <style>
        .legend {
            background-color: rgba(255, 255, 255, 0.7);
            padding: 6px 12px;
            border-radius: 6px;
            font-weight: 600;
            font-size: 13px;
            line-height: 1.4;
            box-shadow: 0 0 4px rgba(0,0,0,0.4);
            border: 1px solid rgba(0,0,0,0.2);
        }

        .legend text, .legend-title {
            text-shadow:
                0 0 2px #fff,
                -1px -1px 0 #fff,
                1px -1px 0 #fff,
                -1px 1px 0 #fff,
                1px 1px 0 #fff;
        }
    </style>
    """
    m.get_root().html.add_child(Element(legend_css))

    return m



BOUNDARY = "https://raw.githubusercontent.com/blackmad/neighborhoods/master/philadelphia.geojson"
DISTANCE_THRESHOLD = 1000

In [68]:
# Load the experimental data generated from previous notebooks
df_clustered = pd.read_pickle(
    "experimental_data/labeled_merged_data_2022-07-21_to_2025-07-20.pkl"
)
recent_crime = pd.read_pickle("experimental_data/merged_data_2025-07-18.pkl")

Now that the data has been clustered, I want to make a map to visualize all the hard work and 
analysis done. This will involve three parts:
1. Plotting the clusters from the previous notebook, where I performed UMAP/HDBSCAN clustering.
2. Overlaying recent crimes
3. Performing some hotspot analysis on the clustered data (without cluster labels).

Effectively, I want each to act as a separate layer, and combined, will provide a comprehensive 
analysis of the crime distribution in Philadelphia (at least, up to the date where this data was
collected and clustered). Of course, plotting recent crimes will help let anyone view the spatial
distribution of clusters. Plotting the clusters of crimes will help highlight any particular 
patterns that emerged. But, these patterns may not be statistically significant, so the hotspot
analysis aims to highlight chunks of Philadelphia have statistically significant concentrations of 
crime.

Each of these three layers complement each other. For example, one could see an area as a proper 
hotspot, and then perhaps see two clusters of crime in that hotspot. Finally, recent crimes can help
highlight the most recent observations and confirm the pattern is still active.

The first step is setting up the base map for these three layers. This will involve setting up the
legend labels (for the extra layers) and color maps, alongside initializing an empty map with a boundary of Philadephia.

In [69]:
# Extract crime type from the OHE'd columns
crime_type_cols = [col for col in recent_crime.columns if col.startswith("crime_")]
for col in crime_type_cols:
    recent_crime[col] = pd.to_numeric(recent_crime[col], errors="coerce").fillna(0)
recent_crime["crime_type"] = recent_crime[crime_type_cols].idxmax(axis=1)

# Define a color map for each crime type
unique_types = recent_crime["crime_type"].unique()
cmap_types = plt.get_cmap("tab20", len(unique_types))
color_map_types = {
    crime: matplotlib.colors.rgb2hex(cmap_types(i))
    for i, crime in enumerate(unique_types)
}

# Create a dynamic color map for the clustered crimes
unique_clusters = sorted(df_clustered["cluster_label"].unique())
cmap_clusters = plt.get_cmap("jet", len(unique_clusters))
color_map_clusters = {
    cluster: matplotlib.colors.rgb2hex(cmap_clusters(i))
    for i, cluster in enumerate(unique_clusters)
}

# Create a mapping for alphabetical cluster labels
cluster_nums = sorted(unique_clusters)
alpha_labels = {num: chr(65 + i) for i, num in enumerate(cluster_nums)}
# Apply the new labels to the dataframe
df_clustered["cluster_alpha_label"] = df_clustered["cluster_label"].map(alpha_labels)

# Load Philadelphia boundary GeoJSON
philly_gdf = gpd.read_file(BOUNDARY)
min_lon, min_lat, max_lon, max_lat = philly_gdf.total_bounds
map_bounds = [[min_lat, min_lon], [max_lat, max_lon]]

# Create Folium map of crime, centered at mean lat/lon
m_crime = folium.Map(
    location=[recent_crime["lat"].mean(), recent_crime["lon"].mean()],
    zoom_start=12,
    max_bounds=map_bounds,
    min_zoom=12,
)
# Add the Philadelphia boundary outline to the map
folium.GeoJson(
    philly_gdf[["geometry"]],
    style_function=lambda x: {"color": "black", "weight": 2, "fillOpacity": 0.0},
    name="Philadelphia Boundary",
).add_to(m_crime)

m_crime

Now I have a base map with nothing else on it. So, I'll start adding data!

First, I'll plot the recent crimes without anything else. I do this with two layers. The first layer
plots the raw crime observations, color coded by the crime type. I also include a second layer to 
help aggregate the number of crimes in broad areas, making it easier to highlight areas with higher
crime density.

In [70]:
# Add recent crime, cluster outline, and hotspot layers to the map
m_crime = plot_recent_crimes(m_crime, recent_crime, color_map_types)

# Add a control for controlling the layers
folium.LayerControl().add_to(m_crime)

# Setting up HTML for crime type legend
legend_html_start = """
     <div style="position: fixed; 
     bottom: 50px; left: 50px; width: 250px; height: 400px; 
     border:2px solid grey; z-index:9998; font-size:14px;
     background-color:white; padding: 10px;">
     <b>Crime Type Legend</b><br>
     <div style="height: 90%; overflow-y: auto;">
     """
legend_items = ""
for crime_type, color in color_map_types.items():
    clean_name = crime_type.replace("crime_", "")
    legend_items += f'&nbsp; <i class="fa fa-circle" style="color:{color}"></i> &nbsp; {clean_name}<br>'
legend_html_end = "</div></div>"
full_legend_html = legend_html_start + legend_items + legend_html_end
m_crime.get_root().html.add_child(folium.Element(full_legend_html))

# Save as html
# m_crime.save("layered_crime_map.html")

m_crime

Now, I want to add a layer with the clustered crime. This will be based off the clustered labels. 
Note that I could plot every single crime in the clusters, but this would be extremely excessive, as
there would be too many points on the map to be meaningful. It would also decrease the performance 
of the map considerably, so I instead use DBSCAN to effectively create subclusters in each cluster. 
This allows me to make some estimated outlines for areas where lots of crime are clustered densely
together.

In [71]:
# NOTE: Have to refresh the map from being empty for code to function correctly
# Create Folium map of crime, centered at mean lat/lon
m_crime = folium.Map(
    location=[recent_crime["lat"].mean(), recent_crime["lon"].mean()],
    zoom_start=12,
    max_bounds=map_bounds,
    min_zoom=12,
)
# Add the Philadelphia boundary outline to the map
folium.GeoJson(
    philly_gdf[["geometry"]],
    style_function=lambda x: {"color": "black", "weight": 2, "fillOpacity": 0.0},
    name="Philadelphia Boundary",
).add_to(m_crime)

# Add recent crime and cluster outlines to map
m_crime = plot_recent_crimes(m_crime, recent_crime, color_map_types)
m_crime = plot_cluster_outlines(
    m_crime, df_clustered, color_map_clusters, alpha_labels, DISTANCE_THRESHOLD
)

# Add a control for controlling the layers
folium.LayerControl().add_to(m_crime)

# Setting up HTML for crime type legend
legend_html_start = """
     <div style="position: fixed; 
     bottom: 50px; left: 50px; width: 250px; height: 400px; 
     border:2px solid grey; z-index:9998; font-size:14px;
     background-color:white; padding: 10px;">
     <b>Crime Type Legend</b><br>
     <div style="height: 90%; overflow-y: auto;">
     """
legend_items = ""
for crime_type, color in color_map_types.items():
    clean_name = crime_type.replace("crime_", "")
    legend_items += f'&nbsp; <i class="fa fa-circle" style="color:{color}"></i> &nbsp; {clean_name}<br>'
legend_html_end = "</div></div>"
full_legend_html = legend_html_start + legend_items + legend_html_end
m_crime.get_root().html.add_child(folium.Element(full_legend_html))

# Setting up HTML for cluster legend
legend_cluster_html_start = """
     <div style="position: fixed; 
     bottom: 50px; right: 50px; width: 150px; height: 225px; 
     border:2px solid grey; z-index:9999; font-size:14px;
     background-color:white; padding: 10px;">
     <b>Cluster Legend</b><br>
     <div style="height: 90%; overflow-y: auto;">
     """
legend_cluster_items = ""
for cluster_label, color in color_map_clusters.items():
    label_text = (
        f"Cluster {alpha_labels[cluster_label]}" if cluster_label != -1 else "Noise"
    )
    icon_shape = "tag" if cluster_label != -1 else "times"
    legend_cluster_items += f'&nbsp; <i class="fa fa-{icon_shape}" style="color:{color}"></i> &nbsp; {label_text}<br>'
legend_cluster_html_end = "</div></div>"
full_legend_cluster_html = (
    legend_cluster_html_start + legend_cluster_items + legend_cluster_html_end
)
m_crime.get_root().html.add_child(folium.Element(full_legend_cluster_html))

# Save as html
# m_crime.save("layered_crime_map.html")

m_crime

Great! It is now clear that there are some distinct geographical clusters with our data. 
From here, one could see how much of the recent crimes overlap these clusters based on the 
historical data, to see if any crimes would fall into each cluster.

But wait, what does each cluster represent? All that can be viewed initially is that crimes are in 
some cluster. But, the function to add the cluster label allows one to hoverover each cluster icon
to view some basic summary statistics. I have added the most common crime type, median income 
(based on census data), average poverty rate, and average population density, as a starting point.
However, more in depth analysis is required for more accurate insights.

Note that this is a very surface level approach, just to have a basic automated way to label each 
cluster to provide more information. I do plan to have the option to download individual cluster 
data in the final deployed map.


Anyways, the final layer I want to add is now the hotspot layer. This aims to help show statistical
significance based on the 3 years of data that I have accumulated, to confirm whether or not those
crimes are worth paying attention to. Note that this notebook does the analysis based solely on the 
data from the clusters, but the actual production map will use the full set of data.

This is done first by dividing the city of Philadelphia into a grid of squares*. Then, I add up the
number of crimes found in each square. From here, I define neighbors, which are the squares adjacent 
to a chosen square. 

Then, I calculate something called the Gi* statistic. This is done with the 
`pysal` library. This test compares the crime count in a given square and its neighbors to the 
average crime count across the entire map.

The result is a "z-score" for each square. A high, positive z-score means the area is a 
statistically significant hotspot (more crime than expected by random chance). A high, negative 
z-score indicates a coldspot (less crime than expected). The final map layer colors each square 
based on this z-score. In short, this is similar to a one sample t-test, but with many samples (one
at a time) with spatial logic.

*I chose a moderate size (2500 feet) as a balance between granularity with the analysis, but one 
could make the size smaller for more local analysis, or even larger for more broad analysis. One 
could also use shapes like heaxgons which are known to be better, but I stuck with squares for 
siplicity.

In [72]:
# NOTE: Have to refresh the map from being empty for code to function correctly
# Create Folium map of crime, centered at mean lat/lon
m_crime = folium.Map(
    location=[recent_crime["lat"].mean(), recent_crime["lon"].mean()],
    zoom_start=12,
    max_bounds=map_bounds,
    min_zoom=12,
)
# Add the Philadelphia boundary outline to the map
folium.GeoJson(
    philly_gdf[["geometry"]],
    style_function=lambda x: {"color": "black", "weight": 2, "fillOpacity": 0.0},
    name="Philadelphia Boundary",
).add_to(m_crime)

# Add recent crime, cluster outline, and hotspot layers to the map
m_crime = plot_recent_crimes(m_crime, recent_crime, color_map_types)
m_crime = plot_cluster_outlines(
    m_crime, df_clustered, color_map_clusters, alpha_labels, DISTANCE_THRESHOLD
)
m_crime = plot_hotspot_analysis(m_crime, df_clustered, philly_gdf)

# Add a control for controlling the layers
folium.LayerControl().add_to(m_crime)

# Setting up HTML for crime type legend
legend_html_start = """
     <div style="position: fixed; 
     bottom: 50px; left: 50px; width: 250px; height: 400px; 
     border:2px solid grey; z-index:9998; font-size:14px;
     background-color:white; padding: 10px;">
     <b>Crime Type Legend</b><br>
     <div style="height: 90%; overflow-y: auto;">
     """
legend_items = ""
for crime_type, color in color_map_types.items():
    clean_name = crime_type.replace("crime_", "")
    legend_items += f'&nbsp; <i class="fa fa-circle" style="color:{color}"></i> &nbsp; {clean_name}<br>'
legend_html_end = "</div></div>"
full_legend_html = legend_html_start + legend_items + legend_html_end
m_crime.get_root().html.add_child(folium.Element(full_legend_html))

# Setting up HTML for cluster legend
legend_cluster_html_start = """
     <div style="position: fixed; 
     bottom: 50px; right: 50px; width: 150px; height: 225px; 
     border:2px solid grey; z-index:9999; font-size:14px;
     background-color:white; padding: 10px;">
     <b>Cluster Legend</b><br>
     <div style="height: 90%; overflow-y: auto;">
     """
legend_cluster_items = ""
for cluster_label, color in color_map_clusters.items():
    label_text = (
        f"Cluster {alpha_labels[cluster_label]}" if cluster_label != -1 else "Noise"
    )
    icon_shape = "tag" if cluster_label != -1 else "times"
    legend_cluster_items += f'&nbsp; <i class="fa fa-{icon_shape}" style="color:{color}"></i> &nbsp; {label_text}<br>'
legend_cluster_html_end = "</div></div>"
full_legend_cluster_html = (
    legend_cluster_html_start + legend_cluster_items + legend_cluster_html_end
)
m_crime.get_root().html.add_child(folium.Element(full_legend_cluster_html))

# Save as html
# m_crime.save("layered_crime_map.html")

m_crime

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  hotspot_grid["n_crimes"].fillna(0, inplace=True)
  w = weights.Queen.from_dataframe(analysis_grid)
 There are 9 disconnected components.
 There are 5 islands with ids: 24, 27, 43, 50, 246.
  W.__init__(self, neighbors, ids=ids, **kw)




The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  hotspot_grid["z_score"].fillna(0, inplace=True)


From here, it can be seen that while most of Philadephia does not appear to be a crime hotspot 
(which is a good thing), there are a couple notable areas that were highlighted as hotspots with
the analysis. In particular, the South region of West Philadelphia, the South region of Center City, 
and Northeast Philadephia are hotspots. Not only that, but these areas do contain a higher 
proportion of crime from the most recent crime. Finally, there are notable clusters of crime in 
these areas; Cluster I is in the hotspot in West Philadephia, parts of cluster H and B are in the
Center City hotspot, and parts of cluster G are in the hotspot. 

This suggests that there is a more 
underlying pattern to crime, but further cluster analysis with these clusters is needed for further
insights. I'll skip over such analysis here, especially since this map is a static example of this 
map in my final product, which would update daily and hotspots/clusters would likely change. But, 
for those interested, I plan to leave the proper tools in my final product. 

I also should have
functionality for performing hotspot analysis on subsets of crime (e.g. for particular crime types)
among other filters,
since this may highlight other hotspots that are muddled by considering all crimes. I won't include
that in this notebook, but hopefully this helps to understand the main logic of how this map was
created.

Finally, note that the map is quite chaotic now with all the layers, but you can toggle which layers 
to enable in the top right icon, allowing you to specifically choose which layers you can about. You
could also toggle them on and off to help view overlap.