In [12]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.models import ColorBar, HoverTool
from bokeh.transform import linear_cmap
from bokeh.palettes import Viridis256
# Load the data from the CSV file
file_path = 'Susp_Age_Group.csv'  # Update the path to your file
data = pd.read_csv(file_path)
# Group by BORO_NM, SUSP_AGE_GROUP, and OFNS_DESC, then count the occurrences
grouped_data = data.groupby(['BORO_NM', 'SUSP_AGE_GROUP', 'OFNS_DESC']).size().reset_index(name='count')
# Sort the data to get the most common offense on top for each combination of borough and age group
sorted_data = grouped_data.sort_values(['BORO_NM', 'SUSP_AGE_GROUP', 'count'], ascending=[True, True, False])
# Drop duplicate combinations of borough and age group, keeping the first occurrence (which is the most common offense)
unique_data = sorted_data.drop_duplicates(subset=['BORO_NM', 'SUSP_AGE_GROUP'])
# Pivot the data for the heatmap
heatmap_data = unique_data.pivot_table(index='SUSP_AGE_GROUP', columns='BORO_NM', values='count', fill_value=0)
# Create a list of boroughs and age groups for the axis of the heatmap
boroughs = list(heatmap_data.columns)
age_groups = list(heatmap_data.index)
# Create a grid for the heatmap
boroughs_grid, age_groups_grid = np.meshgrid(range(len(boroughs)), range(len(age_groups)))
counts = heatmap_data.values.flatten()
# Convert the data to a format suitable for Bokeh
source = ColumnDataSource(data=dict(
    BORO_NM=[boroughs[i] for i in boroughs_grid.flatten()],
    SUSP_AGE_GROUP=[age_groups[j] for j in age_groups_grid.flatten()],
    count=counts,
    OFNS_DESC=[unique_data[unique_data['BORO_NM'] == boroughs[i]].loc[unique_data['SUSP_AGE_GROUP'] == age_groups[j], 'OFNS_DESC'].values[0]
               if heatmap_data.iloc[j, i] > 0 else 'None'
               for i in range(len(boroughs)) for j in range(len(age_groups))]
))
# Using a linear color mapper
mapper = linear_cmap(field_name='count', palette=Viridis256, low=min(counts), high=max(counts))
# Setting up the Bokeh plot
plot = figure(title="Heatmap of Offenses by Borough and Suspect Age Group",
              x_range=boroughs, y_range=list(reversed(age_groups)),
              x_axis_location="above", width=900, height=400,
              tools="hover,pan,box_zoom,reset,wheel_zoom",
              tooltips=[('Borough', '@BORO_NM'), ('Age Group', '@SUSP_AGE_GROUP'), ('Offense', '@OFNS_DESC'), ('Count', '@count')])
# Create Rectangles for the heatmap
plot.rect(x="BORO_NM", y="SUSP_AGE_GROUP", width=1, height=1, source=source,
          fill_color=mapper, line_color=None)
# Add color bar
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0))
plot.add_layout(color_bar, 'right')
# Show the plot
output_notebook()  # Display in notebook
show(plot)  # Open a new browser window or tab with the plot

  data = pd.read_csv(file_path)
