In [11]:
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure, show
from bokeh.tile_providers import CARTODBPOSITRON
from bokeh.transform import factor_cmap



In [12]:
# Assuming 'data' is your DataFrame and is already loaded
data = pd.read_csv('Susp_Age_Group.csv', low_memory=False)
data['CMPLNT_NUM'] = data['CMPLNT_NUM'].astype(str)
top_5_offenses = ['HARRASSMENT 2', 'ASSAULT 3 & RELATED OFFENSES', 'PETIT LARCENY'] #'FELONY ASSAULT', 'MISCELLANEOUS PENAL LAW']
data = data[data['OFNS_DESC'].isin(top_5_offenses)]
data

Unnamed: 0,CMPLNT_NUM,ADDR_PCT_CD,BORO_NM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,LAW_CAT_CD,OFNS_DESC,PARKS_NM,...,SUSP_AGE_GROUP,SUSP_RACE,SUSP_SEX,VIC_AGE_GROUP,VIC_RACE,VIC_SEX,X_COORD_CD,Y_COORD_CD,Latitude,Longitude
1,261271493,49.0,BRONX,2023-01-02,17:10:00-06,,00:00:00-06,MISDEMEANOR,ASSAULT 3 & RELATED OFFENSES,(null),...,25-44,UNKNOWN,F,25-44,WHITE,M,1024094.0,252657.0,40.860071,-73.855956
3,261307748,45.0,BRONX,2023-01-02,23:08:00-06,2023-01-02,23:13:00-06,MISDEMEANOR,ASSAULT 3 & RELATED OFFENSES,(null),...,18-24,WHITE HISPANIC,F,45-64,WHITE HISPANIC,M,1029997.0,245737.0,40.841050,-73.834664
5,261883104,102.0,QUEENS,2023-01-02,23:15:00-06,2023-01-12,23:20:00-06,MISDEMEANOR,ASSAULT 3 & RELATED OFFENSES,(null),...,25-44,WHITE HISPANIC,M,25-44,UNKNOWN,F,1024064.0,193021.0,40.696386,-73.856420
6,261282024,115.0,QUEENS,2023-01-02,16:00:00-06,2023-01-02,16:10:00-06,VIOLATION,HARRASSMENT 2,(null),...,45-64,WHITE HISPANIC,M,25-44,WHITE HISPANIC,M,1022860.0,214688.0,40.755862,-73.860638
8,261307747,45.0,BRONX,2023-01-02,19:30:00-06,2023-01-02,19:37:00-06,VIOLATION,HARRASSMENT 2,(null),...,25-44,BLACK,M,25-44,WHITE HISPANIC,F,1026856.0,244632.0,40.838033,-73.846024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263639,279761610,23.0,MANHATTAN,2023-12-31,07:10:00-06,2023-12-31,07:15:00-06,MISDEMEANOR,ASSAULT 3 & RELATED OFFENSES,(null),...,45-64,UNKNOWN,M,25-44,BLACK,F,1000381.0,226899.0,40.789449,-73.941744
263643,279756029,71.0,BROOKLYN,2023-12-31,09:30:00-06,2023-12-31,09:35:00-06,VIOLATION,HARRASSMENT 2,(null),...,18-24,BLACK,M,UNKNOWN,UNKNOWN,F,1000129.0,180994.0,40.663451,-73.942763
263645,279767324,105.0,QUEENS,2023-12-31,18:50:00-06,2023-12-31,19:00:00-06,VIOLATION,HARRASSMENT 2,(null),...,45-64,BLACK,F,45-64,BLACK,M,1058377.0,200237.0,40.715972,-73.732598
263646,279766994,105.0,QUEENS,2023-12-31,21:20:00-06,2023-12-31,21:20:00-06,VIOLATION,HARRASSMENT 2,(null),...,18-24,BLACK,M,18-24,BLACK,M,1055498.0,197111.0,40.707416,-73.743018


In [16]:
# Filter the dataset for rows with latitude and longitude within New York bounds
filtered_data = data[
    (data['Latitude'] >= ny_south) & (data['Latitude'] <= ny_north) &
    (data['Longitude'] >= ny_west) & (data['Longitude'] <= ny_east)]

print(f"Original dataset size: {len(data)}")
print(f"Filtered dataset size: {len(filtered_data)}")

# Proceed with your data analysis or visualization with `filtered_data`
# Function to convert lat/long to Web Mercator format
def lat_lon_to_mercator(lat, lon):
    k = 6378137  # Earth radius in meters
    x = lon * (k * np.pi/180.0)
    y = np.log(np.tan((90 + lat) * np.pi/360.0)) * k
    return x, y

# Convert coordinates for the whole dataset
data['x'], data['y'] = zip(*data.apply(lambda row: lat_lon_to_mercator(row['Latitude'], row['Longitude']), axis=1))

# Define bounds for New York State in latitude and longitude
ny_state_north, ny_state_south = 45.011, 40.496
ny_state_east, ny_state_west = -73.496, -79.762

# Convert these bounds to Mercator coordinates
north_y, _ = lat_lon_to_mercator(ny_state_north, 0)
south_y, _ = lat_lon_to_mercator(ny_state_south, 0)
_, east_x = lat_lon_to_mercator(0, ny_state_east)
_, west_x = lat_lon_to_mercator(0, ny_state_west)

# Define New York geographical bounds
ny_north = 45.011
ny_south = 40.496
ny_east = -71.856
ny_west = -79.762



Original dataset size: 115552
Filtered dataset size: 115552


In [17]:
# New York City latitude and longitude
nyc_lat, nyc_lon = 40.7128, -74.0060
# Convert NYC coordinates to Mercator
nyc_x, nyc_y = lat_lon_to_mercator(nyc_lat, nyc_lon)
# Assuming 'data' is your DataFrame and it has 'Latitude' and 'Longitude' columns
data['x'], data['y'] = zip(*data.apply(lambda row: lat_lon_to_mercator(row['Latitude'], row['Longitude']), axis=1))
# Creating a ColumnDataSource
source = ColumnDataSource(data={
    'x': data['x'],
    'y': data['y'],
    'BORO_NM': data['BORO_NM'],
    'SUSP_AGE_GROUP': data['SUSP_AGE_GROUP'],
    'OFNS_DESC': data['OFNS_DESC']
})
# Setting up the figure with focus on New York City
p = figure(x_axis_type="mercator", y_axis_type="mercator", sizing_mode="stretch_width", height=600,
           x_range=(nyc_x - 10000, nyc_x + 10000), y_range=(nyc_y - 10000, nyc_y + 10000))
p.add_tile(CARTODBPOSITRON)

# Adding points to the map with different colors for each of the top 5 offenses
p.circle(x='x', y='y', source=source, size=8, color=offense_colors, fill_alpha=0.7, legend_field='OFNS_DESC')
# Adding hover tool
hover = HoverTool()
hover.tooltips = [
    ("Borough", "@BORO_NM"),
    ("Suspect Age Group", "@SUSP_AGE_GROUP"),
    ("Offense Description", "@OFNS_DESC")
]
p.add_tools(hover)
# Handling the legend
p.legend.location = "top_left"
p.legend.click_policy="hide"
# Show the map
#output_notebook()
show(p)