In [1]:
import pandas as pd
import plotly.express as px

In [2]:
data_path = "NYPD_Shooting_Incident_Data__Historic_.csv"
incidents = pd.read_csv(data_path)
incidents['OCCUR_DATE'] = pd.to_datetime(incidents['OCCUR_DATE'])

In [3]:
incidents.head()

Unnamed: 0,INCIDENT_KEY,OCCUR_DATE,OCCUR_TIME,BORO,LOC_OF_OCCUR_DESC,PRECINCT,JURISDICTION_CODE,LOC_CLASSFCTN_DESC,LOCATION_DESC,STATISTICAL_MURDER_FLAG,...,PERP_SEX,PERP_RACE,VIC_AGE_GROUP,VIC_SEX,VIC_RACE,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,Lon_Lat
0,244608249,2022-05-05,00:10:00,MANHATTAN,INSIDE,14,0.0,COMMERCIAL,VIDEO STORE,True,...,M,BLACK,25-44,M,BLACK,986050.0,214231.0,40.754692,-73.9935,POINT (-73.9935 40.754692)
1,247542571,2022-07-04,22:20:00,BRONX,OUTSIDE,48,0.0,STREET,(null),True,...,(null),(null),18-24,M,BLACK,1016802.0,250581.0,40.854402,-73.88233,POINT (-73.88233 40.854402)
2,84967535,2012-05-27,19:35:00,QUEENS,,103,0.0,,,False,...,,,18-24,M,BLACK,1048632.0,198262.0,40.710634,-73.767773,POINT (-73.76777349199995 40.71063412500007)
3,202853370,2019-09-24,21:00:00,BRONX,,42,0.0,,,False,...,M,UNKNOWN,25-44,M,BLACK,1014493.0,242565.0,40.832417,-73.890714,POINT (-73.89071440599997 40.832416753000075)
4,27078636,2007-02-25,21:00:00,BROOKLYN,,83,0.0,,,False,...,M,BLACK,25-44,M,BLACK,1009149.375,190104.703125,40.688443,-73.910219,POINT (-73.91021857399994 40.68844345900004)


In [5]:
incidents['OCCUR_HOUR'] = pd.to_datetime(incidents['OCCUR_TIME'], format='%H:%M:%S').dt.hour
incidents['OCCUR_MONTH'] = incidents['OCCUR_DATE'].dt.month

# Create a pivot table for incidents by hour and month
pivot = pd.pivot_table(incidents, values='INCIDENT_KEY', index='OCCUR_MONTH', columns='OCCUR_HOUR', aggfunc='count')

# Create heatmap
fig = px.imshow(pivot, labels=dict(x="Hour of Day", y="Month", color="Incident Count"),
                x=pivot.columns, y=pivot.index, color_continuous_scale='Viridis')

fig.update_layout(title="Heatmap of Incidents by Hour of Day and Month", height=300)
fig.show()

In [8]:
race_counts = incidents.groupby(['PERP_RACE', 'VIC_RACE']).size().reset_index(name='count')

# Bar plot for demographic comparison
fig = px.bar(race_counts, x='PERP_RACE', y='count', color='VIC_RACE', barmode='stack',
             labels={'PERP_RACE': 'Perpetrator Race', 'count': 'Incident Count'},
             title="Perpetrator and Victim Race Distribution")

fig.update_layout(xaxis_title="Perpetrator Race", yaxis_title="Incident Count")
fig.show()

In [10]:
location_desc = incidents['LOC_OF_OCCUR_DESC'].dropna().value_counts().reset_index()
location_desc.columns = ['Location Type', 'Incident Count']

# Pie chart for location type
fig = px.pie(location_desc, values='Incident Count', names='Location Type', 
             title="Distribution of Incidents by Location Type", 
             )

fig.show()

In [12]:
df_filtered = incidents.dropna(subset=['Latitude', 'Longitude'])

# Scatter map plot of incidents by latitude and longitude
fig = px.scatter_mapbox(df_filtered, lat="Latitude", lon="Longitude", 
                        color="BORO", hover_name="BORO", 
                        hover_data=["OCCUR_DATE", "OCCUR_TIME", "PRECINCT"],
                        title="NYC Incident Locations by Borough",
                        color_discrete_sequence=px.colors.qualitative.Set1, 
                        zoom=10, height=600)

# Update the layout for map style and display
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})

fig.show()