In [28]:
# Load the data
import pandas as pd

data = pd.read_csv('11231.csv')

print(f'The data contains {len(data)} records.')

# Convert 'CRASH DATE' to datetime data type
data['CRASH DATE'] = pd.to_datetime(data['CRASH DATE'])

# Calculate the total number of injuries and deaths
total_persons_injured = data['NUMBER OF PERSONS INJURED'].sum()
total_persons_killed = data['NUMBER OF PERSONS KILLED'].sum()
total_pedestrians_injured = data['NUMBER OF PEDESTRIANS INJURED'].sum()
total_pedestrians_killed = data['NUMBER OF PEDESTRIANS KILLED'].sum()
total_cyclist_injured = data['NUMBER OF CYCLIST INJURED'].sum()
total_cyclist_killed = data['NUMBER OF CYCLIST KILLED'].sum()
total_motorist_injured = data['NUMBER OF MOTORIST INJURED'].sum()
total_motorist_killed = data['NUMBER OF MOTORIST KILLED'].sum()

# Find the earliest and most recent date
earliest_date = data['CRASH DATE'].min()
most_recent_date = data['CRASH DATE'].max()

print(f'Total number of persons injured: {total_persons_injured}')
print(f'Total number of persons killed: {total_persons_killed}')
print(f'Total number of pedestrians injured: {total_pedestrians_injured}')
print(f'Total number of pedestrians killed: {total_pedestrians_killed}')
print(f'Total number of cyclist injured: {total_cyclist_injured}')
print(f'Total number of cyclist killed: {total_cyclist_killed}')
print(f'Total number of motorist injured: {total_motorist_injured}')
print(f'Total number of motorist killed: {total_motorist_killed}')

print(f'Earliest date of the dataset: {earliest_date}')
print(f'Most recent date of the dataset: {most_recent_date}')

The data contains 5832 records.
Total number of persons injured: 1102
Total number of persons killed: 3
Total number of pedestrians injured: 249
Total number of pedestrians killed: 2
Total number of cyclist injured: 188
Total number of cyclist killed: 1
Total number of motorist injured: 647
Total number of motorist killed: 0
Earliest date of the dataset: 2012-07-01 00:00:00
Most recent date of the dataset: 2023-08-01 00:00:00


In [29]:
import pandas as pd
import numpy as np
import folium

# Filter data to include only accidents where someone was killed
fatal_accidents = data[data['NUMBER OF PERSONS KILLED'] > 0]

# Initialize map centered around New York City
m = folium.Map(location=[40.7128, -74.0060], zoom_start=12)

# Loop through each accident and add a marker to the map
for idx, row in fatal_accidents.iterrows():
    # Check if coordinates are valid
    if np.isnan(row['LATITUDE']) or np.isnan(row['LONGITUDE']):
        continue
    # Add marker to the map
    folium.Marker([row['LATITUDE'], row['LONGITUDE']], popup=f"Accident at {row['ON STREET NAME']}").add_to(m)

# Show the map
m

# Save the map to an HTML file
m.save('fatal_accidents.html')

In [30]:
import folium
from folium.plugins import HeatMap

# Filter data to include only accidents where someone was injured
injured_accidents = data[data['NUMBER OF PERSONS INJURED'] > 0]

# Remove entries with missing latitude or longitude values
injured_accidents = injured_accidents.dropna(subset=['LATITUDE', 'LONGITUDE'])

# Extract lat-long pairs as a list of tuples
lat_long_data = injured_accidents[['LATITUDE', 'LONGITUDE']].values.tolist()

# Initialize map centered around New York City
m = folium.Map(location=[40.7128, -74.0060], zoom_start=12)

# Add HeatMap to the map
HeatMap(lat_long_data).add_to(m)

# Show the map
m

# Save the map to an HTML file
m.save('injured_accidents_heatmap.html')

In [31]:
import folium
from shapely.geometry import Point
import geopandas as gpd

# Filter data to include only accidents where someone was injured
injured_accidents = data[data['NUMBER OF PERSONS INJURED'] > 0]

# Remove entries with missing latitude or longitude values
injured_accidents = injured_accidents.dropna(subset=['LATITUDE', 'LONGITUDE'])

# Group data by latitude and longitude and sum up the number of injuries
grouped_data = injured_accidents.groupby(['LATITUDE', 'LONGITUDE'])['NUMBER OF PERSONS INJURED'].sum().reset_index()

# Scale injuries to a range that is appropriate for the circle size (e.g. 0-10)
grouped_data['INJURIES_SCALED'] = (grouped_data['NUMBER OF PERSONS INJURED'] / grouped_data['NUMBER OF PERSONS INJURED'].max()) * 10

# Convert latitude and longitude to geometric points
geometry = [Point(xy) for xy in zip(grouped_data['LONGITUDE'], grouped_data['LATITUDE'])]

# Create a GeoDataFrame
geo_df = gpd.GeoDataFrame(grouped_data, geometry=geometry)

# Create a Folium map centered on New York City
m = folium.Map(location=[40.7128, -74.0060], zoom_start=12, tiles='CartoDB Positron')

# Add data to map
for idx, row in geo_df.iterrows():
    folium.CircleMarker(location=(row['LATITUDE'], row['LONGITUDE']),
                        radius=row['INJURIES_SCALED'],
                        fill=True,
                        fill_opacity=0.7,
                        color='red',
                        fill_color='red').add_to(m)

# Show the map
m

# Save the map to an HTML file
m.save('injured_accidents_circles.html')

In [32]:
import folium
from shapely.geometry import Point
import geopandas as gpd

# Filter data to include only accidents where someone was injured
injured_accidents = data[data['NUMBER OF CYCLIST INJURED'] > 0]

# Remove entries with missing latitude or longitude values
injured_accidents = injured_accidents.dropna(subset=['LATITUDE', 'LONGITUDE'])

# Group data by latitude and longitude and sum up the number of injuries
grouped_data = injured_accidents.groupby(['LATITUDE', 'LONGITUDE'])['NUMBER OF CYCLIST INJURED'].sum().reset_index()

# Scale injuries to a range that is appropriate for the circle size (e.g. 0-10)
grouped_data['INJURIES_SCALED'] = (grouped_data['NUMBER OF CYCLIST INJURED'] / grouped_data['NUMBER OF CYCLIST INJURED'].max()) * 10

# Convert latitude and longitude to geometric points
geometry = [Point(xy) for xy in zip(grouped_data['LONGITUDE'], grouped_data['LATITUDE'])]

# Create a GeoDataFrame
geo_df = gpd.GeoDataFrame(grouped_data, geometry=geometry)

# Create a Folium map centered on New York City
m = folium.Map(location=[40.7128, -74.0060], zoom_start=12, tiles='CartoDB Positron')

# Add data to map
for idx, row in geo_df.iterrows():
    folium.CircleMarker(location=(row['LATITUDE'], row['LONGITUDE']),
                        radius=row['INJURIES_SCALED'],
                        fill=True,
                        fill_opacity=0.7,
                        color='red',
                        fill_color='red').add_to(m)

# Show the map
m

# Save the map to an HTML file
m.save('injured_accidents_cyclists.html')

In [33]:
import folium
from shapely.geometry import Point
import geopandas as gpd

# Set of colors for different categories of injuries
colors = {'CYCLIST': 'skyblue', 'MOTORIST': 'orange', 'PEDESTRIANS': 'red'}
colors = {'CYCLIST': 'skyblue', 'PEDESTRIANS': 'red'}
# colors = {'PEDESTRIANS': 'red'}

# Define function for filtering and processing accidents by type
def process_accidents(injury_type, color):
    # Filter data to include only accidents where specified type of participant was injured
    injured_accidents = data[data[f'NUMBER OF {injury_type.upper()} INJURED'] > 0]
    # Remove entries with missing latitude or longitude values
    injured_accidents = injured_accidents.dropna(subset=['LATITUDE', 'LONGITUDE'])
    # Group data by latitude and longitude and sum up the number of injuries
    grouped_data = injured_accidents.groupby(['LATITUDE', 'LONGITUDE'])[f'NUMBER OF {injury_type.upper()} INJURED'].sum().reset_index()
    # Scale injuries to a range that is appropriate for the circle size (e.g. 0-10)
    grouped_data['INJURIES_SCALED'] = (grouped_data[f'NUMBER OF {injury_type.upper()} INJURED'] / grouped_data[f'NUMBER OF {injury_type.upper()} INJURED'].max()) * 10
    # Convert latitude and longitude to geometric points
    geometry = [Point(xy) for xy in zip(grouped_data['LONGITUDE'], grouped_data['LATITUDE'])]
    # Create a GeoDataFrame
    geo_df = gpd.GeoDataFrame(grouped_data, geometry=geometry)
    # Add data to map
    for idx, row in geo_df.iterrows():
        folium.CircleMarker(location=(row['LATITUDE'], row['LONGITUDE']),
                            radius=row['INJURIES_SCALED'],
                            fill=True,
                            fill_opacity=0.7,
                            color=color,
                            fill_color=color).add_to(m)

# Create a Folium map centered on New York City
m = folium.Map(location=[40.7128, -74.0060], zoom_start=12, tiles='CartoDB Positron')

# Process each type of injury and add to the map
for injury_type, color in colors.items():
    process_accidents(injury_type, color)

# Show the map
m

# Save the map to an HTML file
m.save('injured_accidents.html')