# Schools with most amount of crashes nearby in LA

This project finds out schools where most of the car crashes with injuries or fatalities occurred within 1/4 mile from the school in LA

Sources:
* Schools https://data-cdegis.opendata.arcgis.com/datasets/CDEGIS::california-schools-2022-23/explore
* Injuries/Deaths in the City of LA https://data.lacity.org/Public-Safety/Traffic-Collision-Data-from-2010-to-Present/d5tf-ez2w

Assumptions:
* location - Los Angeles
* year - 2021
* within the hours of 7AM-6PM
* include all types-- motorists, pedestrians, cyclists

Deliverable: 
* List of schools with highest number of the 2021 crashes with injuries or fatalities that occurred within 1/4 mile of a school

Any questions - @sunchugasheva

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import folium
import datetime

pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 1000)

In [None]:
today = datetime.datetime.now().strftime("%Y_%m_%d")
print(today)
radius = 0.25 # miles
calc_year = 2021

## functions

In [None]:
def col_from_code(col, code):
    new_col = col.str.contains('|'.join(code)).replace(
        {True: 1, False: 0}
    ).fillna(0).astype(int)
    
    return new_col

def get_coords(line):
    return list(map(float, line[1:-1].split(', ')))

## Get data

### crashes

In [None]:
crashes_raw = pd.read_csv('../hsla_injuries_deaths/LAPD_crashes.csv', low_memory=False)
display(crashes_raw.head())

In [None]:
cols_dict = {
    'DR Number': 'CASE_ID',
    'Date Occurred': 'COLLISION_DATE',
    'Time Occurred': 'COLLISION_TIME',
    'Location': 'location',
    'MO Codes': 'mo_codes',
    'Premise Description': 'loc_description'
}
crashes = crashes_raw.copy().rename(columns = cols_dict)
crashes = crashes[cols_dict.values()]
crashes['year'] = crashes.COLLISION_DATE.str[-4:].astype(int)
crashes.COLLISION_DATE = pd.to_datetime(crashes.COLLISION_DATE).dt.date

new_cols_dict = {
    'NUMBER_INJURED': ['3024', '3025', '3026'],
    'NUMBER_KILLED': ['3027']
}
for key, value in new_cols_dict.items():
    crashes[key] = col_from_code(crashes.mo_codes, value)
    
locations = [
    'STREET', 'SIDEWALK', 'ALLEY', 'DRIVEWAY',
    'MTA BUS', 'BUS STOP'
]

crashes_count = crashes[
    ((crashes.NUMBER_INJURED!=0)|(crashes.NUMBER_KILLED!=0))&
    (crashes.loc_description.isin(locations))&
    (crashes.year==calc_year)&
    (crashes.COLLISION_TIME>=700)&
    (crashes.COLLISION_TIME<=1800)
].copy().reset_index(drop=True)

crashes_count.location = crashes_count.location.apply(lambda x: get_coords(x))
crashes_count[['POINT_Y', 'POINT_X']] = crashes_count.location.to_list()

In [None]:
crashes_count.head()

In [None]:
crashes_count.loc_description.unique()

In [None]:
crashes_raw.shape, crashes.shape

In [None]:
crashes_cols = [
    'CASE_ID',
    'COLLISION_DATE', 'COLLISION_TIME',
    'NUMBER_KILLED', 'NUMBER_INJURED',
]

crashes_count = crashes_count[
    crashes_cols + [
    'POINT_X', 'POINT_Y'
]].copy()

In [None]:
geometry = [Point(xy) for xy in zip(crashes_count.POINT_X, crashes_count.POINT_Y)]
crashes_geo = gpd.GeoDataFrame(
    crashes_count[crashes_cols],
    crs="EPSG:4326",
    geometry=geometry
)

In [None]:
crashes_geo.head()

### schools

In [None]:
schools_file = open('California_Schools_2022-23.geojson')
schools_geo_raw = gpd.read_file(schools_file)

In [None]:
schools_geo_raw.crs

In [None]:
schools_geo = schools_geo_raw[[
    'OBJECTID',
    'CountyName', 'City',
    'Status', 'SchoolLevel',
    'geometry'
]].copy()

In [None]:
schools_geo = schools_geo[
    (schools_geo.CountyName=='Los Angeles')&
    (schools_geo.City=='Los Angeles')
].copy().reset_index(drop=True)

In [None]:
proj_calc = 'EPSG:3857'
proj = schools_geo.crs
schools_geo_calc = schools_geo.to_crs('EPSG:3857')
print('data proj:', proj, '\ncalculation proj: ', proj_calc)
schools_geo['buffered'] = schools_geo_calc.buffer(
    radius*1609.34,
    #join_style = 2
).to_crs(proj)
schools_geo.set_geometry('buffered', inplace=True)

In [None]:
radius*1609.34

In [None]:
schools_geo.head()

In [None]:
schools_geo.loc[0, 'buffered']

## get overlap and get schools ranking

In [None]:
gdf_school_accidents = gpd.overlay(
        schools_geo, 
        crashes_geo, 
        how='intersection',
        keep_geom_type=False
    )

In [None]:
gdf_school_accidents.head()

In [None]:
print(
    'crashes around schools:', gdf_school_accidents.shape[0],
    '\nduplicated:', gdf_school_accidents[
        gdf_school_accidents.CASE_ID.duplicated()
    ].shape[0],
    '\nschools:', schools_geo.shape[0], 
    '\ncrashes:', crashes_geo.shape[0]
)

In [None]:
gdf_school_accidents = gdf_school_accidents.drop_duplicates(
    subset = ['CASE_ID'], 
    keep = 'first'
).reset_index(drop=True)
print(
    'unique crashes around schools:', gdf_school_accidents.shape[0]
)

In [None]:
top_schools_crashes = pd.DataFrame(gdf_school_accidents)
top_schools_crashes.head()

In [None]:
top_schools_crashes.OBJECTID.nunique()

In [None]:
top_schools = top_schools_crashes[[
    'OBJECTID', 'City', 'Status', 'SchoolLevel',
    'CASE_ID', 'NUMBER_KILLED', 'NUMBER_INJURED'
]].groupby(
    by=['OBJECTID', 'City', 'Status', 'SchoolLevel']
).agg({
    'CASE_ID': 'count',
    'NUMBER_KILLED': 'sum',
    'NUMBER_INJURED': 'sum'
}).sort_values(
    by=['NUMBER_KILLED', 'NUMBER_INJURED'],
    ascending = False
).reset_index(drop=False)
display(top_schools.head())

top_schools_geom = top_schools.merge(
    schools_geo[['OBJECTID', 'geometry']], how = 'left')

top_schools_gdf = gpd.GeoDataFrame(
    top_schools_geom,
    crs="EPSG:4326",
    geometry='geometry'
)
top_schools_gdf.head(1)

## results

In [None]:
# # save geojson file
# top_schools_gdf.to_file(
#     f'LA_schools_radius{radius}miles_stats.geojson',
#     driver='GeoJSON'
# )
# top_schools.to_csv(f'LA_schools_radius{radius}miles_stats.csv', index=False)

In [None]:
f = folium.Figure(width=1000, height=500)
m = folium.Map(location = [34.05, -118.24], zoom_start=12).add_to(f)

map_1 = top_schools_gdf[[
    'OBJECTID',
    'CASE_ID', 'NUMBER_KILLED', 'NUMBER_INJURED',
    'geometry'
]].explore(
    m=m,
    color = 'blue',
    #popup = top_schools_gdf['CASE_ID']
)

folium.TileLayer(
    'CartoDB positron',
    show=False
).add_to(m) 
folium.LayerControl().add_to(m)

m