# Healthy Streets of Los Angeles Injuries/Deaths vs implemented Mobility in plan 2022
This project compares a number of car accidents in 2022 resulted in deaths/injuries for streets with and without Mobility Plan safety improvements.

Sources:
* Injuries/Deaths in the City of LA https://tims.berkeley.edu/
* Implepemented Mobility Plan shapefiles (see hsla_mobilty_plan project)

Assumptions:
* Location only City of Los Angeles
* year is 2022
* Excluding freeways (STATE_ROUTE is Null)

Output:
* deaths/injuries on LA streets last year happened on streets missing mobility safety plan improvements

Any questions - @sunchugasheva

In [11]:
import pandas as pd
import datetime
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point
import folium

pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 1000)

## functions

In [12]:
def show_map(gdf_names):
    '''
    show a number of gdfs with set colors
    - gdfs_colors - dictionary with format:{'name': gdf}
    ''' 
    colors = ['green', 'blue', 'red', 'orange', 'purple', 'yellow', 'magenta']
    f = folium.Figure(width=1000, height=500)
    m = folium.Map(location = [34.05, -118.24], zoom_start=10).add_to(f)
    i = 0
    
    for name in gdf_names.keys():
        color = colors[i]
        i += 1
        gdf = gdf_names[name]
        print(f'{color}: {name}')
        if gdf.loc[0, 'geometry'].geom_type!='Point':
            gdf.explore(
                m = m,
                name = name,
                color = color
            )
        else:
            #folium.Marker(gdf).add_to(m)
            folium.features.GeoJson(gdf).add_to(m)

    folium.TileLayer(
        'CartoDB positron',
        show=False
    ).add_to(m) 
    folium.LayerControl().add_to(m)
    
    return m

In [13]:
def buffer(gdf, radius, proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    convert a gdf of linestrings into a gdf of polygons with radius
    - gdf - GeoDataFrame, has column "geometry"
    - radius - radius of bufferm meters
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''  
    gdf = gdf.copy()
    gdf_calc = gdf.to_crs(proj_calc)
    #print('data proj:', proj, '\ncalculation proj: ', proj_calc)
    gdf['buffered'] = gdf_calc.buffer(radius, cap_style=2).to_crs(proj)
    gdf.set_geometry('buffered', inplace=True)
    
    # merge all intersecting buffered polygons
    gdf_return = gpd.GeoDataFrame(
        geometry=[gdf.unary_union]
    ).explode(
        index_parts=False
    ).reset_index(
        drop=True
    )
    gdf_return.geometry.crs = proj
    
    return gdf_return

In [14]:
def get_tot_length_miles(
    gdf, column='geometry', proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    return a gdf with a column of linestring length in proj_calc units (m)
    - gdf - GeoDataFrame
    - column - column to be used for length calculation if not "geometry"
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''
    if column:
        gdf.set_geometry(column, inplace=True)
    # convert projection to proj_calc, if default - the units will be in meters
    gdf_m = gdf.to_crs('EPSG:3857')
    gdf['length_mile'] = gdf_m.length/1609.34
    length = round(gdf.length_mile.sum(), 2)
    
    return length

def add_stat(dict_stat, stats):
    '''
    dict_stat - dictionary
    stats - list of stats to unpack IN THE ORDER OF KEYS
    '''
    i=0
    for key in dict_stat.keys():
        dict_stat[key].append(stats[i])
        i += 1
    return dict_stat

def crashes_type(df, col):
    '''
    df - originasl dataframe
    col - column that we are interested in
    '''
    df = df[df[col]!=0].copy().reset_index(drop=True)
    return df

def intersect_gdf_crashes(name, gdf, crashes_gdf, radius, stat_gdfs):
    print('intersecting ', name)
    gdf_buffer = buffer(gdf, radius = radius)
    intersect_crashes = gpd.GeoDataFrame()
    for idx, row in gdf_buffer.iterrows():
        intersection = crashes_gdf[crashes_gdf.covered_by(row['geometry'])]
        intersect_crashes = pd.concat([
                intersect_crashes,
                intersection
            ])
    add_stat(stat_gdfs, [name, radius, intersect_crashes])
        
    return intersect_crashes

In [32]:
def get_stats(
    crashes_gdf, impl_gdf, unimpl_gdf,
    radius, stat_gdfs, stats, result
):
    
    print('radius for buffer is', radius)
    # get list of crashes for implemented/unimplemented streets
    impl_crashes = intersect_gdf_crashes(
        name = 'implemented_crashes',
        gdf = impl_gdf,
        crashes_gdf = crashes_gdf,
        radius = radius,
        stat_gdfs = stat_gdfs
    )
    unimpl_crashes = intersect_gdf_crashes(
        name = 'unimplemented_crashes',
        gdf = unimpl_gdf,
        crashes_gdf = crashes_gdf,
        radius = radius,
        stat_gdfs = stat_gdfs
    )
    
    # get lengths for implemented/unimplemented streets
    length_impl_miles = get_tot_length_miles(impl_gdf)
    print(length_impl_miles, 'miles implemented')
    add_stat(
        stats,
        ['length_implemented', length_impl_miles, 'mile', radius]
    )

    length_unimpl_miles = get_tot_length_miles(unimpl_gdf)
    print(length_unimpl_miles, 'miles unimplemented')
    add_stat(
        stats,
        ['length_unimplemented', length_unimpl_miles, 'mile', radius]
    )
    
    cols_calc = ['NUMBER_KILLED', 'NUMBER_INJURED', 'COUNT_SEVERE_INJ']
    
    for col in cols_calc:
        print('calculating stats for', col)
        result['parameter'].append(col)
        result['radius'].append(radius)
        impl = crashes_type(impl_crashes, col)
        unimpl = crashes_type(unimpl_crashes, col)
        total = crashes_type(crashes_gdf, col)
        add_stat(gdfs, ['impl_'+col, impl, radius])
        add_stat(gdfs, ['unimpl_'+col, unimpl, radius])
        add_stat(gdfs, ['total_'+col, total, radius])    

        impl_count = impl.shape[0]
        unimpl_count = unimpl.shape[0]
        total_count = total.shape[0]
        add_stat(stats, ['impl_'+col, impl_count, 'crashes', radius])
        add_stat(stats, ['unimpl_'+col, unimpl_count, 'crashes', radius])
        add_stat(stats, ['total_'+col, total_count, 'crashes', radius])

        impl_total = round(impl_count/total_count, 2)
        unimpl_total = round(unimpl_count/total_count, 2)
        impl_mile = round(impl_count/length_impl_miles, 2)
        unimpl_mile = round(unimpl_count/length_unimpl_miles, 2)

        result['impl_to_total'].append(impl_total)
        result['unimpl_to_total'].append(unimpl_total)
        result['imp_permile'].append(impl_mile)
        result['unimpl_permile'].append(unimpl_mile)
    
    return result 

# data

In [16]:
today = datetime.datetime.now()
print(today)
radius = 20 # our average street will be 40m wide

2023-12-11 07:29:32.374934


## data preparation

### get crashes data

In [17]:
crashes_raw = pd.read_csv('../Crashes.csv')
display(crashes_raw.head(1))

Unnamed: 0,CASE_ID,ACCIDENT_YEAR,PROC_DATE,JURIS,COLLISION_DATE,COLLISION_TIME,OFFICER_ID,REPORTING_DISTRICT,DAY_OF_WEEK,CHP_SHIFT,POPULATION,CNTY_CITY_LOC,SPECIAL_COND,BEAT_TYPE,CHP_BEAT_TYPE,CITY_DIVISION_LAPD,CHP_BEAT_CLASS,BEAT_NUMBER,PRIMARY_RD,SECONDARY_RD,DISTANCE,DIRECTION,INTERSECTION,WEATHER_1,WEATHER_2,STATE_HWY_IND,CALTRANS_COUNTY,CALTRANS_DISTRICT,STATE_ROUTE,ROUTE_SUFFIX,POSTMILE_PREFIX,POSTMILE,LOCATION_TYPE,RAMP_INTERSECTION,SIDE_OF_HWY,TOW_AWAY,COLLISION_SEVERITY,NUMBER_KILLED,NUMBER_INJURED,PARTY_COUNT,PRIMARY_COLL_FACTOR,PCF_CODE_OF_VIOL,PCF_VIOL_CATEGORY,PCF_VIOLATION,PCF_VIOL_SUBSECTION,HIT_AND_RUN,TYPE_OF_COLLISION,MVIW,PED_ACTION,ROAD_SURFACE,ROAD_COND_1,ROAD_COND_2,LIGHTING,CONTROL_DEVICE,CHP_ROAD_TYPE,PEDESTRIAN_ACCIDENT,BICYCLE_ACCIDENT,MOTORCYCLE_ACCIDENT,TRUCK_ACCIDENT,NOT_PRIVATE_PROPERTY,ALCOHOL_INVOLVED,STWD_VEHTYPE_AT_FAULT,CHP_VEHTYPE_AT_FAULT,COUNT_SEVERE_INJ,COUNT_VISIBLE_INJ,COUNT_COMPLAINT_PAIN,COUNT_PED_KILLED,COUNT_PED_INJURED,COUNT_BICYCLIST_KILLED,COUNT_BICYCLIST_INJURED,COUNT_MC_KILLED,COUNT_MC_INJURED,PRIMARY_RAMP,SECONDARY_RAMP,LATITUDE,LONGITUDE,COUNTY,CITY,POINT_X,POINT_Y
0,5378005,2011,2012-12-27,1900,2011-09-25,1718,517840,2607,7,5,6,1950,0,0,0,,0,264T1,AVENUE S,55TH ST EAST,0.0,,Y,A,-,N,,,,,,,,,,Y,3,0,4,2,A,-,9,21801.0,A,N,A,C,A,A,H,-,A,A,0,,,,,Y,,A,1,0,1,3,0,0,0,0,0,0,-,-,,,LOS ANGELES,PALMDALE,-118.031586,34.5581


In [18]:
# columns we are interested in
crashes_col = [
    'CASE_ID', 'COUNTY', 'CITY',
    'ACCIDENT_YEAR', 
    'POINT_X', 'POINT_Y',
    'NUMBER_KILLED', 'NUMBER_INJURED', 
    'COUNT_SEVERE_INJ',
]

# take only City of LA and not highways
crashes = crashes_raw[
        (crashes_raw.CITY=='LOS ANGELES')&
        crashes_raw.STATE_ROUTE.isnull()&
        (crashes_raw.ACCIDENT_YEAR==2022)
    ][crashes_col].copy().reset_index(drop=True)
display(crashes.head(1))

Unnamed: 0,CASE_ID,COUNTY,CITY,ACCIDENT_YEAR,POINT_X,POINT_Y,NUMBER_KILLED,NUMBER_INJURED,COUNT_SEVERE_INJ
0,91827094,LOS ANGELES,LOS ANGELES,2022,,,0,4,0


In [19]:
# it looks like there are no records with no injuries/deaths
display(crashes[
    (crashes.NUMBER_KILLED==0)&
    (crashes.NUMBER_INJURED==0)&
    (crashes.COUNT_SEVERE_INJ==0)
])
display(crashes[
    (crashes.NUMBER_KILLED.isnull())|
    (crashes.NUMBER_INJURED.isnull())|
    (crashes.COUNT_SEVERE_INJ.isnull())
])

Unnamed: 0,CASE_ID,COUNTY,CITY,ACCIDENT_YEAR,POINT_X,POINT_Y,NUMBER_KILLED,NUMBER_INJURED,COUNT_SEVERE_INJ


Unnamed: 0,CASE_ID,COUNTY,CITY,ACCIDENT_YEAR,POINT_X,POINT_Y,NUMBER_KILLED,NUMBER_INJURED,COUNT_SEVERE_INJ


In [20]:
#create geodataframe
crashes_gdf = gpd.GeoDataFrame(
    crashes[['CASE_ID', 'NUMBER_KILLED', 'NUMBER_INJURED', 'COUNT_SEVERE_INJ']],
    geometry=gpd.points_from_xy(crashes['POINT_X'], crashes['POINT_Y'])
)
crashes_gdf.crs = 'EPSG:4326'

### get mobility_implemented data

In [21]:
files_implemented = [
    'unprotected_bike_lane_implemented_2023_11_29',
    'protected_bike_lane_implemented_2023_11_29',
    'NEN_bike_lane_implemented_2023_11_29',
    'class1_bike_lane_implemented_2023_12_06',
    'protected_bus_lane_implemented_2023_11_29'
]

mobility_implemented = gpd.GeoDataFrame()
for file in files_implemented:
    geo_file = open(f'../hsla_mobility_plan/{file}.geojson')
    gdf = gpd.read_file(geo_file)
    gdf['name'] = file[:-11]
    display(gdf.head(1))
    if mobility_implemented.shape[0]==0:
        mobility_implemented = gdf[['name', 'length_m', 'geometry']].copy()
    else:
        mobility_implemented = pd.concat([
            mobility_implemented,
            gdf[['name', 'length_m', 'geometry']]]
        )

Unnamed: 0,OBJECTID,BICYCLE_N,length_m,geometry,name
0,1,2,450.88124,"LINESTRING (-118.35923 34.22239, -118.36181 34...",unprotected_bike_lane_implemented


Unnamed: 0,OBJECTID,BICYCLE_N,length_m,geometry,name
0,206,1,147.987411,"LINESTRING (-118.53610 34.23077, -118.53610 34...",protected_bike_lane_implemented


Unnamed: 0,OBJECTID,CF,CASE_NUM,SOURCE,ADOPTDATE,CPA_1,CPA_2,NEIGHBORHD_N,Shape__Length,length_m,geometry,name
0,14,15-0719,CPC-2013-910-GPA-SP-CA-MSC-M2,Hollywood Community Plan (6/19/12),2016-09-09 00:00:00+00:00,HWD,,2,36.727754,36.727754,"LINESTRING (-118.29289 34.09551, -118.29321 34...",NEN_bike_lane_implemented


Unnamed: 0,OBJECTID,length_m,geometry,name
0,11,4776.592991,"LINESTRING (-118.25434 34.10786, -118.25285 34...",class1_bike_lane_implemented


Unnamed: 0,OBJECTID,CF,CASE_NUM,SOURCE,ADOPTDATE,CPA_1,CPA_2,TRANSIT_N,Shape__Length,length_m,geometry,name
0,9,15-0719,CPC-2013-910-GPA-SP-CA-MSC,Downtown Street Standards,2015-08-11 00:00:00+00:00,CCY,,3,134.114935,134.114935,"LINESTRING (-118.24713 34.04347, -118.24617 34...",protected_bus_lane_implemented


### get mobility_unimplemented data

In [22]:
files_plan = [
    'Bicycle_Enhanced_Network_Paths',
    'Bicycle_Enhanced_Network',
    'Neighborhood_Enhanced_Network',
    'Transit_Enhanced_Network'
]

mobility_plan = gpd.GeoDataFrame()
for file in files_plan:
    geo_file = open(f'../hsla_mobility_plan/{file}.geojson')
    gdf = gpd.read_file(geo_file)
    gdf['name'] = file
    display(gdf.head(1))
    if mobility_plan.shape[0]==0:
        mobility_plan = gdf[['name', 'Shape__Length', 'geometry']].copy()
    else:
        mobility_plan = pd.concat([
            mobility_plan,
            gdf[['name', 'Shape__Length', 'geometry']]]
        )

Unnamed: 0,OBJECTID_12,OBJECTID_1,OBJECTID,bikewaytyp,exbikeway,BIKEWYNAME,MILEAGE,COUNDIST,NETWORK,ARTERIAL,Shape_Leng,Shape_Le_1,Shape__Length,geometry,name
0,1,1,1,Planned BP,0,Valley LA River Path,0.32488,4,GREEN,0,1715.368583,1715.368583,631.219551,"LINESTRING (-118.35051 34.14248, -118.34966 34...",Bicycle_Enhanced_Network_Paths


Unnamed: 0,OBJECTID,CF,CASE_NUM,SOURCE,ADOPTDATE,COMMENTS,CPA_1,CPA_2,BICYCLE_N,created_user,created_date,last_edited_user,last_edited_date,Shape__Length,geometry,name
0,1,15-0719,CPC-2013-910-GPA-SP-CA-MSC,,2015-08-11 00:00:00+00:00,Updated per Timmy. Middle out consistency edit.,SVY,,2,,1970-01-01 00:00:00+00:00,,1970-01-01 00:00:00+00:00,450.88124,"LINESTRING (-118.35923 34.22239, -118.36181 34...",Bicycle_Enhanced_Network


Unnamed: 0,OBJECTID,CF,CASE_NUM,SOURCE,ADOPTDATE,CPA_1,CPA_2,NEIGHBORHD_N,Shape__Length,geometry,name
0,1,15-0719,CPC-2013-910-GPA-SP-CA-MSC-M2,,2016-09-09 00:00:00+00:00,SLK,,2,139.333326,"LINESTRING (-118.25885 34.08532, -118.25835 34...",Neighborhood_Enhanced_Network


Unnamed: 0,OBJECTID,CF,CASE_NUM,SOURCE,ADOPTDATE,CPA_1,CPA_2,TRANSIT_N,Shape__Length,geometry,name
0,1,15-0719,CPC-2013-910-GPA-SP-CA-MSC,,2015-08-11 00:00:00+00:00,WCH,,1,428.828885,"LINESTRING (-118.37112 33.94526, -118.37497 33...",Transit_Enhanced_Network


In [23]:
 # intersect polygons of actual and planned paths
implemented_buffer = buffer(mobility_implemented, radius = 15)
mobility_unimplemented = gpd.overlay(
    mobility_plan, 
    implemented_buffer, 
    how='difference',
    keep_geom_type=False
)
display(mobility_unimplemented.head())

Unnamed: 0,name,Shape__Length,geometry
0,Bicycle_Enhanced_Network_Paths,631.219551,"LINESTRING (-118.35051 34.14248, -118.34966 34..."
1,Bicycle_Enhanced_Network_Paths,2632.455613,"LINESTRING (-118.22734 34.08124, -118.22520 34..."
2,Bicycle_Enhanced_Network_Paths,694.06276,"LINESTRING (-118.23389 34.09827, -118.23886 34..."
3,Bicycle_Enhanced_Network_Paths,3009.990132,"LINESTRING (-118.22567 34.07961, -118.22455 34..."
4,Bicycle_Enhanced_Network_Paths,4748.604236,"MULTILINESTRING ((-118.34502 34.14238, -118.34..."


In [24]:
mobility_plan.shape[0], mobility_unimplemented.shape[0]

(24936, 22095)

## calculation

so the structure of the data we're getting is:
* stats - dictionary with keys as follows, that is going to be converted in dataframe of all different numbers, that we're going to use for calculation. This is mostly to keep an eye on numbers if they make sense
* gdfs - dictionary where all geodataframes go for storage along the way, so we can export the ones we need
* result - a dictionary with keys as follows to be converted to dataframe, effectively that is deliverables

we do have a radius everywhere - depending on how wide (2*radius) we count the street to be (this is mostly important for intersections), we'll get different numbers of crashes

In [33]:
stats = {index: [] for index in ['stat', 'value', 'unit', 'radius']}
gdfs = {index: [] for index in ['name', 'radius', 'gdf']}
cols_result = [
        'parameter', 'radius', 'impl_to_total', 'unimpl_to_total',
        'imp_permile', 'unimpl_permile'
    ]
result = {index: [] for index in cols_result }

for radius in [15, 20]:
    result_df = get_stats(
        crashes_gdf = crashes_gdf, 
        impl_gdf  = mobility_implemented, 
        unimpl_gdf = mobility_unimplemented, 
        radius = radius, 
        stat_gdfs = gdfs, 
        stats = stats,
        result = result
    )

result_df = pd.DataFrame.from_dict(result) 
display(result_df)    
stats_df = pd.DataFrame.from_dict(stats)
display(stats_df)

radius for buffer is 15
intersecting  implemented_crashes
intersecting  unimplemented_crashes
537.13 miles implemented
1999.55 miles unimplemented
calculating stats for NUMBER_KILLED
calculating stats for NUMBER_INJURED
calculating stats for COUNT_SEVERE_INJ
radius for buffer is 20
intersecting  implemented_crashes
intersecting  unimplemented_crashes
537.13 miles implemented
1999.55 miles unimplemented
calculating stats for NUMBER_KILLED
calculating stats for NUMBER_INJURED
calculating stats for COUNT_SEVERE_INJ


Unnamed: 0,parameter,radius,impl_to_total,unimpl_to_total,imp_permile,unimpl_permile
0,NUMBER_KILLED,15,0.23,0.48,0.11,0.06
1,NUMBER_INJURED,15,0.17,0.56,1.97,1.74
2,COUNT_SEVERE_INJ,15,0.19,0.57,0.5,0.4
3,NUMBER_KILLED,20,0.23,0.48,0.12,0.06
4,NUMBER_INJURED,20,0.17,0.57,2.01,1.76
5,COUNT_SEVERE_INJ,20,0.19,0.58,0.51,0.41


Unnamed: 0,stat,value,unit,radius
0,length_implemented,537.13,mile,15
1,length_unimplemented,1999.55,mile,15
2,impl_NUMBER_KILLED,60.0,crashes,15
3,unimpl_NUMBER_KILLED,127.0,crashes,15
4,total_NUMBER_KILLED,264.0,crashes,15
5,impl_NUMBER_INJURED,1056.0,crashes,15
6,unimpl_NUMBER_INJURED,3471.0,crashes,15
7,total_NUMBER_INJURED,6189.0,crashes,15
8,impl_COUNT_SEVERE_INJ,267.0,crashes,15
9,unimpl_COUNT_SEVERE_INJ,806.0,crashes,15


In [None]:
idxs = [0, 1, 11, 12]
for i in idxs:
    print(f"for radius {gdfs['radius'][i]}:\n\
    {gdfs['gdf'][i].shape[0]} {gdfs['name'][i]} total happened in 2022")

for radius 15:
    1100 implemented_crashes total happened in 2022
for radius 15:
    3562 unimplemented_crashes total happened in 2022
for radius 20:
    1125 implemented_crashes total happened in 2022
for radius 20:
    3606 unimplemented_crashes total happened in 2022
