# Healthy Streets of Los Angeles Injuries/Deaths which could be prevented with implemented Mobility in plan 2022
This project finds a number of car accidents that could have been prevented if the Mobility plan was implemented earlier going back to 2015.


Sources:
* Injuries/Deaths in the City of LA https://data.lacity.org/Public-Safety/Traffic-Collision-Data-from-2010-to-Present/d5tf-ez2w
* Implepemented Mobility Plan shapefiles (see hsla_mobilty_plan project)

Assumptions:
* Location only City of Los Angeles
* years are 2015-2023
* Excluding freeways (STATE_ROUTE is Null)

Output:
* a number of deaths/injuries on LA streets that happened on streets missing mobility safety plan improvements at that time

Any questions - @sunchugasheva

In [2]:
import pandas as pd
import datetime
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point
import folium

pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 1000)

## functions

In [3]:
def col_from_code(col, code):
    new_col = col.str.contains('|'.join(code)).replace(
        {True: 1, False: 0}
    ).fillna(0).astype(int)
    
    return new_col

def get_coords(line):
    return list(map(float, line[1:-1].split(', ')))

In [178]:
# todo: change all show_map to show_maps
def show_map(gdf1, name1, color1, gdf2, name2, color2):
    
    print(
            f'{color1}: {name1}, {color2}: {name2}'
        )
    
    f = folium.Figure(width=1000, height=500)
    
    m = gdf1.explore(
        name = name1,
        color = color1
    ).add_to(f)

    map_2 = gdf2.explore(
        m=m,  # pass the map object
        name = name2,
        color = color2
    )

    folium.TileLayer(
        'CartoDB positron',
        show=False
    ).add_to(m) 

    folium.LayerControl().add_to(m)
    
    return m

def show_maps(gdf_names):
    '''
    show a number of gdfs with set colors
    - gdfs_colors - dictionary with format:{'name': gdf}
    ''' 
    colors = ['green', 'blue', 'red', 'orange', 'purple', 'yellow', 'magenta']
    f = folium.Figure(width=1000, height=500)
    m = folium.Map(location = [34.05, -118.24], zoom_start=50).add_to(f)
    i = 0
    
    for name in gdf_names.keys():
        color = colors[i]
        i += 1
        gdf = gdf_names[name]
        print(f'{color}: {name}')
        if gdf.loc[0, 'geometry'].geom_type!='Point':
            gdf.explore(
                m = m,
                name = name,
                color = color
            )
        else:
            #folium.Marker(gdf).add_to(m)
            folium.features.GeoJson(gdf).add_to(m)

    folium.TileLayer(
        'CartoDB positron',
        show=False
    ).add_to(m) 
    folium.LayerControl().add_to(m)
    
    return m

In [85]:
def buffer(gdf, radius, proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    convert a gdf of linestrings into a gdf of polygons with radius
    - gdf - GeoDataFrame, has column "geometry"
    - radius - radius of bufferm meters
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''  
    gdf = gdf.copy()
    gdf_calc = gdf.to_crs(proj_calc)
    #print('data proj:', proj, '\ncalculation proj: ', proj_calc)
    gdf['buffered'] = gdf_calc.buffer(radius, cap_style=2).to_crs(proj)

    gdf_return = gdf.copy()
    gdf_return.geometry = gdf_return.buffered
    gdf_return.set_geometry('geometry', inplace=True)
    gdf_return.geometry.crs = proj
    
    return gdf_return

In [6]:
def overlap_plan_actual(
        name,
        gdf_plan,
        gdf_actual,
        conditions,
        radius = 10,
        year = None,
        column_year = 'Year_',
        print_map = True,
        test_map = False
    ):
    '''
    - name 
    - gdf_plan - GeoDataFrame of Mobility plan
    - gdf_actual - GeoDataFrame of existing roads, has a column_year
    - conditions - filter specific types of bike/bus lanes
    - radius - radius in meters to widen the line of actual roads
    - year - filter data after construction year
    - column_year - column of construction year
    - print_map - show final map
    - test_map - show interim map
    '''
    print(f'{name} for year after {year}:')
    gdf_plan = gdf_plan.copy()
    gdf_actual = gdf_actual.copy()
    
    if year:
        gdf_actual = gdf_actual[gdf_actual[column_year]>year].copy()
    if conditions['plan']:
        if 'bike' in key:
            gdf_plan = gdf_plan[gdf_plan.BICYCLE_N.isin(
                conditions['plan']
            )].copy()
        if 'bus' in key:
            gdf_plan = gdf_plan[gdf_plan.TRANSIT_N.isin(
                conditions['plan']
            )].copy()
    if conditions['actual']:
            gdf_actual = gdf_actual[gdf_actual.Class.isin(
                conditions['actual']
            )].copy()
    
    print(f'radius = {radius} m')
    # widen linestring to polygon
    gdf_plan_buffer = buffer(
        gdf_plan, 
        radius = radius,
        proj = gdf_plan.geometry.crs
    )
    if test_map:
        display(
            show_map(
                gdf2 = gdf_plan_buffer[['geometry']],
                name2 = 'plan', 
                color2 = 'blue',
                gdf1 = gdf_actual[['geometry']],
                name1 = 'actual', 
                color1 = 'green',
            )
        )
    
    # intersect polygons of actual and planned paths
    gdf_implemented = gpd.overlay(
        gdf_actual, 
        gdf_plan_buffer, 
        how='intersection',
        keep_geom_type=False
    )
    gdf_implemented = get_lengths(gdf_implemented)
    gdf_implemented = gdf_implemented[
        round(gdf_implemented.length_m/(radius*2), 2)>1.01
    ].reset_index(drop=True)
    
    # map of buffered planned (green) and implemented (red) paths
    if print_map:
        display(
            show_map(
                gdf2 = gdf_implemented[['geometry']],
                name2 = 'implemented', 
                color2 = 'red',
                gdf1 = gdf_plan[['geometry']],
                name1 = 'plan', 
                color1 = 'green',
            )
        )
        
    print(
        'since', year, 'year:',
        round(get_lengths(gdf_implemented).length_m.sum()/1609.34, 2), 'miles implemented of ',
        round(get_lengths(gdf_plan).length_m.sum()/1609.34, 2), 'miles planned',
        # '\nimplemented lanes records:', gdf_implemented.shape[0],
        # '\nplanned lanes records:', gdf_plan.shape[0]        
    )
    
    percentage = round(
        get_lengths(gdf_implemented).length_m.sum()/
        get_lengths(gdf_plan).length_m.sum()*100, 
        2
    )

    return percentage, gdf_implemented

def get_lengths(gdf, column='geometry', proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    return a gdf with a column of linestring length in proj_calc units (m)
    - gdf - GeoDataFrame
    - column - column to be used for length calculation if not "geometry"
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''
    if column:
        gdf.set_geometry(column, inplace=True)
    # convert projection to proj_calc, if default - the units will be in meters
    gdf_m = gdf.to_crs(proj_calc)
    gdf['length_m'] = gdf_m.length
    
    return gdf

In [171]:
def intersect_gdf_crashes(name, gdf, crashes_gdf, radius):
    
    print('radius for buffer is', radius)
    print('intersecting ', name)
    gdf_buffer = buffer(gdf, radius = radius)
    intersect_crashes = gpd.GeoDataFrame()
    for idx, row in gdf_buffer.iterrows():
        intersection = crashes_gdf[
            crashes_gdf.covered_by(row['geometry'])
        ].copy()
        intersection['install_date'] = row.Install_Da
        intersect_crashes = pd.concat([
                intersect_crashes,
                intersection
            ])
        
    return intersect_crashes

# data

In [8]:
today = datetime.datetime.now()
print(today)
year = 2015 # we compare only mobility plan improvements, so after 2015
radius = 20 # our average street will be 40m wide

2024-01-23 00:40:02.853371


## get crashes data

In [9]:
crashes_raw = pd.read_csv('LAPD_crashes.csv')
display(crashes_raw.head(1))

Unnamed: 0,DR Number,Date Reported,Date Occurred,Time Occurred,Area ID,Area Name,Reporting District,Crime Code,Crime Code Description,MO Codes,Victim Age,Victim Sex,Victim Descent,Premise Code,Premise Description,Address,Cross Street,Location
0,190319651,08/24/2019,08/24/2019,450,3,Southwest,356,997,TRAFFIC COLLISION,3036 3004 3026 3101 4003,22.0,M,H,101.0,STREET,JEFFERSON BL,NORMANDIE AV,"(34.0255, -118.3002)"


In [150]:
cols_dict = {
    'DR Number': 'case_id',
    'Date Occurred': 'collision_date',
    'Address': 'primary_rd',
    'Cross Street': 'secondary_rd',
    'Location': 'location',
    'MO Codes': 'mo_codes',
    'Premise Description': 'loc_description'
}
crashes = crashes_raw.copy().rename(columns = cols_dict)
crashes = crashes[cols_dict.values()]
crashes['year'] = crashes.collision_date.str[-4:].astype(int)
crashes.collision_date = pd.to_datetime(crashes.collision_date).dt.date

new_cols_dict = {
    'injury': ['3024', '3025', '3026'],
    'death': ['3027'],
    'veh_ped': ['3003',  '3501'],
    'veh_bike': ['3008'],
    'veh_veh': ['3004']
}
for key, value in new_cols_dict.items():
    crashes[key] = col_from_code(crashes.mo_codes, value)

In [151]:
locations = [
    'STREET', 'SIDEWALK', 'ALLEY', 'DRIVEWAY',
    'MTA BUS', 'BUS STOP'
]

crashes_count = crashes[
    ((crashes.injury!=0)|(crashes.death!=0))&
    ((crashes.veh_ped!=0)|(crashes.veh_bike!=0)|(crashes.veh_veh!=0))&
    (crashes.loc_description.isin(locations))&
    (crashes.year>2015)
].copy().reset_index(drop=True)

crashes_count.location = crashes_count.location.apply(lambda x: get_coords(x))
crashes_count[['loc_y', 'loc_x']] = crashes_count.location.to_list()

In [152]:
print(crashes.shape[0], crashes_count.shape[0])
display(crashes_count.head())

602328 119099


Unnamed: 0,case_id,collision_date,primary_rd,secondary_rd,location,mo_codes,loc_description,year,injury,death,veh_ped,veh_bike,veh_veh,loc_y,loc_x
0,190319651,2019-08-24,JEFFERSON BL,NORMANDIE AV,"[34.0255, -118.3002]",3036 3004 3026 3101 4003,STREET,2019,1,0,0,0,1,34.0255,-118.3002
1,190319695,2019-08-30,MARTIN LUTHER KING JR,ARLINGTON AV,"[34.0108, -118.3182]",0605 4025 3037 3004 3025 3101,STREET,2019,1,0,0,0,1,34.0108,-118.3182
2,190411883,2019-07-06,MAIN,JOHNSTON,"[34.066, -118.2102]",3101 3401 3701 3003 3025 3029,STREET,2019,1,0,1,0,0,34.066,-118.2102
3,190514552,2019-08-23,I ST,KING AV,"[33.7807, -118.2756]",0605 3037 3003 3026 3029 3101,STREET,2019,1,0,1,0,0,33.7807,-118.2756
4,190319702,2019-08-31,WESTERN AV,36TH ST,"[34.0228, -118.3089]",3036 4025 3004 3026 3101,STREET,2019,1,0,0,0,1,34.0228,-118.3089


In [153]:
#create geodataframe
crashes_gdf = gpd.GeoDataFrame(
    crashes_count[[
        'case_id', 'collision_date', 'year',
        'injury', 'death', 'veh_ped', 'veh_bike', 'veh_veh'
    ]],
    geometry = gpd.points_from_xy(
        x = crashes_count.loc_x,
        y = crashes_count.loc_y,
        crs = 'EPSG:4326')
)

## get mobility data with dates of implementation

We can't use previously generated data because the goal was to compare overlap of implemented vs planned against original planned paths, so it was more correct to connect actual data into one buffered shape. Here we need actual data because it has dates when the path was built, so it means we need vice-versa:
1) merge planned data to a buffered shapes
2) overlap it with actual data
3) buffer the result again
4) overlap with the crashes dataset

### Protected/unprotected Bike lanes:

In [14]:
bike_plan_file = open('../hsla_mobility_plan/Bicycle_Enhanced_Network.geojson')
bike_plan_geo_raw = gpd.read_file(bike_plan_file)
bike_actual_file = open('../hsla_mobility_plan/bike_actual.geojson')
bike_actual_geo_raw = gpd.read_file(bike_actual_file)

In [15]:
bike_plan_geo = bike_plan_geo_raw.copy()
bike_actual_geo = bike_actual_geo_raw.copy()
print(
    'Mobility plan records:',
    bike_plan_geo.shape[0],
    '\nActual bike lanes records:',
    bike_actual_geo.shape[0],
    '\nActual lanes without construction date:',
    bike_actual_geo[bike_actual_geo.Install_Da.isnull()].shape[0]
)
display(bike_plan_geo.head(1))
display(bike_actual_geo.tail(1))

Mobility plan records: 10324 
Actual bike lanes records: 7187 
Actual lanes without construction date: 46


Unnamed: 0,OBJECTID,CF,CASE_NUM,SOURCE,ADOPTDATE,COMMENTS,CPA_1,CPA_2,BICYCLE_N,created_user,created_date,last_edited_user,last_edited_date,Shape__Length,geometry
0,1,15-0719,CPC-2013-910-GPA-SP-CA-MSC,,2015-08-11 00:00:00+00:00,Updated per Timmy. Middle out consistency edit.,SVY,,2,,1970-01-01 00:00:00+00:00,,1970-01-01 00:00:00+00:00,450.88124,"LINESTRING (-118.35923 34.22239, -118.36181 34..."


Unnamed: 0,OBJECTID_12,SECT_ID,STREET_DES,OTHER,STR_FROM,STR_TO,ST_TYPE,WIDTH_,CL_Miles,Lane_Miles,CD,Install_Da,Network,Bikeway,Region,One_Dir_BW,Retire,Class,Limits,ASSETID,STR_NAME,FY,Project_Type,RetireType,InputDate,CHECKED,flag,Upgraded_From,Update_Date,Comments,Tag,Level_Protection,Level_ProtectionNotes,Level_Protetion2,Project_Name,Year_,MP2035_Network,INTERU_FLAG,Shape__Length,geometry
7186,7178,2112100,Modified Avenue I,,STANFORD AV,AVALON BL,,41.0,0.125936,0.125936,9,2023-10-06 00:00:00+00:00,,Protected Bike Lane,,,,4,Avalon Blvd to Central Ave,65750.0,GAGE AVE,FY23/24,NEW,,2023-10-24 00:00:00+00:00,,0.0,,2023-10-24 00:00:00+00:00,,,0,,,Avalon/MLK/Gage Corridors MAT Program,2023.0,,,664.941891,"LINESTRING (-118.26520 33.98184, -118.26301 33..."


In [16]:
bike_conditions = {
    'protected bike lane': {'plan': [1], 'actual': [4]},
    'unprotected bike lane': {'plan': [2, 3], 'actual': [2]}
}
results = []
result_gdfs = []

for key in bike_conditions.keys():
    percentage, result_gdf = overlap_plan_actual(
        name = key,
        conditions = bike_conditions[key],
        gdf_actual = bike_actual_geo[[
                        'Class',
                        'Year_',
                        'Install_Da',
                        'geometry'
                    ]],
        gdf_plan = bike_plan_geo[[
                        'BICYCLE_N',
                        'geometry'
                   ]],
        print_map = False,
        test_map = False,
        year = year,
        column_year = 'Year_'
    )
    results.append([key, year, percentage])
    result_gdfs.append([key, year, result_gdf])
    
    print(
        percentage,
        f'% of {key} implemented after {year}',
        '\n'
    )
        
    print('_______')    

protected bike lane for year after 2015:
radius = 10 m
since 2015 year: 29.28 miles implemented of  310.03 miles planned
9.44 % of protected bike lane implemented after 2015 

_______
unprotected bike lane for year after 2015:
radius = 10 m
since 2015 year: 45.06 miles implemented of  790.77 miles planned
5.7 % of unprotected bike lane implemented after 2015 

_______


### Bike Paths vs class 1

In [17]:
bike_plan_paths = open('../hsla_mobility_plan/Bicycle_Enhanced_Network_Paths.geojson')
bike_paths_geo_raw = gpd.read_file(bike_plan_paths)
bike_path_geo = bike_paths_geo_raw.copy()
print(bike_path_geo.shape[0])
display(bike_path_geo.head())

146


Unnamed: 0,OBJECTID_12,OBJECTID_1,OBJECTID,bikewaytyp,exbikeway,BIKEWYNAME,MILEAGE,COUNDIST,NETWORK,ARTERIAL,Shape_Leng,Shape_Le_1,Shape__Length,geometry
0,1,1,1,Planned BP,0,Valley LA River Path,0.32488,4,GREEN,0,1715.368583,1715.368583,631.219551,"LINESTRING (-118.35051 34.14248, -118.34966 34..."
1,2,2,2,Planned BP,0,LA River Path,1.350428,1,GREEN,1,7130.258445,7130.258445,2632.455613,"LINESTRING (-118.22734 34.08124, -118.22520 34..."
2,3,3,3,Planned BP,0,Central LA River-Cypress Ave Connector,0.356904,1,GREEN,0,1884.455155,1884.455155,694.06276,"LINESTRING (-118.23389 34.09827, -118.23886 34..."
3,4,4,4,Planned BP,0,Arroyo Seco Connection,1.547634,1,GREEN,1,8171.506899,8171.506899,3009.990132,"LINESTRING (-118.22568 34.07960, -118.22455 34..."
4,5,5,5,Planned BP,0,Valley LA River Path,2.442823,4,GREEN,0,12898.104094,12898.104094,4748.604236,"MULTILINESTRING ((-118.34502 34.14238, -118.34..."


In [18]:
bike_class1_conditions = {
    'class1 bike lane': {'plan': None, 'actual': [1]},
}

for key in bike_class1_conditions.keys():
    percentage, result_gdf = overlap_plan_actual(
        name = key,
        gdf_plan = bike_path_geo[['OBJECTID', 'geometry']],
        gdf_actual = bike_actual_geo[[
                                    'Class',
                                    'Year_',
                                    'Install_Da',
                                    'geometry'
                    ]],
        conditions = {'plan': None, 'actual': [1]},
        radius = 120, # take bigger radius beacuse of the data quality
        print_map = False,
        test_map = False,
        year = year,
        column_year = 'Year_'
            )
    print(f'{percentage}% of bike path implemented after {year}')
    results.append([key, year, percentage])
    result_gdfs.append([key, year, result_gdf])
    print('_______')

class1 bike lane for year after 2015:
radius = 120 m
since 2015 year: 5.59 miles implemented of  211.17 miles planned
2.65% of bike path implemented after 2015
_______


### NEN vs class 3

In [19]:
bike_nen_file = open('../hsla_mobility_plan/Neighborhood_Enhanced_Network.geojson')
bike_nen_geo_raw = gpd.read_file(bike_nen_file)
bike_nen_geo = bike_nen_geo_raw[['OBJECTID', 'geometry']].copy()
print(bike_nen_geo.shape[0])
display(bike_nen_geo.head())

10004


Unnamed: 0,OBJECTID,geometry
0,1,"LINESTRING (-118.25885 34.08532, -118.25835 34..."
1,2,"LINESTRING (-118.37527 33.94525, -118.37765 33..."
2,3,"LINESTRING (-118.25972 34.08376, -118.25969 34..."
3,4,"LINESTRING (-118.63962 34.23757, -118.63968 34..."
4,5,"LINESTRING (-118.27829 34.09579, -118.27858 34..."


In [20]:
bike_nen_conditions = {
    'NEN bike lane': {'plan': None, 'actual': [3]},
}

for key in bike_nen_conditions.keys():
    percentage, result_gdf = overlap_plan_actual(
        name = key,
        conditions = bike_nen_conditions[key],
        gdf_actual = bike_actual_geo[[
                                    'Class',
                                    'Year_',
                                    'Install_Da',
                                    'geometry'
                    ]],
        gdf_plan = bike_nen_geo,
        print_map = False,
        year = year,
        column_year = 'Year_',
        test_map = False
    )
    results.append([key, year, percentage])
    result_gdfs.append([key, year, result_gdf])
    
    print(
        percentage,
        f'% of {key} implemented after {year}',
        '\n'
    )
    
    print('_______')   

NEN bike lane for year after 2015:
radius = 10 m
since 2015 year: 9.92 miles implemented of  988.08 miles planned
1.0 % of NEN bike lane implemented after 2015 

_______


### all mobility implemented

In [149]:
mobility_implemented_bike = gpd.GeoDataFrame()
for df in result_gdfs:
    gdf = df[2].copy()
    gdf['name'] = df[0].replace(' ', '_') + '_implemented'
    display(gdf.head(1))
    if mobility_implemented_bike.shape[0]==0:
        mobility_implemented_bike = gdf[['name', 'Install_Da', 'geometry']].copy()
    else:
        mobility_implemented_bike = pd.concat([
            mobility_implemented_bike,
            gdf[['name', 'Install_Da', 'geometry']]]
        )
mobility_implemented_bike.reset_index(inplace=True, drop=True)
mobility_implemented_bike.Install_Da = pd.to_datetime(
    mobility_implemented_bike.Install_Da
).dt.date
display(mobility_implemented_bike.head(1))

Unnamed: 0,Class,Year_,Install_Da,geometry,length_m,name
0,4,2023.0,2023-01-21 00:00:00+00:00,"LINESTRING (-118.25410 33.94492, -118.25411 33...",106.938474,protected_bike_lane_implemented


Unnamed: 0,Class,Year_,Install_Da,geometry,length_m,name
0,2,2019.0,2019-03-01 00:00:00+00:00,"LINESTRING (-118.23294 34.04017, -118.23283 34...",131.00263,unprotected_bike_lane_implemented


Unnamed: 0,Class,Year_,Install_Da,geometry,length_m,name
0,1,2016.0,2016-01-01 00:00:00+00:00,"LINESTRING (-118.39113 34.02801, -118.39431 34...",1318.723223,class1_bike_lane_implemented


Unnamed: 0,Class,Year_,Install_Da,geometry,length_m,name
0,3,2021.0,2021-10-06 00:00:00+00:00,"LINESTRING (-118.51942 34.02826, -118.51820 34...",227.220533,NEN_bike_lane_implemented


Unnamed: 0,name,Install_Da,geometry
0,protected_bike_lane_implemented,2023-01-21,"LINESTRING (-118.25410 33.94492, -118.25411 33..."


# get stats

In [169]:
for radius in [20, 40]:
    impl_crashes = intersect_gdf_crashes(
        name = 'implemented_crashes',
        gdf = mobility_implemented_bike,
        crashes_gdf = crashes_gdf,
        radius = radius
    )
    #display(impl_crashes.tail())
    impl_crashes.reset_index(inplace=True, drop=True)
    num_preventable = impl_crashes[
          impl_crashes.collision_date<impl_crashes.install_date
      ].shape[0]
    print(f'{num_preventable} preventable of {impl_crashes.shape[0]} crashes')

radius for buffer is 20
intersecting  implemented_crashes
3329 preventable of 4697 crashes
radius for buffer is 40
intersecting  implemented_crashes
3632 preventable of 5065 crashes


In [172]:
impl_crashes.head(2)

Unnamed: 0,case_id,collision_date,year,injury,death,veh_ped,veh_bike,veh_veh,geometry,install_date
0,211807054,2021-03-02,2021,1,0,1,0,0,POINT (-118.25410 33.94490),2023-01-21
1,201810463,2020-05-01,2020,1,0,0,0,1,POINT (-118.25410 33.94490),2023-01-21


In [177]:
m = show_maps({
    'crashes': impl_crashes[['geometry']],
    'implemented': mobility_implemented_bike[['geometry']]
})
m

green: crashes
blue: implemented
