# Healthy Streets of Los Angeles Injuries/Deaths vs implemented Mobility in plan 2022
This project compares a number of car accidents in 2022 resulted in deaths/injuries for streets with and without Mobility Plan safety improvements.

Sources:
* Injuries/Deaths in the City of LA https://tims.berkeley.edu/
* Implepemented Mobility Plan shapefiles (see hsla_mobilty_plan project)

Assumptions:
* Location only City of Los Angeles
* year is 2022
* Excluding freeways (STATE_ROUTE is Null)

Output:
* deaths/injuries on LA streets in 2022 happened on streets missing mobility safety plan improvements
* we consider only streets with mobility plan implemented after 2015, but if you need to change that - look for flag "after_2015", make it False

Any questions - @sunchugasheva

In [None]:
import pandas as pd
import datetime
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point
import folium

pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 1000)

## functions

In [None]:
def show_map(gdf_names):
    '''
    show a number of gdfs with set colors
    - gdfs_colors - dictionary with format:{'name': gdf}
    ''' 
    colors = ['green', 'blue', 'red', 'orange', 'purple', 'yellow', 'magenta']
    f = folium.Figure(width=1000, height=500)
    m = folium.Map(location = [34.05, -118.24], zoom_start=10).add_to(f)
    i = 0
    
    for name in gdf_names.keys():
        color = colors[i]
        i += 1
        gdf = gdf_names[name]
        print(f'{color}: {name}')
        if gdf.loc[0, 'geometry'].geom_type!='Point':
            gdf.explore(
                m = m,
                name = name,
                color = color
            )
        else:
            #folium.Marker(gdf).add_to(m)
            folium.features.GeoJson(gdf).add_to(m)

    folium.TileLayer(
        'CartoDB positron',
        show=False
    ).add_to(m) 
    folium.LayerControl().add_to(m)
    
    return m

In [None]:
def buffer(gdf, radius, proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    convert a gdf of linestrings into a gdf of polygons with radius
    - gdf - GeoDataFrame, has column "geometry"
    - radius - radius of bufferm meters
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''  
    gdf = gdf.copy()
    gdf_calc = gdf.to_crs(proj_calc)
    #print('data proj:', proj, '\ncalculation proj: ', proj_calc)
    gdf['buffered'] = gdf_calc.buffer(radius, cap_style=2).to_crs(proj)
    gdf.set_geometry('buffered', inplace=True)
    
    # merge all intersecting buffered polygons
    gdf_return = gpd.GeoDataFrame(
        geometry=[gdf.unary_union]
    ).explode(
        index_parts=False
    ).reset_index(
        drop=True
    )
    gdf_return.geometry.crs = proj
    
    return gdf_return

In [None]:
def get_tot_length_miles(
    gdf, column='geometry', proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    return a gdf with a column of linestring length in proj_calc units (m)
    - gdf - GeoDataFrame
    - column - column to be used for length calculation if not "geometry"
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''
    if column:
        gdf.set_geometry(column, inplace=True)
    # convert projection to proj_calc, if default - the units will be in meters
    gdf_m = gdf.to_crs('EPSG:3857')
    gdf['length_mile'] = gdf_m.length/1609.34
    length = round(gdf.length_mile.sum(), 2)
    
    return length

def add_stat(dict_stat, stats):
    '''
    dict_stat - dictionary
    stats - list of stats to unpack IN THE ORDER OF KEYS
    '''
    i=0
    for key in dict_stat.keys():
        dict_stat[key].append(stats[i])
        i += 1
    return dict_stat

def crashes_type(df, col):
    '''
    df - originasl dataframe
    col - column that we are interested in
    '''
    df = df[df[col]!=0].copy().reset_index(drop=True)
    return df

def intersect_gdf_crashes(name, gdf, crashes_gdf, radius, stat_gdfs):
    print('intersecting ', name)
    gdf_buffer = buffer(gdf, radius = radius)
    intersect_crashes = gpd.GeoDataFrame()
    for idx, row in gdf_buffer.iterrows():
        intersection = crashes_gdf[crashes_gdf.covered_by(row['geometry'])]
        intersect_crashes = pd.concat([
                intersect_crashes,
                intersection
            ])
    add_stat(stat_gdfs, [name, radius, intersect_crashes])
        
    return intersect_crashes

In [None]:
def get_stats(
    crashes_gdf, impl_gdf, unimpl_gdf,
    radius, stat_gdfs, stats, result
):
    
    print('radius for buffer is', radius)
    # get list of crashes for implemented/unimplemented streets
    impl_crashes = intersect_gdf_crashes(
        name = 'implemented_crashes',
        gdf = impl_gdf,
        crashes_gdf = crashes_gdf,
        radius = radius,
        stat_gdfs = stat_gdfs
    )
    unimpl_crashes = intersect_gdf_crashes(
        name = 'unimplemented_crashes',
        gdf = unimpl_gdf,
        crashes_gdf = crashes_gdf,
        radius = radius,
        stat_gdfs = stat_gdfs
    )
    
    # get lengths for implemented/unimplemented streets
    length_impl_miles = get_tot_length_miles(impl_gdf)
    print(length_impl_miles, 'miles implemented')
    add_stat(
        stats,
        ['length_implemented', length_impl_miles, 'mile', radius]
    )

    length_unimpl_miles = get_tot_length_miles(unimpl_gdf)
    print(length_unimpl_miles, 'miles unimplemented')
    add_stat(
        stats,
        ['length_unimplemented', length_unimpl_miles, 'mile', radius]
    )
    
    cols_calc = ['NUMBER_KILLED', 'NUMBER_INJURED', 'COUNT_SEVERE_INJ']
    
    for col in cols_calc:
        print('calculating stats for', col)
        result['parameter'].append(col)
        result['radius'].append(radius)
        impl = crashes_type(impl_crashes, col)
        unimpl = crashes_type(unimpl_crashes, col)
        total = crashes_type(crashes_gdf, col)
        add_stat(gdfs, ['impl_'+col, impl, radius])
        add_stat(gdfs, ['unimpl_'+col, unimpl, radius])
        add_stat(gdfs, ['total_'+col, total, radius])    

        impl_count = impl.shape[0]
        unimpl_count = unimpl.shape[0]
        total_count = total.shape[0]
        add_stat(stats, ['impl_'+col, impl_count, 'crashes', radius])
        add_stat(stats, ['unimpl_'+col, unimpl_count, 'crashes', radius])
        add_stat(stats, ['total_'+col, total_count, 'crashes', radius])

        impl_total = round(impl_count/total_count, 2)
        unimpl_total = round(unimpl_count/total_count, 2)
        impl_mile = round(impl_count/length_impl_miles, 2)
        unimpl_mile = round(unimpl_count/length_unimpl_miles, 2)

        result['impl_to_total'].append(impl_total)
        result['unimpl_to_total'].append(unimpl_total)
        result['imp_permile'].append(impl_mile)
        result['unimpl_permile'].append(unimpl_mile)
    
    return result 

# data

In [None]:
today = datetime.datetime.now().strftime("%Y_%m_%d")
print(today)
radius = 20 # our average street will be 40m wide
after_2015 = False # if we need to count MP implemented after 2015
if after_2015:
    name_2015 = '_since_2015'
else:
    name_2015 = ''

## data preparation

### get crashes data

In [None]:
crashes_raw = pd.read_csv('Crashes.csv')
display(crashes_raw.head(1))

In [None]:
# columns we are interested in
crashes_col = [
    'CASE_ID', 'COUNTY', 'CITY',
    'ACCIDENT_YEAR', 
    'POINT_X', 'POINT_Y',
    'NUMBER_KILLED', 'NUMBER_INJURED', 
    'COUNT_SEVERE_INJ',
]

# take only City of LA and not highways
crashes = crashes_raw[
        (crashes_raw.CITY=='LOS ANGELES')&
        crashes_raw.STATE_ROUTE.isnull()&
        (crashes_raw.ACCIDENT_YEAR==2022)
    ][crashes_col].copy().reset_index(drop=True)
display(crashes.head(1))

In [None]:
# it looks like there are no records with no injuries/deaths
display(crashes[
    (crashes.NUMBER_KILLED==0)&
    (crashes.NUMBER_INJURED==0)&
    (crashes.COUNT_SEVERE_INJ==0)
])
display(crashes[
    (crashes.NUMBER_KILLED.isnull())|
    (crashes.NUMBER_INJURED.isnull())|
    (crashes.COUNT_SEVERE_INJ.isnull())
])

In [None]:
#create geodataframe
crashes_gdf = gpd.GeoDataFrame(
    crashes[['CASE_ID', 'NUMBER_KILLED', 'NUMBER_INJURED', 'COUNT_SEVERE_INJ']],
    geometry=gpd.points_from_xy(crashes['POINT_X'], crashes['POINT_Y'])
)
crashes_gdf.crs = 'EPSG:4326'

In [None]:
crashes[crashes.NUMBER_KILLED>0].shape[0], crashes[crashes.NUMBER_INJURED>0].shape[0]

In [None]:
crashes.shape[0]

### get mobility_implemented data

In [None]:
files_implemented = [
    f'unprotected_bike_lane_implemented{name_2015}_2023_11_29',
    f'protected_bike_lane_implemented{name_2015}_2023_11_29',
    f'NEN_bike_lane_implemented{name_2015}_2023_11_29',
    f'class1_bike_lane_implemented{name_2015}_2023_12_06',
    f'protected_bus_lane_implemented{name_2015}_2023_11_29'
]

mobility_implemented = gpd.GeoDataFrame()
for file in files_implemented:
    geo_file = open(f'../hsla_mobility_plan/{file}.geojson')
    gdf = gpd.read_file(geo_file)
    gdf['name'] = file[:-11]
    display(gdf.head(1))
    if mobility_implemented.shape[0]==0:
        mobility_implemented = gdf[['name', 'length_m', 'geometry']].copy()
    else:
        mobility_implemented = pd.concat([
            mobility_implemented,
            gdf[['name', 'length_m', 'geometry']]]
        )

### get mobility_unimplemented data

In [None]:
files_plan = [
    'Bicycle_Enhanced_Network_Paths',
    'Bicycle_Enhanced_Network',
    'Neighborhood_Enhanced_Network',
    'Transit_Enhanced_Network'
]

mobility_plan = gpd.GeoDataFrame()
for file in files_plan:
    geo_file = open(f'../hsla_mobility_plan/{file}.geojson')
    gdf = gpd.read_file(geo_file)
    gdf['name'] = file
    display(gdf.head(1))
    print(gdf.shape)
    if mobility_plan.shape[0]==0:
        mobility_plan = gdf[['name', 'Shape__Length', 'geometry']].copy()
    else:
        mobility_plan = pd.concat([
            mobility_plan,
            gdf[['name', 'Shape__Length', 'geometry']]]
        )

In [None]:
 # intersect polygons of actual and planned paths
implemented_buffer = buffer(mobility_implemented, radius = 15)
mobility_unimplemented = gpd.overlay(
    mobility_plan, 
    implemented_buffer, 
    how='difference',
    keep_geom_type=False
)
display(mobility_unimplemented.head())

In [None]:
mobility_implemented.to_file(
    f'../mobility_implemented{name_2015}.geojson',
    driver='GeoJSON'
)
mobility_unimplemented.to_file(
    f'../mobility_unimplemented{name_2015}.geojson',
    driver='GeoJSON'
)

In [None]:
mobility_plan.shape[0], mobility_unimplemented.shape[0]

## calculation

so the structure of the data we're getting is:
* stats - dictionary with keys as follows, that is going to be converted in dataframe of all different numbers, that we're going to use for calculation. This is mostly to keep an eye on numbers if they make sense
* gdfs - dictionary where all geodataframes go for storage along the way, so we can export the ones we need
* result - a dictionary with keys as follows to be converted to dataframe, effectively that is deliverables

we do have a radius everywhere - depending on how wide (2*radius) we count the street to be (this is mostly important for intersections), we'll get different numbers of crashes

In [None]:
stats = {index: [] for index in ['stat', 'value', 'unit', 'radius']}
gdfs = {index: [] for index in ['name', 'radius', 'gdf']}
cols_result = [
        'parameter', 'radius', 'impl_to_total', 'unimpl_to_total',
        'imp_permile', 'unimpl_permile'
    ]
result = {index: [] for index in cols_result }

for radius in [15, 20]:
    result_df = get_stats(
        crashes_gdf = crashes_gdf, 
        impl_gdf  = mobility_implemented, 
        unimpl_gdf = mobility_unimplemented, 
        radius = radius, 
        stat_gdfs = gdfs, 
        stats = stats,
        result = result
    )

result_df = pd.DataFrame.from_dict(result) 
display(result_df)    
stats_df = pd.DataFrame.from_dict(stats)
display(stats_df)

In [None]:
result_df.to_csv(
    f'2022_crashes_mp_unimplemented{name_2015}_{today}.csv',
    index=False
)
stats_df.to_csv(
    f'stats_2022_crashes_mp_unimplemented{name_2015}_{today}.csv',
    index=False
)

In [None]:
idxs = [0, 1, 11, 12]
for i in idxs:
    print(f"for radius {gdfs['radius'][i]}:\n\
    {gdfs['gdf'][i].shape[0]} {gdfs['name'][i]} total happened in 2022")

# troubleshoot

In [None]:
test = gdfs['gdf'][0]

In [None]:
test.head()

In [None]:
crashes_raw.head()

In [None]:
test = test.merge(
    crashes_raw[['CASE_ID', 'COLLISION_DATE']],
    on='CASE_ID',
    how = 'left'
)

In [None]:
# test.to_file('impl_crashes_2022.geojson', driver='GeoJSON')