# Healthy Streets of Los Angeles Injuries/Deaths which could be prevented with implemented Mobility in plan 2022
This project finds a number of car accidents that could have been prevented if the Mobility plan was implemented earlier going back to 2015.


Sources:
* Injuries/Deaths in the City of LA https://data.lacity.org/Public-Safety/Traffic-Collision-Data-from-2010-to-Present/d5tf-ez2w
* Implepemented Mobility Plan shapefiles (see hsla_mobilty_plan project)

Assumptions:
* Location only City of Los Angeles
* years are 2015-2023
* Excluding freeways (STATE_ROUTE is Null)

Output:
* a number of deaths/injuries on LA streets that happened on streets missing mobility safety plan improvements at that time

Any questions - @sunchugasheva

In [None]:
import pandas as pd
import datetime
import geopandas as gpd
from shapely.geometry import Polygon, LineString, Point
import folium

pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 1000)

## functions

In [None]:
def col_from_code(col, code):
    new_col = col.str.contains('|'.join(code)).replace(
        {True: 1, False: 0}
    ).fillna(0).astype(int)
    
    return new_col

def get_coords(line):
    return list(map(float, line[1:-1].split(', ')))

In [None]:
def show_map(gdf_names):
    '''
    show a number of gdfs with set colors
    - gdfs_colors - dictionary with format:{'name': gdf}
    ''' 
    colors = ['green', 'blue', 'red', 'orange', 'purple', 'yellow', 'magenta']
    f = folium.Figure(width=1000, height=500)
    m = folium.Map(location = [34.05, -118.24], zoom_start=10).add_to(f)
    i = 0
    
    for name in gdf_names.keys():
        color = colors[i]
        i += 1
        gdf = gdf_names[name]
        print(f'{color}: {name}')
        if gdf.loc[0, 'geometry'].geom_type!='Point':
            gdf.explore(
                m = m,
                name = name,
                color = color
            )
        else:
            #folium.Marker(gdf).add_to(m)
            folium.features.GeoJson(gdf).add_to(m)

    folium.TileLayer(
        'CartoDB positron',
        show=False
    ).add_to(m) 
    folium.LayerControl().add_to(m)
    
    return m

In [None]:
def buffer(gdf, radius, proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    convert a gdf of linestrings into a gdf of polygons with radius
    - gdf - GeoDataFrame, has column "geometry"
    - radius - radius of bufferm meters
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''  
    gdf = gdf.copy()
    gdf_calc = gdf.to_crs(proj_calc)
    #print('data proj:', proj, '\ncalculation proj: ', proj_calc)
    gdf['buffered'] = gdf_calc.buffer(radius, cap_style=2).to_crs(proj)
    gdf.set_geometry('buffered', inplace=True)
    
    # merge all intersecting buffered polygons
    gdf_return = gpd.GeoDataFrame(
        geometry=[gdf.unary_union]
    ).explode(
        index_parts=False
    ).reset_index(
        drop=True
    )
    gdf_return.geometry.crs = proj
    
    return gdf_return

In [None]:
def get_tot_length_miles(
    gdf, column='geometry', proj='EPSG:4326', proj_calc='EPSG:3857'):
    '''
    return a gdf with a column of linestring length in proj_calc units (m)
    - gdf - GeoDataFrame
    - column - column to be used for length calculation if not "geometry"
    - proj - projection of the original dataset
    - proj_calc='EPSG:3857' - projection for calculations
    '''
    if column:
        gdf.set_geometry(column, inplace=True)
    # convert projection to proj_calc, if default - the units will be in meters
    gdf_m = gdf.to_crs('EPSG:3857')
    gdf['length_mile'] = gdf_m.length/1609.34
    length = round(gdf.length_mile.sum(), 2)
    
    return length

def add_stat(dict_stat, stats):
    '''
    dict_stat - dictionary
    stats - list of stats to unpack IN THE ORDER OF KEYS
    '''
    i=0
    for key in dict_stat.keys():
        dict_stat[key].append(stats[i])
        i += 1
    return dict_stat

def crashes_type(df, col):
    '''
    df - originasl dataframe
    col - column that we are interested in
    '''
    df = df[df[col]!=0].copy().reset_index(drop=True)
    return df

def intersect_gdf_crashes(name, gdf, crashes_gdf, radius, stat_gdfs):
    print('intersecting ', name)
    gdf_buffer = buffer(gdf, radius = radius)
    intersect_crashes = gpd.GeoDataFrame()
    for idx, row in gdf_buffer.iterrows():
        intersection = crashes_gdf[crashes_gdf.covered_by(row['geometry'])]
        intersect_crashes = pd.concat([
                intersect_crashes,
                intersection
            ])
    add_stat(stat_gdfs, [name, radius, intersect_crashes])
        
    return intersect_crashes

In [None]:
def get_stats(
    crashes_gdf, impl_gdf, unimpl_gdf,
    radius, stat_gdfs, stats, result
):
    
    print('radius for buffer is', radius)
    # get list of crashes for implemented/unimplemented streets
    impl_crashes = intersect_gdf_crashes(
        name = 'implemented_crashes',
        gdf = impl_gdf,
        crashes_gdf = crashes_gdf,
        radius = radius,
        stat_gdfs = stat_gdfs
    )
    unimpl_crashes = intersect_gdf_crashes(
        name = 'unimplemented_crashes',
        gdf = unimpl_gdf,
        crashes_gdf = crashes_gdf,
        radius = radius,
        stat_gdfs = stat_gdfs
    )
    
    # get lengths for implemented/unimplemented streets
    length_impl_miles = get_tot_length_miles(impl_gdf)
    print(length_impl_miles, 'miles implemented')
    add_stat(
        stats,
        ['length_implemented', length_impl_miles, 'mile', radius]
    )

    length_unimpl_miles = get_tot_length_miles(unimpl_gdf)
    print(length_unimpl_miles, 'miles unimplemented')
    add_stat(
        stats,
        ['length_unimplemented', length_unimpl_miles, 'mile', radius]
    )
    
    cols_calc = ['NUMBER_KILLED', 'NUMBER_INJURED', 'COUNT_SEVERE_INJ']
    
    for col in cols_calc:
        print('calculating stats for', col)
        result['parameter'].append(col)
        result['radius'].append(radius)
        impl = crashes_type(impl_crashes, col)
        unimpl = crashes_type(unimpl_crashes, col)
        total = crashes_type(crashes_gdf, col)
        add_stat(gdfs, ['impl_'+col, impl, radius])
        add_stat(gdfs, ['unimpl_'+col, unimpl, radius])
        add_stat(gdfs, ['total_'+col, total, radius])    

        impl_count = impl.shape[0]
        unimpl_count = unimpl.shape[0]
        total_count = total.shape[0]
        add_stat(stats, ['impl_'+col, impl_count, 'crashes', radius])
        add_stat(stats, ['unimpl_'+col, unimpl_count, 'crashes', radius])
        add_stat(stats, ['total_'+col, total_count, 'crashes', radius])

        impl_total = round(impl_count/total_count, 2)
        unimpl_total = round(unimpl_count/total_count, 2)
        impl_mile = round(impl_count/length_impl_miles, 2)
        unimpl_mile = round(unimpl_count/length_unimpl_miles, 2)

        result['impl_to_total'].append(impl_total)
        result['unimpl_to_total'].append(unimpl_total)
        result['imp_permile'].append(impl_mile)
        result['unimpl_permile'].append(unimpl_mile)
    
    return result 

# data

In [None]:
today = datetime.datetime.now()
print(today)
radius = 20 # our average street will be 40m wide

## data preparation

### get crashes data

In [None]:
crashes_raw = pd.read_csv('LAPD_crashes.csv')
display(crashes_raw.head(1))

In [None]:
cols_dict = {
    'DR Number': 'case_id',
    'Date Occurred': 'collision_date',
    'Address': 'primary_rd',
    'Cross Street': 'secondary_rd',
    'Location': 'location',
    'MO Codes': 'mo_codes',
    'Premise Description': 'loc_description'
}
crashes = crashes_raw.copy().rename(columns = cols_dict)
crashes = crashes[cols_dict.values()]
crashes['year'] = crashes.collision_date.str[-4:].astype(int)

new_cols_dict = {
    'injury': ['3024', '3025', '3026'],
    'death': ['3027'],
    'veh_ped': ['3003',  '3501'],
    'veh_bike': ['3008'],
    'veh_veh': ['3004']
}
for key, value in new_cols_dict.items():
    crashes[key] = col_from_code(crashes.mo_codes, value)

In [None]:
locations = [
    'STREET', 'SIDEWALK', 'ALLEY', 'DRIVEWAY',
    'MTA BUS', 'BUS STOP'
]

crashes_count = crashes[
    ((crashes.injury!=0)|(crashes.death!=0))&
    ((crashes.veh_ped!=0)|(crashes.veh_bike!=0)|(crashes.veh_veh!=0))&
    (crashes.loc_description.isin(locations))&
    (crashes.year>2015)
].copy().reset_index(drop=True)

crashes_count.location = crashes_count.location.apply(lambda x: get_coords(x))
crashes_count[['loc_x', 'loc_y']] = crashes_count.location.to_list()

In [None]:
crashes_count.head()

In [None]:
crashes.shape[0], crashes_count.shape[0]

In [None]:
#create geodataframe
crashes_gdf = gpd.GeoDataFrame(
    crashes_count[[
        'case_id', 'collision_date',
        'injury', 'death', 'veh_ped', 'veh_bike', 'veh_veh'
    ]],
    geometry = gpd.points_from_xy(crashes_count.loc_x, crashes_count.loc_y)
)
crashes_gdf.crs = 'EPSG:4326'

### get mobility data with dates of implementation