# Newport Pedestrian Safety Survey - 2014
This notebook visualizes the results of the 2014 pedestrian safety survey, which asked responents to rate 25 intersections in the City of Newport, RI either "Safe", "Unsafe" or "Don't Know".  The "Don't Know" category is ambiguous in that it covers the situiation where the respondent isn't familiar with the intersection in question, as well as the situation where the respondent has had both safe and unsafe experiences, and cannot classify as one or the other.  Because of this, the "Don't Know" responses were discarded from the safety scaling.

In [1]:
import geopandas as gpd
import json

import pandas as pd

from bokeh.plotting import figure, output_notebook, show
from bokeh.models import GeoJSONDataSource, ColorBar, HoverTool, Range1d
from bokeh.tile_providers import STAMEN_TONER, CARTODBPOSITRON
from bokeh.models.mappers import LinearColorMapper
from bokeh.palettes import Spectral6

In [2]:
# load bokeh
output_notebook()

In [3]:
gdf = gpd.read_file('./pedestrian-safety-2014/pedestrian-safety-2014.shp')
survey = pd.read_csv('Newport Pedestrian Safety Survey - May 2014 cleaned.csv')

In [4]:
# drop unused columns
slim_survey = survey.drop(['Collector ID', 'Start Date', 'End Date', 'Comment/Explanation'], axis=1)

# make the columns line up with intersection ids
column_names = [
    'Respondent ID',
                1,1,1,
                2,2,2,
                3,3,3,
                4,4,4,
                5,5,5,
                6,6,6,
                7,7,7,
                8,8,8,
                9,9,9,
                10,10,10,
                11,11,11,
                12,12,12,
                13,13,13,
                14,14,14,
                15,15,15,
                16,16,16,
                17,17,17,
                18,18,18,
                19,19,19,
                20,20,20,
                21,21,21,
                22,22,22,
                23,23,23,
                24,24,24,
                25,25,25
                ]
slim_survey.columns = column_names

# add geodataframe columns
gdf['Safe']=""
gdf['Unsafe']=""
gdf["I don't know."]=""
gdf["score"]=""

In [5]:
# build geodataframe of aggregate data, and calculate safety score
for i in range(0,25):
    counts = slim_survey[i+1].melt().value.value_counts(sort=False)
    gdf['score'].iloc[i] = counts['Safe']/(counts['Safe']+counts['Unsafe'])
    gdf['Safe'].iloc[i] = counts['Safe'].astype('float')
    gdf['Unsafe'].iloc[i] = counts['Unsafe'].astype('float')
    gdf["I don't know."].iloc[i] = counts["I don't know."].astype('float')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [6]:
# create the GeoJSONDataSource
geosource = GeoJSONDataSource(geojson=gdf.to_crs(epsg=3857).to_json())

# Process the data
The score given each crossing is calculated as the ratio of "Safe" responses to total "Safe" and "Unsafe" responses. The plot shows crossings with higher safety scores as green, and low safety scores as red.

In [7]:
# define tooltip information
hover = HoverTool(tooltips=[
    ("Intersection", "@id"),
    ("Safe", "@Safe"),
    ("Unsafe", "@Unsafe"),
    ("Score", "@score"),
])

# define color map for plot, swapped low-high for numbers closer to 1 to be green
cmap = LinearColorMapper(palette = Spectral6, low=1, high=0)

# set plotting ranges
minx, miny, maxx, maxy = gdf.to_crs(epsg=3857).total_bounds
mercator_x_extent = dict(start=minx-25, end=maxx+25, bounds=None)
mercator_y_extent = dict(start=miny-25, end=maxy+25, bounds=None)
x_range = Range1d(**mercator_x_extent)
y_range = Range1d(**mercator_y_extent)

# build plot
p = figure(tools=["wheel_zoom,box_zoom,pan", hover], x_range=x_range, y_range=y_range)
p.add_tile(CARTODBPOSITRON)
p.circle(x='x', y='y', size=12, alpha=0.9, source=geosource, fill_color={'field': 'score', 'transform':cmap}, line_color=None)
show(p)