# Bicycle Crashes
- Town of Chapel Hill, North Carolina 

In [324]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import seaborn as sns
import sys

# df = pd.read_csv('https://catalog.data.gov/dataset/bicycle-crashes.csv')
fname = 'https://www.chapelhillopendata.org/api/v2/catalog/datasets/bicycle-crash-data-chapel-hill-region/exports/csv'
df = pd.read_csv(fname, sep=';')
df

Unnamed: 0,geo_point_2d,geo_shape,ambulancer,bikeage_gr,bike_age,bike_dir,bike_injur,bike_pos,bike_race,bike_sex,...,rd_conditi,rd_config,rd_defects,rd_feature,rd_surface,rural_urba,speed_limi,traff_cntr,weather,workzone_i
0,"35.9100670923, -79.0745027481","{""type"": ""Point"", ""coordinates"": [-79.07450274...",Yes,70+,70+,With Traffic,K: Killed,Travel Lane,White,Male,...,Dry,"Two-Way, Not Divided",,No Special Feature,Smooth Asphalt,Urban,20 - 25 MPH,No Control Present,Clear,No
1,"36.0151171157, -79.0306027406","{""type"": ""Point"", ""coordinates"": [-79.03060274...",No,20-24,24,With Traffic,C: Possible Injury,Travel Lane,White,Male,...,Dry,"Two-Way, Not Divided",,"Driveway, Private",Coarse Asphalt,Rural,40 - 45 MPH,"Double Yellow Line, No Passing Zone",Clear,No
2,"35.9889631116, -78.9887827261","{""type"": ""Point"", ""coordinates"": [-78.98878272...",Yes,25-29,26,With Traffic,B: Evident Injury,Travel Lane,Black,Male,...,Dry,"Two-Way, Not Divided",,No Special Feature,Smooth Asphalt,Urban,40 - 45 MPH,"Double Yellow Line, No Passing Zone",Clear,No
3,"35.9227900944, -79.0888327525","{""type"": ""Point"", ""coordinates"": [-79.08883275...",Yes,30-39,33,With Traffic,B: Evident Injury,Travel Lane,White,Female,...,Dry,"Two-Way, Not Divided",,T-Intersection,Smooth Asphalt,Urban,30 - 35 MPH,Missing,Clear,No
4,"36.0100771166, -78.9900027276","{""type"": ""Point"", ""coordinates"": [-78.99000272...",Yes,40-49,41,With Traffic,B: Evident Injury,Travel Lane,White,Female,...,Dry,"Two-Way, Not Divided",,Four-Way Intersection,Smooth Asphalt,Rural,40 - 45 MPH,Stop Sign,Clear,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,"35.9382390969, -79.1048027585","{""type"": ""Point"", ""coordinates"": [-79.10480275...",Yes,20-24,21,With Traffic,C: Possible Injury,Travel Lane,White,Male,...,Dry,"Two-Way, Not Divided",,No Special Feature,Smooth Asphalt,Rural,40 - 45 MPH,No Control Present,Clear,No
161,"35.9376070989, -79.0565827433","{""type"": ""Point"", ""coordinates"": [-79.05658274...",Yes,60-69,65,With Traffic,A: Disabling Injury,Travel Lane,White,Male,...,Dry,"Two-Way, Not Divided",,No Special Feature,Smooth Asphalt,Urban,30 - 35 MPH,No Control Present,Clear,No
162,"35.9090970918, -79.0778687486","{""type"": ""Point"", ""coordinates"": [-79.07786874...",No,30-39,36,With Traffic,B: Evident Injury,Unknown,White,Male,...,Dry,"Two-Way, Not Divided",,T-Intersection,Smooth Asphalt,Urban,30 - 35 MPH,No Control Present,Clear,No
163,"35.9152570935, -79.0568027425","{""type"": ""Point"", ""coordinates"": [-79.05680274...",Yes,30-39,38,With Traffic,B: Evident Injury,Travel Lane,White,Male,...,Dry,"Two-Way, Divided, Unprotected Median",,Other,Smooth Asphalt,Urban,20 - 25 MPH,Stop Sign,Clear,No


In [325]:
df.columns

Index(['geo_point_2d', 'geo_shape', 'ambulancer', 'bikeage_gr', 'bike_age',
       'bike_dir', 'bike_injur', 'bike_pos', 'bike_race', 'bike_sex', 'city',
       'county', 'crashalcoh', 'crashday', 'crash_grp', 'crash_loc',
       'crash_mont', 'crash_time', 'crash_type', 'crash_year', 'crsh_sevri',
       'developmen', 'drvrage_gr', 'drvr_age', 'drvr_alc_d', 'drvr_estsp',
       'drvr_injur', 'drvr_race', 'drvr_sex', 'drvr_vehty', 'excsspdind',
       'hit_run', 'light_cond', 'locality', 'num_lanes', 'num_units',
       'rd_charact', 'rd_class', 'rd_conditi', 'rd_config', 'rd_defects',
       'rd_feature', 'rd_surface', 'rural_urba', 'speed_limi', 'traff_cntr',
       'weather', 'workzone_i'],
      dtype='object')

## Utility function to test if a dataframe contains any NaNs

In [326]:
def countDataFrameNulls(df):
    return df.isna().values.sum()

## Translate speed_limi into speed_limi_min and speed_limi_max

In [354]:
def translateSpeed(loc, x):
    ''' Translate a speed of the format "25 - 35 MPH" to (25, 35)'''


    m = re.findall(r'^(\d+)\s*-\s*(\d+)\s*MPH$', x)
    if m:
        m = (np.uint32(m[0][0]), np.uint32(m[0][1]))
    else:
        m = (0,0)

    print("translateSpeed({}, {}) = {}".format(loc, x, m))
    return m



In [359]:
#print(df.head())
df.index
df.dtypes

#lines = { (injur, sex) : for injur in df['bike_injur'] for sex in df['bike_sex']}
print(translateSpeed(0, '25 - 35 MPH'))

try:
    for i, x in enumerate(df['speed_limi']):
        df['speed_limi_min'], df['speed_limi_max'] = translateSpeed(i, x)

    
    bike_injur_sex = df.groupby(['bike_injur', 'bike_sex'])
    print(bike_injur_sex)
    
    bike_injur_sex_speed_min = bike_injur_sex['speed_limi_min'].apply(list).to_frame()
    bike_injur_sex_speed_max = bike_injur_sex['speed_limi_max'].apply(list).to_frame()
    bike_injur_sex_speed = pd.merge(
        bike_injur_sex_speed_min,
        bike_injur_sex_speed_max,
        how='outer',
        left_index=True,
        right_index=True)
    print(bike_injur_sex_speed)

    #print("MIN:\n{}\n\n".format(bike_injur_sex_speed_min))
    #print("MAX:\n{}\n\n".format(bike_injur_sex_speed_max))

    print("--- NullCount =", countDataFrameNulls(bike_injur_sex_speed))

    for i, n in zip(bike_injur_sex_speed.index, bike_injur_sex_speed.values):
        print(i, n)
        #plt.axvline(x=i, y = n)

    #print(type(bike_injur_sex_speed))
    #print(bike_injur_sex_speed.iloc(0))

    #sns.lineplot( x = bike_injur_sex_speed_min.index, y='speed_limi_min', data=bike_injur_sex_speed_min)
    #sns.lineplot( x = bike_injur_sex_speed_max.index, y='speed_limi_max', data=bike_injur_sex_speed_max)
except Exception as e:
        raise type(e)(str(e)).with_traceback(sys.exc_info()[2])

SyntaxError: invalid syntax (<ipython-input-359-69a3bfc5ac3f>, line 9)