## Imports

In [72]:

import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofchicago.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofchicago.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")





## Grab preliminary data through query

In [73]:
# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
# data website https://data.cityofchicago.org/resource/85ca-t3if.json

crash_data = client.get("85ca-t3if", 
                     #where='violation_date BETWEEN \'2019-01-01T00:00:00.000\' AND \'2019-12-13T00:00:00.000\'',
                     limit=10000,
                    )

# Convert to pandas DataFrame
all_df = pd.DataFrame.from_records(crash_data)

In [74]:
all_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 49 columns):
crash_record_id                  10000 non-null object
crash_date                       10000 non-null object
posted_speed_limit               10000 non-null object
traffic_control_device           10000 non-null object
device_condition                 10000 non-null object
weather_condition                10000 non-null object
lighting_condition               10000 non-null object
first_crash_type                 10000 non-null object
trafficway_type                  10000 non-null object
alignment                        10000 non-null object
roadway_surface_cond             10000 non-null object
road_defect                      10000 non-null object
report_type                      9802 non-null object
crash_type                       10000 non-null object
hit_and_run_i                    3577 non-null object
damage                           10000 non-null object
date_police_no

In [75]:
# What's in this data?
for col in ['traffic_control_device']:
    print(col, all_df[col].unique())

traffic_control_device ['NO CONTROLS' 'TRAFFIC SIGNAL' 'STOP SIGN/FLASHER' 'UNKNOWN' 'OTHER'
 'OTHER REG. SIGN' 'PEDESTRIAN CROSSING SIGN' 'RAILROAD CROSSING GATE'
 'FLASHING CONTROL SIGNAL' 'OTHER RAILROAD CROSSING' 'YIELD'
 'POLICE/FLAGMAN' 'RR CROSSING SIGN' 'NO PASSING']


In [150]:
# How many were at traffic signals
signal_df = all_df[all_df['traffic_control_device']=='TRAFFIC SIGNAL']
other_df = all_df[all_df['traffic_control_device']!='TRAFFIC SIGNAL']


# how many crashes are at signals?
print('Total crashes in study: {}'.format(len(all_df)))
print('Total crashes at signal: {}'.format(len(signal_df)))

print('Percent crashes at signal: {:.1f}%'.format(len(signal_df)/len(all_df)*100))



Total crashes in study: 10000
Total crashes at signal: 2920
Percent crashes at signal: 29.2%


In [164]:
# what kind of crashes occur at traffic light, and how does it compare to other crashes?

def crash_stats(all_df, df):
    # by percentages
    crash_types = df['first_crash_type'].unique()

    print_list = []
    print('{:30}{:15}{:20}'.format("Type of Crash", 'Total crashes','Percent of total'))
    for crash in crash_types:
        num_crashes = len(df[df['first_crash_type']==crash])
        print_list.append([crash, num_crashes])
    
    print_list = sorted(print_list, key=lambda x: x[1], reverse=True)
    
    for crash, n in print_list: 
        print('{:30}{:<15d}{:>5.1f}%'.format(crash, n, 100 * n / len(df)))

def print_header(message):
    print('--'*20)
    print(message)
    print('--'*20)
        
def print_crash_stats(all_df, signal_df, other_df):
    print('CRASH STATS')
    print('Total crashes in study:', len(all_df))
    print('Signal crashes: {:.1f}%'.format((100 * len(signal_df) / len(all_df))))

    print_header('CRASHES NOT AT SIGNAL')

    print('Total crashes:', len(other_df))
    print()
    crash_stats(all_df, other_df)
    print('\n\n')
    print_header('CRASHES AT SIGNAL')

    print('Total crashes:', len(signal_df))
    crash_stats(all_df, signal_df)

        

        
print_crash_stats(all_df, signal_df, other_df)


CRASH STATS
Total crashes in study: 10000
Signal crashes: 29.2%
----------------------------------------
CRASHES NOT AT SIGNAL
----------------------------------------
Total crashes: 7080

Type of Crash                 Total crashes  Percent of total    
PARKED MOTOR VEHICLE          2390            33.8%
REAR END                      1016            14.4%
SIDESWIPE SAME DIRECTION      924             13.1%
ANGLE                         787             11.1%
TURNING                       708             10.0%
FIXED OBJECT                  542              7.7%
PEDESTRIAN                    127              1.8%
REAR TO FRONT                 103              1.5%
SIDESWIPE OPPOSITE DIRECTION  100              1.4%
OTHER OBJECT                  94               1.3%
REAR TO SIDE                  79               1.1%
HEAD ON                       74               1.0%
PEDALCYCLIST                  69               1.0%
REAR TO REAR                  27               0.4%
OTHER NONCOLLISIO

In [171]:
cams_of_interest = ['2552', '2592', '2593', '2553', '3002', '3022', '3032', '3003', '3041', '3082', '3043', '3084', '3051', '3052']
cam_addresses = ['340 W UPPER WACKER DR', '10300 S HALSTED STREE',
       '11500 S HALSTED STREE', '200 N UPPER WACKER DR',
       '5232 N MILWAUKEE AVE', '300 S MICHIGAN AVE', '100 E ONTARIO ST',
       '628 N MICHIGAN AVE', '100 E JACKSON BLVD', '5616 W FOSTER AVE',
       '5200 N NORTHWEST HWY', '5232 N CENTRAL AVE', '800 W 115TH STREET',
       '800 W 103RD STREET']

cam_with_address =  [['2552', '11500 S HALSTED STREE', '115TH AND HALSTED'],
                     ['2592', '10300 S HALSTED STREE', 'HALSTED AND 103RD'],
                     ['2593', '800 W 103RD STREET', 'HALSTED AND 103RD'],
                     ['2553', '800 W 115TH STREET', '115TH AND HALSTED'],
                     ['3002', '5200 N NORTHWEST HWY', 'NORTHWEST HIGHWAY AND FOSTER'],
                     ['3022', '5232 N MILWAUKEE AVE', 'MILWAUKEE AND CENTRAL'],
                     ['3032', '5232 N CENTRAL AVE', 'CENTRAL AND MILWAUKEE'],
                     ['3003', '5616 W FOSTER AVE', 'FOSTER AND NORTHWEST HIGHWAY'],
                     ['3041', '300 S MICHIGAN AVE', 'MICHIGAN AND JACKSON'],
                     ['3082', '628 N MICHIGAN AVE', 'MICHIGAN AND ONTARIO'],
                     ['3043', '100 E JACKSON BLVD', 'MICHIGAN AND JACKSON'],
                     ['3084', '100 E ONTARIO ST', 'MICHIGAN AND ONTARIO'],
                     ['3051', '200 N UPPER WACKER DR', 'LAKE AND UPPER WACKER'],
                     ['3052', '340 W UPPER WACKER DR', 'LAKE AND UPPER WACKER']]





In [177]:
print(signal_df.columns)  
print(signal_df.street_name)

print(signal_df.latitude.isna().sum())
print(signal_df.location.isna().sum())


Index(['crash_record_id', 'crash_date', 'posted_speed_limit',
       'traffic_control_device', 'device_condition', 'weather_condition',
       'lighting_condition', 'first_crash_type', 'trafficway_type',
       'alignment', 'roadway_surface_cond', 'road_defect', 'report_type',
       'crash_type', 'hit_and_run_i', 'damage', 'date_police_notified',
       'prim_contributory_cause', 'sec_contributory_cause', 'street_no',
       'street_direction', 'street_name', 'beat_of_occurrence', 'num_units',
       'most_severe_injury', 'injuries_total', 'injuries_fatal',
       'injuries_incapacitating', 'injuries_non_incapacitating',
       'injuries_reported_not_evident', 'injuries_no_indication',
       'injuries_unknown', 'crash_hour', 'crash_day_of_week', 'crash_month',
       'latitude', 'longitude', 'location', 'crash_date_est_i',
       'intersection_related_i', 'statements_taken_i', 'private_property_i',
       'dooring_i', 'photos_taken_i', 'work_zone_i', 'work_zone_type',
       'workers

In [207]:
signal_df.location.head()

4     {'type': 'Point', 'coordinates': [-87.65873570...
11    {'type': 'Point', 'coordinates': [-87.62662144...
12    {'type': 'Point', 'coordinates': [-87.67572135...
14    {'type': 'Point', 'coordinates': [-87.62564751...
15    {'type': 'Point', 'coordinates': [-87.71572304...
Name: location, dtype: object

In [208]:
!pip install geopy

Collecting geopy
  Using cached geopy-2.0.0-py3-none-any.whl (111 kB)
Collecting geographiclib<2,>=1.49
  Using cached geographiclib-1.50-py3-none-any.whl (38 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-2.0.0
You should consider upgrading via the '/Users/aaronlee/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [209]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="specify_your_app_name_here")
location = geolocator.geocode("175 5th Avenue NYC")
print(location.address)
# out: Flatiron Building, 175, 5th Avenue, Flatiron, New York, NYC, New York, ...

print((location.latitude, location.longitude))
# out: (40.7410861, -73.9896297241625)

print(location.raw)
# out: {'place_id': '9167009604', 'type': 'attraction', ...}


ConfigurationError: Using Nominatim with default or sample `user_agent` "specify_your_app_name_here" is strongly discouraged, as it violates Nominatim's ToS https://operations.osmfoundation.org/policies/nominatim/ and may possibly cause 403 and 429 HTTP errors. Please specify a custom `user_agent` with `Nominatim(user_agent="my-application")` or by overriding the default `user_agent`: `geopy.geocoders.options.default_user_agent = "my-application"`.