## Scraping Live Road Closures from Here.com

_Author: Rachel Koenig_

In [1]:
#Import libraries 
import json, requests
import time, datetime
import pandas as pd

# Code below allows multiple console outputs to be generated without print statements
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

An api key for Here.com is required to run this notebook.  Visit https://developer.here.com/ to create one.

In [2]:
# pull in our private API credentials
json_file = open('creds/here-creds.json')
json_str = json_file.read()
json_data = json.loads(json_str)

In [3]:
# Specify latitude and longitude coordinates for Los Angeles, CA.  
# Here.com will search a radius around this point
proximity_coords = "34.14892%2C-118.0632%2C23664"

In [4]:
url = f"https://traffic.api.here.com/traffic/6.0/incidents.json?prox={proximity_coords}&criticality=critical&app_id={json_data['APP_ID']}&app_code={json_data['APP_CODE']}"

In [5]:
# Scrape Here.com for any traffic incidents in the last 24 hours for the set proximity
res = requests.get(url)
date_requested = time.strftime('%Y-%m-%d') 
time_requested = time.strftime('%H:%M') 
date_requested; time_requested #print date & time of request 

'2019-07-31'

'14:05'

In [6]:
# Check for 200 status 
res.status_code

200

In [7]:
 new_json = res.json()

In [8]:
print(new_json.keys())
new_json['TRAFFICITEMS'].keys()

dict_keys(['TIMESTAMP', 'VERSION', 'TRAFFICITEMS'])


dict_keys(['TRAFFICITEM'])

In [9]:
# Turn json into dictionary 
incident_dict = new_json['TRAFFICITEMS']['TRAFFICITEM']
incident_dict

[{'TRAFFICITEMID': 2028247178486333156,
  'ORIGINALTRAFFICITEMID': 970920204488413198,
  'TRAFFICITEMSTATUSSHORTDESC': 'ACTIVE',
  'TRAFFICITEMTYPEDESC': 'MISCELLANEOUS',
  'STARTTIME': '07/31/2019 20:19:46',
  'ENDTIME': '08/01/2019 01:18:40',
  'ENTRYTIME': '07/31/2019 20:19:46',
  'CRITICALITY': {'ID': '0', 'DESCRIPTION': 'critical'},
  'VERIFIED': True,
  'ABBREVIATION': {'SHORTDESC': 'MISC', 'DESCRIPTION': ''},
  'RDSTMCLOCATIONS': {'RDSTMC': [{'ORIGIN': {'EBUCOUNTRYCODE': '1',
      'TABLEID': 6,
      'LOCATIONID': '23635',
      'LOCATIONDESC': 'W Sierra Madre Ave/Silverfish',
      'RDSDIRECTION': '+'},
     'DIRECTION': '-',
     'ALERTC': {'TRAFFICCODE': 965,
      'DESCRIPTION': 'Closed due to serious fire.',
      'ALERTCDURATION': 'D',
      'ALERTCDIRECTION': 2,
      'URGENCY': 'U',
      'UPDATECLASS': 5,
      'PHRASECODE': 'F16.C1',
      'EXTENT': '1',
      'DURATION': 0}}]},
  'LOCATION': {'DEFINED': {'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'content': 'Pasadena',


In [10]:
# Turn dictionary into DataFrame
LA_incidents2 = pd.DataFrame(incident_dict)

In [11]:
# Check how many current roads are closed.  Rows = # of incidents 
LA_incidents2.shape

(44, 15)

In [12]:
#Check column names
LA_incidents2.columns

Index(['ABBREVIATION', 'COMMENTS', 'CRITICALITY', 'ENDTIME', 'ENTRYTIME',
       'LOCATION', 'ORIGINALTRAFFICITEMID', 'RDSTMCLOCATIONS', 'STARTTIME',
       'TRAFFICITEMDESCRIPTION', 'TRAFFICITEMDETAIL', 'TRAFFICITEMID',
       'TRAFFICITEMSTATUSSHORTDESC', 'TRAFFICITEMTYPEDESC', 'VERIFIED'],
      dtype='object')

In [13]:
# Check one incident for closed road name. 
# LA_incidents2['LOCATION'][4]['DEFINED']['ORIGIN']['ROADWAY']['DESCRIPTION'][0]['content']

The names of the actual closed roads were in two different locations in the nested dictionaries so we had to pull from each separately and them combine into one column.

In [14]:
# Loop through nested dictionary to find name of closed road in ['DEFINED'] key and return as a list.
# If empty, fill in with 'uknown' and keeping looping.
roads1 = []
for i in range(0, LA_incidents2.shape[0]):
    try:
        word = LA_incidents2['LOCATION'][i]['DEFINED']['ORIGIN']['ROADWAY']['DESCRIPTION'][0]['content']
        roads1.append(word)
    except:
        roads1.append('unknown')
    

In [15]:
# Turn list into a DataFrame and rename column.
roads1 = pd.DataFrame(roads1).rename(columns={0: 'closed_road1'})
roads1.head()

Unnamed: 0,closed_road1
0,Pasadena
1,unknown
2,unknown
3,San Bernardino Fwy
4,Whittier Blvd


In [47]:
# Loop through nested dictionary to find name of closed road in ['ORIGIN'] key and return as a list.
# If empty, fill in with 'uknown' and keeping looping.

road2 = []
for i in range(0,LA_incidents2.shape[0]):
    try:
        word = locations['INTERSECTION'][i]['ORIGIN']['STREET1']['ADDRESS1']
        road2.append(word)
    except:
        road2.append('unknown')

roads2 = pd.DataFrame(road2).rename(columns={0:'closed_road2'})
roads2.head()

Unnamed: 0,closed_road2
0,unknown
1,Garey St
2,Ramona Rd
3,unknown
4,unknown


In [48]:
#Concatonate the two Roads DataFrames on the columns 
all_roads = pd.concat((roads1, roads2), axis=1)

#Add the strings in each column together into one column.
all_roads['Closed Roads'] = all_roads['closed_road1'] + all_roads['closed_road2']

#Strip 'unknown' off the strings on the front and/or back end. 
all_roads['Closed Roads'] = all_roads['Closed Roads'].str.strip('unknown')

#Drop the original roads columns, leaving one final Series of all the closed roads.
all_roads.drop(columns=['closed_road1', 'closed_road2'], inplace=True)

all_roads.head()

0              Pasadena
1              Garey St
2             Ramona Rd
3    San Bernardino Fwy
4         Whittier Blvd
Name: Closed Roads, dtype: object

Unnamed: 0,Closed Roads
0,Pasadena
1,Garey St
2,Ramona Rd
3,San Bernardino Fwy
4,Whittier Blvd


In [16]:
# Check traffic description output 
# LA_incidents2['TRAFFICITEMDESCRIPTION'][43][0]['content']

In [17]:
# Loop through TRAFFICITEMDESCRIPTION dictionary to get details of closure start and end points. 
# Return a list of all closure details.
start_end = []
for i in range(0,LA_incidents2.shape[0]):
    try:
        word = LA_incidents2['TRAFFICITEMDESCRIPTION'][i][0]['content']
        start_end.append(word)
    except:
        start_end.append('unknown')
    

In [49]:
# Save list as a DataFrame 
start_end_df = pd.DataFrame(start_end)

# Closure details column also included closure type, so we split type into it's own column. 
start_end_type = start_end_df[0].str.split(" - Closed", n=1, expand=True).rename(columns={0: 'Closure Details', 1: 'Closure Type'})

#Check output 
start_end_type.head()

Unnamed: 0,Closure Details,Closure Type
0,Closed at W Sierra Madre Ave/Silverfish,due to serious fire.
1,Closed between E 1st St and E 3rd St/S Alameda St,.
2,Closed between I-10/San Bernardino Fwy and I-1...,due to roadwork.
3,At CA-39/Azusa Ave/Exit 36 - Entry ramp closed...,
4,Closed at E 6th St,due to roadwork.


In [50]:
#Turn LOCATIONS key into a DataFrame
locations = pd.DataFrame(list(LA_incidents2['LOCATION']))

#Check head of DataFrame
locations.head()

Unnamed: 0,DEFINED,GEOLOC,INTERSECTION,NAVTECH,POLITICALBOUNDARY
0,{'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'cont...,"{'ORIGIN': {'LATITUDE': 34.23911, 'LONGITUDE':...",,"{'EDGE': {'EDGEID': ['775815005', '775815008',...",
1,,"{'ORIGIN': {'LATITUDE': 34.04856, 'LONGITUDE':...","{'ORIGIN': {'ID': '', 'STREET1': {'ADDRESS1': ...","{'EDGE': {'EDGEID': ['23907850']}, 'VERSIONID'...","{'METROAREA': {'value': '', 'ID': 6}, 'COUNTY'..."
2,,"{'ORIGIN': {'LATITUDE': 34.06572, 'LONGITUDE':...","{'ORIGIN': {'ID': '', 'STREET1': {'ADDRESS1': ...","{'EDGE': {'EDGEID': ['955186417', '23910120']}...","{'METROAREA': {'value': '', 'ID': 6}, 'COUNTY'..."
3,{'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'cont...,"{'ORIGIN': {'LATITUDE': 34.07201, 'LONGITUDE':...",,"{'EDGE': {'EDGEID': ['24041211']}, 'VERSIONID'...",
4,{'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'cont...,"{'ORIGIN': {'LATITUDE': 34.036845, 'LONGITUDE'...",,"{'EDGE': {'EDGEID': ['764258587', '782857550',...",


In [51]:
# Turn GEOLOC nested dictionary into a DataFrame 
geo_coor = pd.DataFrame(list(locations['GEOLOC']))

# Check head of DataFrame
geo_coor.head()

Unnamed: 0,ORIGIN,TO
0,"{'LATITUDE': 34.23911, 'LONGITUDE': -117.85131}","[{'LATITUDE': 34.199964, 'LONGITUDE': -117.860..."
1,"{'LATITUDE': 34.04856, 'LONGITUDE': -118.23519}","[{'LATITUDE': 34.04718, 'LONGITUDE': -118.23516}]"
2,"{'LATITUDE': 34.06572, 'LONGITUDE': -118.15693}","[{'LATITUDE': 34.06708, 'LONGITUDE': -118.15434}]"
3,"{'LATITUDE': 34.07201, 'LONGITUDE': -117.90261}","[{'LATITUDE': 34.07201, 'LONGITUDE': -117.90261}]"
4,"{'LATITUDE': 34.036845, 'LONGITUDE': -118.221478}","[{'LATITUDE': 34.03853, 'LONGITUDE': -118.22796}]"


In [35]:
# ORIGIN column is in dictionary format so you can split 
origin = geo_coor['ORIGIN'].apply(pd.Series).rename(columns={'LATITUDE': 'Latitude', 'LONGITUDE': 'Longitude'})
origin.head()

Unnamed: 0,Latitude,Longitude
0,34.23911,-117.85131
1,34.04856,-118.23519
2,34.06572,-118.15693
3,34.07201,-117.90261
4,34.036845,-118.221478


In [43]:
#A list of DataFrames to be concatonated
dataframes = [all_roads, start_end_type, origin]

# Concat all 3 DataFrames into one, along the column axis.
total_LA_closures = pd.concat(dataframes, axis=1)

In [44]:
total_LA_closures

Unnamed: 0,Closed Roads,Closure Details,Closure Type,Latitude,Longitude
0,Pasadena,Closed at W Sierra Madre Ave/Silverfish,due to serious fire.,34.23911,-117.85131
1,Garey St,Closed between E 1st St and E 3rd St/S Alameda St,.,34.04856,-118.23519
2,Ramona Rd,Closed between I-10/San Bernardino Fwy and I-1...,due to roadwork.,34.06572,-118.15693
3,San Bernardino Fwy,At CA-39/Azusa Ave/Exit 36 - Entry ramp closed...,,34.07201,-117.90261
4,Whittier Blvd,Closed at E 6th St,due to roadwork.,34.036845,-118.221478
5,Lucas Ave,Closed at W 1st St/Beverly Blvd/Glendale Blvd,due to roadwork.,34.06205,-118.25969
6,Angeles Crest Hwy,Closed between Waterman Fire Rd and NF-3N26c,due to rockfalls. Mud slide.,34.25846,-118.1052
7,1st St,Closed between W 1st St/Beverly Blvd and Glend...,due to roadwork.,34.061341,-118.258692
8,2nd Pl,Closed between S Hope St and W 2nd St,due to roadwork.,34.054907,-118.251555
9,Kellogg Dr,At I-10/San Bernardino Fwy - Entry ramp closed...,,34.06433,-117.81208


In [1]:
# Save to a csv.  Commented out so as so not override the save every time the notebook is run.
# total_LA_closures.to_csv('live_here_locations.csv')