## Import Libraries

In [1]:
import pandas as pd
import json
import requests
from bs4 import BeautifulSoup
from time import sleep
from flask import Flask,render_template

**Check working directory**

In [2]:
%pwd

'/Users/elle/GA_DSI/projects/Project_5'

## Set up the Here.com API

**I define what I want the Here.com API to and get:**
- I set up the URL with the proper path to retrieve incidents as well as the latitude and longitude of Los Angeles and the distance out from that coordinate I want to get incidents in meters.
- I set up requests to use GET from the URL and use BeautifulSoup to pull the data from the XML

In [3]:
# Proximity	{base-url}/{path}/{resource}.{format}?prox={proximity coordinates}
url = "https://traffic.ls.hereapi.com/traffic/6.2/incidents.json?prox=34.05361,-118.2455,50000&apiKey=ImpjzwMBg68fDXVm4yRQJrv8K83RdONfuT9tKU3Rj20"
res = requests.get(url)
soup = BeautifulSoup(res.content, 'lxml')
soup

<html><body><p>{"TIMESTAMP":"02/21/2020 21:34:56 GMT","VERSION":2.3,"TRAFFIC_ITEMS":{"TRAFFIC_ITEM":[{"TRAFFIC_ITEM_ID":1467688938403884516,"ORIGINAL_TRAFFIC_ITEM_ID":1467688938403884516,"TRAFFIC_ITEM_STATUS_SHORT_DESC":"ACTIVE","TRAFFIC_ITEM_TYPE_DESC":"CONSTRUCTION","START_TIME":"02/21/2020 15:01:00","END_TIME":"02/22/2020 00:01:00","ENTRY_TIME":"02/21/2020 15:12:48","CRITICALITY":{"ID":"2","DESCRIPTION":"minor"},"VERIFIED":true,"ABBREVIATION":{"SHORT_DESC":"CONST","DESCRIPTION":"construction"},"RDS-TMC_LOCATIONS":{"RDS-TMC":[{"ORIGIN":{"EBU_COUNTRY_CODE":"1","TABLE_ID":6,"LOCATION_ID":"06065","LOCATION_DESC":"Trancas Canyon Rd","RDS_DIRECTION":"+"},"DIRECTION":"-","ALERTC":{"TRAFFIC_CODE":52,"QUANTIFIERS":0,"DESCRIPTION":"Roadwork on the hard shoulder.","ALERTC_DURATION":"L","ALERTC_DIRECTION":1,"UPDATE_CLASS":11,"PHRASE_CODE":"E11.Z146","EXTENT":"1","DURATION":0},"LENGTH":2.53938,"PRIMARY_OFFSET":1.96097},{"ORIGIN":{"EBU_COUNTRY_CODE":"1","TABLE_ID":6,"LOCATION_ID":"06065","LOCATIO

## Create a dictionary of the data

In [4]:
inc_dict = res.json()['TRAFFIC_ITEMS']['TRAFFIC_ITEM']
inc_dict

[{'TRAFFIC_ITEM_ID': 1467688938403884516,
  'ORIGINAL_TRAFFIC_ITEM_ID': 1467688938403884516,
  'TRAFFIC_ITEM_STATUS_SHORT_DESC': 'ACTIVE',
  'TRAFFIC_ITEM_TYPE_DESC': 'CONSTRUCTION',
  'START_TIME': '02/21/2020 15:01:00',
  'END_TIME': '02/22/2020 00:01:00',
  'ENTRY_TIME': '02/21/2020 15:12:48',
  'CRITICALITY': {'ID': '2', 'DESCRIPTION': 'minor'},
  'VERIFIED': True,
  'ABBREVIATION': {'SHORT_DESC': 'CONST', 'DESCRIPTION': 'construction'},
  'RDS-TMC_LOCATIONS': {'RDS-TMC': [{'ORIGIN': {'EBU_COUNTRY_CODE': '1',
      'TABLE_ID': 6,
      'LOCATION_ID': '06065',
      'LOCATION_DESC': 'Trancas Canyon Rd',
      'RDS_DIRECTION': '+'},
     'DIRECTION': '-',
     'ALERTC': {'TRAFFIC_CODE': 52,
      'QUANTIFIERS': 0,
      'DESCRIPTION': 'Roadwork on the hard shoulder.',
      'ALERTC_DURATION': 'L',
      'ALERTC_DIRECTION': 1,
      'UPDATE_CLASS': 11,
      'PHRASE_CODE': 'E11.Z146',
      'EXTENT': '1',
      'DURATION': 0},
     'LENGTH': 2.53938,
     'PRIMARY_OFFSET': 1.96097},
 

## Extract necessary data from the dictionaries

**Create a DataFrame of the dictionaries**

In [5]:
here_incidents = pd.DataFrame(inc_dict)

**Check how many incidents were found.  In this case there are 306.**

In [6]:
here_incidents.shape

(315, 17)

**Inspect the columns to find which column has the geolocation info we are looking for.**

In [7]:
here_incidents.columns

Index(['TRAFFIC_ITEM_ID', 'ORIGINAL_TRAFFIC_ITEM_ID',
       'TRAFFIC_ITEM_STATUS_SHORT_DESC', 'TRAFFIC_ITEM_TYPE_DESC',
       'START_TIME', 'END_TIME', 'ENTRY_TIME', 'CRITICALITY', 'VERIFIED',
       'ABBREVIATION', 'RDS-TMC_LOCATIONS', 'LOCATION', 'TRAFFIC_ITEM_DETAIL',
       'TRAFFIC_ITEM_DESCRIPTION', 'mid', 'PRODUCT', 'COMMENTS'],
      dtype='object')

**'LOCATION' looks like what we are looking for.  Turn 'LOCATION' into a DataFrame and then inspect:**

In [8]:
locations = pd.DataFrame(list(here_incidents['LOCATION']))
locations.head()

Unnamed: 0,DEFINED,GEOLOC,NAVTECH,LENGTH,INTERSECTION,POLITICAL_BOUNDARY
0,{'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'valu...,"{'ORIGIN': {'LATITUDE': 34.04039, 'LONGITUDE':...","{'EDGE': {'EDGE_ID': ['1146410624', '114641062...",2.53938,,
1,{'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'valu...,"{'ORIGIN': {'LATITUDE': 34.05102, 'LONGITUDE':...","{'EDGE': {'EDGE_ID': ['1152678298', '115273328...",0.73867,,
2,{'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'valu...,"{'ORIGIN': {'LATITUDE': 34.05776, 'LONGITUDE':...","{'EDGE': {'EDGE_ID': ['764603554', '764603555'...",0.8483,,
3,,"{'ORIGIN': {'LATITUDE': 33.75693, 'LONGITUDE':...","{'EDGE': {'EDGE_ID': ['121238382', '1195443886...",0.12457,"{'ORIGIN': {'ID': '', 'STREET1': {'ADDRESS1': ...","{'METRO_AREA': {'value': '', 'ID': 6}, 'COUNTY..."
4,{'ORIGIN': {'ROADWAY': {'DESCRIPTION': [{'valu...,"{'ORIGIN': {'LATITUDE': 34.02935, 'LONGITUDE':...","{'EDGE': {'EDGE_ID': ['1183421426', '118342142...",0.4578,,


**Double checking to make sure we still have all 306 incidents:**

In [9]:
locations.shape

(315, 6)

**'GEOLOC' is a nested dictionary that looks like it has exactly what we are looking for - the Latitude and Longitude of the 'incident'.  We will turn that into a DataFrame and inspect:**

In [10]:
geo_cords = pd.DataFrame(list(locations['GEOLOC']))
geo_cords.head()

Unnamed: 0,ORIGIN,TO,GEOMETRY
0,"{'LATITUDE': 34.04039, 'LONGITUDE': -118.88558}","[{'LATITUDE': 34.03112, 'LONGITUDE': -118.84363}]",
1,"{'LATITUDE': 34.05102, 'LONGITUDE': -118.24453}","[{'LATITUDE': 34.05944, 'LONGITUDE': -118.2368}]",
2,"{'LATITUDE': 34.05776, 'LONGITUDE': -118.44775}","[{'LATITUDE': 34.06151, 'LONGITUDE': -118.43369}]",
3,"{'LATITUDE': 33.75693, 'LONGITUDE': -118.12304}","[{'LATITUDE': 33.75516, 'LONGITUDE': -118.12345}]","{'SHAPES': {'SHP': [{'value': '33.75693,-118.1..."
4,"{'LATITUDE': 34.02935, 'LONGITUDE': -118.20087}","[{'LATITUDE': 34.02847, 'LONGITUDE': -118.20875}]",


**'ORIGIN' column has dictionary entries of Latitude and Longitude as key:value pairs so now split them into their own columns so we are left with 2 columns - 'Latitude' and 'Longitude' that we can then plot on a map:**

In [73]:
origin_cords = geo_cords['ORIGIN'].apply(pd.Series).rename(columns={'LATITUDE': 'Latitude', 'LONGITUDE': 'Longitude'})
origin_cords.head()

Unnamed: 0,Latitude,Longitude
0,34.07228,-118.08451
1,34.03853,-118.22796
2,34.27184,-118.47214
3,34.04039,-118.88558
4,34.06066,-118.24938


**One final check to make sure we have all 306 incidents...**

In [75]:
origin_cords.shape

(306, 2)

**Save Latitude and Longitude data in a .csv file:**

In [76]:
# Save to a csv
origin_cords.to_csv('here_incidents_FINAL.csv')

**DONE!**