### 1. Pull Traffic API

This notebook uses the api provided at [data.gov.sg](https://api.data.gov.sg/v1/transport/traffic-images) to compile a set of traffic camera image url links.

In [1]:
import requests
import pprint
import datetime
from config import config


API_KEY = config["C_KEY"]
URL = "https://api.data.gov.sg/v1/transport/traffic-images"
pp = pprint.PrettyPrinter(indent=2)

In [2]:
def get_one_traffic_data(date_time):
    r = requests.get(URL, params={"date_time": date_time}, headers={"api-key": API_KEY})
    data = r.json()
    return data

In [3]:
#test to see if working
test_date_time = "2017-05-07T09:54:00"
test_data = get_one_traffic_data(test_date_time)
pp.pprint(test_data)

{ 'api_info': {'status': 'healthy'},
  'items': [ { 'cameras': [ { 'camera_id': '1701',
                              'image': 'https://images.data.gov.sg/api/traffic-images/2017/05/31af8e44-a762-44ce-90fd-cd3f804b528a.jpg',
                              'image_metadata': { 'height': 480,
                                                  'md5': '1ae3ec73d11d3e80f1169f979434bc84',
                                                  'width': 640},
                              'location': { 'latitude': 1.323604823,
                                            'longitude': 103.8587802},
                              'timestamp': '2017-05-07T09:53:14+08:00'},
                            { 'camera_id': '1704',
                              'image': 'https://images.data.gov.sg/api/traffic-images/2017/05/42983cc9-8d6e-4665-bb8d-481b3c73f929.jpg',
                              'image_metadata': { 'height': 480,
                                                  'md5': '60975493c97a3fe31f07011e128c

### Data structure of json response

- items[0] (dict)
 - timestamp (str)
 - cameras (arr)
   - **camera_id** (str)
   - **image** (str url)
   - **image_metadata** (dict)
     - *height (int)*
     - *width (int)*
     - *md5 (str)*
   - **location** (dict)
     - *latitude (float64)*
     - *longitude (float64)*
   - **timestamp** (str)
- api_info (dict)
 - status (str)

In [4]:
#Display structure of a single camera record
pp.pprint(test_data["items"][0]["cameras"][0])

{ 'camera_id': '1701',
  'image': 'https://images.data.gov.sg/api/traffic-images/2017/05/31af8e44-a762-44ce-90fd-cd3f804b528a.jpg',
  'image_metadata': { 'height': 480,
                      'md5': '1ae3ec73d11d3e80f1169f979434bc84',
                      'width': 640},
  'location': {'latitude': 1.323604823, 'longitude': 103.8587802},
  'timestamp': '2017-05-07T09:53:14+08:00'}


In [5]:
def convert_timestring(dt):
    #Format required for api query: YYYY-MM-DD[T]HH:MM:SS+08:00'
    return dt.strftime("%Y-%m-%dT%X+08:00")

def collect_traffic_data(start_dt, end_dt):
    #container to store data collected
    data = []
    md5_store = []
    
    #Camera data updated approximately every 3 mins,so set checking interval to 1 min.
    interval = datetime.timedelta(minutes=15)
    count = 0
    dt = start_dt
    prev_time_string = ""
    while dt < end_dt:
        #Try/Except block to handle network error
        try:
            raw = get_one_traffic_data(convert_timestring(dt))
            #Try/Except block to handle data return errors (e.g. traffic camera down)
            try:
                for cam in raw["items"][0]["cameras"]:
                    if cam["image_metadata"]["md5"] in md5_store:
                        continue
                    row = [cam["camera_id"], 
                           cam["image"],
                           cam["image_metadata"]["height"],
                           cam["image_metadata"]["width"],
                           cam["image_metadata"]["md5"],
                           cam["location"]["latitude"],
                           cam["location"]["longitude"],
                           cam["timestamp"]]
                    md5_store.append(cam["image_metadata"]["md5"])
                    data.append(row)
            except:
                print("Encountered data exception... Continuing...")
            dt += interval
            count += 1
            if count % (24*4) == 0:
                print("Day {} done!".format(count/24/4))
        except:
            print("Encountered network exception... Retrying...")
            continue
    return data

In [17]:
#Collect 4 months of data from 2016-01 to 2016-04
from dateutil.relativedelta import relativedelta

year = 2016
month = 12
full_data = []
num_months = 4

for _ in range(num_months):
    start_date = datetime.datetime(year,month,1,0,0,0,0)
    end_date = start_date+relativedelta(months=1)
    print("Running {}-{}...".format(year,month))
    d = collect_traffic_data(start_date, end_date)
    full_data.extend(d)
    print("{}-{}: done!".format(year,month))
    
    if month <= 11:
        year = year
        month += 1
    else:
        year += 1
        month = 1

Running 2016-12...
Day 1.0 done!
Day 2.0 done!
Day 3.0 done!
Day 4.0 done!
Day 5.0 done!
Day 6.0 done!
Day 7.0 done!
Day 8.0 done!
Day 9.0 done!
Day 10.0 done!
Day 11.0 done!
Day 12.0 done!
Day 13.0 done!
Day 14.0 done!
Day 15.0 done!
Day 16.0 done!
Day 17.0 done!
Day 18.0 done!
Day 19.0 done!
Day 20.0 done!
Day 21.0 done!
Day 22.0 done!
Day 23.0 done!
Day 24.0 done!
Day 25.0 done!
Day 26.0 done!
Day 27.0 done!
Day 28.0 done!
Day 29.0 done!
Day 30.0 done!
Day 31.0 done!
2016-12: done!
Running 2017-1...
Day 1.0 done!
Day 2.0 done!
Day 3.0 done!
Day 4.0 done!
Day 5.0 done!
Day 6.0 done!
Day 7.0 done!
Day 8.0 done!
Day 9.0 done!
Day 10.0 done!
Day 11.0 done!
Day 12.0 done!
Day 13.0 done!
Day 14.0 done!
Day 15.0 done!
Day 16.0 done!
Day 17.0 done!
Day 18.0 done!
Day 19.0 done!
Day 20.0 done!
Day 21.0 done!
Day 22.0 done!
Day 23.0 done!
Day 24.0 done!
Day 25.0 done!
Day 26.0 done!
Day 27.0 done!
Day 28.0 done!
Day 29.0 done!
Day 30.0 done!
Day 31.0 done!
2017-1: done!
Running 2017-2...
Day 

In [18]:
len(full_data)

842760

In [None]:
#Save compiled records into pickle file
import pickle
pickle.dump(full_data, open( "camera_links.p", "rb" ) )