### 2. Download Traffic Image

This notebook downloads the image files from the traffic camera image urls compiled in previous notebook.

In [1]:
import pickle
import pandas as pd

In [2]:
#load pickle file containing url link/details of each camera
data = pickle.load( open( "camera_links.p", "rb" ) )
len(data)

2234840

In [3]:
#read data into dataframe
columns = ["cid", "url", "height", "width", "md5", "lat", "lon", "ts"]
df = pd.DataFrame(data, columns=columns)

In [4]:
df.dtypes

cid        object
url        object
height      int64
width       int64
md5        object
lat       float64
lon       float64
ts         object
dtype: object

In [5]:
#convert datetime string to a python datetime object
df["time"] = pd.to_datetime(df.ts, format="%Y-%m-%dT%X+08:00")
df.head()

Unnamed: 0,cid,url,height,width,md5,lat,lon,ts,time
0,1001,https://images.data.gov.sg/api/traffic-images/...,240,320,73aa26edd0939ac8f507f92d24ad76a2,1.295313,103.871146,2016-02-29T23:58:08+08:00,2016-02-29 23:58:08
1,1002,https://images.data.gov.sg/api/traffic-images/...,240,320,16e3c8b0dac5a7456a6847f09177bea8,1.319541,103.878563,2016-02-29T23:58:08+08:00,2016-02-29 23:58:08
2,1003,https://images.data.gov.sg/api/traffic-images/...,240,320,6145a2c08ec831a043af6249ce771d00,1.323957,103.872858,2016-02-29T23:58:08+08:00,2016-02-29 23:58:08
3,1004,https://images.data.gov.sg/api/traffic-images/...,240,320,0f4ff5ac54036eadce58976e0dbeff31,1.319536,103.875067,2016-02-29T23:58:08+08:00,2016-02-29 23:58:08
4,1005,https://images.data.gov.sg/api/traffic-images/...,240,320,da79f31a6915f0eaab1d08178634cf49,1.36352,103.905394,2016-02-29T23:58:08+08:00,2016-02-29 23:58:08


In [6]:
#check for duplicate rows and drop, based on unique md5 hash
print("before: {}".format(df.shape))
df.drop_duplicates(subset="md5", inplace=True)
print("after: {}".format(df.shape))

before: (2234840, 9)
after: (2234802, 9)


In [7]:
import shutil
import sys
import os
import requests

#Function to download images
def download_image(camera_id):
    if type(camera_id) != str:
        camera_id = str(camera_id)
    image_fields = df[df["cid"] == camera_id][["cid","url","ts"]].values
    total_images = len(image_fields)
    folder_path = os.getcwd()+"/images/"+camera_id
    os.makedirs(folder_path, exist_ok=True)
    errors = 0
    print("Starting download of camera image: {}".format(camera_id))
    for i, image in enumerate(image_fields):
        url = image[1]
        try:
            response = requests.get(url, stream=True)
            filename = "images/"+image[0]+"/"+image[0]+"-"+str(i)+".jpg"
            with open(filename, 'wb') as out_file:
                shutil.copyfileobj(response.raw, out_file)
            del response
            print("Image {}/{} downloaded...".format(i+1,total_images))
        except:
            errors += 1
            print("{} occurred. skipping to next image...".format(sys.exc_info()[0]))
    print("Download completed with {} errors!".format(errors))

In [None]:
selected_cids = ["1502","1703","4704","4706"]

In [None]:
#download image 1502
download_image("1502")


Starting download of camera image: 1502
Image 1/30438 downloaded...
Image 2/30438 downloaded...
Image 3/30438 downloaded...
Image 4/30438 downloaded...
Image 5/30438 downloaded...
Image 6/30438 downloaded...
Image 7/30438 downloaded...
Image 8/30438 downloaded...
Image 9/30438 downloaded...
Image 10/30438 downloaded...
Image 11/30438 downloaded...
Image 12/30438 downloaded...
Image 13/30438 downloaded...
Image 14/30438 downloaded...
Image 15/30438 downloaded...
Image 16/30438 downloaded...
Image 17/30438 downloaded...
Image 18/30438 downloaded...
Image 19/30438 downloaded...
Image 20/30438 downloaded...
Image 21/30438 downloaded...
Image 22/30438 downloaded...
Image 23/30438 downloaded...
Image 24/30438 downloaded...
Image 25/30438 downloaded...
Image 26/30438 downloaded...
Image 27/30438 downloaded...
Image 28/30438 downloaded...
Image 29/30438 downloaded...
Image 30/30438 downloaded...
Image 31/30438 downloaded...
Image 32/30438 downloaded...
Image 33/30438 downloaded...
Image 34/304