### Road image dataset from Open Street Cam
This notebook does the following:
- Get geo coordinates along the roads in New york from new york streets shape file
- Uses these coordinates to extract relevant track ids from open street cam
- Extracts and saves images from these track ids.

In [66]:
import numpy as np
import pandas as pd
import geopandas as gpd

try:
    from urllib import urlretrieve as urlretrieve
except ImportError:
    from urllib.request import urlretrieve as urlretrieve  

In [43]:
import requests
import os
import json

In [105]:
def extractOpenStreetCamImagesFromTrackID(trackID, imageDir):
    '''
    This function takes track ID and fetches all images in that track 
    from OpenStreetCam and saves them into a new folder in the specified
    directory.
    '''
    oscbaseurl = 'http://openstreetcam.org/details'
    response = requests.post(oscbaseurl, data = {'id':trackID})
    jsonData = json.loads(response.text)
    photoData = jsonData['osv']['photos']
    trackDir = imageDir + '/' + str(trackID)
    if not os.path.isdir(trackDir):
        os.mkdir(trackDir)
    baseurl = 'https://openstreetcam.org'
    [urlretrieve(baseurl +'/'+  p['name'], trackDir +'/'+ str(trackID) + '_' + p['fileName']) for p in photoData]

In [106]:
def getNearbytracks(lat, lng):
    '''
    This function takes a decimal degree latitude/longitude 
    pair as two strings and returns nearby OpenStreetCam 
    tracks as a list of sequence_ids.
    '''
    url = 'http://openstreetcam.org/nearby-tracks'
    
    sequence_ids = []
    
    # form data to be sent to API
    data = {'lat': lat, 'lng': lng, 'distance': '5.0',
           'myTracks': 'false', 'filterUserNames': 'false'}
        
    # sending post request and saving response as response object
    r = requests.post(url=url, data=data)
        
    # extracting data in json format
    extract = r.json()
    
    # if nearby tracks exist, store them in a list
    try:
        sequences = extract['osv']['sequences'] # indexes post request json with nearby tracks
        for i in range(len(sequences)): 
            sequence_ids.append(sequences[i]['sequence_id'])
    except:
        pass
    
    return sequence_ids

In [107]:
def getGeoDataFrameFromShpFileZipUrl(url):
    '''
    This function downloads the zip file, unzips it into the dorectory 
    pointed to by PUIdata environment variable. Then it 
    reads it into a gepandas dataframe
    '''
    filenames = ! ls $PUIDATA | grep .shphf
    urlretrieve(url, "file.gz")
    os.system('unzip -d $PUIDATA '+'file.gz')
    filenames2 = ! ls $PUIDATA | grep .shp
    shapeFile = list(set(filenames2).difference(filenames))[0]
    shapeFilePath = os.getenv('PUIDATA') + '/' + shapeFile
    return gpd.GeoDataFrame.from_file(shapeFilePath)

In [108]:
#Set environment variable
os.environ["PUIDATA"] = '/home/urwa/PUI/PUIData'

### Get points on streets

In [70]:
shpurl = 'https://data.cityofnewyork.us/api/geospatial/exjm-f27b?method=export&format=Shapefile'
streets_shp = getGeoDataFrameFromShpFileZipUrl(shpurl)

In [71]:
mh_streets = streets_shp[(streets_shp["borocode"]=="1") & \
                         (streets_shp["st_width"]>20)&(streets_shp["st_width"]<30)]['geometry']
print(mh_streets.shape)

(1707,)


### Get relevant track ids

In [82]:
%%time
t=0
trackList=[]
for street in mh_streets.get_values():
    if t % 100 == 1:
        print(t)
    t += 1
    for i in np.arange(0,len(street.xy[0]),5):
        lon = street.xy[0][i]
        lat = street.xy[1][i]
        trackList += getNearbytracks(lat, lon)
trackList = list(set(trackList))

1
101
201
301
401
501
601
701
801
901
1001
1101
1201
1301
1401
1501
1601
1701
CPU times: user 17.7 s, sys: 1.12 s, total: 18.8 s
Wall time: 21min 22s


In [109]:
len(trackList)

152

### Collect Images from track ids

In [1]:
for tid in trackList:
    if tid not in ['525573','1301739','12583','1294039']:
        print(tid)
        extractOpenStreetCamImagesFromTrackID(tid, 'Images')