## Gather labeled and unlabeled data

## 1 Collect OSM ways & nodes

### 1.1 Retreive ways & nodes in given bounding box from OSM (test)

In [None]:
import pandas as pd
import overpy

bbox_minlat, bbox_minlon, bbox_maxlat, bbox_maxlon = 49.729140,9.857140,49.822259,10.008888
bbox_minlat, bbox_minlon, bbox_maxlat, bbox_maxlon = 49.799140,9.957140,49.802259,10.008888

api = overpy.Overpass()
result = api.query(f'node["surface"]({bbox_minlat},{bbox_minlon},{bbox_maxlat},{bbox_maxlon});way["surface"]["highway"]({bbox_minlat},{bbox_minlon},{bbox_maxlat},{bbox_maxlon});(._;>;);out;')

#### 1.2 Create data frame with nodes (incl. lat/lon), their way and th surface of their way

In [None]:
nodes_surface = list()

for way in result.ways:
    if("surface" in way.tags):
        for node in way.nodes:
            nodes_surface.append([node.id, node.lat, node.lon, way.id, way.tags["surface"]])

df_nodes_surface=pd.DataFrame(nodes_surface,columns=['nodeId','nodeLat','nodeLon','wayId','waySurface'])
df_nodes_surface

## 2 Download images from Mapillary close to the collected nodes

### 2.1 configuration & helper functions for mapillary connection

Code adapted from mapillary_tools https://github.com/mapillary/mapillary_tools/blob/v0.5.0/bin/download_images.py 

In [None]:
import urllib
import json
import os
import shutil
import argparse
import time
from pathlib import Path

In [None]:
BASE_DIR = './ressources/mapillary_raw/'
# See https://www.mapillary.com/developer/api-documentation/

MAPILLARY_API_IM_SEARCH_URL = 'https://a.mapillary.com/v3/images?'
MAPILLARY_API_IM_RETRIEVE_URL = 'https://d1cuyjsrcm0gby.cloudfront.net/'
CLIENT_ID = 'TG1sUUxGQlBiYWx2V05NM0pQNUVMQTo2NTU3NTBiNTk1NzM1Y2U2'
nrOfImageDownloadsPerNode=100
maxDistance=5

In [None]:
'''
Script to download images using the Mapillary image search API.

Downloads images inside a rect (min_lat, max_lat, min_lon, max_lon).
'''

def create_dirs(base_path):
    Path(base_path).mkdir(parents=True, exist_ok=True)


def query_search_api(lon, lat, max_results):
    '''
    Send query to the search API and get dict with image data.
    '''

    # Create URL
    params = urllib.parse.urlencode(list(zip(
        ['client_id','closeto','radius','per_page', 'pano'],
        [CLIENT_ID, ','.join([str(lon), str(lat)]),str(maxDistance), str(max_results), str('false')])), doseq=True)
    
    print(MAPILLARY_API_IM_SEARCH_URL + params)

    # Get data from server, then parse JSON
    query = urllib.request.urlopen(MAPILLARY_API_IM_SEARCH_URL + params).read()
    query = json.loads(query)['features']

    print("Result: {0} images in area.".format(len(query)))
    return query


def download_images(query, path, size=640):
    '''
    Download images in query result to path.

    Return list of downloaded images with lat,lon.
    There are four sizes available: 320, 640, 1024 (default), or 2048.
    '''
    im_size = "thumb-{0}.jpg".format(size)
    im_list = []

    for im in query:
        # Use key to create url to download from and filename to save into
        key = im['properties']['key']
        url = MAPILLARY_API_IM_RETRIEVE_URL + key + '/' + im_size
        filename = key + ".jpg"

        #try:
        # Get image and save to disk
        if(os.path.isfile(path+filename)==False):
            image = urllib.request.urlretrieve(url, path + filename)
            #image.retrieve(url, path + filename)

            # Log filename and GPS location
            coords = ",".join(map(str, im['geometry']['coordinates']))
            im_list.append([filename, coords])

            print("Successfully downloaded: {0}".format(filename))
        else:
            print("file "+filename+" already exists" )
       # except KeyboardInterrupt:
       #     break
       # except:
       #     print("Failed to download: {0}".format(filename))
    
    return im_list


#if __name__ == '__main__':
    '''
    Use from command line as below, or run query_search_api and download_images
    from your own scripts.
    '''

    parser = argparse.ArgumentParser()
    parser.add_argument('min_lat', type=float)
    parser.add_argument('max_lat', type=float)
    parser.add_argument('min_lon', type=float)
    parser.add_argument('max_lon', type=float)
    parser.add_argument('--max_results', type=int, default=400)
    parser.add_argument('--image_size', type=int, default=1024, choices=[320,640,1024,2048])
    args = parser.parse_args()

    # query api
    query = query_search_api(args.min_lat, args.max_lat, args.min_lon, args.max_lon, args.max_results)

    # create directories for saving
    create_dirs(BASE_DIR)

    # download
    downloaded_list = download_images(query, path=BASE_DIR, size=args.image_size)

    # save filename with lat, lon
    with open(BASE_DIR+"downloaded.txt", "w") as f:
        for data in downloaded_list:
            f.write(",".join(data) + "\n")

def download_images_nearby(node, dirColumn, baseDir):
    path = baseDir + str(node[dirColumn]) + "/"
    create_dirs(path)
    query=query_search_api(node['nodeLon'],node['nodeLat'],nrOfImageDownloadsPerNode)
    download_images(query,path)

### 2.2 Download nearby images for each node and move them to the subdirectory for the respective way surface

In [None]:
df_nodes_surface.apply(download_images_nearby, dirColumn='waySurface', baseDir=BASE_DIR + 'labeled/', axis=1)


## 3 Download images for unlabled ways

### 3.1 Retreive ways & nodes in given bounding box from OSM

In [None]:
import pandas as pd
import overpy

bbox_minlat, bbox_minlon, bbox_maxlat, bbox_maxlon = 49.729140,9.857140,49.822259,10.008888

api = overpy.Overpass()
result = api.query(f'node({bbox_minlat},{bbox_minlon},{bbox_maxlat},{bbox_maxlon});way["highway"]({bbox_minlat},{bbox_minlon},{bbox_maxlat},{bbox_maxlon});(._;>;);out;')

### 3.2 Filter for ways where no surface is specified yet

In [None]:
nodes_surface = list()

for way in result.ways:
    if("surface" not in way.tags):
        for node in way.nodes:
            nodes_surface.append([node.id, node.lat, node.lon, way.id])

df_nodes_wo_surface=pd.DataFrame(nodes_surface,columns=['nodeId','nodeLat','nodeLon','wayId'])
df_nodes_wo_surface

### 3.3 Download images nearby the nodes of a way and store them in unlabeled/\[wayId]/\[imageId]

In [None]:
df_nodes_wo_surface.apply(download_images_nearby, dirColumn='wayId', baseDir=BASE_DIR + 'unlabeled/', axis=1)