# Table of Contents

- [Imports](#Imports-and-Configuration)
- [Photo Downloads](#Photo-Downloads)
- [Save Results](#Save-Results)

# Imports and Configuration

Import necessary packages and configure APIs

In [1]:
import pandas as pd
import numpy as np
import folium
import geocoder
import xml
import urllib
from PIL import Image
import matplotlib.pyplot as plt
import flickr_api as flickr
import googlemaps
import time
import requests
from googlemaps import convert
flickr.set_keys(api_key = '4ea697590240af7b4ced24c4e46b3d41', api_secret = '855a29a3e8cd2677')

In [1]:
with open('../api_key_maps.txt') as f:
    api_key = f.readline()
    f.close

In [3]:
# source https://github.com/alexis-mignon/python-flickr-api/wiki/Flickr-API-Keys-and-Authentication
a = flickr.auth.AuthHandler() # creates a new AuthHandler object
perms = "write" # set the required permissions
url = a.get_authorization_url(perms)
print(url)

https://www.flickr.com/services/oauth/authorize?oauth_token=72157708599786124-9e19adedc0ba741b&perms=write


In [4]:
a.set_verifier('1141080de8028213') # copy your oauth_verifier tag here!
flickr.set_auth_handler(a)
a.save('.auth.txt')
flickr.set_auth_handler(".auth.txt")

In [5]:
# get the route df with the stopover city names
route_df = pd.read_csv('../datasets/route.csv')
route_df.drop(columns=['Unnamed: 0'], inplace=True)
route_df.head()

Unnamed: 0,Cities
0,New York
1,Nashville
2,Memphis
3,Dallas


# Photo Downloads

In [6]:
# List with city names
cities = []
for i in range(route_df.shape[0]):
    cities.append(route_df.iloc[i]['Cities'])

# List with city center coordinates
def get_latlong(address):
    g = geocoder.arcgis(address)
    return tuple(g.latlng)

# get the coordinates of each city
cities_coord = [get_latlong(city) for city in cities]

# Dict with bounding boxes
# This will be used to make sure we are out of the cities once we start looking for images along the road
bounding_box = {a: {} for a in cities}
for index, city in enumerate(cities):
    g = geocoder.arcgis(cities[index])
    bounding_box[city]['max_lat'] = g.bbox['northeast'][0]
    bounding_box[city]['min_lat'] = g.bbox['southwest'][0]
    bounding_box[city]['max_lng'] = g.bbox['northeast'][1]
    bounding_box[city]['min_lng'] = g.bbox['southwest'][1]

In stopover cities, dowload photos with accuracy of 11 which means photos around the city.
Along the route, download photos within a 10 km radius.
So : 
- get route from 1 city to another
- search radius of 10 km around every step end locations
- stop when reaching stopover 

In [7]:
# download photos for every city
# initialize dataframe with columns for each feature we want to save from our images
file_number = 1
photo_info_df = pd.DataFrame(columns = ['filename', 'city', 'url', 'latitude', 'longitude'])

# Download photos
for i in range(len(cities)):
    print('downloading photos in ', cities[i])
    
    # initializing page number and urls for each city. urls list helps us keep track of the number of images we have
    page_number = 1 
    urls = []
    # loop through cities and get 3000 images for each
    # if the API takes too long and cannot fetch enough images we exit the loop
    # by setting max page number to 50, this would mean the API couldnt get images by the 50th page
    while ((len(urls) < 3000) and (page_number < 50)):
        # in this case we also check the views to get somewhat popular images 
        photo_search = flickr.Photo.search(api_key = '4ea697590240af7b4ced24c4e46b3d41',  
                                           per_page = 500,
                                           lat = cities_coord[i][0],
                                           lon = cities_coord[i][1],
                                           page = page_number,
                                           accuracy = 11,
                                           content_type = 1,
                                           min_taken_date = 2016,
                                           media = 'photos', 
                                           extras = ['views']) 
        for photo in photo_search:
            # only add pictures that have at least 20 views 
            if (photo.views >= 20):
                try:
                    url = photo.get('url_c')
                    urls.append(url)
                    path = '../photos_to_classify/' + str(file_number) + '.jpg'
                    urllib.request.urlretrieve(url, path)
                    image = Image.open(path)
                    image = image.resize((256, 256), Image.ANTIALIAS)
                    image.save(path)
                    # save information relevant to future use in a dataframe
                    photo_info_df.loc[file_number-1] = [path, cities[i], url,
                                                        photo.getLocation().latitude, 
                                                        photo.getLocation().longitude]
                    file_number += 1
                except:
                    pass                       
        page_number += 1

downloading photos in  New York
downloading photos in  Nashville
downloading photos in  Memphis
downloading photos in  Dallas


In [15]:
# save how many photos were downloaded for each city
photo_info_df.shape

(12052, 5)

In [16]:
# download photos along the way
client = googlemaps.client.Client(key = api_key)

# loop through each start city
for index in range(len(cities)-1): 
    # get the current origin and destination
    current_start = cities[index]
    current_end = cities[index+1]
    print('downloading pictures in the ', current_start, 'to', current_end, 'route')
    # generate the current route
    route = googlemaps.directions.directions(client = client,
                                             origin = current_start,
                                             destination = current_end, 
                                             mode = 'driving')
    
    # loop through each step
    for step in route[0]['legs'][0]['steps'][0:-1]: 
        end_lat = step['end_location']['lat']
        end_lng = step['end_location']['lng']
        # make sure were out of the stopover cities, so if coordinates are outside of bounding boxes
        start_minlat = bounding_box[current_start]['min_lat']
        start_maxlat = bounding_box[current_start]['max_lat']
        start_minlng = bounding_box[current_start]['min_lng']
        start_maxlng = bounding_box[current_start]['max_lng']
        
        end_minlat = bounding_box[current_end]['min_lat']
        end_maxlat = bounding_box[current_end]['max_lat']
        end_minlng = bounding_box[current_end]['min_lng']
        end_maxlng = bounding_box[current_end]['max_lng']
        
        # if we are outside of the cities, then look for images
        if ((start_minlat < end_lat < start_maxlat) & (start_minlng < end_lng < start_maxlng) == False):
            if ((end_minlat < end_lat < end_maxlat) & (end_minlng < end_lng < end_maxlng)== False):
                urls = []
                page_number = 1
                
                # looking for images within a 10 km radius, 300 for each end of step
                while ((len(urls) < 300) and (page_number < 50)):
                    photo_search = flickr.Photo.search(api_key = '4ea697590240af7b4ced24c4e46b3d41', 
                                                       lat = end_lat, 
                                                       lon = end_lng, 
                                                       per_page = 250,
                                                       radius = 10,
                                                       page = page_number,
                                                       min_taken_date = 2016,
                                                       content_type = 1,
                                                       media = 'photos', 
                                                       extras = ['views']) 
                    for photo in photo_search:
                        
                        # only add pictures if they have more than 30 views
                        if (photo.views > 30):
                            try:
                                url = photo.get('url_c')
                                urls.append(url)
                                path = '../photos_to_classify/' + str(file_number) + '.jpg'
                                urllib.request.urlretrieve(url, path)
                                image = Image.open(path)
                                image = image.resize((256, 256), Image.ANTIALIAS)
                                image.save(path)
                                current_route = current_start + ' ' + current_end
                                # save relevant information about the images
                                photo_info_df.loc[file_number-1] = [path, current_route, url,
                                                                    photo.getLocation().latitude, 
                                                                    photo.getLocation().longitude]
                                file_number += 1
                            except:
                                pass
                    page_number += 1

downloading pictures in the  New York to Nashville route
downloading pictures in the  Nashville to Memphis route
downloading pictures in the  Memphis to Dallas route


# Save Results

In [19]:
photo_info_df.to_csv('../datasets/all_pics.csv')