In [81]:
import pandas as pd
import selenium
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import multiprocessing
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from multiprocessing import Pool
import numpy as np
import time

from selenium.common.exceptions import TimeoutException, ElementNotInteractableException, ElementClickInterceptedException

from pathlib import Path
from PIL import Image

import cv2
import os.path
import random

In [2]:
import sys
from pathlib import Path
sys.path.append(str(Path("../lib/").absolute()))

In [2]:
grey_square = cv2.imread('grey_square.png', 0)

def needs_repair(interid, zoom=16):
    image_path = "screenshots/%s*%d.png" % (interid, zoom)
    if not os.path.exists(image_path):
        return True
    image_obj = cv2.imread(image_path, 0)
    method = cv2.TM_SQDIFF_NORMED
    result = cv2.matchTemplate(image_obj, grey_square, method)
    needs_repair = np.min(result) < 1E-5
    return needs_repair

In [13]:
profile = FirefoxProfile()
driver = webdriver.Firefox(profile)
driver.implicitly_wait(3000)
driver.get("https://www.openstreetmap.org/")

In [22]:
driver.find_elements_by_name('commit')

[<selenium.webdriver.firefox.webelement.FirefoxWebElement (session="0666f1b3-28fb-a141-b0d7-94e6677011db", element="b71456be-6533-2d41-ae2e-fae0b122ba23")>,
 <selenium.webdriver.firefox.webelement.FirefoxWebElement (session="0666f1b3-28fb-a141-b0d7-94e6677011db", element="0647332d-2368-bb4e-9cd7-372c8f21f3c6")>,
 <selenium.webdriver.firefox.webelement.FirefoxWebElement (session="0666f1b3-28fb-a141-b0d7-94e6677011db", element="e5a6cc8e-ae52-574a-a352-1fe436edcc3d")>,
 <selenium.webdriver.firefox.webelement.FirefoxWebElement (session="0666f1b3-28fb-a141-b0d7-94e6677011db", element="4a44405d-dd10-a64e-9663-49afb5c11d5a")>]

In [25]:

#driver.find_element_by_name('commit').click()

In [86]:
def take_shot(driver, inter_id, zoom, lat, lon, delay):
    url = "https://www.openstreetmap.org/#map=%d/%f/%f" % (zoom, lat, lon)
    print('Shooting', inter_id, 'at', url)
    query_box = driver.find_elements_by_id('query')[1]
    query_box.send_keys('%f %f' % (lat, lon))
    query_box.send_keys(Keys.RETURN)
    driver.find_element_by_class_name('geolink > span').click()
    time.sleep(random.randint(delay // 2, delay))
    driver.save_screenshot("screenshots/%s*%d.png" % (inter_id, zoom))
    
def close_init_popups(driver):
    # close the popups
    for elem in driver.find_elements_by_class_name('close-wrap'):
        try:
            elem.click()
        except (ElementNotInteractableException, ElementClickInterceptedException) as e:
            pass
    
def run_scraper(coords, delay=10):
    profile = FirefoxProfile()
    driver = webdriver.Firefox(profile)
    driver.implicitly_wait(3000)
    driver.get("https://www.openstreetmap.org/#map=16/42.353570/-71.104630")
    close_init_popups(driver)
    
    # not going to responsive to zoom
    for interid, lat, lon, zoom in coords:
        # while loop is proper but sometimes the server struggles
        if needs_repair(interid, zoom):
            take_shot(driver, interid, zoom, lat, lon, delay)
    
    driver.quit()
    

In [4]:
def scrape_pool(all_coords, num_processes=2, delay=10):
    pool = Pool(num_processes)
    manager = multiprocessing.Manager()
    coords = np.array_split(all_coords, num_processes)

    for i in range(num_processes):
        pool.apply_async(run_scraper, args=(coords[i], delay))

    pool.close()
    pool.join()

In [5]:
orig_train = pd.read_csv('train.csv')
orig_test = pd.read_csv('test.csv')

In [6]:
relevant_columns = ['IntersectionId', 'Latitude', 'Longitude', 'City']
full_df = pd.concat([orig_train[relevant_columns], orig_test[relevant_columns]])
full_df['UniqueIntersection'] = full_df['IntersectionId'].astype(str) + full_df['City']

full_df = full_df.drop_duplicates('UniqueIntersection')
full_df['zoom'] = 16

In [87]:
run_scraper(full_df[['UniqueIntersection', 'Latitude', 'Longitude', 'zoom']][full_df['UniqueIntersection'] == '30Boston'].iloc[:1].values)

Shooting 30Boston at https://www.openstreetmap.org/#map=16/42.382010/-71.102030


In [29]:
full_df[full_df['UniqueIntersection'] == '967Boston']

Unnamed: 0,IntersectionId,Latitude,Longitude,City,UniqueIntersection,zoom
296510,967,42.35357,-71.10463,Boston,967Boston,16


In [83]:
profile = FirefoxProfile()
driver = webdriver.Firefox(profile)
driver.implicitly_wait(1)
driver.get("https://www.openstreetmap.org/#map=16/42.353570/-71.104630")
close_init_popups(driver)
take_shot(driver, '1350Chicago', 16, 42.35357, -71.10463, 10)

Shooting 1350Chicago at https://www.openstreetmap.org/#map=16/42.353570/-71.104630


In [89]:
scrape_pool(full_df[['UniqueIntersection', 'Latitude', 'Longitude', 'zoom']].sample(frac=1).values, num_processes=3, delay=10)

Shooting 182Philadelphia at https://www.openstreetmap.org/#map=16/39.929810/-75.163280
Shooting 507Philadelphia at https://www.openstreetmap.org/#map=16/39.955060/-75.210050
Shooting 776Philadelphia at https://www.openstreetmap.org/#map=16/40.006760/-75.131600
Shooting 1072Boston at https://www.openstreetmap.org/#map=16/42.338140/-71.035500
Shooting 181Philadelphia at https://www.openstreetmap.org/#map=16/39.997510/-75.121260
Shooting 1483Chicago at https://www.openstreetmap.org/#map=16/41.726300/-87.560820
Shooting 347Boston at https://www.openstreetmap.org/#map=16/42.397950/-71.121430
Shooting 2778Chicago at https://www.openstreetmap.org/#map=16/41.768190/-87.693110
Shooting 1726Philadelphia at https://www.openstreetmap.org/#map=16/40.016620/-75.147190
Shooting 1201Philadelphia at https://www.openstreetmap.org/#map=16/40.022970/-75.136930
Shooting 1329Philadelphia at https://www.openstreetmap.org/#map=16/40.060400/-74.990440
Shooting 294Chicago at https://www.openstreetmap.org/#map=1

Shooting 1893Chicago at https://www.openstreetmap.org/#map=16/41.932220/-87.683980
Shooting 264Chicago at https://www.openstreetmap.org/#map=16/41.781030/-87.584370
Shooting 248Chicago at https://www.openstreetmap.org/#map=16/41.663250/-87.641420
Shooting 1397Chicago at https://www.openstreetmap.org/#map=16/41.884240/-87.656740
Shooting 1843Philadelphia at https://www.openstreetmap.org/#map=16/40.034390/-75.075270
Shooting 654Philadelphia at https://www.openstreetmap.org/#map=16/40.030600/-75.187570
Shooting 182Chicago at https://www.openstreetmap.org/#map=16/41.999130/-87.888670
Shooting 215Philadelphia at https://www.openstreetmap.org/#map=16/40.030580/-75.187680
Shooting 189Chicago at https://www.openstreetmap.org/#map=16/41.768560/-87.663900
Shooting 1199Boston at https://www.openstreetmap.org/#map=16/42.348090/-71.088130
Shooting 340Atlanta at https://www.openstreetmap.org/#map=16/33.828720/-84.407490
Shooting 899Boston at https://www.openstreetmap.org/#map=16/42.358770/-71.051560

Shooting 1415Philadelphia at https://www.openstreetmap.org/#map=16/40.022710/-75.133520
Shooting 1376Philadelphia at https://www.openstreetmap.org/#map=16/40.033380/-75.066530
Shooting 1174Philadelphia at https://www.openstreetmap.org/#map=16/40.033800/-75.074040
Shooting 1154Boston at https://www.openstreetmap.org/#map=16/42.354530/-71.053350
Shooting 897Boston at https://www.openstreetmap.org/#map=16/42.355240/-71.054030
Shooting 1734Philadelphia at https://www.openstreetmap.org/#map=16/40.036850/-75.062630
Shooting 898Boston at https://www.openstreetmap.org/#map=16/42.330100/-71.039810
Shooting 1348Philadelphia at https://www.openstreetmap.org/#map=16/40.058030/-75.052730
Shooting 466Atlanta at https://www.openstreetmap.org/#map=16/33.824130/-84.352160
Shooting 984Boston at https://www.openstreetmap.org/#map=16/42.314730/-71.060020
Shooting 1704Chicago at https://www.openstreetmap.org/#map=16/41.975160/-87.762760
Shooting 1828Philadelphia at https://www.openstreetmap.org/#map=16/40.