## Article web page processing

In [2]:
import html2text
import json
import requests
import numpy as np
from bs4 import BeautifulSoup

In [3]:
def text_from_html(url: str): #data_folder_html_file_id):
    """
    Given url of the webpage returns the readable text from the page
    """
    #road_trip = open("../data/" + str(data_folder_html_file_id) + ".html", "r").read()
    road_trip = requests.get(url).text
    soup = BeautifulSoup(road_trip)
    links = []
    for link in soup.findAll('a'):
        http_link = link.get('href')
        if "nationalgeographic" in http_link:
            continue
        if not http_link.startswith("http"):
            continue
        if len(http_link) > 5:
            links.append(http_link)
    
    text_maker = html2text.HTML2Text()
    text_maker.ignore_links = True
    text_maker.ignore_images = True
    road_trip_processed = text_maker.handle(road_trip)
    
    start = "ShareTweetEmail"
    index = road_trip_processed.find(start)
    road_trip_processed = road_trip_processed[index + len(start) + 1:]
    
    end = road_trip_processed.find("ShareTweetEmail")
    road_trip_processed = road_trip_processed[:end]
    road_trip_processed = road_trip_processed.replace("\n\n", ". ")
    for i in ["\n", "[", "]", "*", "#", "_", "\\'", '\\']:
        road_trip_processed = road_trip_processed.replace(i, " ")
    road_trip_processed = road_trip_processed.replace('>', ", ")
    for i in range(4): # 4 seems enough
        road_trip_processed = road_trip_processed.replace("  ", " ")
    road_trip_processed = road_trip_processed.replace("..", ".")
               
    return road_trip_processed.strip(), links

def get_text_and_places() -> dict:
    with open('/home/geoner/data/articles.json') as json_file:
        data = json.load(json_file)
    for i in data:
        text, _ = text_from_html(data[i]["article_url"])
        data[i]["text"] = text      
    return data

In [4]:
%time data = get_text_and_places()

CPU times: user 934 ms, sys: 33.6 ms, total: 968 ms
Wall time: 8.76 s


## Extractor

In [5]:
import spacy

In [6]:
nlp = spacy.load('en_core_web_trf')
def get_ne(txt: str) -> list:
    ne_types = ["GPE", "FAC", "LOC", "ORG"]
    ne = nlp(txt).ents
    target_ne = [[str(ne[i]).strip(), ne[i]] for i in range(len(ne)) if ne[i].label_ in ne_types]
    for i in range(len(target_ne)):
        if target_ne[i][0].startswith("the"):
            target_ne[i][0] = target_ne[i][0][4:]
        if target_ne[i][0].endswith(','):
            target_ne[i][0] = target_ne[i][0][:-1]
        target_ne[i][0] = target_ne[i][0].strip()
        if target_ne[i][0].endswith('\'s'):
            target_ne[i][0] = target_ne[i][0][:-2]
        if target_ne[i][0].endswith("’s"):
            target_ne[i][0] = target_ne[i][0][:-2]    
        target_ne[i][0] = target_ne[i][0].strip()
    places = list(set(i[0] for i in target_ne)) # removing duplicates as well
    places_and_poi_processed = []
    # case: route 66 museum -> oklahoma route 66 museum
    for i in places:
        if i.startswith("National Geographic"):
            continue
        if i.startswith("Peace of Mind"):
            continue
        if i.endswith(" and"):
            continue
        places_and_poi_processed.append(i)
    places_and_poi = [i for i in target_ne if i[0] in places_and_poi_processed]
    places_and_poi.sort(key=lambda x: x[1].start_char)
    ne_index = {}
    for i in range(len(places_and_poi)):
        if ne_index.get(places_and_poi[i][0]):
            ne_index[places_and_poi[i][0]].append(i)
        else:
            ne_index[places_and_poi[i][0]] = [i]
    return places_and_poi_processed, places_and_poi, ne_index

## Gazetteer

In [7]:
import difflib
def str_similarity_percent(a: str, b: str) -> float:
    return round(difflib.SequenceMatcher(lambda x: x == " ", a, b).ratio(), 3)

In [8]:
# 20'000 credits daily limit per application (identified by the parameter 'username'),
# the hourly limit is 1000 credits. A credit is a web service request hit for most services.
# An exception is thrown when the limit is exceeded. 
geonames = {
        "exact_match":"http://secure.geonames.org/searchJSON?maxRows=40&username=kwh44&name_equals=",
    "general_match": "http://secure.geonames.org/searchJSON?maxRows=30&username=kwh44&name="
}
# https://www.gisgraphy.com/documentation/user-guide.php#fulltextservice
gisgraphy = {
    "exact_match": "https://services.gisgraphy.com/fulltext/fulltextsearch?&format=json&allwordsrequired=true&q=",
    "general_match": "https://services.gisgraphy.com/fulltext/fulltextsearch?&format=json&q="
}
resource_name = ""
dbpedia = {
    "resource_name_match": "https://lookup.dbpedia.org/api/search?&typeName=place&format=JSON_FULL&maxResults=3&query=",
    "resource_rdf_get": f"https://dbpedia.org/data/{resource_name}.json"
}

## Existence Detection

In [9]:
import reverse_geocoder as rg
import unidecode

In [10]:
def get_state_by_lat_lng(lat, lng):
    return rg.search((lat,lng))[0]

In [11]:
class Place:
    
    def __init__(self, place_name, country, state, place_type, lat, long, link=None, population=0):
        self.place_name = place_name
        self.country = country
        self.state = state
        self.place_type = place_type
        self.lat = float(lat)
        self.long = float(long)
        self.link = link
        self.population = population
        
    def __repr__(self):
        return f"Place name: {self.place_name}; State: {self.state}, \
Country: {self.country}, Place Type: {self.place_type}, Lat: {self.lat}, Long: {self.long}"
    
    def __eq__(self, other):
        return self.place_name == other.place_name and self.country == other.country and \
        self.state == other.state

In [38]:
# can be made faster by parallelizing
def existence_check(extracted_places: list) -> tuple:
    geonames_search = dict()
    gisgraphy_search = dict()
    dbpedia_search = dict()
    for i in extracted_places:
        print(i)
        # try dbpedia, maybe there is a wikipedia page for the place extracted
        #print("DBpedia search")
        # dbpedia resource search
        dbpedia_lookup = requests.get(dbpedia["resource_name_match"] + i).json()
        for result in dbpedia_lookup["docs"]:
            result_name = result["label"][0]["value"]
            redirect_result_name = []
            if result.get("redirectlabel"):
                redirect_result_name = [k["value"] for k in result["redirectlabel"]]
            relevance = max(str_similarity_percent(unidecode.unidecode(r), i) for r in redirect_result_name + [result_name])
            print("Result relevance is ", relevance)
            if relevance >= 0.65:
                print("Wikipedia page found")
                # do resourse rdf json get
                rdf_json_url = result["resource"][0]["value"].replace("resource", "data") + ".json"
                print(rdf_json_url)
                try:
                    resource_rdf = requests.get(rdf_json_url, timeout=5).json()
                except:
                    print("dbpedia timeout")
                    continue
                if dbpedia_search.get(i):
                    dbpedia_search[i].append([result["resource"][0]["value"], resource_rdf])
                else:
                    dbpedia_search[i] = [[result["resource"][0]["value"], resource_rdf]]
        exact_match = False
        # geonames exact place match search
        geoname_exact_lookup = requests.get(geonames["exact_match"] + i).json()
        # print(geoname_exact_lookup)
        results_num = geoname_exact_lookup["totalResultsCount"]
        if results_num >= 1:
            geonames_search[i] = geoname_exact_lookup
            print("Geonames exact search found match")
            exact_match = True
        # gisgraphy exact place match search
        gisgraphy_exact_lookup = requests.get(gisgraphy["exact_match"] + i).json()
        # print(gisgraphy_exact_lookup)
        results_num = gisgraphy_exact_lookup["response"]["numFound"]
        if results_num >= 1:
            # gisgaphy is not that good with exact match, might find lots of places that constain the query string,
            # but those places would add more noise for region localization module
            if results_num < 80:
                gisgraphy_search[i] = gisgraphy_exact_lookup
                print("Gisgraphy exact search found match")
                exact_match = True
        if exact_match:
            continue
        else:
            print("exact place name search failed")        
        # exact place name search failed
        # geonames partial place match search
        geoname_general_lookup = requests.get(geonames["general_match"] + i).json()
        results_num = geoname_general_lookup["totalResultsCount"]
        if results_num >= 1:
            geonames_search[i] = geoname_general_lookup
            print("Geonames partial place match found")
            continue
        # gisgraphy partial place match search
        gisgraphy_general_lookup = requests.get(gisgraphy["general_match"] + i).json()
        results_num = gisgraphy_general_lookup["response"]["numFound"]
        if results_num >= 1:
            if results_num < 80:
                gisgraphy_search[i] = gisgraphy_general_lookup
                print("Geonames partial place match found")
                continue
    return geonames_search, gisgraphy_search, dbpedia_search

In [13]:
def unify_lookup_results(lookup_response):
    geonames_results, gisgraphy_results, dbpedia_rdf = lookup_response
    unified_lookup_results = []
    for key, value in geonames_results.items():
        results = value['geonames']
        for res in results:
            if str_similarity_percent(key, res["toponymName"]) < 0.65:
                continue
            # skip results that don't have required information
            try:
                lat = res["lat"]
                long = res["lng"]
                place_name = key # res["toponymName"]
                country = res["countryCode"]
                place_type = res["fcodeName"]
                population = res.get("population", 0)
                state = res["adminName1"]
                unified_lookup_results.append(Place(place_name, country, state, place_type, lat, long, population=population))
            except:
                continue
    for key, value in gisgraphy_results.items():
        num = len(value["response"]["docs"])
        for res in value["response"]["docs"]:
            if str_similarity_percent(key, res['name']) < 0.75:
                continue
            try:
                lat = res["lat"]
                long = res["lng"]
                place_name = key
                country = res["country_code"]
                place_type = res["placetype"]
                population = res.get("population", 0)
                state = get_state_by_lat_lng(lat, long)["admin1"]
                unified_lookup_results.append(Place(place_name, country, state, place_type, lat, long, population=population))
            except:
                continue
    for key, value in dbpedia_rdf.items():
        for result in value:
            try:
                rdf = result[1][result[0]]
                long = rdf["http://www.w3.org/2003/01/geo/wgs84_pos#long"][0]["value"]
                lat = rdf["http://www.w3.org/2003/01/geo/wgs84_pos#lat"][0]["value"]
                place_type = "Attraction"
                place_name = key
                country_admin1 = get_state_by_lat_lng(lat, long)
                state = country_admin1["admin1"]
                country = country_admin1["cc"]
                unified_lookup_results.append(Place(place_name, country, state, place_type, lat, long))
            except Exception as e:
                continue
    return unified_lookup_results

## Region localization and noise removal

In [14]:
from geopy import distance

def ll_dist(a, b):
    return distance.geodesic(a, b).km

In [15]:
def leave_one_country(geo_points):
    places = geo_points
    countries = dict()
    for i in places:
        if countries.get(i.country):
            countries[i.country] +=1
        else:
            countries[i.country] = 1
    travel_country = max(countries, key=lambda x: countries[x])
    travel_country_places = list(filter(lambda i: i.country == travel_country, places))
    place_name_geocoding = dict()
    for place in travel_country_places:
        if place_name_geocoding.get(place.place_name):
            place_name_geocoding[place.place_name].append(place)
        else:
            place_name_geocoding[place.place_name] = [place]
    states = [i.state for i in travel_country_places if i.place_type == "first-order administrative division"]
    return place_name_geocoding, states

In [150]:
def remove_duplicates_and_leave_states(place_name_geocoding, states):
    for i in place_name_geocoding:
        possible_geolocation_num = len(place_name_geocoding[i]) 
        if possible_geolocation_num <= 1:
            continue
        filtered_geolocations = []
        # state names are treated as first-order administrative division
        for j in place_name_geocoding[i]:
            if j.place_type == "first-order administrative division":
                filtered_geolocations.append(j)
                break
        if filtered_geolocations:
            place_name_geocoding[i] = filtered_geolocations
            continue
        filtered_geolocations = []    
        # remove duplicate geolocations that come from different
        # gazetteers and differ slightly in coordinates
        for j in place_name_geocoding[i]:
#             if j.state not in states:
#                 continue
            duplicate = False
            for q in place_name_geocoding[i]:
                if j.state == q.state and abs(j.lat - q.lat) < 1.0 and abs(j.long - q.long) < 1.0:
                    duplicate = True
            if not duplicate or j not in filtered_geolocations:
                filtered_geolocations.append(j)
        place_name_geocoding[i] = filtered_geolocations
        # remove geolocations that are located on state borders,
        # that point to the same place, but in on gazetteer it on opposing sides of the border
        filtered_geolocations = []    
        for j in place_name_geocoding[i]:
            duplicate = False
            for q in filtered_geolocations:
                if j != q and abs(j.lat - q.lat) < 1.0 and abs(j.long - q.long) < 1.0:
                    duplicate = True
            if not duplicate:
                filtered_geolocations.append(j)
        place_name_geocoding[i] = filtered_geolocations
    return place_name_geocoding

In [17]:
def located_based_on_neighbours_v2(place_name_geocoding, ne_index, places_and_poi, order=0, gap=1):
    for i in place_name_geocoding:    
        # for places that have two or more possible geolocations
        # look at left and right target token in the text to 
        # decide which geolocation to choose
        neighbours = []
        if len(place_name_geocoding[i]) > 1:
            try:
                first_mention = ne_index[i][order]
            except:
                pass
            try:
                print(i)
                right_neighbour = places_and_poi[first_mention + gap]
                right_neighbour_name = right_neighbour[0]
                print("Right neighbour ", right_neighbour_name)
                if len(place_name_geocoding[right_neighbour_name]) == 1:
                    neighbours.append(place_name_geocoding[right_neighbour_name][0])
            except:
                pass
            try:
                left_neighbour = places_and_poi[first_mention - gap]
                left_neighbour_name = left_neighbour[0]
                print("left neighbour ", left_neighbour_name)
                if len(place_name_geocoding[left_neighbour_name]) == 1:
                    neighbours.append(place_name_geocoding[left_neighbour_name][0])
            except:
                pass
        if neighbours:
            filtered_geolocations = []
            closest_neighbour = None
            closest_dist = None
            closest_possible_geolocation = None
            for n in range(len(neighbours)):
                cord1 = (neighbours[n].lat, neighbours[n].long)
                for b in place_name_geocoding[i]:
                    cord2 = (b.lat, b.long)
                    d = ll_dist(cord1, cord2)
                    if closest_dist is None or closest_dist > d:
                        closest_dist = d
                        closest_neighbour = neighbours[n]
                        closest_possible_geolocation = b
            print("Closest neighbour ", closest_neighbour.place_name)
            print("Closest distance ", closest_dist)
            if closest_dist <= 100:
                # in km
                place_name_geocoding[i] = [closest_possible_geolocation]
    return place_name_geocoding

In [18]:
def located_based_on_neighbours(place_name_geocoding, ne_index, places_and_poi, order=0, gap=1):
    for i in place_name_geocoding:    
        # for places that have two or more possible geolocations
        # look at left and right target token in the text to 
        # decide which geolocation to choose
        neighbours_state = []
        if len(place_name_geocoding[i]) > 1:
            try:
                first_mention = ne_index[i][order]
            except:
                continue
            try:
                right_neighbour = places_and_poi[first_mention + gap]
                dist = abs(right_neighbour[1].start_char - places_and_poi[first_mention][1].start_char)
                if dist > 300:
                    raise ValueError("neighbour too far away")
                right_neighbour_name = right_neighbour[0]
                if len(place_name_geocoding[right_neighbour_name]) == 1:
                    neighbours_state.append([place_name_geocoding[right_neighbour_name][0].state, dist])
            except:
                pass
            try:
                left_neighbour = places_and_poi[first_mention - gap]
                dist = abs(left_neighbour[1].start_char - places_and_poi[first_mention][1].start_char)
                if dist > 300:
                    raise ValueError("neighbour too far away")
                left_neighbour_name = left_neighbour[0]
                if len(place_name_geocoding[left_neighbour_name]) == 1:
                    neighbours_state.append([place_name_geocoding[left_neighbour_name][0].state, dist])
            except:
                pass
        if neighbours_state:
            filtered_geolocations = []
            closest_neighbour_with_exact_geolocation = min(neighbours_state, key=lambda x: x[1])
            filtered_state = closest_neighbour_with_exact_geolocation[0]
            for j in place_name_geocoding[i]:
                if j.state == filtered_state:
                    filtered_geolocations.append(j)
                    break
            if filtered_geolocations:
                place_name_geocoding[i] = filtered_geolocations
            else:
                # try assign target place to more distant neighbour region
                other_neighbour = max(neighbours_state, key=lambda x: x[1])
                filtered_state = other_neighbour[0]
                for j in place_name_geocoding[i]:
                    if j.state == filtered_state:
                        filtered_geolocations.append(j)
                        break
                if filtered_geolocations:
                    place_name_geocoding[i] = filtered_geolocations
    return place_name_geocoding

In [19]:
def locate_based_on_proximity_to_located(place_name_geocoding, min_group=2, max_dist = 150):
    for i in place_name_geocoding:
        neig_dist_ith = dict()
        if len(place_name_geocoding[i]) == 1:
            continue
        for j in place_name_geocoding:
            if len(place_name_geocoding[j]) == 1:
                index = 0
                for a in place_name_geocoding[i]:

                    b = place_name_geocoding[j][0]

                    neig_dist = ll_dist((a.lat, a.long), (b.lat, b.long))

                    if neig_dist_ith.get(index):
                        neig_dist_ith[index].append(neig_dist)
                    else:
                        neig_dist_ith[index] = [neig_dist]
                    index += 1
                        
        possible_geolocations = []
        for key, value in neig_dist_ith.items():
            counter = 0
            for v in value:
                if v <= max_dist:
                    counter += 1
            if counter >= min_group:
                possible_geolocations.append([place_name_geocoding[i][key], counter])
        if possible_geolocations:
            geolocation = max(possible_geolocations, key=lambda x: x[1])
            #print(type(geolocation), geolocation)
            place_name_geocoding[i] = [geolocation[0]]
            
    return place_name_geocoding

In [21]:
def locate_based_on_population(place_name_geocoding):
    for i in place_name_geocoding:
        if len(place_name_geocoding[i]) > 1:
            if any(j.population > 0 for j in place_name_geocoding[i]):
                most_popular = max(place_name_geocoding[i], key=lambda x: x.population)
                place_name_geocoding[i] = [most_popular]
    return place_name_geocoding

In [22]:
def remove_geolocations_not_in_states_mentioned(place_name_geocoding, states):
    for i in place_name_geocoding:
        filtered_geolocations = []
        if len(place_name_geocoding[i]) > 1:
            for j in place_name_geocoding[i]:
                if j.state in states:
                    filtered_geolocations.append(j)
        if filtered_geolocations:
            place_name_geocoding[i] = filtered_geolocations
    return place_name_geocoding

In [23]:
def locate_in_group(place_name_geocoding, min_group_num=3):
    states = dict()
    for i in place_name_geocoding:
        if len(place_name_geocoding[i]) == 1:
            if states.get(place_name_geocoding[i][0].state):
                states[place_name_geocoding[i][0].state] +=1
            else:
                states[place_name_geocoding[i][0].state] = 1
    # choose top states
    states = sorted(list(states.items()), key=lambda x: x[1])

    top_states = [j[0] for j in states if j[1] >= 3]
    for i in place_name_geocoding:
        filtered_geolocations = []
        if len(place_name_geocoding[i]) > 1:
            for j in place_name_geocoding[i]:
                if j.state in top_states:
                     filtered_geolocations.append(j)
            if filtered_geolocations:
                place_name_geocoding[i] = filtered_geolocations
    return place_name_geocoding

In [24]:
def remove_empty_geo_point(place_name_geocoding):
    remove_keys = []
    for i in place_name_geocoding:
        if len(place_name_geocoding[i]) == 0:
            remove_keys.append(i)
    for i in remove_keys:
        del place_name_geocoding[i]
    return place_name_geocoding

In [25]:
def remove_mere_state_names(place_name_geocoding):
    for i in place_name_geocoding:
        state_name = False
        for j in place_name_geocoding[i]:
            if j.place_type == "first-order administrative division":
                state_name = True
        if state_name:
            place_name_geocoding[i] = []
    return place_name_geocoding

In [154]:
def disambiguate(places_geocoded, ne_index, places_and_poi):
    x, y = leave_one_country(places_geocoded)
    x = remove_duplicates_and_leave_states(x, y)
    x = located_based_on_neighbours(x, ne_index, places_and_poi)
    x = locate_in_group(x)
    x = located_based_on_neighbours(x, ne_index, places_and_poi)
    x = locate_based_on_population(x)
    x = locate_in_group(x)
    x = locate_based_on_proximity_to_located(x, max_dist=300)
    x = remove_mere_state_names(x)
    x = remove_empty_geo_point(x)
    print("States mentioned ", y)
    return x

## Visualization

In [27]:
import altair as alt
from vega_datasets import data as vg_data
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go

In [28]:
def visualize_points(place_name_geocoding):
    name, lat, long = [], [], []
    for i in place_name_geocoding:
        if len(place_name_geocoding[i]) == 1:
            name.append(i)
            lat.append(place_name_geocoding[i][0].lat)
            long.append(place_name_geocoding[i][0].long)
                
    df = pd.DataFrame(list(zip(name, lat, long)), columns =['Name', 'Lat', "Long"])
#     states = alt.topo_feature(vg_data.us_10m.url, feature='states')
#     gdf = gpd.read_file('https://raw.githubusercontent.com/python-visualization/folium/master/tests/us-states.json', driver='GeoJSON')
#     gdf = gdf[gdf.id=='CA']
    
#     background = alt.Chart(states).mark_geoshape(
#         fill='lightgray',
#         stroke='white'
#     ).project('albersUsa').properties(
#         width=900,
#         height=1000
#     )
    
#     base = alt.Chart(gdf).mark_geoshape(
#     stroke='black', 
#     fill='lightgray').properties(
#         width=700,
#         height=1000
#     )

#     points = alt.Chart(df).mark_circle().encode(
#         longitude='Long:Q',
#         latitude='Lat:Q',
#         color=alt.Color('Name:N', legend=alt.Legend(symbolLimit=0)),
#         size=alt.value(50),
#         tooltip='Name'
#     )

#     return background + points
    fig = go.Figure(data=go.Scattergeo(
        lon = df['Long'],
        lat = df['Lat'],
        text = df['Name'],
        mode = 'markers'
        ))

    fig.update_layout(
            title = 'Geocoded mentioned places',
            geo_scope='usa',
            height=800,
            margin=dict(l=0, r=0, t=50, b=0),
        )
    return fig.show()

### Test page

In [29]:
def find_places(story_id: int):
    page_url = data[str(story_id)]["article_url"]
    text, links = text_from_html(page_url)
    places_and_poi_processed, places_and_poi, ne_index = get_ne(text)
    lookup_results = existence_check(places_and_poi_processed)
    places_geocoded = unify_lookup_results(lookup_results)
    place_name_geocoding = disambiguate(places_geocoded, ne_index, places_and_poi)
    return ne_index, places_and_poi, places_geocoded, place_name_geocoding

In [39]:
p1_ne_index, p1_places_and_poi, p1_places_geocoded, p1_place_name_geocoding = find_places(1)

Blueberry Hill
Result relevance is  0.683
Wikipedia page found
http://dbpedia.org/data/Blueberry_Hill_(restaurant).json
Result relevance is  0.32
Result relevance is  0.357
Geonames exact search found match
Will Rogers State Historic Park
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Will_Rogers_State_Historic_Park.json
Result relevance is  0.519
Result relevance is  0.346
Geonames exact search found match
Gisgraphy exact search found match
Oatman
Result relevance is  0.75
Wikipedia page found
http://dbpedia.org/data/Oatman,_Arizona.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Durlin_Hotel.json
Result relevance is  0.375
Geonames exact search found match
Michigan
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Michigan.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Detroit.json
Result relevance is  0.727
Wikipedia page found
http://dbpedia.org/data/Flint,_Michigan.json
Geonames e

Palisades Park
Result relevance is  0.875
Wikipedia page found
http://dbpedia.org/data/Palisades_Park,_New_Jersey.json
Result relevance is  0.903
Wikipedia page found
http://dbpedia.org/data/Palisades_Interstate_Parkway.json
Result relevance is  0.718
Wikipedia page found
http://dbpedia.org/data/Palisades_Interstate_Park_Commission.json
Geonames exact search found match
St. Louis
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/St._Louis.json
Result relevance is  0.621
Result relevance is  0.692
Wikipedia page found
http://dbpedia.org/data/St._Louis_County,_Missouri.json
Geonames exact search found match
Stroud
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Stroud.json
Result relevance is  0.571
Result relevance is  0.571
Geonames exact search found match
San Bernardino
Result relevance is  0.8
Wikipedia page found
http://dbpedia.org/data/San_Bernardino_County,_California.json
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/d

Gisgraphy exact search found match
Hwy 53
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Interstate_530.json
Result relevance is  0.111
Result relevance is  0.222
exact place name search failed
Ozarks
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Ozarks.json
Result relevance is  0.5
Result relevance is  0.462
Geonames exact search found match
Shirley
Result relevance is  0.778
Wikipedia page found
http://dbpedia.org/data/Shirley,_Massachusetts.json
Result relevance is  0.636
Result relevance is  0.609
Geonames exact search found match
Oklahoma City
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Oklahoma_City.json
Result relevance is  0.609
Result relevance is  0.609
Geonames exact search found match
Costa Rica
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Costa_Rica.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/San_José,_Costa_Rica.json
Result relevance is  0.6

In [98]:
p1_places_geocoded

[Place name: Blueberry Hill; State: Ontario, Country: CA, Place Type: hill, Lat: 45.73339, Long: -79.98296,
 Place name: Blueberry Hill; State: Mississippi, Country: US, Place Type: populated place, Lat: 31.40267, Long: -90.44426,
 Place name: Blueberry Hill; State: Central Province, Country: PG, Place Type: hill, Lat: -9.61472, Long: 147.40038,
 Place name: Blueberry Hill; State: Maine, Country: US, Place Type: mountain, Lat: 44.53618, Long: -69.917,
 Place name: Blueberry Hill; State: Ontario, Country: CA, Place Type: hill, Lat: 46.39818, Long: -81.28286,
 Place name: Blueberry Hill; State: Alberta, Country: CA, Place Type: hill, Lat: 55.88323, Long: -119.40282,
 Place name: Blueberry Hill; State: Alberta, Country: CA, Place Type: hill, Lat: 51.5334, Long: -114.68532,
 Place name: Blueberry Hill; State: British Columbia, Country: CA, Place Type: hill, Lat: 53.79989, Long: -123.16973,
 Place name: Blueberry Hill; State: Alaska, Country: US, Place Type: mountain, Lat: 60.89722, Long: -

In [151]:
p1_place_name_geocoding = disambiguate(p1_places_geocoded, p1_ne_index, p1_places_and_poi)

[Place name: Needles; State: California, Country: US, Place Type: island, Lat: 37.82909, Long: -122.47831, Place name: Needles; State: California, Country: US, Place Type: populated place, Lat: 34.84806, Long: -114.61413, Place name: Needles; State: Idaho, Country: US, Place Type: mountain, Lat: 44.73462, Long: -115.8279, Place name: Needles; State: Oregon, Country: US, Place Type: island, Lat: 42.429, Long: -124.45122, Place name: Needles; State: Missouri, Country: US, Place Type: , Lat: 39.78641, Long: -93.13743, Place name: Needles; State: California, Country: US, Place Type: Attraction, Lat: 34.84805679321289, Long: -114.6141662597656]
[Place name: Needles; State: California, Country: US, Place Type: island, Lat: 37.82909, Long: -122.47831, Place name: Needles; State: California, Country: US, Place Type: populated place, Lat: 34.84806, Long: -114.61413, Place name: Needles; State: Idaho, Country: US, Place Type: mountain, Lat: 44.73462, Long: -115.8279, Place name: Needles; State: 

In [152]:
[i for i in p1_place_name_geocoding if len(p1_place_name_geocoding[i]) > 1]

["Devil's Elbow"]

In [165]:
"L.A." in [i[0] for i in p1_places_and_poi]

True

In [167]:
p1_places_geocoded

[Place name: Blueberry Hill; State: Ontario, Country: CA, Place Type: hill, Lat: 45.73339, Long: -79.98296,
 Place name: Blueberry Hill; State: Mississippi, Country: US, Place Type: populated place, Lat: 31.40267, Long: -90.44426,
 Place name: Blueberry Hill; State: Central Province, Country: PG, Place Type: hill, Lat: -9.61472, Long: 147.40038,
 Place name: Blueberry Hill; State: Maine, Country: US, Place Type: mountain, Lat: 44.53618, Long: -69.917,
 Place name: Blueberry Hill; State: Ontario, Country: CA, Place Type: hill, Lat: 46.39818, Long: -81.28286,
 Place name: Blueberry Hill; State: Alberta, Country: CA, Place Type: hill, Lat: 55.88323, Long: -119.40282,
 Place name: Blueberry Hill; State: Alberta, Country: CA, Place Type: hill, Lat: 51.5334, Long: -114.68532,
 Place name: Blueberry Hill; State: British Columbia, Country: CA, Place Type: hill, Lat: 53.79989, Long: -123.16973,
 Place name: Blueberry Hill; State: Alaska, Country: US, Place Type: mountain, Lat: 60.89722, Long: -

In [176]:
p1_place_name_geocoding["Palisades Park"][0]

Place name: Palisades Park; State: California, Country: US, Place Type: park, Lat: 32.80227, Long: -117.25948

In [303]:
list(p1_place_name_geocoding.keys())

['Blueberry Hill',
 'Will Rogers State Historic Park',
 'Oatman',
 'Meteor Crater',
 'Chicago',
 'Lincoln',
 'Big Piney River',
 'America',
 'Tacoma',
 'Palo Duro Canyon State Park',
 'Amarillo',
 'Los Angeles',
 'Santa Monica',
 'Kingman',
 'Lebanon',
 'El Morro National Monument',
 'Acoma',
 'Palisades Park',
 'St. Louis',
 'Stroud',
 'San Bernardino',
 'Rialto',
 'Pacific',
 'Needles',
 'Springfield',
 'Seligman',
 'Colorado River',
 'Route 66',
 'Palo Duro Canyon',
 'Loop',
 'Tucumcari',
 'Chain of Rocks Bridge',
 'Route 66 Museum',
 'Hackberry General Store',
 'Ozarks',
 'Shirley',
 'Oklahoma City',
 'Clinton',
 'Grants',
 'Funks Grove',
 "Devil's Elbow",
 'Tulsa',
 'Jack Smith Park',
 'Woody Guthrie Center',
 'Cozy Dog Drive In',
 'Delmar Loop',
 'Lone Star State',
 'Cadillac Ranch',
 'Oklahoma Route 66 Museum',
 'Munger Moss Motel',
 'Blue Swallow Motel',
 'Wigwam Motel',
 'Rock Cafe',
 'Oatman Hotel',
 'Meramec Caverns',
 'El Garces',
 'Amtrak',
 "Cain's Ballroom"]

In [178]:
ll_dist((34.0230223,-118.5116491), ( 32.80227, -117.25948))

178.5953553636868

In [41]:
visualize_points(p1_place_name_geocoding)

In [42]:
p2_ne_index, p2_places_and_poi, p2_places_geocoded, p2_place_name_geocoding = find_places(2)

West Coast
Result relevance is  0.636
Result relevance is  0.87
Wikipedia page found
http://dbpedia.org/data/West_Coast,_New_Zealand.json
Result relevance is  0.606
Geonames exact search found match
Hawaii
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Hawaii.json
Result relevance is  0.545
Result relevance is  0.571
Geonames exact search found match
Santa Ynez
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Santa_Ynez,_California.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Santa_Ynez_Mountains.json
Result relevance is  0.645
Geonames exact search found match
Pea Soup Andersen
Result relevance is  0.412
Result relevance is  0.316
Result relevance is  0.294
Gisgraphy exact search found match
Santa Barbara
Result relevance is  0.867
Wikipedia page found
http://dbpedia.org/data/Santa_Barbara,_California.json
Result relevance is  0.788
Wikipedia page found
http://dbpedia.org/data/Santa_Barbara_County,_California.

Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Hearst_Castle.json
dbpedia timeout
Result relevance is  0.462
Result relevance is  0.6
Geonames exact search found match
Gisgraphy exact search found match
Guadalupe
Result relevance is  0.6
Result relevance is  0.621
Result relevance is  0.947
Wikipedia page found
http://dbpedia.org/data/Guadeloupe.json
dbpedia timeout
Geonames exact search found match
Crystal Cove Beach Cottages
Result relevance is  0.375
Result relevance is  0.256
Result relevance is  0.385
exact place name search failed
Huntington Pier
Result relevance is  0.833
Wikipedia page found
http://dbpedia.org/data/Huntington_Beach_Pier.json
dbpedia timeout
Result relevance is  0.359
Result relevance is  0.387
Gisgraphy exact search found match
Trout Creek Trail
Result relevance is  0.294
Result relevance is  0.312
Result relevance is  0.308
Geonames exact search found match
Gisgraphy exact search found match
OstrichLand USA
Result relevance is  0.5
Resul

In [None]:
p2_place_name_geocoding = disambiguate(p2_places_geocoded, p2_ne_index, p2_places_and_poi)

In [197]:
[i for i in p2_place_name_geocoding if len(p2_place_name_geocoding[i]) > 1]

[]

In [265]:
p2_place_name_geocoding["Pea Soup Andersen"]

[Place name: Pea Soup Andersen; State: California, Country: US, Place Type: Restaurant, Lat: 34.613735000000005, Long: -120.192777]

In [304]:
list(p2_place_name_geocoding.keys())

['Santa Ynez',
 'Santa Barbara',
 'Southern California',
 'Orange County',
 'Long Beach',
 'Crystal Cove State Park',
 'Pismo',
 'Los Trancos Creek',
 'Huntington Beach',
 'Newport Beach',
 'Pismo Beach',
 'Long Beach Convention and Entertainment Center',
 'Pacific',
 'Harbor House Inn',
 'Ripley',
 'Avila Beach',
 'Mendenhall',
 'El Paseo',
 'Arroyo Grande',
 'Hearst Castle',
 'Trout Creek Trail',
 'Lompoc',
 'State Street',
 'Buellton',
 'Aquarium of the Pacific',
 'Vandenberg Air Force Base',
 'Pea Soup Andersen',
 'Edna Valley',
 'Dunes Center',
 'Huntington Pier',
 'La Purisima Mission',
 'Guadalupe-Nipomo Dunes',
 'SoCal',
 'International Surfing Museum']

In [44]:
visualize_points(p2_place_name_geocoding)

In [45]:
p3_ne_index, p3_places_and_poi, p3_places_geocoded, p3_place_name_geocoding = find_places(3)

Crescent Rock
Result relevance is  0.812
Wikipedia page found
http://dbpedia.org/data/Crescent_Beach,_Surrey.json
Result relevance is  0.262
Result relevance is  0.326
Geonames exact search found match
Piedmont
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Piedmont.json
Result relevance is  0.64
Result relevance is  0.696
Wikipedia page found
http://dbpedia.org/data/Turin.json
Geonames exact search found match
Virginia
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Virginia.json
Result relevance is  0.8
Wikipedia page found
http://dbpedia.org/data/West_Virginia.json
Result relevance is  0.64
Geonames exact search found match
Baymont Inn & Suites
Result relevance is  0.4
Result relevance is  0.368
Result relevance is  0.314
exact place name search failed
Geonames partial place match found
Cumberland Pkwy
Result relevance is  0.538
Result relevance is  0.3
Result relevance is  0.353
exact place name search failed
Big Meadows Lodge
Result rel

exact place name search failed
Watermill Restaurant
Result relevance is  0.387
Result relevance is  0.35
Result relevance is  0.378
Gisgraphy exact search found match
George Washington Memorial Parkway
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/George_Washington_Memorial_Parkway.json
Result relevance is  0.333
Result relevance is  0.303
Geonames exact search found match
Tuscarora-Overall Run Trail
Result relevance is  0.293
Result relevance is  0.3
Result relevance is  0.286
Gisgraphy exact search found match
Cave City
Result relevance is  0.556
Result relevance is  0.5
Result relevance is  0.818
Wikipedia page found
http://dbpedia.org/data/Cave_City,_Kentucky.json
Geonames exact search found match
Skyline Drive
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Skyline_Drive.json
Result relevance is  0.686
Wikipedia page found
http://dbpedia.org/data/Equinox_Mountain.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/d

exact place name search failed
Geonames partial place match found
Mammoth Cave Hotel
Result relevance is  0.419
Result relevance is  0.261
Result relevance is  0.387
Gisgraphy exact search found match
Shenandoah
Result relevance is  0.741
Wikipedia page found
http://dbpedia.org/data/Shenandoah_County,_Virginia.json
Result relevance is  0.833
Wikipedia page found
http://dbpedia.org/data/Shenandoah,_Pennsylvania.json
Result relevance is  0.833
Wikipedia page found
http://dbpedia.org/data/Shenandoah,_Iowa.json
Geonames exact search found match
Mammoth Cave National Park
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Mammoth_Cave_National_Park.json
Result relevance is  0.085
Result relevance is  0.327
Geonames exact search found match
Gisgraphy exact search found match
Newfound Gap
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Newfound_Gap.json
Result relevance is  0.417
Result relevance is  0.353
Geonames exact search found match
Gisgraphy ex

In [46]:
[i for i in p3_place_name_geocoding if len(p3_place_name_geocoding[i]) > 1]

['Gateway Airport', 'Buckhorn Inn', 'Indian Creek Falls']

In [273]:
p3_places_geocoded

[Place name: Crescent Rock; State: Washington, Country: US, Place Type: rock, Lat: 48.16939, Long: -123.72582,
 Place name: Crescent Rock; State: Virginia, Country: US, Place Type: mountain, Lat: 38.56123, Long: -78.38362,
 Place name: Piedmont; State: Oklahoma, Country: US, Place Type: populated place, Lat: 35.642, Long: -97.74643,
 Place name: Piedmont; State: Munster, Country: IE, Place Type: locality, Lat: 51.89, Long: -8.42972,
 Place name: Piedmont; State: Missouri, Country: US, Place Type: populated place, Lat: 37.15449, Long: -90.69567,
 Place name: Piedmont; State: Ohio, Country: US, Place Type: populated place, Lat: 40.1884, Long: -81.19649,
 Place name: Piedmont; State: Washington, Country: US, Place Type: populated place, Lat: 48.0937, Long: -123.79159,
 Place name: Piedmont; State: Virginia, Country: US, Place Type: populated place, Lat: 37.61098, Long: -78.92169,
 Place name: Piedmont; State: Arizona, Country: US, Place Type: populated place, Lat: 34.23891, Long: -112.864

In [None]:
"" in [i[0] for i in p3_places_and_poi]

In [300]:
p3_place_name_geocoding["Frozen Niagara"][0]

Place name: Frozen Niagara; State: Kentucky, Country: US, Place Type: Street, Lat: 37.1491677161007, Long: -86.06427187623612

In [280]:
ll_dist((35.8277934,-83.5843169), (35.65454, -83.44127))

23.172513465749688

In [305]:
list(p3_place_name_geocoding.keys())

['Crescent Rock',
 'Piedmont',
 'Baymont Inn & Suites',
 'Big Meadows Lodge',
 'Charlies Bunion',
 'Clingmans Dome',
 'Gateway Airport',
 'Great Smoky Mountains National Park',
 'Mammoth Cave',
 'Gatlinburg',
 'Calvary Rocks',
 'Hawksbill Mountain',
 'LeConte Lodge',
 'Green River',
 'Smoky Mountains',
 'Rapidan Camp',
 'George Washington Memorial Parkway',
 'Cave City',
 'Skyline Drive',
 'Shenandoah Valley',
 'Shenandoah River',
 'Great Smoky Mountains',
 'Cades Cove',
 'Mount Le Conte',
 'Blue Ridge Parkway',
 'Grotto Falls',
 'Chimney Tops',
 'Skyland',
 'Deep Creek Trail',
 'Catoosa Wildlife Management Area',
 'Union',
 'Compton Peak',
 'Arlington',
 'Shenandoah National Park',
 'Skyland Resort',
 'Ronald Reagan Washington National Airport',
 'Cataloochee',
 'Shenandoah',
 'Mammoth Cave National Park',
 'Newfound Gap',
 'Manassas National Battlefield Park',
 'Cedar Sink',
 'Buckhorn Inn',
 'Trillium Gap Trail',
 'Frozen Niagara',
 'I-64W',
 'Pollock Dining Room',
 'Indian Creek Fa

In [47]:
visualize_points(p3_place_name_geocoding)

In [48]:
p4_ne_index, p4_places_and_poi, p4_places_geocoded, p4_place_name_geocoding = find_places(4)

Kings Canyon Scenic Byway
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/California_State_Route_180.json
Result relevance is  0.286
Result relevance is  0.3
Geonames exact search found match
Wawona
Result relevance is  0.857
Wikipedia page found
http://dbpedia.org/data/Wawona,_California.json
Result relevance is  0.429
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Wawona_Hotel.json
Geonames exact search found match
Yosemite
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Yosemite_National_Park.json
Result relevance is  0.593
Result relevance is  0.824
Wikipedia page found
http://dbpedia.org/data/Yosemite_Village,_California.json
Geonames exact search found match
California 198
Result relevance is  0.606
Result relevance is  0.7
Wikipedia page found
http://dbpedia.org/data/California_State_Route_198.json
Result relevance is  0.963
Wikipedia page found
http://dbpedia.org/data/California_State_Route_19.json
exact place

Geonames exact search found match
Gisgraphy exact search found match
Tioga Road
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/California_State_Route_120.json
Result relevance is  0.455
Result relevance is  0.312
exact place name search failed
Death Valley
Result relevance is  0.889
Wikipedia page found
http://dbpedia.org/data/Death_Valley_National_Park.json
Result relevance is  0.727
Wikipedia page found
http://dbpedia.org/data/Death_Valley_Junction,_California.json
Result relevance is  0.857
Wikipedia page found
http://dbpedia.org/data/Interstate_40_in_North_Carolina.json
Geonames exact search found match
Lower Yosemite Fall
Result relevance is  0.333
Result relevance is  0.35
Result relevance is  0.293
Geonames exact search found match
Gisgraphy exact search found match
Wolverton Meadow
Result relevance is  0.296
Result relevance is  0.432
Result relevance is  0.348
Geonames exact search found match
Gisgraphy exact search found match
Badger Pass
Result relevan

Result relevance is  0.686
Wikipedia page found
http://dbpedia.org/data/Mammoth_Cave_National_Park.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Crystal_Cave_(Sequoia_National_Park).json
Geonames exact search found match
Gisgraphy exact search found match
Lone Pine
Result relevance is  0.818
Wikipedia page found
http://dbpedia.org/data/Lone_Pine,_California.json
Result relevance is  0.581
Result relevance is  0.889
Wikipedia page found
http://dbpedia.org/data/Long_Pine,_Nebraska.json
Geonames exact search found match
Mist Trail
Result relevance is  0.364
Result relevance is  0.421
Result relevance is  0.348
Geonames exact search found match
Gisgraphy exact search found match
Nevada
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Nevada.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Reno,_Nevada.json
Result relevance is  0.6
Geonames exact search found match
Red Cathedral
Result relevance is  0.556
Result

In [49]:
[i for i in p4_place_name_geocoding if len(p4_place_name_geocoding[i]) > 1]

['Mirror Lake',
 'Ash Mountain',
 'Swinging Bridge',
 'Grants Grove',
 'Underground Tour',
 'North Grove Loop']

In [306]:
list(p4_place_name_geocoding.keys())

['Kings Canyon Scenic Byway',
 'Wawona',
 'Yosemite',
 'Furnace Creek',
 'Kings River',
 'Scotty’s Castle',
 'Glacier Point',
 'Vernal',
 'Kings Canyon National Park',
 'Sequoia Park',
 'Sierra',
 'Taft Point',
 'Stovepipe Wells',
 'General Grant Tree',
 'Yosemite National Park',
 'Furnace Creek Ranch',
 'Grizzly Giant',
 'Death Valley National Park',
 'Badwater',
 'Death Valley',
 'Lower Yosemite Fall',
 'Badger Pass',
 'Las Vegas',
 'Ubehebe Crater',
 'Dante’s View',
 'Half Dome',
 'Hume Lake',
 'Manzanar National Historic Site',
 'Alabama Hills',
 'Mariposa Grove',
 'Lodgepole',
 'Olmsted Point',
 'Wildrose Peak',
 'Vernal Fall',
 'Furnace Creek Inn',
 'Mirror Lake',
 'Ash Mountain',
 'Sentinel Dome',
 'General Sherman Tree',
 'Merced River',
 'Yosemite Valley',
 'El Capitan',
 'Coarsegold',
 'Crystal Cave',
 'Lone Pine',
 'Mist Trail',
 'Sequoia',
 'Zabriskie Point',
 'Yosemite Falls',
 'Tunnel Log',
 'Death Valley Junction',
 'Mono Lake',
 'Mount Whitney',
 'Nevada Falls',
 'Moro 

In [381]:
"Congress Trail" in [i[0] for i in p4_places_and_poi]

False

In [377]:
p4_places_geocoded

[Place name: Kings Canyon Scenic Byway; State: California, Country: US, Place Type: road, Lat: 36.82265, Long: -118.83522,
 Place name: Wawona; State: California, Country: US, Place Type: populated place, Lat: 37.53688, Long: -119.65627,
 Place name: Wawona; State: North Sulawesi, Country: ID, Place Type: populated place, Lat: 1.3103, Long: 124.5586,
 Place name: Yosemite; State: Kentucky, Country: US, Place Type: populated place, Lat: 37.34674, Long: -84.82467,
 Place name: Yosemite; State: California, Country: US, Place Type: populated place, Lat: 37.7452, Long: -119.59822,
 Place name: Yosemite; State: Limpopo, Country: ZA, Place Type: farm, Lat: -24.03753, Long: 30.15813,
 Place name: Yosemite; State: Beijing, Country: CN, Place Type: hotel, Lat: 40.07356, Long: 116.52565,
 Place name: Furnace Creek; State: California, Country: US, Place Type: populated place, Lat: 36.4478, Long: -116.8529,
 Place name: Furnace Creek; State: Missouri, Country: US, Place Type: stream, Lat: 37.81172,

In [394]:
p4_place_name_geocoding["Crescent Meadow Road"]

[Place name: Crescent Meadow Road; State: California, Country: US, Place Type: Street, Lat: 36.55500907756713, Long: -118.76987004859132]

In [373]:
ll_dist((37.4115211,-119.213161), (37.26217, -119.70098))

46.29810217208078

In [50]:
visualize_points(p4_place_name_geocoding)

In [51]:
p5_ne_index, p5_places_and_poi, p5_places_geocoded, p5_place_name_geocoding = find_places(5)

Westcliffe
Result relevance is  0.833
Wikipedia page found
http://dbpedia.org/data/Westcliffe,_Colorado.json
Result relevance is  0.947
Wikipedia page found
http://dbpedia.org/data/Westcliff-on-Sea.json
Result relevance is  0.645
Geonames exact search found match
Zapata Ranch
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Zapata_Ranch,_Texas.json
Result relevance is  0.273
Result relevance is  0.64
Geonames exact search found match
Gisgraphy exact search found match
Paonia
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Paonia,_Colorado.json
Result relevance is  0.522
Result relevance is  0.343
Geonames exact search found match
Southwest Colorado
Result relevance is  0.733
Wikipedia page found
http://dbpedia.org/data/Colorado_River.json
Result relevance is  0.412
Result relevance is  0.368
exact place name search failed
Geonames partial place match found
Valley View Hot Springs
Result relevance is  0.333
Result relevance is  0.27
Result rele

Result relevance is  0.345
Geonames exact search found match
Gisgraphy exact search found match
Colorado
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Colorado.json
Result relevance is  0.762
Wikipedia page found
http://dbpedia.org/data/Colorado_Springs,_Colorado.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Boulder,_Colorado.json
Geonames exact search found match
Smokey Jack Observatory
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Westcliffe,_Colorado.json
Result relevance is  0.35
Result relevance is  0.316
exact place name search failed
Geonames partial place match found
Alys’
Result relevance is  0.4
Result relevance is  0.375
Result relevance is  0.375
Geonames exact search found match
Paonia Reservoir
Result relevance is  0.432
Result relevance is  0.452
Result relevance is  0.312
Geonames exact search found match
Gisgraphy exact search found match
The Crestone Peaks
Result relevance is  0.839
Wikiped

In [52]:
[i for i in p5_place_name_geocoding if len(p5_place_name_geocoding[i]) > 1]

['North Fork Valley', 'Lizard', 'Wild West', 'Bluff Park', 'Willow Lake']

In [53]:
visualize_points(p5_place_name_geocoding)

In [54]:
p6_ne_index, p6_places_and_poi, p6_places_geocoded, p6_place_name_geocoding = find_places(6)

National Register of Historic Places
Result relevance is  0.699
Wikipedia page found
http://dbpedia.org/data/Cazenovia,_New_York.json
Result relevance is  0.615
Result relevance is  0.637
exact place name search failed
Geonames partial place match found
Craighead
Result relevance is  0.72
Wikipedia page found
http://dbpedia.org/data/Craighead_County,_Arkansas.json
Result relevance is  0.383
Result relevance is  0.562
Geonames exact search found match
Spring Creek Campground
Result relevance is  0.318
Result relevance is  0.273
Result relevance is  0.3
Geonames exact search found match
Gisgraphy exact search found match
Volcano Room
Result relevance is  0.267
Result relevance is  0.296
Result relevance is  0.348
exact place name search failed
Earth
Result relevance is  0.833
Wikipedia page found
http://dbpedia.org/data/Moon.json
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Earth.json
Result relevance is  0.455
Geonames exact search found match
Chihuahuan Desert


In [55]:
[i for i in p6_place_name_geocoding if len(p6_place_name_geocoding[i]) > 1]

['Spring Creek Campground', 'Red River Campground']

In [56]:
visualize_points(p6_place_name_geocoding)

In [57]:
p7_ne_index, p7_places_and_poi, p7_places_geocoded, p7_place_name_geocoding = find_places(7)

Portland
Result relevance is  0.842
Wikipedia page found
http://dbpedia.org/data/Portland,_Oregon.json
Result relevance is  0.857
Wikipedia page found
http://dbpedia.org/data/Poland.json
dbpedia timeout
Result relevance is  0.842
Wikipedia page found
http://dbpedia.org/data/Portland,_Maine.json
Geonames exact search found match
Napa Valley
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Napa_Valley_AVA.json
Result relevance is  0.5
Result relevance is  0.579
Geonames exact search found match
Oyster Trail
Result relevance is  0.87
Wikipedia page found
http://dbpedia.org/data/U.S._Route_18.json
Result relevance is  0.348
Result relevance is  0.286
Gisgraphy exact search found match
Damariscotta
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Damariscotta,_Maine.json
Result relevance is  0.706
Wikipedia page found
http://dbpedia.org/data/Damariscotta-Newcastle,_Maine.json
Result relevance is  0.533
Geonames exact search found match
Main Street
R

Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Kennebec_River.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/Sandy_River_(Kennebec_River_tributary).json
Result relevance is  0.651
Wikipedia page found
http://dbpedia.org/data/Little_River_(Kennebec_River_tributary).json
Geonames exact search found match
Gisgraphy exact search found match
Nonesuch Oysters
Result relevance is  0.357
Result relevance is  0.303
Result relevance is  0.414
exact place name search failed
Cape Elizabeth
Result relevance is  0.714
Wikipedia page found
http://dbpedia.org/data/Port_Elizabeth.json
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Cape_Elizabeth,_Maine.json
Result relevance is  0.824
Wikipedia page found
http://dbpedia.org/data/Cape_Elizabeth_Lights.json
Geonames exact search found match
Fort Andross Mill
Result relevance is  0.333
Result relevance is  0.387
Result relevance is  0.308
exact place name search failed
Harbor Fish 

In [58]:
[i for i in p7_place_name_geocoding if len(p7_place_name_geocoding[i]) > 1]

[]

In [59]:
visualize_points(p7_place_name_geocoding)

In [60]:
p8_ne_index, p8_places_and_poi, p8_places_geocoded, p8_place_name_geocoding = find_places(8)

NY 208
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/New_York_State_Route_208.json
Result relevance is  0.833
Wikipedia page found
http://dbpedia.org/data/New_York_State_Route_286.json
Result relevance is  0.462
exact place name search failed
Edward Hopper House Art Center
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Edward_Hopper_Birthplace_and_Boyhood_Home.json
Result relevance is  0.298
Result relevance is  0.318
exact place name search failed
Old Rhinebeck Aerodrome
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Old_Rhinebeck_Aerodrome.json
Result relevance is  0.333
Result relevance is  0.3
exact place name search failed
Geonames partial place match found
Shawangunks Ridge
Result relevance is  0.97
Wikipedia page found
http://dbpedia.org/data/Shawangunk_Ridge.json
Result relevance is  0.323
Result relevance is  0.323
exact place name search failed
Hudson River National Historic Landmark District
Result relevan

Result relevance is  0.684
Wikipedia page found
http://dbpedia.org/data/Greenwich.json
Geonames exact search found match
Home of Franklin D. Roosevelt National Historic Site
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Home_of_Franklin_D._Roosevelt_National_Historic_Site.json
Result relevance is  0.272
Result relevance is  0.316
Geonames exact search found match
Gisgraphy exact search found match
NY 207
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/New_York_State_Route_207.json
Result relevance is  0.667
Wikipedia page found
http://dbpedia.org/data/New_York_State_Route_286.json
Result relevance is  0.5
Gisgraphy exact search found match
Eleanor Roosevelt National Historic Site
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Eleanor_Roosevelt_National_Historic_Site.json
Result relevance is  0.261
Result relevance is  0.328
Geonames exact search found match
Gisgraphy exact search found match
Greenwich Village
Result r

NY 17
Result relevance is  0.545
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/New_York_State_Route_17.json
Result relevance is  0.909
Wikipedia page found
http://dbpedia.org/data/New_York_State_Route_427.json
exact place name search failed
Staatsburgh State Historic Site
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Staatsburgh_State_Historic_Site.json
Result relevance is  0.444
Result relevance is  0.133
Gisgraphy exact search found match
Tappan Zee Bridge
Result relevance is  0.895
Wikipedia page found
http://dbpedia.org/data/Tappan_Zee_Bridge_(1955–2017).json
Result relevance is  0.231
Result relevance is  0.327
Geonames exact search found match
Gisgraphy exact search found match
Clermont State Historic Site
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Clermont_State_Historic_Site.json
Result relevance is  0.78
Wikipedia page found
http://dbpedia.org/data/T._C._Steele_State_Historic_Site.json
Result relevance 

In [61]:
[i for i in p8_place_name_geocoding if len(p8_place_name_geocoding[i]) > 1]

['America', 'Churchill Downs']

In [62]:
visualize_points(p8_place_name_geocoding)

In [63]:
p9_ne_index, p9_places_and_poi, p9_places_geocoded, p9_place_name_geocoding = find_places(9)

Chicago River
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Chicago_River.json
Result relevance is  0.438
Result relevance is  0.452
Geonames exact search found match
Ragstock
Result relevance is  0.875
Wikipedia page found
http://dbpedia.org/data/Radstock.json
Result relevance is  0.609
Result relevance is  0.609
Gisgraphy exact search found match
Lincoln Park
Result relevance is  0.857
Wikipedia page found
http://dbpedia.org/data/Lincoln_Park,_Michigan.json
Result relevance is  0.727
Wikipedia page found
http://dbpedia.org/data/Lincoln_Park,_Chicago.json
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Lincoln_Park.json
Geonames exact search found match
Neo Futurists
Result relevance is  0.368
Result relevance is  0.333
Result relevance is  0.417
exact place name search failed
Xinhua/Redux
Result relevance is  0.25
Result relevance is  0.37
Result relevance is  0.357
exact place name search failed
Museum of Science and Industry
Result rele

In [64]:
[i for i in p9_place_name_geocoding if len(p9_place_name_geocoding[i]) > 1]

['Second Chance Thrift']

In [65]:
visualize_points(p9_place_name_geocoding)

In [66]:
p10_ne_index, p10_places_and_poi, p10_places_geocoded, p10_place_name_geocoding = find_places(10)

Google
Result relevance is  0.75
Wikipedia page found
http://dbpedia.org/data/Topeka,_Kansas.json
Result relevance is  0.632
Result relevance is  0.632
Geonames exact search found match
Gisgraphy exact search found match
Santa Barbara
Result relevance is  0.867
Wikipedia page found
http://dbpedia.org/data/Santa_Barbara,_California.json
Result relevance is  0.788
Wikipedia page found
http://dbpedia.org/data/Santa_Barbara_County,_California.json
Result relevance is  0.553
Geonames exact search found match
San Francisco
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/San_Francisco.json
Result relevance is  0.765
Wikipedia page found
http://dbpedia.org/data/San_Francisco_Bay_Area.json
Result relevance is  0.867
Wikipedia page found
http://dbpedia.org/data/San_Francisco_Bay.json
Geonames exact search found match
Computer History Museum
Result relevance is  1.0
Wikipedia page found
http://dbpedia.org/data/Computer_History_Museum.json
Result relevance is  0.35
Result rel

In [67]:
[i for i in p10_place_name_geocoding if len(p10_place_name_geocoding[i]) > 1]

['Clock Tower']

In [68]:
visualize_points(p10_place_name_geocoding)