In [41]:
import os
import pandas as pd
import re
import requests

data_dir = "/home/stavros/DATA/InfoKiosk"

In [57]:
museums = pd.read_csv(os.path.join(data_dir, "museums_hand.csv"))
monuments = pd.read_csv(os.path.join(data_dir, "monuments_hand.csv"))
natural = pd.read_csv(os.path.join(data_dir, "natural_beauty_hand.csv"))

Find coordinates

In [58]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}

def search_name(name):
    sname = "+".join(name.split(" "))
    return "https://www.google.gr/maps/search/{}".format(sname)


def find_link(text, target="@36"):
    idx = text.find(target)
    start = idx
    while text[start] != '"':
        start -= 1
    end = idx
    while text[end] != '"':
        end += 1
    return text[start + 1:end]


def get_link(name):
    sname = search_name(name + " Rhodes")
    response = requests.get(sname, headers=headers)
    link = find_link(response.text)
    if link:
        return link
    print("Failed to find link for {}.".format(sname))
    
    sname = search_name(name)
    response = requests.get(sname, headers=headers)
    link = find_link(response.text)
    if link:
        return link
    print("Failed to find link for {}.".format(sname))
    
    return None


def coords_from_link(link):
    if link is None:
        return None
    
    coords_str = [p for p in link.split("/") if p and p[0] == "@"]
    if not coords_str:
        raise ValueError("Failed to find coordinates in {}.")
    elif len(coords_str) > 1:
        raise ValueError("Found multiple coordinates in {}.".format(link))
    
    coords_ls = coords_str[0].split(",")
    assert coords_ls[0][0] == "@"
    c1 = float(coords_ls[0][1:])
    c2 = float(coords_ls[1])
    return (c1, c2)


def find_coordinates(data_df):
    data_df["google_maps"] = data_df["name"].map(get_link)
    coordinates = data_df["google_maps"].map(coords_from_link)
    
    data_df["latitude"] = coordinates.map(lambda c: c[0] if c is not None else None)
    data_df["longitude"] = coordinates.map(lambda c: c[1] if c is not None else None)
    return data_df

In [59]:
monuments = find_coordinates(monuments)
museums = find_coordinates(museums)
natural = find_coordinates(natural)

Failed to find link for https://www.google.gr/maps/search/Ancient+Kamiros+Rhodes.
Failed to find link for https://www.google.gr/maps/search/Ancient+Kamiros.
Failed to find link for https://www.google.gr/maps/search/The+Vroulia+Rhodes.
Failed to find link for https://www.google.gr/maps/search/The+Vroulia.
Failed to find link for https://www.google.gr/maps/search/The+Palace+of+the+Grand+Master+(Byzantine+Museum)+Rhodes.
Failed to find link for https://www.google.gr/maps/search/The+Museum+of+Modern+Greek+Art+Rhodes.
Failed to find link for https://www.google.gr/maps/search/The+Museum+of+Modern+Greek+Art.
Failed to find link for https://www.google.gr/maps/search/The+Aquarium+of+Rhodes+or+Hydrobiological+Station+of+Rhodes+Rhodes.
Failed to find link for https://www.google.gr/maps/search/The+Aquarium+of+Rhodes+or+Hydrobiological+Station+of+Rhodes.
Failed to find link for https://www.google.gr/maps/search/Hostel+of+St.+Catherine+(Hagia+Aikaterini)+Rhodes.
Failed to find link for https://www.g

In [95]:
monuments.to_csv(os.path.join(data_dir, "monuments.csv"), index=False)
museums.to_csv(os.path.join(data_dir, "museums.csv"), index=False)
natural.to_csv(os.path.join(data_dir, "natural.csv"), index=False)

In [110]:
def directions_link(name):
    sname = "+".join(name.split(" "))
    return "https://www.google.gr/maps/dir/Stavros+Melathron+hotel/{}/am=t".format(sname)


def find_target(text, target="χλμ."):
    idx = text.find(target)
    start = idx
    while text[start] != '"':
        start -= 1
    end = idx
    while text[end] != '"':
        end += 1
    return text[start + 1:end]


def find_distance_and_time(name):
    link = directions_link(name)
    response = requests.get(link, headers=headers)
    if response.status_code != 200:
        print("Failed to find distance for {}.".format(name))
        return None, None, None
    
    try:
        distance_string = find_target(response.text, target="χλμ.")
        time_string = find_target(response.text, target="λεπτ.")

        distance_match = re.match("(\d+),(\d+) χλμ.", distance_string)
        distance = float(".".join([distance_match.group(1), distance_match.group(2)]))

        time_match = re.match("((\d+) ώρ. )?(\d+) λεπτ.", time_string)
        hours = 0 if time_match.group(2) is None else int(time_match.group(2))
        minutes = int(time_match.group(3))
    except Exception as e:
        print(name, e)
        return link, None, None
    
    return link, distance, 60 * hours + minutes


def find_directions(data_df):
    directions = data_df["name"].map(find_distance_and_time)
    
    data_df["directions_url"] = directions.map(lambda c: c[0] if c is not None else None)
    data_df["road_distance"] = directions.map(lambda c: c[1] if c is not None else None)
    data_df["road_time"] = directions.map(lambda c: c[2] if c is not None else None)
    
    return data_df

In [97]:
monuments = pd.read_csv(os.path.join(data_dir, "monuments.csv"))
museums = pd.read_csv(os.path.join(data_dir, "museums.csv"))
natural = pd.read_csv(os.path.join(data_dir, "natural.csv"))

In [111]:
monuments = find_directions(monuments)
museums = find_directions(museums)
natural = find_directions(natural)

The Vroulia 'NoneType' object has no attribute 'group'
Hostel of St. Catherine (Hagia Aikaterini) 'NoneType' object has no attribute 'group'
Preserved Traditional Settlement Koskinou 'NoneType' object has no attribute 'group'


In [113]:
monuments.to_csv(os.path.join(data_dir, "monuments.csv"), index=False)
museums.to_csv(os.path.join(data_dir, "museums.csv"), index=False)
natural.to_csv(os.path.join(data_dir, "natural.csv"), index=False)