In [45]:
import pandas as pd
import numpy as np

In [46]:
import requests

class GeoCodeClient:
    def __init__(self, url="https://nominatim.openstreetmap.org/"):
        self.url = url + "{}"

    def reverse_geocode(self, lat=0, lon=0):
        params = {'format': 'json',
                  'lat': lat,
                  'addressdetails': 1,
                  'lon': lon}
        resp = requests.get(self.url.format("reverse"), params=params)
        if not resp.ok:
            raise ConnectionError("Error connecting to openstreetmap code {}".format(resp.status_code))
        body = resp.json()
        address = body["address"]
        return address
    
class GeoLocation:

    def __init__(self, country_code=None, name=None):
        self.country_code = country_code
        self.name = name

    def __repr__(self):
        return "['{}', '{}']".format(self.name, self.country_code)
    
class OpenStreetMapManager:

    def __init__(self, base_url="https://nominatim.openstreetmap.org/"):
        self.url = base_url
        self.client = GeoCodeClient(self.url)

    def find_location_byname(self, lat=None, lon=None):
        if lat is None or lon is None:
            raise ValueError("Both lat and lon are mandatory")
        address = self.client.reverse_geocode(lat, lon)
        name = None
        if "village" in address:
            name = address["village"]
        elif "town" in address:
            name = address["town"]
        elif "city" in address:
            name = address["city"]
        else:
            print("Undefined: {}".format(address))
            name = "Undefined"
        country_code = address["country_code"]
        return [country_code, name]

In [47]:
segments = pd.read_csv("../dataset/strava-segments-italy-FINAL.csv")

In [48]:
segments[["s_lat", "s_lng"]] = segments["start_latlng"].apply(lambda x: pd.Series(np.array(eval(x))))
segments[["e_lat", "e_lng"]] = segments["end_latlng"].apply(lambda x: pd.Series(np.array(eval(x))))

In [10]:
import time

manager = OpenStreetMapManager()
def extract_location(row):
    time.sleep(0.8)
    return manager.find_location_byname(row["s_lat"], row["s_lng"])

In [None]:
segments[["country_code", "loc_name"]] = segments[["s_lat", "s_lng"]].apply(extract_location, axis=1)

In [14]:
segments.to_csv(path_or_buf="../dataset/strava-segments-italy-FINAL-GEOCODED.csv",index=True)    

Now we are going to leave out segments with country_code different than "it"

In [49]:
segments = pd.read_csv("../dataset/strava-segments-italy-FINAL-GEOCODED.csv")

In [51]:
segments_italy = segments[segments.country_code == "it"]

In [54]:
segments_italy.to_csv(path_or_buf="../dataset/strava-segments-italy_only-FINAL-GEOCODED.csv",index=True)    