In [1]:
import requests
import pandas as pd
import h3
import json

In [2]:
import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2
from modules.config import *

sys.path.append(os.path.abspath(os.path.join("..", "utils")))
from overpass_meta import get_meta_data

overpass_meta = get_meta_data()

In [3]:
query = """
[out:json];    
relation["name"="Leipzig"]["admin_level"="6"];
out body;
>;
out skel qt;
"""

response = requests.get(
    "http://overpass-api.de/api/interpreter",
    params={"data": query},
)
data = response.json()


In [4]:
def get_file_path(category):
    return os.path.join(REPO_DATA_DIR_PATH, overpass_meta[category]["filename"])

def poi_file_exists(category):
    return os.path.isfile(get_file_path(category))


def read_poi_file(category):
    with open(get_file_path(category)) as f:
        data = json.load(f)
    return data


def save_poi_file(category, data):
    with open(get_file_path(category), "w") as f:
        json.dump(data, f)


def fetch_poi_data(category):
    response = requests.get(
        "http://overpass-api.de/api/interpreter",
        params={"data": overpass_meta[category]["query"]},
    )
    data = response.json()
    save_poi_file(category, data)
    return data


def get_poi_data(category):
    if poi_file_exists(category):
        return read_poi_file(category)
    return fetch_poi_data(category)


In [5]:
# load sustenance data into a dataframe
sustenance_data = get_poi_data("sustenance")
sustenance_df = pd.DataFrame(sustenance_data["elements"])
sustenance_df["category"] = "sustenance"
sustenance_df["amenity"] = sustenance_df["tags"].apply(lambda tags: tags["amenity"])
sustenance_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,30162037,51.344661,12.272481,"{'addr:city': 'Leipzig', 'addr:country': 'DE',...",sustenance,restaurant
1,node,49531558,51.316443,12.382568,"{'amenity': 'cafe', 'name': 'eis fichte MCL', ...",sustenance,cafe


In [6]:
# load public transport data into a dataframe
public_transport_data = get_poi_data("public_transport")
public_transport_df = pd.DataFrame(public_transport_data["elements"])
public_transport_df["category"] = "public_transport"
public_transport_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category
0,node,26023348,51.333368,12.321735,"{'name': 'Leipzig-Lindenau', 'operator': 'DB S...",public_transport
1,node,376142577,51.346552,12.383086,"{'name': 'Leipzig Hauptbahnhof', 'name:fr': 'G...",public_transport


In [7]:
# load education data into a dataframe
education_data = get_poi_data("education")
education_df = pd.DataFrame(education_data["elements"])
education_df["category"] = "education"
education_df["amenity"] = education_df["tags"].apply(lambda tags: tags["amenity"])
education_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,264496513,51.337711,12.373673,"{'addr:city': 'Leipzig', 'addr:country': 'DE',...",education,university
1,node,266977484,51.375002,12.411598,"{'addr:city': 'Leipzig', 'addr:postcode': '043...",education,library


In [8]:
# load arts and culture data into a dataframe
arts_and_culture_data = get_poi_data("arts_and_culture")
arts_and_culture_df = pd.DataFrame(arts_and_culture_data["elements"])
arts_and_culture_df["category"] = "arts_and_culture"
arts_and_culture_df["amenity"] = arts_and_culture_df["tags"].apply(
    lambda tags: tags["amenity"]
)
arts_and_culture_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,26022810,51.337641,12.333334,"{'amenity': 'theatre', 'name': 'Theater der Ju...",arts_and_culture,theatre
1,node,26022926,51.336158,12.339108,"{'addr:city': 'Leipzig', 'addr:postcode': '041...",arts_and_culture,theatre


In [9]:
# load sports data into a dataframe
sports_data = get_poi_data("sports")
sports_df = pd.DataFrame(sports_data["elements"])
sports_df["category"] = "sports"
sports_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category
0,node,260629059,51.339999,12.377531,"{'addr:city': 'Leipzig', 'addr:country': 'DE',...",sports
1,node,264978890,51.34414,12.394389,"{'created_by': 'Potlatch 0.9c', 'leisure': 'sp...",sports


In [10]:
# concatenate all dataframes
poi_df = pd.concat(
    [sustenance_df, public_transport_df, education_df, arts_and_culture_df, sports_df]
)
poi_df = poi_df.drop(columns={"type", "id", "tags"})
poi_df.head(2)

Unnamed: 0,lat,lon,category,amenity
0,51.344661,12.272481,sustenance,restaurant
1,51.316443,12.382568,sustenance,cafe


In [12]:
poi_df.to_pickle(POIS_PATH)
# poi_df.to_pickle("../00_data/poi_data.pkl"))

In [None]:
pois.groupby('category').count()

## Compute number of pois per hexagon

In [None]:
trips_df = pd.read_csv(TRIPS_PATH)

In [None]:
def convert_to_hex(latitude, longitude):
    return h3.geo_to_h3(lat=latitude, lng=longitude, resolution=H3_RESOLUTION)

In [None]:
# compute the hexagon id for each point of interest
poi_df["hex"] = poi_df.apply(lambda poi: convert_to_hex(poi["lat"], poi["lon"]), axis=1)
poi_df.head(2)

In [None]:
# create a dataframe with the number of points of interest in each hexagon
# and each category that occurs in the poi dataframe
all_hexagons_with_poi = poi_df.groupby(["hex", "category"]).size().to_frame()
all_hexagons_with_poi = all_hexagons_with_poi.reset_index()
all_hexagons_with_poi = all_hexagons_with_poi.rename(columns={0: "number of poi"})
all_hexagons_with_poi.head(2)

In [None]:
# create a dataframe with all hexagons where at least one trip started or ended
hexagons_df = pd.DataFrame()
hexagons_df["hex"] = pd.concat([trips_df["start_hex"], trips_df["end_hex"]]).unique()
hexagons_df.head(2)

In [None]:
# create a column 'hex_and_neighbors' which contains a set of hexagons
# this set consists of the hexagon from column 'hex' and its 6 neighbors
hexagons_df["hex_and_neighbors"] = hexagons_df.apply(
    lambda row: list(h3.k_ring(row["hex"], 1)), axis=1
)
hexagons_df.head(2)

In [None]:
# this function will return the sum of points of interest in a given category for a given set of hexagons
def calculate_poi(hex_and_neighbors, category):
    return all_hexagons_with_poi[
        (
            (all_hexagons_with_poi["hex"].isin(hex_and_neighbors))
            & (all_hexagons_with_poi["category"] == category)
        )
    ]["number of poi"].sum()

In [None]:
# compute the number of poi in each category for each hexagon and its neighbors
categories = [
    'sustenance',
    'public_transport',
    'education',
    'arts_and_culture',
    'sports'
]

for category in categories:
    hexagons_df[f"{category}_poi"] = hexagons_df["hex_and_neighbors"].apply(
    lambda row: calculate_poi(row, category)
)

hexagons_df.head(2)

In [None]:
# original_data_df = pd.read_csv(ORIGINAL_DATA_MERGED_PATH)

# import folium

# fmap = folium.Map(location=[data_df.lat.mean(), data_df.lng.mean()], zoom_start=12)
# folium.features.Choropleth(
# 	geo_data=flexzone_loose.geometry.map(lambda polygon: shapely.ops.transform(lambda x, y: (y, x), polygon)).to_json(),
# ).add_to(fmap)

# for index, row in data_geodf[:1000].iterrows():
# 	folium.CircleMarker(
# 		location=[row.lat, row.lng],
# 		radius=1,
# 		color="red",
# 		fill=True,
# 		fill_color="red",
# 		fill_opacity=0.5,
# 	).add_to(fmap)
	

# fmap

In [None]:
trips_df = pd.merge(trips_df, hexagons_df, left_on="start_hex", right_on="hex")
trips_df = trips_df.drop(columns={"hex", "hex_and_neighbors"})

# add '_start' suffix to poi columns
trips_df = trips_df.rename(
    columns={
        "sustenance_poi": "sustenance_poi_start",
        "public_transport_poi": "public_transport_poi_start",
        "education_poi": "education_poi_start",
        "arts_and_culture_poi": "arts_and_culture_poi_start",
        "sports_poi": "sports_poi_start",
    }
)
trips_df.head(2)

In [None]:
trips_df = pd.merge(trips_df, hexagons_df, left_on="end_hex", right_on="hex")
trips_df = trips_df.drop(columns={"hex", "hex_and_neighbors"})

# add '_end' suffix to poi columns
trips_df = trips_df.rename(
    columns={
        "sustenance_poi": "sustenance_poi_end",
        "public_transport_poi": "public_transport_poi_end",
        "education_poi": "education_poi_end",
        "arts_and_culture_poi": "arts_and_culture_poi_end",
        "sports_poi": "sports_poi_end",
    }
)
trips_df.head(2)

In [None]:
trips_df.to_pickle(TRIPS_PATH)