In [1]:
import requests
import pandas as pd
import h3
import json

In [2]:
import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2
from modules.config import *

sys.path.append(os.path.abspath(os.path.join("..", "utils")))
from overpass_meta import get_meta_data

overpass_meta = get_meta_data()

In [3]:
def get_file_path(category):
    return os.path.join(REPO_DATA_DIR_PATH, overpass_meta[category]["filename"])

def poi_file_exists(category):
    return os.path.isfile(get_file_path(category))


def read_poi_file(category):
    with open(get_file_path(category)) as f:
        data = json.load(f)
    return data


def save_poi_file(category, data):
    with open(get_file_path(category), "w") as f:
        json.dump(data, f)


def fetch_poi_data(category):
    response = requests.get(
        "http://overpass-api.de/api/interpreter",
        params={"data": overpass_meta[category]["query"]},
    )
    data = response.json()
    save_poi_file(category, data)
    return data

def get_poi_data(category):
    if poi_file_exists(category):
        return read_poi_file(category)
    return fetch_poi_data(category)


In [4]:
# load sustenance data into a dataframe
sustenance_data = get_poi_data("sustenance")
sustenance_df = pd.DataFrame(sustenance_data["elements"])
sustenance_df["category"] = "sustenance"
sustenance_df["amenity"] = sustenance_df["tags"].apply(lambda tags: tags["amenity"])
sustenance_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,30162037,51.344661,12.272481,"{'addr:city': 'Leipzig', 'addr:country': 'DE',...",sustenance,restaurant
1,node,49531558,51.316443,12.382568,"{'amenity': 'cafe', 'name': 'eis fichte MCL', ...",sustenance,cafe


In [5]:
# load public transport data into a dataframe
public_transport_data = get_poi_data("public_transport")
public_transport_df = pd.DataFrame(public_transport_data["elements"])
public_transport_df["category"] = "public_transport"
public_transport_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category
0,node,12277458,51.38011,12.406596,"{'bus': 'yes', 'highway': 'bus_stop', 'name': ...",public_transport
1,node,12351651,51.376061,12.3842,"{'bus': 'yes', 'local_ref': '0612', 'name': 'D...",public_transport


In [6]:
# load education data into a dataframe
education_data = get_poi_data("education")
education_df = pd.DataFrame(education_data["elements"])
education_df["category"] = "education"
education_df["amenity"] = education_df["tags"].apply(lambda tags: tags["amenity"])
education_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,264496513,51.337711,12.373673,"{'addr:city': 'Leipzig', 'addr:country': 'DE',...",education,university
1,node,266977484,51.375002,12.411598,"{'addr:city': 'Leipzig', 'addr:postcode': '043...",education,library


In [7]:
# load arts and culture data into a dataframe
arts_and_culture_data = get_poi_data("arts_and_culture")
arts_and_culture_df = pd.DataFrame(arts_and_culture_data["elements"])
arts_and_culture_df["category"] = "arts_and_culture"
arts_and_culture_df["amenity"] = arts_and_culture_df["tags"].apply(
    lambda tags: tags["amenity"]
)
arts_and_culture_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,26022810,51.337641,12.333334,"{'amenity': 'theatre', 'name': 'Theater der Ju...",arts_and_culture,theatre
1,node,26022926,51.336158,12.339108,"{'addr:city': 'Leipzig', 'addr:postcode': '041...",arts_and_culture,theatre


In [8]:
# load sports data into a dataframe
sports_data = get_poi_data("sports")
sports_df = pd.DataFrame(sports_data["elements"])
sports_df["category"] = "sports"
sports_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category
0,node,260629059,51.339999,12.377531,"{'addr:city': 'Leipzig', 'addr:country': 'DE',...",sports
1,node,264978890,51.34414,12.394389,"{'created_by': 'Potlatch 0.9c', 'leisure': 'sp...",sports


In [9]:
# concatenate all dataframes
poi_df = pd.concat(
    [sustenance_df, public_transport_df, education_df, arts_and_culture_df, sports_df]
)
poi_df = poi_df.drop(columns={"type", "id", "tags"})
poi_df.head(2)

Unnamed: 0,lat,lon,category,amenity
0,51.344661,12.272481,sustenance,restaurant
1,51.316443,12.382568,sustenance,cafe


In [10]:
print(f"The number of pois in category sustenance: {sustenance_df.index.size}")
print(f"The number of pois in category public transport: {public_transport_df.index.size}")
print(f"The number of pois in category education: {education_df.index.size}")
print(f"The number of pois in category arts and culture: {arts_and_culture_df.index.size}")
print(f"The number of pois in category sports: {sports_df.index.size}")

The number of pois in category sustenance: 1455
The number of pois in category public transport: 1701
The number of pois in category education: 167
The number of pois in category arts and culture: 51
The number of pois in category sports: 127


In [11]:
poi_df.to_parquet(os.path.join(PROCESSED_DATA_DIR_PATH, "poi_data.pkl"))

## Compute number of pois per hexagon

In [12]:
# this function will compute hex id based on latitude and longitude
def convert_to_hex(latitude, longitude, resolution):
    return h3.geo_to_h3(lat=latitude, lng=longitude, resolution=resolution)

In [13]:
# compute the hexagon id for each point of interest
dfs = []
for resolution in CALC_H3_RESOLUTIONS:
    hex_name = "hex_"+str(resolution)
    poi_df[hex_name] = poi_df.apply(lambda poi: convert_to_hex(poi["lat"], poi["lon"], resolution), axis=1)

    all_hexagons_with_poi = poi_df.groupby([hex_name, "category"]).size().to_frame()
    all_hexagons_with_poi = all_hexagons_with_poi.reset_index()
    all_hexagons_with_poi["h3_res"] = resolution
    all_hexagons_with_poi = all_hexagons_with_poi.rename(columns={0: "pois", hex_name: "hex"})
    dfs.append(all_hexagons_with_poi)
    print(all_hexagons_with_poi.head(2))
    print(len(all_hexagons_with_poi.index))

               hex          category  pois  h3_res
0  871f1a10dffffff  public_transport     6       7
1  871f1a12dffffff  public_transport    11       7
185
               hex          category  pois  h3_res
0  881f1a10d1fffff  public_transport     3       8
1  881f1a10d9fffff  public_transport     2       8
613
               hex          category  pois  h3_res
0  891f1a10d13ffff  public_transport     1       9
1  891f1a10d8fffff  public_transport     2       9
1392


In [14]:
all_hexagons_with_poi = pd.concat(dfs, ignore_index=True, axis=0)
all_hexagons_with_poi

Unnamed: 0,hex,category,pois,h3_res
0,871f1a10dffffff,public_transport,6,7
1,871f1a12dffffff,public_transport,11,7
2,871f1a12dffffff,sustenance,6,7
3,871f1a144ffffff,public_transport,3,7
4,871f1a144ffffff,sustenance,3,7
...,...,...,...,...
2185,891f1abb68fffff,public_transport,1,9
2186,891f1abb6a7ffff,sustenance,1,9
2187,891f1abb6abffff,public_transport,1,9
2188,891f1abb6abffff,sports,1,9


In [16]:
# create a dataframe with all hexagons
# create a column 'hex_and_neighbors' which contains a set of hexagons
# this set consists of the hexagon from column 'hex' and its 6 neighbors
hexagons_df = all_hexagons_with_poi[["hex", "h3_res"]].drop_duplicates().copy()
hexagons_df["hex_and_neighbors"] = hexagons_df.apply(
    lambda row: list(h3.k_ring(row["hex"], 1)), axis=1
)
hexagons_df

AttributeError: 'DataFrame' object has no attribute 'unique'

In [None]:
hexagons_df['hex'].unique().size

1270

In [None]:
# this function will return the sum of points of interest in a given category for a given set of hexagons
def calculate_poi(hex_and_neighbors, category):
    return all_hexagons_with_poi[
        (
            (all_hexagons_with_poi["hex"].isin(hex_and_neighbors))
            & (all_hexagons_with_poi["category"] == category)
        )
    ]["pois"].sum()

In [None]:
# compute the number of pois in each category for each hexagon and its neighbors
categories = [
    'sustenance',
    'public_transport',
    'education',
    'arts_and_culture',
    'sports'
]

for category in categories:
    hexagons_df[f"{category}_poi"] = hexagons_df["hex_and_neighbors"].apply(
    lambda row: calculate_poi(row, category)
)

hexagons_df.head(2)

Unnamed: 0,hex,h3_res,hex_and_neighbors,sustenance_poi,public_transport_poi,education_poi,arts_and_culture_poi,sports_poi
0,871f1a10dffffff,7,"[871f1a108ffffff, 871f1a172ffffff, 871f1a10cff...",1,11,0,0,0
1,871f1a12dffffff,7,"[871f1a129ffffff, 871f1a12dffffff, 871f1a8daff...",21,73,7,0,4


In [None]:
hexagons_df.to_parquet(HEXAGON_WITH_POIS_PATH)