# Getting POI Data

In [2]:
#Importing the important libraries
import pandas as pd
import geopandas as gp
# OSMNX for getting OpenStreetMap Data
import osmnx as ox
# Folium for plotting maps
import folium

In [3]:
# Import POIs with an amenity tag
# https://wiki.openstreetmap.org/wiki/Key:amenity
tags = {'amenity': True}
poi = ox.geometries_from_place(query = {'city': 'Chicago'}, tags=tags)

# As we will see later not all public transport stops are marked with an amenity tag
# Therefore we will load a second datasetfor public transport related POIs
# https://wiki.openstreetmap.org/wiki/Key:public_transport
tags = {'public_transport': True}
transport = ox.geometries_from_place(query = {'city': 'Chicago'}, tags=tags)

### Amenity Data

In [4]:
# As we loaded the public transport data as a second dataframe we now exclude the double entries
poi = poi[poi["public_transport"].isna()]

In [5]:
# Reduce dataframe to important columns
poiDF = poi[["amenity", "geometry", "name"]].reset_index().set_index("osmid").drop("element_type", axis = 1).copy()
poiDF.tail(3)

Unnamed: 0_level_0,amenity,geometry,name
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
15907961,parking,"MULTIPOLYGON (((-87.88232 41.98844, -87.88258 ...",Contractor Lot J
15910908,shelter,"POLYGON ((-87.62616 41.87921, -87.62615 41.878...",
15925258,theatre,"POLYGON ((-87.62925 41.88838, -87.62908 41.888...",House of Blues


In [6]:
# Have a look at the different types of POI and their occurrence
print(poiDF.groupby("amenity").count()["geometry"].sort_values(ascending=False).to_string())

amenity
parking                    6529
restaurant                 2199
place_of_worship           1881
fast_food                  1263
school                     1227
bar                         855
bicycle_parking             835
bicycle_rental              815
cafe                        745
bench                       736
bank                        448
fuel                        418
parking_entrance            266
post_box                    229
toilets                     222
shelter                     219
fountain                    209
social_facility             193
pharmacy                    181
drinking_water              178
loading_dock                160
clinic                      140
waste_basket                125
library                     109
fire_station                103
post_office                 100
pub                         100
ice_cream                   100
atm                          96
vending_machine              92
studio                       92


In [7]:
# For later analyses it could be interesting to group some tags and make a simpler categorization
# We don't group some items like for example transport related tags as their differences could be of importance
# Parking & Transport related POIs
car_parking = ["parking", "parking_entrance", 'parking_space']
bicycle_parking = ["bicycle_parking"]
bicycle_rental = ["bicycle_rental"]
bicycle_repair_station  =  ['bicycle_repair_station']
taxi = ['taxi']
car_sharing = ['car_sharing']
car_rental = ['car_rental']
water_transport = ["water_transport",'loading_dock', 'boat_rental','boat_storage']
fuel = ["fuel"]
# Other maybe important POIs
gastronomy = ["gastronomy", 'fast_food', 'cafe', 'restaurant', 'pub', 'bar', 'ice_cream', 'food_court', 'fast_food;ice_cream', 'biergarten']
place_of_worship = ['place_of_worship']
finance = ["finance", 'bank', 'atm', 'bureau_de_change', 'money_transfer', 'payment_terminal', 'payment_centre']
sports = ["sports", 'pilates', 'training']
health = ["health", 'pharmacy', 'hospital', 'dentist', 'clinic', 'nursing_home', 'doctors']
education = ["education", "music_school", 'school', 'university', 'college', 'language_school', 'library', 'prep_school', 'flight_school','research_institute']
nightlife = ["nightlife", 'stripclub', 'nightclub']
culture = ["culture", 'theatre', 'cinema', 'events_venue', 'planetarium', 'music_venue', 'arts_centre', 'exhibition_centre']
children = ["children", 'childcare', "kindergarten"]
police = ["police"]

category_list = [
car_parking,
bicycle_parking,
bicycle_rental,
bicycle_repair_station,
taxi,
car_sharing,
car_rental,
water_transport,
fuel,
gastronomy,
place_of_worship,
finance,
sports,
health,
education,
nightlife,
culture,
children,
police]

In [8]:
# Method to apply our categorization to a row of the dataframe
def simpleCategory(row):
    for x in category_list:
        if row["amenity"] in x:
            return x[0]
    return "other"

In [9]:
# Create a column with our categorization
poiDF["category"] = poiDF.apply(simpleCategory, axis=1)

In [10]:
# Have a look at the resulting occurences of categories
print(poiDF.groupby("category").count()["geometry"].sort_values(ascending=False).to_string())

category
parking                   6841
gastronomy                5281
other                     2952
place_of_worship          1881
education                 1437
bicycle_parking            835
bicycle_rental             815
finance                    590
health                     486
fuel                       418
culture                    177
water_transport            166
children                    52
police                      38
nightlife                   24
car_rental                  24
bicycle_repair_station      19
taxi                        10
car_sharing                  2
sports                       2


In [11]:
# Method that returns the Latitude and longitude
def getLatAndLong(row):
    if (row.geometry.geom_type == "Point"):
        return [row.geometry.y, row.geometry.x]
    # For all POIs that are not only points, but polygons or multigons we return the center coordinates
    else:
        return [row.geometry.centroid.y, row.geometry.centroid.x]

In [12]:
# Add columns with latitude and longitude
poiDF[["latitude", "longitude"]] = poiDF.apply(getLatAndLong, axis=1, result_type='expand')

In [13]:
poiDF.tail(3)

Unnamed: 0_level_0,amenity,geometry,name,category,latitude,longitude
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
15907961,parking,"MULTIPOLYGON (((-87.88232 41.98844, -87.88258 ...",Contractor Lot J,parking,41.988025,-87.881296
15910908,shelter,"POLYGON ((-87.62616 41.87921, -87.62615 41.878...",,other,41.87954,-87.626137
15925258,theatre,"POLYGON ((-87.62925 41.88838, -87.62908 41.888...",House of Blues,culture,41.888259,-87.629116


### Public Transport Data

In [14]:
# Reduce dataframe to important columns
transportDF = transport[["public_transport", "wheelchair", "geometry", "name", "amenity"]].reset_index().set_index("osmid").drop("element_type", axis = 1)

In [15]:
transportDF.head(3)

Unnamed: 0_level_0,public_transport,wheelchair,geometry,name,amenity
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
20217109,station,,POINT (-87.62252 41.88915),Michigan Avenue Bridge Dock,ferry_terminal
20217237,stop_position,,POINT (-87.63177 41.88782),River North Water Taxi Terminal,ferry_terminal
20217442,stop_position,,POINT (-87.63774 41.87906),Willis Tower/Union Station Water Taxi Terminal,ferry_terminal


In [16]:
# Look at different type of "public_transport" tags
print(transportDF.groupby("public_transport").count()["geometry"].sort_values(ascending=False).to_string())

public_transport
platform          10662
stop_position      5520
station             261
no                   19
service_center        3


In [17]:
# https://wiki.openstreetmap.org/wiki/Key:public_transport
# Only keep relevant entries
transportDF = transportDF[transportDF["public_transport"].isin(["platform", "stop_position", "station"])].copy()

In [18]:
# Add columns with latitude and longitude
transportDF[["latitude", "longitude"]] = transportDF.apply(getLatAndLong, axis=1, result_type='expand')

In [19]:
transportDF.head(3)

Unnamed: 0_level_0,public_transport,wheelchair,geometry,name,amenity,latitude,longitude
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
20217109,station,,POINT (-87.62252 41.88915),Michigan Avenue Bridge Dock,ferry_terminal,41.889146,-87.622517
20217237,stop_position,,POINT (-87.63177 41.88782),River North Water Taxi Terminal,ferry_terminal,41.887824,-87.631768
20217442,stop_position,,POINT (-87.63774 41.87906),Willis Tower/Union Station Water Taxi Terminal,ferry_terminal,41.879062,-87.63774


### Merging the two dataframes

In [20]:
transportDF["category"] = "public_transport"
transportDF.head(3)

Unnamed: 0_level_0,public_transport,wheelchair,geometry,name,amenity,latitude,longitude,category
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20217109,station,,POINT (-87.62252 41.88915),Michigan Avenue Bridge Dock,ferry_terminal,41.889146,-87.622517,public_transport
20217237,stop_position,,POINT (-87.63177 41.88782),River North Water Taxi Terminal,ferry_terminal,41.887824,-87.631768,public_transport
20217442,stop_position,,POINT (-87.63774 41.87906),Willis Tower/Union Station Water Taxi Terminal,ferry_terminal,41.879062,-87.63774,public_transport


In [21]:
df = pd.concat([poiDF, transportDF], axis = 0, join = "outer")
df

Unnamed: 0_level_0,amenity,geometry,name,category,latitude,longitude,public_transport,wheelchair
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
258490031,parking,POINT (-87.64385 41.95123),,parking,41.951227,-87.643847,,
269449042,parking_entrance,POINT (-87.61506 41.85869),,parking,41.858689,-87.615058,,
269450074,parking_entrance,POINT (-87.58421 41.79174),,parking,41.791742,-87.584208,,
269688947,post_box,POINT (-87.77220 42.00407),,other,42.004073,-87.772198,,
270863340,parking_entrance,POINT (-87.61564 41.86421),,parking,41.864208,-87.615638,,
...,...,...,...,...,...,...,...,...
13621412,,"POLYGON ((-87.66859 41.91735, -87.66895 41.917...",,public_transport,41.917203,-87.668448,platform,
13621413,,"POLYGON ((-87.66793 41.91671, -87.66797 41.916...",,public_transport,41.917215,-87.668182,platform,
13621414,,"POLYGON ((-87.66801 41.91728, -87.66800 41.917...",,public_transport,41.917277,-87.668066,platform,
15910906,,"POLYGON ((-87.62624 41.87979, -87.62623 41.879...",,public_transport,41.879543,-87.626201,platform,no


In [22]:
print(df.groupby("category").count()["geometry"].sort_values(ascending=False).to_string())

category
public_transport          16443
parking                    6841
gastronomy                 5281
other                      2952
place_of_worship           1881
education                  1437
bicycle_parking             835
bicycle_rental              815
finance                     590
health                      486
fuel                        418
culture                     177
water_transport             166
children                     52
police                       38
nightlife                    24
car_rental                   24
bicycle_repair_station       19
taxi                         10
car_sharing                   2
sports                        2


### Saving dataframe

In [23]:
df.to_pickle("data/poi/poi.pickle")

## Hexagons

In [24]:
# Importing Libarys For Spatial Discretization
import h3
from shapely import Polygon 

In [25]:
df = pd.read_pickle("data/poi/poi.pickle")

In [26]:
df

Unnamed: 0_level_0,amenity,geometry,name,category,latitude,longitude,public_transport,wheelchair
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
258490031,parking,POINT (-87.64385 41.95123),,parking,41.951227,-87.643847,,
269449042,parking_entrance,POINT (-87.61506 41.85869),,parking,41.858689,-87.615058,,
269450074,parking_entrance,POINT (-87.58421 41.79174),,parking,41.791742,-87.584208,,
269688947,post_box,POINT (-87.77220 42.00407),,other,42.004073,-87.772198,,
270863340,parking_entrance,POINT (-87.61564 41.86421),,parking,41.864208,-87.615638,,
...,...,...,...,...,...,...,...,...
13621412,,"POLYGON ((-87.66859 41.91735, -87.66895 41.917...",,public_transport,41.917203,-87.668448,platform,
13621413,,"POLYGON ((-87.66793 41.91671, -87.66797 41.916...",,public_transport,41.917215,-87.668182,platform,
13621414,,"POLYGON ((-87.66801 41.91728, -87.66800 41.917...",,public_transport,41.917277,-87.668066,platform,
15910906,,"POLYGON ((-87.62624 41.87979, -87.62623 41.879...",,public_transport,41.879543,-87.626201,platform,no


In [27]:
def convert_to_hex_id (row, res):
    hex_id = h3.geo_to_h3(row["latitude"], row["longitude"], res)
    return hex_id if hex_id != "0" else None

def convert_hex_to_polygon(hex):
        polygon = None
        if hex:
               polygon = Polygon(h3.h3_to_geo_boundary(hex, geo_json=True))         
        return polygon

In [28]:
# Creating Hex_ID Columns
df['hex_7']  = df.apply(convert_to_hex_id, res=7, axis=1)
df['hex_8']  = df.apply(convert_to_hex_id, res=8, axis=1)
df['hex_9']  = df.apply(convert_to_hex_id, res=9, axis=1)

In [29]:
# Creating Polygon Columns
df['pol_7']  = df['hex_7'].apply(convert_hex_to_polygon)
df['pol_8']  = df['hex_8'].apply(convert_hex_to_polygon) 
df['pol_9']  = df['hex_9'].apply(convert_hex_to_polygon) 

In [30]:
df

Unnamed: 0_level_0,amenity,geometry,name,category,latitude,longitude,public_transport,wheelchair,hex_7,hex_8,hex_9,pol_7,pol_8,pol_9
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
258490031,parking,POINT (-87.64385 41.95123),,parking,41.951227,-87.643847,,,872664c14ffffff,882664c145fffff,892664c144fffff,"POLYGON ((-87.63414405293 41.96445315471353, -...",POLYGON ((-87.64084295487153 41.95798939959579...,POLYGON ((-87.64331225830723 41.95215193946119...
269449042,parking_entrance,POINT (-87.61506 41.85869),,parking,41.858689,-87.615058,,,872664c1bffffff,882664c1b1fffff,892664c1b03ffff,"POLYGON ((-87.61820944356228 41.8710984903598,...",POLYGON ((-87.61347038938916 41.86360034164272...,POLYGON ((-87.61470441038506 41.86068519734808...
269450074,parking_entrance,POINT (-87.58421 41.79174),,parking,41.791742,-87.584208,,,872664cc5ffffff,882664cc59fffff,892664cc583ffff,POLYGON ((-87.5937067068291 41.798228672497444...,POLYGON ((-87.58229602405846 41.79718226760902...,POLYGON ((-87.58353033145704 41.79426977332311...
269688947,post_box,POINT (-87.77220 42.00407),,other,42.004073,-87.772198,,,872664d9effffff,882664d9e1fffff,892664d9e0fffff,"POLYGON ((-87.7773181551225 42.01982017726515,...",POLYGON ((-87.77253980099742 42.01231761635508...,POLYGON ((-87.77499453418736 42.00647434498914...
270863340,parking_entrance,POINT (-87.61564 41.86421),,parking,41.864208,-87.615638,,,872664c1bffffff,882664c1bdfffff,892664c1bc7ffff,"POLYGON ((-87.61820944356228 41.8710984903598,...","POLYGON ((-87.61820944356228 41.8710984903598,...","POLYGON ((-87.6164567332772 41.86589170573153,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13621412,,"POLYGON ((-87.66859 41.91735, -87.66895 41.917...",,public_transport,41.917203,-87.668448,platform,,872664c12ffffff,882664cac9fffff,892664cac93ffff,"POLYGON ((-87.6723729663648 41.93963771855481,...","POLYGON ((-87.6695545771894 41.9181739690806, ...","POLYGON ((-87.6695545771894 41.9181739690806, ..."
13621413,,"POLYGON ((-87.66793 41.91671, -87.66797 41.916...",,public_transport,41.917215,-87.668182,platform,,872664c12ffffff,882664cac9fffff,892664cac93ffff,"POLYGON ((-87.6723729663648 41.93963771855481,...","POLYGON ((-87.6695545771894 41.9181739690806, ...","POLYGON ((-87.6695545771894 41.9181739690806, ..."
13621414,,"POLYGON ((-87.66801 41.91728, -87.66800 41.917...",,public_transport,41.917277,-87.668066,platform,,872664c12ffffff,882664cac9fffff,892664cac93ffff,"POLYGON ((-87.6723729663648 41.93963771855481,...","POLYGON ((-87.6695545771894 41.9181739690806, ...","POLYGON ((-87.6695545771894 41.9181739690806, ..."
15910906,,"POLYGON ((-87.62624 41.87979, -87.62623 41.879...",,public_transport,41.879543,-87.626201,platform,no,872664c1affffff,882664c1a9fffff,892664c1a87ffff,POLYGON ((-87.63912440648137 41.88713767856642...,POLYGON ((-87.62769200190127 41.88609596409943...,POLYGON ((-87.62593769473047 41.88088848015391...


This is the final POI-DataFrame. Lets look at the columns.


| column | explanation |
|---|---|
| osmid | OSM ID of the POI |
| amenity | "amenity=* is the top-level tag describing useful and important facilities for visitors and residents, such as toilets, telephones, banks, pharmacies, prisons and schools" - [OSM Wiki](https://wiki.openstreetmap.org/wiki/Main_Page)|
| geometry | Shape and position of the POI. |
| name | Some POIs are named, for example "Hilbert Pub" |
| category | The selfmade categories for easy later analysis |
| latitude | Latitude |
| longitude | Longitude |
| public_transport | "The public_transport key denotes stop positions and platforms of public transport" - [OSM Wiki](https://wiki.openstreetmap.org/wiki/Main_Page)|
| wheelchair | "This tag may be used to mark places or ways that are suitable to be used with a wheelchair and a person with a disability who uses another mobility device (like a walker)" - [OSM Wiki](https://wiki.openstreetmap.org/wiki/Main_Page)|
| hex_7 | HexagonID for resolution 7 |
| hex_8 | HexagonID for resolution 8 |
| hex_9 | HexagonID for resolution 9 |
| pol_7 | Geometry of Heaxagon for resolution 7 |
| pol_8 | Geometry of Heaxagon for resolution 8 |
| pol_9 | Geometry of Heaxagon for resolution 9 |

In [31]:
df.to_pickle("data/poi/poi_prepared.pickle")