## 01c POI Data - Preprocessing

In [1]:
#Imports
import osmnx as ox
import folium
import geopandas as gp
import pandas as pd

In [82]:
# POIs in OSM are called amenities
amenity_pois = ox.features_from_place({'city': 'Chicago'}, {'amenity': True})

# Next to amenities is an extra section for public transport, this could also be interesting for us
# https://wiki.openstreetmap.org/wiki/Map_features#Addresses
transport_pois = ox.features_from_place({'city': 'Chicago'}, {'public_transport': True})

In [83]:
# Pick fields of interest, drop unnecessary column "element_type" and reset index
amenity_pois = amenity_pois[["amenity", "name", "geometry", "public_transport"]].reset_index().set_index("osmid").drop("element_type", axis = 1)

In [88]:
# Set CRS - necessary for getting accurate long and lat of Chicago 
amenity_gdf = gp.GeoDataFrame(amenity_pois)
amenity_gdf = amenity_gdf.to_crs(3435)
print(amenity_gdf.crs)
amenity_gdf.head(3)

EPSG:3435


Unnamed: 0_level_0,amenity,name,geometry,public_transport
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20217109,ferry_terminal,Shoreline Sightseeing,POINT (1177794.409 1902995.829),station
20217442,ferry_terminal,Union Station/Willis Tower - Shoreline Water T...,POINT (1173679.176 1899287.730),station
258490031,parking,,POINT (1171804.787 1925571.513),


In [89]:
print(f"Unique amenities in Chicago: {len(amenity_gdf['amenity'].unique())}")

amenity_counts = amenity_gdf['amenity'].value_counts()
print("Amenities in Chicago and their count:")
for amenity, count in amenity_counts.items():
    print(f"{amenity}: {count}")

Unique amenities in Chicago: 126
Amenities in Chicago and their count:
parking: 7025
parking_space: 4970
restaurant: 2236
place_of_worship: 1861
fast_food: 1330
school: 1185
bicycle_parking: 1116
bench: 960
bicycle_rental: 888
bar: 844
cafe: 768
bank: 457
fuel: 421
parking_entrance: 409
shelter: 279
post_box: 275
loading_dock: 248
toilets: 235
fountain: 213
social_facility: 211
drinking_water: 190
pharmacy: 177
clinic: 170
waste_basket: 167
vending_machine: 115
ice_cream: 115
library: 107
atm: 105
fire_station: 103
dentist: 101
post_office: 97
pub: 97
studio: 94
theatre: 87
car_wash: 70
community_centre: 61
charging_station: 59
university: 52
childcare: 46
ferry_terminal: 45
doctors: 45
hospital: 40
arts_centre: 38
police: 38
veterinary: 37
events_venue: 36
social_centre: 35
money_transfer: 31
recycling: 30
college: 28
kindergarten: 26
cinema: 24
car_rental: 23
bus_station: 22
nightclub: 20
post_depot: 20
bicycle_repair_station: 20
public_bookcase: 18
bureau_de_change: 18
animal_shelte

In [90]:
# The amenities which have a count of < 3 are very specifc, we drop those ------------?-------------
items = amenity_gdf['amenity'].value_counts().to_dict().items()

# Filtering only those rows where duplicate entries occur more than n
n = 2
amenity_gdf = amenity_gdf[amenity_gdf['amenity'].isin([key for key, val in items if val > n])]

In [101]:
# Get latitude and longitude for each row
# amenity_gdf['lat'] = amenity_gdf["geometry"].apply(lambda p: p.y)
# amenity_gdf['lon'] = amenity_gdf["geometry"].apply(lambda p: p.x)
def get_lat_lon(p):
    if p.geom_type == 'Point':
        return (p.x, p.y)
    else:
        centroid = p.centroid
        return (centroid.x, centroid.y)
        
amenity_gdf[['lon', 'lat']] = amenity_gdf['geometry'].apply(lambda p: get_lat_lon(p))
amenity_gdf.head(3)

ValueError: Columns must be same length as key