In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import geopandas as gpd

import json
from pathlib import Path

from geowrangler.datasets import geofabrik

import sys

sys.path.append("../../../")  # include parent directory
from src.vector_utils import *



# Extract OSM POIs

Download available Points of Interests (POIs) from OpenStreetMap from year 2022 backwards.

### Set-up directories and parameters

In [3]:
# data directories
DATA_DIR = Path("../../../data/")
SRC_DIR = Path("../../../src/")
ADMIN_FPATH = DATA_DIR / "01-admin-bounds"
RAW_FPATH = DATA_DIR / "02-raw"
PROCESSED_FPATH = DATA_DIR / "03-processed"
OUTPUT_FPATH = DATA_DIR / "04-output"
GIS_FPATH = DATA_DIR / "05-gis"

In [4]:
# Parameters

# POI types of OSM
# tags taken from https://wiki.openstreetmap.org/wiki/Map_features
poi_file = Path(SRC_DIR / "osm_poi_categories.json")

with open(poi_file, "r") as f:
    data = json.load(f)

POI_TYPES_NEEDED = []
for category in data:
    POI_TYPES_NEEDED += data[category]

# change this parameter to download for other years
OSM_YEAR = "2022"
OSM_COUNTRY = "philippines"

### Load Administrative Boundaries for the 12 cities

In [5]:
admin_bounds_gdf = gpd.read_file(ADMIN_FPATH / "renamed_target_admin_bounds.gpkg")
admin_bounds_gdf.head(2)

Unnamed: 0,region_name,region_code,province_name,province_code,city_name,city_code,barangay_name,barangay_psgc_code,geometry
0,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Lomboy,PH015518016,"POLYGON ((120.32742 16.05423, 120.32719 16.053..."
1,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Tapuac,PH015518031,"POLYGON ((120.33380 16.03974, 120.33389 16.039..."


## Download OSM 2022

In [6]:
osm = geofabrik.OsmDataManager(cache_dir="../../../data/02-raw/")

In [7]:
osm_pois_2022 = osm.load_pois(region=OSM_COUNTRY, year=OSM_YEAR, use_cache=True)
osm_pois_2022.head(2)

2023-06-07 10:24:26.643 | INFO     | geowrangler.datasets.geofabrik:download_osm_region_data:128 - OSM Data: Cached data available for philippines at ../../../data/02-raw/osm/philippines-220101-free.shp.zip? True
2023-06-07 10:24:26.645 | DEBUG    | geowrangler.datasets.geofabrik:load_pois:222 - OSM POIs for philippines and year 2022 being loaded from ../../../data/02-raw/osm/philippines-220101-free.shp.zip


Unnamed: 0,osm_id,code,fclass,name,geometry
0,21717820,2907,camera_surveillance,,POINT (121.02120 14.57608)
1,21717872,2722,museum,Ayala Museum,POINT (121.02324 14.55358)


In [8]:
osm_pois_2022["fclass"].unique()

array(['camera_surveillance', 'museum', 'motel', 'bicycle_shop',
       'beverages', 'post_office', 'fire_station', 'school', 'college',
       'market_place', 'bank', 'supermarket', 'garden_centre',
       'telephone', 'restaurant', 'hotel', 'cafe', 'fast_food',
       'pharmacy', 'doctors', 'public_building', 'hospital', 'bakery',
       'sports_centre', 'atm', 'attraction', 'viewpoint', 'park',
       'university', 'police', 'graveyard', 'tower', 'toilet',
       'arts_centre', 'doityourself', 'cinema', 'pitch', 'water_tower',
       'swimming_pool', 'monument', 'lighthouse', 'kindergarten',
       'convenience', 'town_hall', 'florist', 'pub', 'mall', 'stadium',
       'furniture_shop', 'bar', 'car_dealership', 'computer_shop',
       'shoe_shop', 'embassy', 'library', 'bookshop', 'sports_shop',
       'clothes', 'department_store', 'community_centre',
       'mobile_phone_shop', 'comms_tower', 'memorial', 'car_rental',
       'tourist_info', 'guesthouse', 'playground', 'theatre', '

## Generate POI features

Use the `add_osm_poi_features` util from the `src` folder. This came from the [povertymapping util](https://github.com/thinkingmachines/unicef-ai4d-poverty-mapping/blob/main/povertymapping/osm.py). 

POI features to calculate for each admin bounds and POI type:
1. POI count
2. Distance to nearest POI

In [9]:
osm_poi_feats_gdf = add_osm_poi_features(
    admin_bounds_gdf,
    OSM_COUNTRY,
    OSM_YEAR,
    osm,
    use_cache=True,
    poi_types=POI_TYPES_NEEDED,
)
osm_poi_feats_gdf.head(2)

2023-06-07 10:24:32.328 | DEBUG    | geowrangler.datasets.geofabrik:load_pois:203 - OSM POIs for philippines and year 2022 found in cache.


Unnamed: 0,region_name,region_code,province_name,province_code,city_name,city_code,barangay_name,barangay_psgc_code,geometry,osm_year,...,osm_poi_toilet_count,osm_poi_toilet_nearest,osm_poi_recycling_count,osm_poi_recycling_nearest,osm_poi_waste_basket_count,osm_poi_waste_basket_nearest,osm_poi_wastewater_plant_count,osm_poi_wastewater_plant_nearest,osm_poi_waste_transfer_station_count,osm_poi_waste_transfer_station_nearest
0,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Lomboy,PH015518016,"POLYGON ((120.32742 16.05423, 120.32719 16.053...",2022,...,0.0,5993.655956,0.0,2486.499185,0.0,10000.0,0.0,10000.0,0.0,10000.0
1,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Tapuac,PH015518031,"POLYGON ((120.33380 16.03974, 120.33389 16.039...",2022,...,0.0,3736.27654,0.0,79.395847,0.0,10000.0,0.0,10000.0,0.0,10000.0


In [10]:
# Save generated POI features
osm_poi_feats_gdf = osm_poi_feats_gdf.sort_values(by="poi_count", ascending=False)
osm_poi_feats_gdf.to_file(OUTPUT_FPATH / "osm_poi_features_2022.gpkg", driver="GPKG")

  pd.Int64Index,


In [11]:
remove_pcode = ["region_code", "province_code", "city_code", "barangay_psgc_code"]
check_osm_poi_feats = osm_poi_feats_gdf.copy()
check_osm_poi_feats = check_osm_poi_feats.loc[
    :, ~check_osm_poi_feats.columns.isin(remove_pcode)
]

In [12]:
def get_count_cols(df):
    count_cols = [x for x in df.columns[df.columns.str.contains("_count")]]
    return count_cols


def get_nearest_cols(df):
    nearest_cols = [x for x in df.columns[df.columns.str.contains("_nearest")]]
    return nearest_cols

In [13]:
admin_cols = ["region_name", "province_name", "city_name", "barangay_name", "geometry"]

# get columns with "_count" suffix
count_cols, nearest_cols = get_count_cols(check_osm_poi_feats), get_nearest_cols(
    check_osm_poi_feats
)

poi_counts = check_osm_poi_feats[admin_cols + count_cols]
poi_nearest = check_osm_poi_feats[admin_cols + nearest_cols]

Check POI counts aggregated for each city. 

Davao City, Iloilo City, and Muntinlupa City have high POI counts, while Palayan City and Navotas City have very low POI counts (both are less than a hundred).

In [14]:
city_stats_counts = poi_counts.groupby(["city_name"]).sum()
city_stats_counts.sort_values(by="poi_count", ascending=False)

  city_stats_counts = poi_counts.groupby(["city_name"]).sum()


Unnamed: 0_level_0,poi_count,osm_poi_clinic_count,osm_poi_dentist_count,osm_poi_doctors_count,osm_poi_hospital_count,osm_poi_optician_count,osm_poi_pharmacy_count,osm_poi_atm_count,osm_poi_bank_count,osm_poi_college_count,...,osm_poi_water_mill_count,osm_poi_water_tower_count,osm_poi_water_works_count,osm_poi_water_well_count,osm_poi_sanitary_dump_station_count,osm_poi_toilet_count,osm_poi_recycling_count,osm_poi_waste_basket_count,osm_poi_wastewater_plant_count,osm_poi_waste_transfer_station_count
city_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Davao City,3424.0,0.0,13.0,40.0,4.0,2.0,130.0,70.0,216.0,30.0,...,0.0,6.0,1.0,2.0,0.0,32.0,4.0,4.0,0.0,0.0
Iloilo City,1661.0,0.0,2.0,8.0,3.0,3.0,71.0,32.0,103.0,13.0,...,0.0,1.0,0.0,2.0,0.0,15.0,0.0,0.0,0.0,0.0
City of Muntinlupa,1014.0,0.0,12.0,8.0,3.0,7.0,41.0,18.0,110.0,3.0,...,0.0,35.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,0.0
Cagayan de Oro City,987.0,0.0,10.0,3.0,3.0,0.0,54.0,16.0,69.0,9.0,...,0.0,8.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
City of Mandaluyong,968.0,0.0,15.0,5.0,0.0,1.0,41.0,35.0,98.0,5.0,...,0.0,2.0,0.0,0.0,0.0,4.0,5.0,0.0,0.0,0.0
Tacloban City,781.0,0.0,5.0,5.0,2.0,3.0,29.0,12.0,33.0,6.0,...,0.0,0.0,0.0,29.0,0.0,1.0,1.0,0.0,0.0,0.0
Dagupan City,751.0,0.0,12.0,4.0,1.0,5.0,49.0,3.0,48.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
Zamboanga City,537.0,0.0,3.0,2.0,1.0,0.0,47.0,57.0,29.0,4.0,...,0.0,5.0,2.0,5.0,0.0,9.0,0.0,0.0,1.0,0.0
Mandaue City,531.0,0.0,1.0,6.0,1.0,0.0,36.0,6.0,54.0,6.0,...,0.0,1.0,0.0,4.0,0.0,7.0,1.0,0.0,0.0,0.0
Legazpi City,453.0,0.0,5.0,2.0,0.0,2.0,14.0,21.0,28.0,2.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [15]:
city_stats_counts.to_csv(OUTPUT_FPATH / "osm_features_2022_cityagg.csv")