In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import geopandas as gpd
import pandas as pd

import json
from pathlib import Path

from geowrangler.datasets import geofabrik

import sys

sys.path.append("../../../")  # include parent directory
from src.vector_utils import *



# Extract multiple years of OSM Data

This notebook extracts both POI and water features from the available years for OSM Philippines.

### Input

- Administrative Boundaries
- Raw OSM files from geofabrik (years 2014-2022)

### Output for each OSM year

- `CSV` file of features for each barangay
  - POI features
  - Water features
  - Waterway features

## Set-up parameters and directories

In [3]:
# data directories
DATA_DIR = Path("../../../data/")
SRC_DIR = Path("../../../src/")
ADMIN_FPATH = DATA_DIR / "01-admin-bounds"
RAW_FPATH = DATA_DIR / "02-raw"
PROCESSED_FPATH = DATA_DIR / "03-processed"
OUTPUT_FPATH = DATA_DIR / "04-output" / "osm"
GIS_FPATH = DATA_DIR / "05-gis"

In [4]:
# Parameters

# POI types of OSM
# tags taken from https://wiki.openstreetmap.org/wiki/Map_features
poi_file = Path(SRC_DIR / "osm_poi_categories.json")

with open(poi_file, "r") as f:
    data = json.load(f)

POI_TYPES_NEEDED = []
for category in data:
    POI_TYPES_NEEDED += data[category]

# change this parameter to download for other years
OSM_YEAR = "2022"
OSM_COUNTRY = "philippines"

OSM_YEARS = ["2022", "2021", "2020", "2019", "2018", "2017", "2016", "2015", "2014"]

## Load Admin Bounds

In [5]:
admin_bounds_gdf = gpd.read_file(ADMIN_FPATH / "target_admin_bounds.shp")
admin_bounds_gdf.head(2)

Unnamed: 0,ADM1_EN,ADM1_PCODE,ADM2_EN,ADM2_PCODE,ADM3_EN,ADM3_PCODE,ADM4_EN,ADM4_PCODE,geometry
0,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Lomboy,PH015518016,"POLYGON ((120.32742 16.05423, 120.32719 16.053..."
1,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Tapuac,PH015518031,"POLYGON ((120.33380 16.03974, 120.33389 16.039..."


## Extract features

In [6]:
osm = geofabrik.OsmDataManager(cache_dir="../../../data/02-raw/")

In [7]:
def extract_all_osm_feat(osm_year):
    # extract POI features
    osm_poi_feats_gdf = add_osm_poi_features(
        admin_bounds_gdf,
        OSM_COUNTRY,
        osm_year,
        osm,
        use_cache=True,
        poi_types=POI_TYPES_NEEDED,
    )

    # Save  POI features
    drop_cols = [
        "ADM1_EN",
        "ADM1_PCODE",
        "ADM2_EN",
        "ADM2_PCODE",
        "ADM3_EN",
        "ADM3_PCODE",
        "ADM4_EN",
        "geometry",
    ]
    osm_poi_feats_df = osm_poi_feats_gdf.drop(columns=drop_cols)
    osm_poi_feats_df = pd.DataFrame(osm_poi_feats_df)
    osm_poi_feats_df.to_csv(
        OUTPUT_FPATH / f"osm_poi_features_{osm_year}.csv", index=False
    )

    # extract water features
    osm_water_feats_gdf = add_osm_water_features(
        admin_bounds_gdf, "philippines", osm_year
    )

    # save water features
    osm_water_feats_df = osm_water_feats_gdf.drop(columns=drop_cols)
    osm_water_feats_df = pd.DataFrame(osm_water_feats_df)
    osm_water_feats_df.to_csv(
        OUTPUT_FPATH / f"osm_features_water_{osm_year}.csv", index=False
    )

    # extract waterway featuers
    osm_waterways_feats_gdf = add_osm_water_features(
        admin_bounds_gdf,
        "philippines",
        osm_year,
        waterways=True,
    )

    # save waterway features
    osm_waterways_feats_df = osm_waterways_feats_gdf.drop(columns=drop_cols)
    osm_waterways_feats_df = pd.DataFrame(osm_waterways_feats_df)
    osm_waterways_feats_df.to_csv(
        OUTPUT_FPATH / f"osm_features_waterways_{osm_year}.csv", index=False
    )

In [8]:
%%time
for osm_year in OSM_YEARS:
    extract_all_osm_feat(osm_year)

2023-12-13 22:46:44.226 | INFO     | geowrangler.datasets.geofabrik:download_osm_region_data:128 - OSM Data: Cached data available for philippines at ../../../data/02-raw/osm/philippines-220101-free.shp.zip? True
2023-12-13 22:46:44.227 | DEBUG    | geowrangler.datasets.geofabrik:load_pois:222 - OSM POIs for philippines and year 2022 being loaded from ../../../data/02-raw/osm/philippines-220101-free.shp.zip
2023-12-13 22:46:57.498 | DEBUG    | src.vector_utils:add_osm_water_features:184 - OSM Water for philippines and year 2022 being loaded from ../../../data/02-raw/osm/philippines-220101-free.shp.zip
2023-12-13 22:47:09.462 | DEBUG    | src.vector_utils:add_osm_water_features:184 - OSM Water for philippines and year 2022 being loaded from ../../../data/02-raw/osm/philippines-220101-free.shp.zip
2023-12-13 22:47:19.052 | INFO     | geowrangler.datasets.geofabrik:download_osm_region_data:128 - OSM Data: Cached data available for philippines at ../../../data/02-raw/osm/philippines-210101

CPU times: user 5min 4s, sys: 1.57 s, total: 5min 6s
Wall time: 5min 20s
