In [14]:
import geopandas as gpd
import pandas as pd
import geowrangler.vector_zonal_stats as vzs

from pathlib import Path
import os

# Extract infrastructure features from building footprints

### Input 

- Administrative Boundaries
- Cropped Google Open Buildings files

### Output

- Barangay-level dataframe with building features

## Set-up paramaters and directories

In [15]:
OUTPUT_DIR = Path("../../../data/04-output/")
RAW_DIR = Path("../../../data/02-raw/")

ADMIN_BOUNDS = Path("../../../data/01-admin-bounds/target_admin_bounds.shp")
GOOGLE_BLDGS_DIR = RAW_DIR / "google_open_buildings_v3"

## Load Admin Bounds

In [16]:
admin_gdf = gpd.read_file(ADMIN_BOUNDS)
admin_gdf.head(2)

Unnamed: 0,ADM1_EN,ADM1_PCODE,ADM2_EN,ADM2_PCODE,ADM3_EN,ADM3_PCODE,ADM4_EN,ADM4_PCODE,geometry
0,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Lomboy,PH015518016,"POLYGON ((120.32742 16.05423, 120.32719 16.053..."
1,Region I,PH010000000,Pangasinan,PH015500000,Dagupan City,PH015518000,Tapuac,PH015518031,"POLYGON ((120.33380 16.03974, 120.33389 16.039..."


In [17]:
## add brgy area
admin_gdf = admin_gdf.to_crs("epsg:3123")
admin_gdf["brgy_area_m2"] = admin_gdf.area

## Load Cropped Google Bldgs 

In [18]:
google_bldg_files = os.listdir(GOOGLE_BLDGS_DIR)
google_bldg_files

['google_lacuna_cities_bldgs_muntinlupa.geojson',
 'google_lacuna_cities_bldgs_dagupan.geojson',
 'google_lacuna_cities_bldgs_davao.geojson',
 'google_lacuna_cities_bldgs_cdo.geojson',
 'google_lacuna_cities_bldgs_palayan.geojson',
 'google_lacuna_cities_bldgs_iloilo.geojson',
 'google_lacuna_cities_bldgs_mandaue.geojson',
 'google_lacuna_cities_bldgs_zamboanga.geojson',
 'google_lacuna_cities_bldgs_navotas.geojson',
 'google_lacuna_cities_bldgs_legazpi.geojson',
 'google_lacuna_cities_bldgs_tacloban.geojson',
 'google_lacuna_cities_bldgs_mandaluyong.geojson']

In [19]:
google_gdfs = []
for bldg_file in google_bldg_files:
    gdf = gpd.read_file(GOOGLE_BLDGS_DIR / bldg_file, driver="GeoJSON")
    google_gdfs.append(gdf)

google_gdf = gpd.GeoDataFrame(pd.concat(google_gdfs, ignore_index=True))

In [20]:
google_gdf.head(2)

Unnamed: 0,city_name,barangay_psgc_code,area_in_meters,confidence,geometry
0,City of Muntinlupa,PH137603005,29.1533,0.6901,"POLYGON ((121.02198 14.36817, 121.02192 14.368..."
1,City of Muntinlupa,PH137603005,97.5825,0.7559,"POLYGON ((121.05054 14.39108, 121.05053 14.391..."


### Add binnings for different building sizes at a building level

In [21]:
# less than 100
is_less_100sqm = google_gdf["area_in_meters"] < 100
# 100 - 200 sqm
is_100_200sqm = (google_gdf["area_in_meters"] >= 100) & (
    google_gdf["area_in_meters"] <= 200
)
# greater than 200 sqm
is_gt_200sqm = google_gdf["area_in_meters"] > 200

google_gdf.loc[is_less_100sqm, "is_less_100sqm"] = 1
google_gdf.loc[~is_less_100sqm, "is_less_100sqm"] = 0

google_gdf.loc[is_100_200sqm, "is_100_200sqm"] = 1
google_gdf.loc[~is_100_200sqm, "is_100_200sqm"] = 0

google_gdf.loc[is_gt_200sqm, "is_gt_200sqm"] = 1
google_gdf.loc[~is_gt_200sqm, "is_gt_200sqm"] = 0

## Extract buildings features per barangay

In [22]:
admin_gdf = admin_gdf.to_crs("epsg:4326")
google_gdf = google_gdf.set_crs("epsg:4326")

In [23]:
%%time
aligned_google = vzs.create_zonal_stats(
    admin_gdf,
    google_gdf,
    aggregations=[
        {"func": "count", "output": "google_bldgs_count"},
        {
            "column": "area_in_meters",
            "func": ["sum", "mean"],
            "output": ["google_bldgs_area_total", "google_bldgs_area_mean"],
        },
        {
            "column": "is_less_100sqm",
            "func": "sum",
            "output": "google_bldgs_count_lt100_sqm",
        },
        {
            "column": "is_100_200sqm",
            "func": "sum",
            "output": "google_bldgs_count_100_200_sqm",
        },
        {
            "column": "is_gt_200sqm",
            "func": "sum",
            "output": "google_bldgs_count_gt_200_sqm",
        },
    ],
)

CPU times: user 4.98 s, sys: 622 ms, total: 5.6 s
Wall time: 5.59 s


In [25]:
# convert counts to integers
count_cols = [
    "google_bldgs_count_lt100_sqm",
    "google_bldgs_count_100_200_sqm",
    "google_bldgs_count_gt_200_sqm",
]
aligned_google[count_cols] = aligned_google[count_cols].astype(int)

In [28]:
# add building density (number of buildings per m2)
aligned_google["google_bldgs_density"] = (
    aligned_google["google_bldgs_count"] / aligned_google["brgy_area_m2"]
)

# add total builtup area in percent
aligned_google["google_bldgs_pct_built_up_area"] = 100 * (
    aligned_google["google_bldgs_area_total"] / aligned_google["brgy_area_m2"]
)

## Add descriptor columns and organize table

In [29]:
aligned_google["date"] = "2023-01-01"
aligned_google["freq"] = "Y"

In [30]:
aligned_google.columns

Index(['ADM1_EN', 'ADM1_PCODE', 'ADM2_EN', 'ADM2_PCODE', 'ADM3_EN',
       'ADM3_PCODE', 'ADM4_EN', 'ADM4_PCODE', 'geometry', 'brgy_area_m2',
       'google_bldgs_count', 'google_bldgs_area_total',
       'google_bldgs_area_mean', 'google_bldgs_count_lt100_sqm',
       'google_bldgs_count_100_200_sqm', 'google_bldgs_count_gt_200_sqm',
       'google_bldgs_density', 'google_bldgs_pct_built_up_area', 'date',
       'freq'],
      dtype='object')

In [31]:
aligned_google = aligned_google[
    [
        "ADM4_PCODE",
        "date",
        "freq",
        "google_bldgs_count",
        "google_bldgs_area_total",
        "google_bldgs_area_mean",
        "google_bldgs_count_lt100_sqm",
        "google_bldgs_count_100_200_sqm",
        "google_bldgs_count_gt_200_sqm",
        "google_bldgs_density",
        "google_bldgs_pct_built_up_area",
    ]
]
aligned_google.head(3)

Unnamed: 0,ADM4_PCODE,date,freq,google_bldgs_count,google_bldgs_area_total,google_bldgs_area_mean,google_bldgs_count_lt100_sqm,google_bldgs_count_100_200_sqm,google_bldgs_count_gt_200_sqm,google_bldgs_density,google_bldgs_pct_built_up_area
0,PH015518016,2023-01-01,Y,469,18878.581,40.252838,442,20,7,0.00046,1.850054
1,PH015518031,2023-01-01,Y,2209,234899.6797,106.337564,1535,449,225,0.002118,22.526707
2,PH015518022,2023-01-01,Y,6530,479063.8388,73.363528,5245,889,396,0.002004,14.701164


In [32]:
google_bldg_extract_df = pd.DataFrame(aligned_google)
google_bldg_extract_df.to_csv(OUTPUT_DIR / "google_bldgs_v3_features.csv")