In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

from shapely.geometry import box
from geowrangler import vector_zonal_stats as vzs
import os



In [None]:
RAW_DIR = "../../../data/tm/1-raw/"
PROCESSED_DIR = "../../../data/tm/2-processed/"

# Create BQ Table of buildings per tile
This notebook adds a `tile_id` and `building_id` column to construct the building table used as starting point in annotations

## 1. Read tiles

In [2]:
tile_bounds_fn = f"{PROCESSED_DIR}ctt-tiles/tile_bounds_revised_raw.geojson"
tile_gdf = gpd.read_file(tile_bounds_fn)
tile_gdf.head()

Unnamed: 0,tile_id,city,n_footprints_estimate,geometry
0,T00001,Dagupan,50-100,"POLYGON ((120.35530 16.07866, 120.35754 16.078..."
1,T00002,Dagupan,50-100,"POLYGON ((120.33560 16.06329, 120.33784 16.063..."
2,T00003,Dagupan,50-100,"POLYGON ((120.33290 16.04601, 120.33516 16.046..."
3,T00004,Dagupan,100-200,"POLYGON ((120.36347 16.04528, 120.36586 16.045..."
4,T00005,Dagupan,50-100,"POLYGON ((120.32903 16.04524, 120.33128 16.045..."


## 2. Merge city files and add building id

In [3]:
bldg_bounds_fdir = f"{PROCESSED_DIR}ms-open-buildings/within_city/"
data_gdf_list = []
fn_list = [fn for fn in os.listdir(bldg_bounds_fdir) if "geojson" in fn]
for fn in fn_list:
    bldg_gdf = gpd.read_file(bldg_bounds_fdir + fn)
    bldg_gdf.columns = bldg_gdf.columns.str.lower()
    region = bldg_gdf["adm3_en"].values[0]
    print(f"Processing {region}...", end="")
    # join with tiles
    filtered_gdf = gpd.sjoin(bldg_gdf, tile_gdf, how="inner", predicate="intersects")
    filtered_gdf = filtered_gdf.reset_index(drop=True)
    # add bldg id2
    filtered_gdf["bldg_id"] = (
        filtered_gdf["tile_id"]
        + "-"
        + (filtered_gdf.index + 1).astype(str).str.zfill(8)
    )
    if len(filtered_gdf):
        data_gdf_list.append(filtered_gdf)
    print("Done!")

Processing Cagayan de Oro City...Done!
Processing Dagupan City...Done!
Processing Davao City...Done!
Processing Iloilo City...Done!
Processing Legazpi City...Done!
Processing City of Mandaluyong...Done!
Processing Mandaue City...Done!
Processing City of Muntinlupa...Done!
Processing City of Navotas...Done!
Processing Palayan City...Done!
Processing Tacloban City...Done!
Processing Zamboanga City...Done!


In [4]:
data_gdf = pd.concat(data_gdf_list)
data_gdf = data_gdf[["tile_id", "bldg_id", "geometry"]]
data_gdf.head()

Unnamed: 0,tile_id,bldg_id,geometry
0,T00095,T00095-00000001,"POLYGON ((124.60154 8.49409, 124.60161 8.49409..."
1,T00095,T00095-00000002,"POLYGON ((124.60186 8.49409, 124.60198 8.49409..."
2,T00095,T00095-00000003,"POLYGON ((124.60210 8.49420, 124.60210 8.49406..."
3,T00095,T00095-00000004,"POLYGON ((124.60248 8.49414, 124.60247 8.49423..."
4,T00095,T00095-00000005,"POLYGON ((124.60179 8.49424, 124.60179 8.49418..."


In [5]:
data_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 14541 entries, 0 to 1519
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   tile_id   14541 non-null  object  
 1   bldg_id   14541 non-null  object  
 2   geometry  14541 non-null  geometry
dtypes: geometry(1), object(2)
memory usage: 454.4+ KB


## 3. Save

In [6]:
data_gdf.to_file(
    f"{PROCESSED_DIR}ctt-tiles/tile_bldgs_ms.geojson", index=False, driver="GeoJSON"
)

In [None]:
data_gdf.to_csv(f"{PROCESSED_DIR}ctt-tiles/tile_bldgs_ms.csv", index=False)