In [8]:
import numpy as np
import pandas as pd
import geopandas as gpd

from shapely.geometry import box
from geowrangler import vector_zonal_stats as vzs
import os
import sys
import pyperclip

sys.path.append("../../..")  # include parent directory
from src.settings import (
    DATA_DIR
)

In [12]:
tile_bounds_fn = os.path.join(DATA_DIR, 'building_footprints/tile_bounds_revised.geojson')
tile_gdf = gpd.read_file(tile_bounds_fn)
tile_gdf['maxy'] = tile_gdf.geometry.bounds['maxy']
tile_gdf = tile_gdf.sort_values('maxy', ascending=False).reset_index(drop=True)
tile_gdf['tile_id'] = 'T'+(tile_gdf.index+1).astype(str).str.zfill(5)
tile_gdf['n_footprints_estimate'] = tile_gdf['estimated no of footprints']
tile_gdf = tile_gdf[['tile_id','city','n_footprints_estimate','geometry']].set_geometry('geometry')
tile_gdf.head()

Unnamed: 0,tile_id,city,n_footprints_estimate,geometry
0,T00001,Dagupan,50-100,"POLYGON ((120.35530 16.07866, 120.35754 16.078..."
1,T00002,Dagupan,50-100,"POLYGON ((120.33560 16.06329, 120.33784 16.063..."
2,T00003,Dagupan,50-100,"POLYGON ((120.33290 16.04601, 120.33516 16.046..."
3,T00004,Dagupan,100-200,"POLYGON ((120.36347 16.04528, 120.36586 16.045..."
4,T00005,Dagupan,50-100,"POLYGON ((120.32903 16.04524, 120.33128 16.045..."


In [13]:
bldg_bounds_fdir = os.path.join(DATA_DIR, 'building_footprints/buildings-google-gpkg/')
data_gdf_list = []
for fn in os.listdir(bldg_bounds_fdir):
    region = fn.split('_')[-1].split('.')[0]
    print(f'Processing {region}...',end='')
    bldg_gdf = gpd.read_file(bldg_bounds_fdir + fn)
    bldg_gdf.columns = bldg_gdf.columns.str.lower()
    filtered_gdf = gpd.sjoin(bldg_gdf, tile_gdf, how='inner', predicate='intersects')
    filtered_gdf = filtered_gdf.reset_index(drop=True)
    filtered_gdf['bldg_id'] = filtered_gdf['tile_id'] + '-' +  (filtered_gdf.index+1).astype(str).str.zfill(8)
    if len(filtered_gdf):
        data_gdf_list.append(filtered_gdf)
    print('Done!')


Processing mandaluyong...Done!
Processing zamboanga...Done!
Processing davao...Done!
Processing muntinlupa...Done!
Processing tacloban...Done!
Processing navotas...Done!
Processing palayan...Done!
Processing mandaue...Done!
Processing iloilo...Done!
Processing legazpi...Done!
Processing dagupan...Done!
Processing cdo...Done!


In [4]:
data_gdf = pd.concat(data_gdf_list)
data_gdf

Unnamed: 0,adm3_en,adm4_pcode,area_in_meters,confidence,geometry,index_right,tile_id,city,n_footprints_estimate,bldg_id
0,Cagayan de Oro City,PH104305004,25.3322,0.7395,"MULTIPOLYGON (((124.62846 8.45173, 124.62848 8...",98,T00099,Cagayan de Oro,50-100,T00099-00000001
1,Cagayan de Oro City,PH104305004,7.7660,0.7122,"MULTIPOLYGON (((124.62839 8.45102, 124.62838 8...",98,T00099,Cagayan de Oro,50-100,T00099-00000002
2,Cagayan de Oro City,PH104305004,22.9834,0.8170,"MULTIPOLYGON (((124.62741 8.45166, 124.62737 8...",98,T00099,Cagayan de Oro,50-100,T00099-00000003
3,Cagayan de Oro City,PH104305004,141.2490,0.9108,"MULTIPOLYGON (((124.62698 8.45150, 124.62688 8...",98,T00099,Cagayan de Oro,50-100,T00099-00000004
4,Cagayan de Oro City,PH104305004,40.7960,0.7949,"MULTIPOLYGON (((124.62742 8.45071, 124.62738 8...",98,T00099,Cagayan de Oro,50-100,T00099-00000005
...,...,...,...,...,...,...,...,...,...,...
3913,Zamboanga City,PH097332062,142.5635,0.8724,"MULTIPOLYGON (((122.22979 7.20491, 122.22974 7...",107,T00108,Zamboanga,100-200,T00108-00003914
3914,Zamboanga City,PH097332062,64.6345,0.7898,"MULTIPOLYGON (((122.22829 7.20497, 122.22819 7...",107,T00108,Zamboanga,100-200,T00108-00003915
3915,Zamboanga City,PH097332062,20.1684,0.7264,"MULTIPOLYGON (((122.22840 7.20408, 122.22837 7...",107,T00108,Zamboanga,100-200,T00108-00003916
3916,Zamboanga City,PH097332062,101.4958,0.8098,"MULTIPOLYGON (((122.22803 7.20370, 122.22795 7...",107,T00108,Zamboanga,100-200,T00108-00003917


In [5]:
data_gdf = data_gdf[['tile_id','adm3_en','n_footprints_estimate','bldg_id','area_in_meters','confidence','geometry']]
data_gdf

Unnamed: 0,tile_id,adm3_en,n_footprints_estimate,bldg_id,area_in_meters,confidence,geometry
0,T00099,Cagayan de Oro City,50-100,T00099-00000001,25.3322,0.7395,"MULTIPOLYGON (((124.62846 8.45173, 124.62848 8..."
1,T00099,Cagayan de Oro City,50-100,T00099-00000002,7.7660,0.7122,"MULTIPOLYGON (((124.62839 8.45102, 124.62838 8..."
2,T00099,Cagayan de Oro City,50-100,T00099-00000003,22.9834,0.8170,"MULTIPOLYGON (((124.62741 8.45166, 124.62737 8..."
3,T00099,Cagayan de Oro City,50-100,T00099-00000004,141.2490,0.9108,"MULTIPOLYGON (((124.62698 8.45150, 124.62688 8..."
4,T00099,Cagayan de Oro City,50-100,T00099-00000005,40.7960,0.7949,"MULTIPOLYGON (((124.62742 8.45071, 124.62738 8..."
...,...,...,...,...,...,...,...
3913,T00108,Zamboanga City,100-200,T00108-00003914,142.5635,0.8724,"MULTIPOLYGON (((122.22979 7.20491, 122.22974 7..."
3914,T00108,Zamboanga City,100-200,T00108-00003915,64.6345,0.7898,"MULTIPOLYGON (((122.22829 7.20497, 122.22819 7..."
3915,T00108,Zamboanga City,100-200,T00108-00003916,20.1684,0.7264,"MULTIPOLYGON (((122.22840 7.20408, 122.22837 7..."
3916,T00108,Zamboanga City,100-200,T00108-00003917,101.4958,0.8098,"MULTIPOLYGON (((122.22803 7.20370, 122.22795 7..."


In [6]:
data_gdf.columns = ['tile_id', 'adm3_en', 'n_footprints_estimate' , 'bldg_id', 'bldg_area_in_meters','bldg_confidence', 'geometry']
data_gdf

Unnamed: 0,tile_id,adm3_en,n_footprints_estimate,bldg_id,bldg_area_in_meters,bldg_confidence,geometry
0,T00099,Cagayan de Oro City,50-100,T00099-00000001,25.3322,0.7395,"MULTIPOLYGON (((124.62846 8.45173, 124.62848 8..."
1,T00099,Cagayan de Oro City,50-100,T00099-00000002,7.7660,0.7122,"MULTIPOLYGON (((124.62839 8.45102, 124.62838 8..."
2,T00099,Cagayan de Oro City,50-100,T00099-00000003,22.9834,0.8170,"MULTIPOLYGON (((124.62741 8.45166, 124.62737 8..."
3,T00099,Cagayan de Oro City,50-100,T00099-00000004,141.2490,0.9108,"MULTIPOLYGON (((124.62698 8.45150, 124.62688 8..."
4,T00099,Cagayan de Oro City,50-100,T00099-00000005,40.7960,0.7949,"MULTIPOLYGON (((124.62742 8.45071, 124.62738 8..."
...,...,...,...,...,...,...,...
3913,T00108,Zamboanga City,100-200,T00108-00003914,142.5635,0.8724,"MULTIPOLYGON (((122.22979 7.20491, 122.22974 7..."
3914,T00108,Zamboanga City,100-200,T00108-00003915,64.6345,0.7898,"MULTIPOLYGON (((122.22829 7.20497, 122.22819 7..."
3915,T00108,Zamboanga City,100-200,T00108-00003916,20.1684,0.7264,"MULTIPOLYGON (((122.22840 7.20408, 122.22837 7..."
3916,T00108,Zamboanga City,100-200,T00108-00003917,101.4958,0.8098,"MULTIPOLYGON (((122.22803 7.20370, 122.22795 7..."


In [7]:
tile_data_gdf = data_gdf[['tile_id','adm3_en','n_footprints_estimate']].drop_duplicates().sort_values('tile_id').copy()
tile_data_gdf = tile_data_gdf.merge(tile_gdf[['tile_id','geometry']], on=['tile_id'])
tile_data_gdf.columns = ['tile_id','city_name','n_footprints_estimate','geometry']
tile_data_gdf

Unnamed: 0,tile_id,city_name,n_footprints_estimate,geometry
0,T00001,Dagupan City,50-100,"POLYGON ((120.35530 16.07866, 120.35754 16.078..."
1,T00002,Dagupan City,50-100,"POLYGON ((120.33560 16.06329, 120.33784 16.063..."
2,T00003,Dagupan City,50-100,"POLYGON ((120.33290 16.04601, 120.33516 16.046..."
3,T00004,Dagupan City,100-200,"POLYGON ((120.36347 16.04528, 120.36586 16.045..."
4,T00005,Dagupan City,50-100,"POLYGON ((120.32903 16.04524, 120.33128 16.045..."
...,...,...,...,...
116,T00118,Zamboanga City,100-200,"POLYGON ((122.01679 6.92710, 122.01904 6.92710..."
117,T00119,Zamboanga City,100-200,"POLYGON ((122.08647 6.92517, 122.08871 6.92517..."
118,T00120,Zamboanga City,100-200,"POLYGON ((122.14914 6.91893, 122.15139 6.91893..."
119,T00121,Zamboanga City,100-200,"POLYGON ((122.13012 6.91162, 122.13236 6.91162..."


In [8]:
tile_data_gdf.to_csv(r'C:\Users\DafroseBajaros\Documents\#lacuna-fund-climate-health\Building Footprints\notebook-outputs\tile_bounds.csv',index=False)

In [9]:
#data_gdf.columns = ['tile_id', 'adm3_en', 'n_footprints_estimate' , 'bldg_id', 'bldg_area_in_meters','bldg_confidence', 'geometry']
data_gdf.head()

Unnamed: 0,tile_id,adm3_en,n_footprints_estimate,bldg_id,bldg_area_in_meters,bldg_confidence,geometry
0,T00099,Cagayan de Oro City,50-100,T00099-00000001,25.3322,0.7395,"MULTIPOLYGON (((124.62846 8.45173, 124.62848 8..."
1,T00099,Cagayan de Oro City,50-100,T00099-00000002,7.766,0.7122,"MULTIPOLYGON (((124.62839 8.45102, 124.62838 8..."
2,T00099,Cagayan de Oro City,50-100,T00099-00000003,22.9834,0.817,"MULTIPOLYGON (((124.62741 8.45166, 124.62737 8..."
3,T00099,Cagayan de Oro City,50-100,T00099-00000004,141.249,0.9108,"MULTIPOLYGON (((124.62698 8.45150, 124.62688 8..."
4,T00099,Cagayan de Oro City,50-100,T00099-00000005,40.796,0.7949,"MULTIPOLYGON (((124.62742 8.45071, 124.62738 8..."


In [10]:
data_bldg_gdf = data_gdf[['tile_id', 'bldg_id', 'bldg_area_in_meters','bldg_confidence', 'geometry']]
data_bldg_gdf.head()

Unnamed: 0,tile_id,bldg_id,bldg_area_in_meters,bldg_confidence,geometry
0,T00099,T00099-00000001,25.3322,0.7395,"MULTIPOLYGON (((124.62846 8.45173, 124.62848 8..."
1,T00099,T00099-00000002,7.766,0.7122,"MULTIPOLYGON (((124.62839 8.45102, 124.62838 8..."
2,T00099,T00099-00000003,22.9834,0.817,"MULTIPOLYGON (((124.62741 8.45166, 124.62737 8..."
3,T00099,T00099-00000004,141.249,0.9108,"MULTIPOLYGON (((124.62698 8.45150, 124.62688 8..."
4,T00099,T00099-00000005,40.796,0.7949,"MULTIPOLYGON (((124.62742 8.45071, 124.62738 8..."


In [11]:
data_bldg_gdf.groupby('tile_id').size().sort_values(ascending=False)

tile_id
T00025    806
T00120    678
T00030    647
T00034    637
T00036    633
         ... 
T00016     82
T00103     70
T00014     68
T00021     54
T00018     26
Length: 121, dtype: int64

In [17]:
grouped_data = data_bldg_gdf.groupby('tile_id').size().reset_index(name='count')
sorted_data = grouped_data.sort_values(by='tile_id')
sorted_data_string = sorted_data.to_string(index=False)
pyperclip.copy(sorted_data_string)

print("Sorted data copied to clipboard:")
print(sorted_data_string)

Sorted data copied to clipboard:
tile_id  count
 T00001    204
 T00002    137
 T00003    206
 T00004    338
 T00005    526
 T00006    176
 T00007    282
 T00008    319
 T00009    318
 T00010    314
 T00011    216
 T00012    142
 T00013    240
 T00014     68
 T00015    189
 T00016     82
 T00017    119
 T00018     26
 T00019    122
 T00020    210
 T00021     54
 T00022    139
 T00023    338
 T00024    499
 T00025    806
 T00026    566
 T00027    572
 T00029    388
 T00030    647
 T00031    530
 T00032    122
 T00033    125
 T00034    637
 T00035    335
 T00036    633
 T00037    260
 T00038    392
 T00039    304
 T00040    390
 T00041    344
 T00042    593
 T00043    353
 T00044    264
 T00045    298
 T00046     97
 T00047    421
 T00048    307
 T00049    480
 T00050    392
 T00051    342
 T00052    127
 T00053    243
 T00054    416
 T00055     99
 T00056    336
 T00057    427
 T00058    259
 T00059    369
 T00060    154
 T00061    205
 T00062    236
 T00063    464
 T00064    447
 T00065

In [12]:
data_bldg_gdf.to_csv(r'C:\Users\DafroseBajaros\Documents\#lacuna-fund-climate-health\Building Footprints\notebook-outputs\tile_bldgs.csv',index=False)

In [13]:
data_bldg_gdf.to_file(r'C:\Users\DafroseBajaros\Documents\#lacuna-fund-climate-health\Building Footprints\notebook-outputs\tile_bldgs.geojson',index=False, driver='GeoJSON')

In [14]:
#data_gdf.to_file('../../../data/tm/2-processed/bldgs_bq_check.geojson', driver='GeoJSON')