In [28]:
import numpy as np
import pandas as pd
import geopandas as gpd

from shapely.geometry import box
from geowrangler import vector_zonal_stats as vzs
import os
import sys
import pyperclip

sys.path.append("../..")  # include parent directory
from src.settings import (
    DATA_DIR
)

In [24]:
tile_bounds_fn = os.path.join(DATA_DIR, 'building_footprints/tile_bounds_revised.geojson')
tile_gdf = gpd.read_file(tile_bounds_fn)
tile_gdf['maxy'] = tile_gdf.geometry.bounds['maxy']
tile_gdf = tile_gdf.sort_values('maxy', ascending=False).reset_index(drop=True)
tile_gdf['tile_id'] = 'T'+(tile_gdf.index+1).astype(str).str.zfill(5)
tile_gdf['n_footprints_estimate'] = tile_gdf['estimated no of footprints']
tile_gdf = tile_gdf[['tile_id','city','n_footprints_estimate','geometry']].set_geometry('geometry')
tile_gdf.head()

Unnamed: 0,tile_id,city,n_footprints_estimate,geometry
0,T00001,Dagupan,50-100,"POLYGON ((120.35530 16.07866, 120.35754 16.078..."
1,T00002,Dagupan,50-100,"POLYGON ((120.33560 16.06329, 120.33784 16.063..."
2,T00003,Dagupan,50-100,"POLYGON ((120.33290 16.04601, 120.33516 16.046..."
3,T00004,Dagupan,100-200,"POLYGON ((120.36347 16.04528, 120.36586 16.045..."
4,T00005,Dagupan,50-100,"POLYGON ((120.32903 16.04524, 120.33128 16.045..."


In [26]:
bldg_bounds_fdir = os.path.join(DATA_DIR, 'building_footprints/buildings-ms-gpkg/')
data_gdf_list = []
for fn in os.listdir(bldg_bounds_fdir):
    region = fn.split('_')[-1].split('.')[0]
    print(f'Processing {region}...',end='')
    bldg_gdf = gpd.read_file(bldg_bounds_fdir + fn)
    bldg_gdf.columns = bldg_gdf.columns.str.lower()
    filtered_gdf = gpd.sjoin(bldg_gdf, tile_gdf, how='inner', predicate='intersects')
    filtered_gdf = filtered_gdf.reset_index(drop=True)
    filtered_gdf['bldg_id'] = filtered_gdf['tile_id'] + '-' +  (filtered_gdf.index+1).astype(str).str.zfill(8)
    if len(filtered_gdf):
        data_gdf_list.append(filtered_gdf)
    print('Done!')


Processing mandaluyong...Done!
Processing zamboanga...Done!
Processing davao...Done!
Processing muntinlupa...Done!
Processing tacloban...Done!
Processing navotas...Done!
Processing palayan...Done!
Processing mandaue...Done!
Processing iloilo...Done!
Processing legazpi...Done!
Processing dagupan...Done!
Processing cdo...Done!


In [4]:
data_gdf = pd.concat(data_gdf_list)
data_gdf

Unnamed: 0,adm3_en,adm4_pcode,area,geometry,index_right,tile_id,city,n_footprints_estimate,bldg_id
0,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60161 8.49415, 124.60161 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000001
1,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60180 8.49406, 124.60180 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000002
2,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60216 8.49407, 124.60210 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000003
3,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60240 8.49423, 124.60247 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000004
4,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60184 8.49418, 124.60179 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000005
...,...,...,...,...,...,...,...,...,...
1528,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90187 7.09232, 121.90192 7...",112,T00113,Zamboanga,50-100,T00113-00001529
1529,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90215 7.09224, 121.90216 7...",112,T00113,Zamboanga,50-100,T00113-00001530
1530,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90163 7.09237, 121.90163 7...",112,T00113,Zamboanga,50-100,T00113-00001531
1531,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90172 7.09230, 121.90173 7...",112,T00113,Zamboanga,50-100,T00113-00001532


In [5]:
data_gdf = pd.concat(data_gdf_list)
data_gdf

Unnamed: 0,adm3_en,adm4_pcode,area,geometry,index_right,tile_id,city,n_footprints_estimate,bldg_id
0,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60161 8.49415, 124.60161 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000001
1,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60180 8.49406, 124.60180 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000002
2,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60216 8.49407, 124.60210 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000003
3,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60240 8.49423, 124.60247 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000004
4,Cagayan de Oro City,PH104305004,0.125512,"MULTIPOLYGON (((124.60184 8.49418, 124.60179 8...",94,T00095,Cagayan de Oro,50-100,T00095-00000005
...,...,...,...,...,...,...,...,...,...
1528,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90187 7.09232, 121.90192 7...",112,T00113,Zamboanga,50-100,T00113-00001529
1529,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90215 7.09224, 121.90216 7...",112,T00113,Zamboanga,50-100,T00113-00001530
1530,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90163 7.09237, 121.90163 7...",112,T00113,Zamboanga,50-100,T00113-00001531
1531,Zamboanga City,PH097332062,0.125512,"MULTIPOLYGON (((121.90172 7.09230, 121.90173 7...",112,T00113,Zamboanga,50-100,T00113-00001532


In [6]:
data_gdf = data_gdf[['tile_id','adm3_en','n_footprints_estimate','bldg_id','geometry']]
data_gdf

Unnamed: 0,tile_id,adm3_en,n_footprints_estimate,bldg_id,geometry
0,T00095,Cagayan de Oro City,50-100,T00095-00000001,"MULTIPOLYGON (((124.60161 8.49415, 124.60161 8..."
1,T00095,Cagayan de Oro City,50-100,T00095-00000002,"MULTIPOLYGON (((124.60180 8.49406, 124.60180 8..."
2,T00095,Cagayan de Oro City,50-100,T00095-00000003,"MULTIPOLYGON (((124.60216 8.49407, 124.60210 8..."
3,T00095,Cagayan de Oro City,50-100,T00095-00000004,"MULTIPOLYGON (((124.60240 8.49423, 124.60247 8..."
4,T00095,Cagayan de Oro City,50-100,T00095-00000005,"MULTIPOLYGON (((124.60184 8.49418, 124.60179 8..."
...,...,...,...,...,...
1528,T00113,Zamboanga City,50-100,T00113-00001529,"MULTIPOLYGON (((121.90187 7.09232, 121.90192 7..."
1529,T00113,Zamboanga City,50-100,T00113-00001530,"MULTIPOLYGON (((121.90215 7.09224, 121.90216 7..."
1530,T00113,Zamboanga City,50-100,T00113-00001531,"MULTIPOLYGON (((121.90163 7.09237, 121.90163 7..."
1531,T00113,Zamboanga City,50-100,T00113-00001532,"MULTIPOLYGON (((121.90172 7.09230, 121.90173 7..."


In [7]:
data_gdf.columns = ['tile_id', 'adm3_en', 'n_footprints_estimate' , 'bldg_id', 'geometry']
data_gdf

Unnamed: 0,tile_id,adm3_en,n_footprints_estimate,bldg_id,geometry
0,T00095,Cagayan de Oro City,50-100,T00095-00000001,"MULTIPOLYGON (((124.60161 8.49415, 124.60161 8..."
1,T00095,Cagayan de Oro City,50-100,T00095-00000002,"MULTIPOLYGON (((124.60180 8.49406, 124.60180 8..."
2,T00095,Cagayan de Oro City,50-100,T00095-00000003,"MULTIPOLYGON (((124.60216 8.49407, 124.60210 8..."
3,T00095,Cagayan de Oro City,50-100,T00095-00000004,"MULTIPOLYGON (((124.60240 8.49423, 124.60247 8..."
4,T00095,Cagayan de Oro City,50-100,T00095-00000005,"MULTIPOLYGON (((124.60184 8.49418, 124.60179 8..."
...,...,...,...,...,...
1528,T00113,Zamboanga City,50-100,T00113-00001529,"MULTIPOLYGON (((121.90187 7.09232, 121.90192 7..."
1529,T00113,Zamboanga City,50-100,T00113-00001530,"MULTIPOLYGON (((121.90215 7.09224, 121.90216 7..."
1530,T00113,Zamboanga City,50-100,T00113-00001531,"MULTIPOLYGON (((121.90163 7.09237, 121.90163 7..."
1531,T00113,Zamboanga City,50-100,T00113-00001532,"MULTIPOLYGON (((121.90172 7.09230, 121.90173 7..."


In [8]:
tile_data_gdf = data_gdf[['tile_id','adm3_en','n_footprints_estimate']].drop_duplicates().sort_values('tile_id').copy()
tile_data_gdf = tile_data_gdf.merge(tile_gdf[['tile_id','geometry']], on=['tile_id'])
tile_data_gdf.columns = ['tile_id','city_name','n_footprints_estimate','geometry']
tile_data_gdf

Unnamed: 0,tile_id,city_name,n_footprints_estimate,geometry
0,T00001,Dagupan City,50-100,"POLYGON ((120.35530 16.07866, 120.35754 16.078..."
1,T00002,Dagupan City,50-100,"POLYGON ((120.33560 16.06329, 120.33784 16.063..."
2,T00003,Dagupan City,50-100,"POLYGON ((120.33290 16.04601, 120.33516 16.046..."
3,T00004,Dagupan City,100-200,"POLYGON ((120.36347 16.04528, 120.36586 16.045..."
4,T00005,Dagupan City,50-100,"POLYGON ((120.32903 16.04524, 120.33128 16.045..."
...,...,...,...,...
117,T00118,Zamboanga City,100-200,"POLYGON ((122.01679 6.92710, 122.01904 6.92710..."
118,T00119,Zamboanga City,100-200,"POLYGON ((122.08647 6.92517, 122.08871 6.92517..."
119,T00120,Zamboanga City,100-200,"POLYGON ((122.14914 6.91893, 122.15139 6.91893..."
120,T00121,Zamboanga City,100-200,"POLYGON ((122.13012 6.91162, 122.13236 6.91162..."


In [9]:
tile_data_gdf.to_csv(r'C:\Users\DafroseBajaros\Documents\#lacuna-fund-climate-health\Building Footprints\notebook-outputs-ms\tile_bounds.csv',index=False)

In [10]:
#data_gdf.columns = ['tile_id', 'adm3_en', 'n_footprints_estimate' , 'bldg_id', 'bldg_area_in_meters','bldg_confidence', 'geometry']
data_gdf.head()

Unnamed: 0,tile_id,adm3_en,n_footprints_estimate,bldg_id,geometry
0,T00095,Cagayan de Oro City,50-100,T00095-00000001,"MULTIPOLYGON (((124.60161 8.49415, 124.60161 8..."
1,T00095,Cagayan de Oro City,50-100,T00095-00000002,"MULTIPOLYGON (((124.60180 8.49406, 124.60180 8..."
2,T00095,Cagayan de Oro City,50-100,T00095-00000003,"MULTIPOLYGON (((124.60216 8.49407, 124.60210 8..."
3,T00095,Cagayan de Oro City,50-100,T00095-00000004,"MULTIPOLYGON (((124.60240 8.49423, 124.60247 8..."
4,T00095,Cagayan de Oro City,50-100,T00095-00000005,"MULTIPOLYGON (((124.60184 8.49418, 124.60179 8..."


In [11]:
data_bldg_gdf = data_gdf[['tile_id', 'bldg_id', 'geometry']]
data_bldg_gdf.head()

Unnamed: 0,tile_id,bldg_id,geometry
0,T00095,T00095-00000001,"MULTIPOLYGON (((124.60161 8.49415, 124.60161 8..."
1,T00095,T00095-00000002,"MULTIPOLYGON (((124.60180 8.49406, 124.60180 8..."
2,T00095,T00095-00000003,"MULTIPOLYGON (((124.60216 8.49407, 124.60210 8..."
3,T00095,T00095-00000004,"MULTIPOLYGON (((124.60240 8.49423, 124.60247 8..."
4,T00095,T00095-00000005,"MULTIPOLYGON (((124.60184 8.49418, 124.60179 8..."


In [12]:
data_bldg_gdf.groupby('tile_id').size().sort_values(ascending=False)

tile_id
T00093    250
T00118    240
T00064    237
T00111    226
T00114    214
         ... 
T00036     48
T00032     44
T00018     21
T00063     12
T00028      1
Length: 122, dtype: int64

In [16]:
grouped_data = data_bldg_gdf.groupby('tile_id').size().reset_index(name='count')
sorted_data = grouped_data.sort_values(by='tile_id')
sorted_data_string = sorted_data.to_string(index=False)
pyperclip.copy(sorted_data_string)

print("Sorted data copied to clipboard:")
print(sorted_data_string)

Sorted data copied to clipboard:
tile_id  count
 T00001     94
 T00002     58
 T00003     90
 T00004    130
 T00005    179
 T00006     80
 T00007     82
 T00008    156
 T00009    124
 T00010     91
 T00011     75
 T00012    101
 T00013    115
 T00014     53
 T00015     72
 T00016     73
 T00017     74
 T00018     21
 T00019     63
 T00020    109
 T00021     49
 T00022     85
 T00023     75
 T00024    176
 T00025    113
 T00026    141
 T00027    152
 T00028      1
 T00029     59
 T00030    136
 T00031    107
 T00032     44
 T00033     72
 T00034    196
 T00035    123
 T00036     48
 T00037    109
 T00038    131
 T00039    128
 T00040     81
 T00041    101
 T00042    147
 T00043     60
 T00044     89
 T00045    108
 T00046     79
 T00047    195
 T00048    112
 T00049    108
 T00050    126
 T00051    118
 T00052     53
 T00053    121
 T00054    192
 T00055     60
 T00056    167
 T00057    162
 T00058     92
 T00059    199
 T00060     94
 T00061    113
 T00062     98
 T00063     12
 T00064

In [15]:
grouped_data = data_bldg_gdf.groupby('tile_id').size().reset_index(name='count')
sorted_data = grouped_data.sort_values(by='tile_id')
print(sorted_data.to_string())

    tile_id  count
0    T00001     94
1    T00002     58
2    T00003     90
3    T00004    130
4    T00005    179
5    T00006     80
6    T00007     82
7    T00008    156
8    T00009    124
9    T00010     91
10   T00011     75
11   T00012    101
12   T00013    115
13   T00014     53
14   T00015     72
15   T00016     73
16   T00017     74
17   T00018     21
18   T00019     63
19   T00020    109
20   T00021     49
21   T00022     85
22   T00023     75
23   T00024    176
24   T00025    113
25   T00026    141
26   T00027    152
27   T00028      1
28   T00029     59
29   T00030    136
30   T00031    107
31   T00032     44
32   T00033     72
33   T00034    196
34   T00035    123
35   T00036     48
36   T00037    109
37   T00038    131
38   T00039    128
39   T00040     81
40   T00041    101
41   T00042    147
42   T00043     60
43   T00044     89
44   T00045    108
45   T00046     79
46   T00047    195
47   T00048    112
48   T00049    108
49   T00050    126
50   T00051    118
51   T00052 

In [17]:
data_bldg_gdf.to_csv(r'C:\Users\DafroseBajaros\Documents\#lacuna-fund-climate-health\Building Footprints\notebook-outputs-ms\tile_bldgs.csv',index=False)

In [19]:
data_bldg_gdf

Unnamed: 0,tile_id,bldg_id,geometry
0,T00095,T00095-00000001,"MULTIPOLYGON (((124.60161 8.49415, 124.60161 8..."
1,T00095,T00095-00000002,"MULTIPOLYGON (((124.60180 8.49406, 124.60180 8..."
2,T00095,T00095-00000003,"MULTIPOLYGON (((124.60216 8.49407, 124.60210 8..."
3,T00095,T00095-00000004,"MULTIPOLYGON (((124.60240 8.49423, 124.60247 8..."
4,T00095,T00095-00000005,"MULTIPOLYGON (((124.60184 8.49418, 124.60179 8..."
...,...,...,...
1528,T00113,T00113-00001529,"MULTIPOLYGON (((121.90187 7.09232, 121.90192 7..."
1529,T00113,T00113-00001530,"MULTIPOLYGON (((121.90215 7.09224, 121.90216 7..."
1530,T00113,T00113-00001531,"MULTIPOLYGON (((121.90163 7.09237, 121.90163 7..."
1531,T00113,T00113-00001532,"MULTIPOLYGON (((121.90172 7.09230, 121.90173 7..."


In [18]:
data_bldg_gdf.to_file(r'C:\Users\DafroseBajaros\Documents\#lacuna-fund-climate-health\Building Footprints\notebook-outputs-ms\tile_bldgs.geojson',index=False, driver='GeoJSON')

In [65]:
#data_gdf.to_file('../../../data/tm/2-processed/bldgs_bq_check.geojson', driver='GeoJSON')

  pd.Int64Index,
