# Google Building Footprint Data

The data is gotten from `https://sites.research.google/open-buildings/#download`

In [1]:
%load_ext jupyter_black
import pandas as pd
import geopandas as gpd
import os
from pathlib import Path
import requests

In [2]:
input_dir = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input/02_housing_damage/input/Google Footprint Data/"
)
shp_input_dir = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input/02_housing_damage/input/"
)
output_dir = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input/02_housing_damage/output/"
)

In [3]:
adm3_shp = gpd.read_file(
    shp_input_dir / "phl_adminboundaries_candidate_adm3.zip"
)
grid = gpd.read_file(output_dir / "phl_0.1_degree_grid_land_overlap.gpkg")
adm3_grid_intersection = gpd.read_file(
    shp_input_dir / "phl_adm3_grid_intersection.gpkg"
)

The files that cover the Philippines are below and are selected by viewing on the map.

### Downloading data

In [4]:
file_pattern = ["33d", "33b", "339", "325", "32f", "323", "331", "347"]
polygons_url_link = "https://storage.googleapis.com/open-buildings-data/v2/polygons_s2_level_4_gzip/"
points_url_link = "https://storage.googleapis.com/open-buildings-data/v2/points_s2_level_4_gzip/"
file_list = [patt + "_buildings.csv.gz" for patt in file_pattern]

Using the points data as we are using centroids.

In [None]:
for file in file_list:
    r = requests.get(points_url_link + file, allow_redirects=True)
    open(input_dir / file, "wb").write(r.content)

### Merging all files into one

In [25]:
google_df = pd.DataFrame()
for file in file_list:
    zone_file = pd.read_csv(input_dir / file, compression="gzip")
    google_df = pd.concat([google_df, zone_file])
google_df

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code
0,9.429870,118.531309,26.5833,0.6818,6PXWCGHJ+WGVX
1,9.440374,118.482974,17.9793,0.7408,6PXWCFRM+45XR
2,9.363887,118.464563,37.3005,0.8230,6PXW9F77+HR4C
3,9.495301,118.569990,73.2031,0.7485,6PXWFHW9+4XH4
4,9.381147,118.503310,62.4306,0.8135,6PXW9GJ3+F83X
...,...,...,...,...,...
5370,20.449684,121.971273,50.4747,0.6766,7QG3CXXC+VGC8
5371,19.511708,121.913273,68.3271,0.7195,7QF3GW67+M8J8
5372,20.451546,121.972214,7.8440,0.6061,7QG3FX2C+JV8X
5373,20.619810,121.707082,7.3060,0.6186,7QG3JP94+WRG8


In [11]:
google_df.to_csv(input_dir / "google_footprint_data.csv", index=False)
# google_df = pd.read_csv(input_dir / "google_footprint_data.csv")

In [6]:
google_df[
    (google_df["latitude"].between(17.35, 17.45))
    & (google_df["longitude"].between(122.05, 122.15))
]

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code
11692258,17.382679,122.138921,10.7012,0.6136,7Q9494MQ+3HF3


In [7]:
# converting point df to geodataframe
ggl_gdf = gpd.GeoDataFrame(
    google_df,
    geometry=gpd.points_from_xy(google_df.longitude, google_df.latitude)
)

In [8]:
ggl_gdf.set_crs(adm3_shp.crs, inplace=True)

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code,geometry
0,9.429870,118.531309,26.5833,0.6818,6PXWCGHJ+WGVX,POINT (118.53131 9.42987)
1,9.440374,118.482974,17.9793,0.7408,6PXWCFRM+45XR,POINT (118.48297 9.44037)
2,9.363887,118.464563,37.3005,0.8230,6PXW9F77+HR4C,POINT (118.46456 9.36389)
3,9.495301,118.569990,73.2031,0.7485,6PXWFHW9+4XH4,POINT (118.56999 9.49530)
4,9.381147,118.503310,62.4306,0.8135,6PXW9GJ3+F83X,POINT (118.50331 9.38115)
...,...,...,...,...,...,...
35173505,20.449684,121.971273,50.4747,0.6766,7QG3CXXC+VGC8,POINT (121.97127 20.44968)
35173506,19.511708,121.913273,68.3271,0.7195,7QF3GW67+M8J8,POINT (121.91327 19.51171)
35173507,20.451546,121.972214,7.8440,0.6061,7QG3FX2C+JV8X,POINT (121.97221 20.45155)
35173508,20.619810,121.707082,7.3060,0.6186,7QG3JP94+WRG8,POINT (121.70708 20.61981)


In [9]:
# ggl_gdf.to_file(input_dir / "google_footprint_data.gpkg", driver="GPKG")
# ggl_gdf = gpd.read_file(input_dir / "google_footprint_data.gpkg")

## Counting number of buildings by region

In [9]:
del google_df

In [10]:
# ensuring all buildings are within municipality bounds
ggl_gdf_within = gpd.sjoin(ggl_gdf, adm3_shp, how="inner", predicate="within")

In [11]:
del ggl_gdf

In [12]:
ggl_gdf_within.drop(
    ggl_gdf_within.columns.difference(["latitude", "longitude", "geometry"]),
    axis=1,
    inplace=True,
)

In [13]:
ggl_gdf_within[
    (ggl_gdf_within["latitude"].between(17.35, 17.45))
    & (ggl_gdf_within["longitude"].between(122.05, 122.15))
]

Unnamed: 0,latitude,longitude,geometry
11692258,17.382679,122.138921,POINT (122.13892 17.38268)


### By Municipality

In [14]:
phl_ggl_bld_municip_count = adm3_shp.merge(
    adm3_shp.sjoin(ggl_gdf_within, how="left")
    .groupby("ADM3_PCODE")
    .count()
    .geometry.rename("numbuildings")
    .reset_index()
)

In [15]:
phl_ggl_bld_municip_count.columns

Index(['Shape_Leng', 'Shape_Area', 'ADM3_EN', 'ADM3_PCODE', 'ADM3_REF',
       'ADM3ALT1EN', 'ADM3ALT2EN', 'ADM2_EN', 'ADM2_PCODE', 'ADM1_EN',
       'ADM1_PCODE', 'ADM0_EN', 'ADM0_PCODE', 'date', 'validOn', 'validTo',
       'geometry', 'numbuildings'],
      dtype='object')

In [16]:
phl_ggl_bld_municip_count[["ADM3_PCODE", "ADM3_EN", "numbuildings"]].to_csv(
    input_dir / "phl_google_bld_municip_count.csv", index=False
)

### By Grid

In [17]:
phl_ggl_bld_grid_count = grid.merge(
    grid.sjoin(ggl_gdf_within, how="left")
    .groupby("Centroid")
    .count()
    .geometry.rename("numbuildings")
    .reset_index()
)

In [18]:
phl_ggl_bld_grid_count.columns

Index(['id', 'Longitude', 'Latitude', 'Centroid', 'geometry', 'numbuildings'], dtype='object')

In [19]:
phl_ggl_bld_grid_count[phl_ggl_bld_grid_count["Centroid"] == "122.1E_17.4N"]

Unnamed: 0,id,Longitude,Latitude,Centroid,geometry,numbuildings
1565,13064,122.1,17.4,122.1E_17.4N,"POLYGON ((122.05000 17.45000, 122.15000 17.450...",1


In [20]:
phl_ggl_bld_grid_count[["id", "Centroid", "numbuildings"]].to_csv(
    input_dir / "phl_google_bld_grid_count.csv", index=False
)

### By Municipality and Grid Intersection

In [27]:
adm3_grid_intersection.sjoin(ggl_gdf_within, how="left")

Unnamed: 0,Shape_Leng,Shape_Area,ADM3_EN,ADM3_PCODE,ADM3_REF,ADM3ALT1EN,ADM3ALT2EN,ADM2_EN,ADM2_PCODE,ADM1_EN,...,validOn,validTo,id,Longitude,Latitude,Centroid,geometry,index_right,latitude,longitude
0,1.601219,0.063496,Aborlan,PH175301000,,,,Palawan,PH175300000,Region IV-B,...,2020-05-29,,6795.0,118.3,9.7,118.3E_9.7N,"POLYGON ((118.34195 9.65002, 118.34195 9.65005...",,,
1,1.601219,0.063496,Aborlan,PH175301000,,,,Palawan,PH175300000,Region IV-B,...,2020-05-29,,6796.0,118.3,9.6,118.3E_9.6N,"MULTIPOLYGON (((118.28925 9.55002, 118.28930 9...",24508.0,9.645160,118.339115
1,1.601219,0.063496,Aborlan,PH175301000,,,,Palawan,PH175300000,Region IV-B,...,2020-05-29,,6796.0,118.3,9.6,118.3E_9.6N,"MULTIPOLYGON (((118.28925 9.55002, 118.28930 9...",34084.0,9.611918,118.344101
1,1.601219,0.063496,Aborlan,PH175301000,,,,Palawan,PH175300000,Region IV-B,...,2020-05-29,,6796.0,118.3,9.6,118.3E_9.6N,"MULTIPOLYGON (((118.28925 9.55002, 118.28930 9...",23440.0,9.611980,118.344171
1,1.601219,0.063496,Aborlan,PH175301000,,,,Palawan,PH175300000,Region IV-B,...,2020-05-29,,6796.0,118.3,9.6,118.3E_9.6N,"MULTIPOLYGON (((118.28925 9.55002, 118.28930 9...",17316.0,9.612101,118.343180
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9886,4.719512,0.122560,Zamboanga City,PH097332000,,,,Zamboanga del Sur,PH097300000,Region IX,...,2020-05-29,,13669.0,122.4,7.0,122.4E_7.0N,"MULTIPOLYGON (((122.35853 6.98396, 122.35854 6...",26149950.0,6.979623,122.354159
9886,4.719512,0.122560,Zamboanga City,PH097332000,,,,Zamboanga del Sur,PH097300000,Region IX,...,2020-05-29,,13669.0,122.4,7.0,122.4E_7.0N,"MULTIPOLYGON (((122.35853 6.98396, 122.35854 6...",26289006.0,6.979695,122.354282
9886,4.719512,0.122560,Zamboanga City,PH097332000,,,,Zamboanga del Sur,PH097300000,Region IX,...,2020-05-29,,13669.0,122.4,7.0,122.4E_7.0N,"MULTIPOLYGON (((122.35853 6.98396, 122.35854 6...",23965135.0,6.979707,122.354487
9886,4.719512,0.122560,Zamboanga City,PH097332000,,,,Zamboanga del Sur,PH097300000,Region IX,...,2020-05-29,,13669.0,122.4,7.0,122.4E_7.0N,"MULTIPOLYGON (((122.35853 6.98396, 122.35854 6...",24160013.0,6.979715,122.354192


In [28]:
phl_ggl_bld_intersection_count = adm3_grid_intersection.merge(
    adm3_grid_intersection.sjoin(ggl_gdf_within, how="left")
    .groupby(["ADM3_PCODE", "Centroid"])
    .count()
    .index_right.rename("numbuildings")
    .reset_index()
)

In [29]:
phl_ggl_bld_intersection_count.columns

Index(['Shape_Leng', 'Shape_Area', 'ADM3_EN', 'ADM3_PCODE', 'ADM3_REF',
       'ADM3ALT1EN', 'ADM3ALT2EN', 'ADM2_EN', 'ADM2_PCODE', 'ADM1_EN',
       'ADM1_PCODE', 'ADM0_EN', 'ADM0_PCODE', 'date', 'validOn', 'validTo',
       'id', 'Longitude', 'Latitude', 'Centroid', 'geometry', 'numbuildings'],
      dtype='object')

In [30]:
phl_ggl_bld_intersection_count[
    phl_ggl_bld_intersection_count["Centroid"] == "122.1E_17.4N"
]

Unnamed: 0,Shape_Leng,Shape_Area,ADM3_EN,ADM3_PCODE,ADM3_REF,ADM3ALT1EN,ADM3ALT2EN,ADM2_EN,ADM2_PCODE,ADM1_EN,...,ADM0_PCODE,date,validOn,validTo,id,Longitude,Latitude,Centroid,geometry,numbuildings
4289,1.313291,0.026166,Cabagan,PH023106000,,,,Isabela,PH023100000,Region II,...,PH,2016-06-30,2020-05-29,,13064.0,122.1,17.4,122.1E_17.4N,"POLYGON ((122.09036 17.42153, 122.08660 17.413...",0
4290,1.082705,0.039482,Maconacon,PH023117000,,,,Isabela,PH023100000,Region II,...,PH,2016-06-30,2020-05-29,,13064.0,122.1,17.4,122.1E_17.4N,"POLYGON ((122.09197 17.35533, 122.09094 17.359...",1
4291,0.898695,0.038314,San Pablo,PH023133000,,,,Isabela,PH023100000,Region II,...,PH,2016-06-30,2020-05-29,,13064.0,122.1,17.4,122.1E_17.4N,"POLYGON ((122.08605 17.44835, 122.09036 17.421...",0
4292,0.973154,0.034042,Tumauini,PH023137000,,,,Isabela,PH023100000,Region II,...,PH,2016-06-30,2020-05-29,,13064.0,122.1,17.4,122.1E_17.4N,"POLYGON ((122.09197 17.35533, 122.09221 17.350...",0


In [31]:
phl_ggl_bld_intersection_count[
    ["ADM3_PCODE", "ADM3_EN", "id", "Centroid", "numbuildings"]
].to_csv(input_dir / "phl_google_bld_intersection_count.csv", index=False)