# Google Building Footprint Data

The data is gotten from `https://sites.research.google/open-buildings/#download`

In [1]:
%load_ext jupyter_black
import pandas as pd
import geopandas as gpd
import os
from pathlib import Path
import requests

In [2]:
base_url = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input/02_housing_damage/"
)
input_dir = base_url / "input/Google Footprint Data/"
shp_input_dir = base_url / "input/"
output_dir = base_url / "output/"

In [3]:
adm3_shp = gpd.read_file(
    shp_input_dir / "phl_adminboundaries_candidate_adm3.zip"
)
grid = gpd.read_file(output_dir / "phl_0.1_degree_grid_land_overlap.gpkg")
adm3_grid_intersection = gpd.read_file(
    shp_input_dir / "phl_adm3_grid_intersection.gpkg"
)

The files that cover the Philippines are below and are selected by viewing on the map.

### Downloading data

In [4]:
file_pattern = ["33d", "33b", "339", "325", "32f", "323", "331", "347"]
polygons_url_link = "https://storage.googleapis.com/open-buildings-data/v2/polygons_s2_level_4_gzip/"
points_url_link = "https://storage.googleapis.com/open-buildings-data/v2/points_s2_level_4_gzip/"
file_list = [patt + "_buildings.csv.gz" for patt in file_pattern]

Using the points data as we are using centroids.

In [None]:
for file in file_list:
    r = requests.get(points_url_link + file, allow_redirects=True)
    open(input_dir / file, "wb").write(r.content)

### Merging all files into one

In [5]:
google_df = pd.DataFrame()
for file in file_list:
    zone_file = pd.read_csv(input_dir / file, compression="gzip")
    google_df = pd.concat([google_df, zone_file])
google_df

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code
0,9.429870,118.531309,26.5833,0.6818,6PXWCGHJ+WGVX
1,9.440374,118.482974,17.9793,0.7408,6PXWCFRM+45XR
2,9.363887,118.464563,37.3005,0.8230,6PXW9F77+HR4C
3,9.495301,118.569990,73.2031,0.7485,6PXWFHW9+4XH4
4,9.381147,118.503310,62.4306,0.8135,6PXW9GJ3+F83X
...,...,...,...,...,...
5370,20.449684,121.971273,50.4747,0.6766,7QG3CXXC+VGC8
5371,19.511708,121.913273,68.3271,0.7195,7QF3GW67+M8J8
5372,20.451546,121.972214,7.8440,0.6061,7QG3FX2C+JV8X
5373,20.619810,121.707082,7.3060,0.6186,7QG3JP94+WRG8


In [11]:
google_df.to_csv(input_dir / "google_footprint_data.csv", index=False)
# google_df = pd.read_csv(input_dir / "google_footprint_data.csv")

In [6]:
google_df[
    (google_df["latitude"].between(17.35, 17.45))
    & (google_df["longitude"].between(122.05, 122.15))
]

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code
1844083,17.382679,122.138921,10.7012,0.6136,7Q9494MQ+3HF3


In [7]:
# converting point df to geodataframe
ggl_gdf = gpd.GeoDataFrame(
    google_df,
    geometry=gpd.points_from_xy(google_df.longitude, google_df.latitude),
)

In [8]:
ggl_gdf.set_crs(adm3_shp.crs, inplace=True)

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code,geometry
0,9.429870,118.531309,26.5833,0.6818,6PXWCGHJ+WGVX,POINT (118.53131 9.42987)
1,9.440374,118.482974,17.9793,0.7408,6PXWCFRM+45XR,POINT (118.48297 9.44037)
2,9.363887,118.464563,37.3005,0.8230,6PXW9F77+HR4C,POINT (118.46456 9.36389)
3,9.495301,118.569990,73.2031,0.7485,6PXWFHW9+4XH4,POINT (118.56999 9.49530)
4,9.381147,118.503310,62.4306,0.8135,6PXW9GJ3+F83X,POINT (118.50331 9.38115)
...,...,...,...,...,...,...
5370,20.449684,121.971273,50.4747,0.6766,7QG3CXXC+VGC8,POINT (121.97127 20.44968)
5371,19.511708,121.913273,68.3271,0.7195,7QF3GW67+M8J8,POINT (121.91327 19.51171)
5372,20.451546,121.972214,7.8440,0.6061,7QG3FX2C+JV8X,POINT (121.97221 20.45155)
5373,20.619810,121.707082,7.3060,0.6186,7QG3JP94+WRG8,POINT (121.70708 20.61981)


In [9]:
# ggl_gdf.to_file(input_dir / "google_footprint_data.gpkg", driver="GPKG")
# ggl_gdf = gpd.read_file(input_dir / "google_footprint_data.gpkg")

## Counting number of buildings by region

In [10]:
del google_df

In [11]:
# ensuring all buildings are within municipality bounds
ggl_gdf_within = gpd.sjoin(ggl_gdf, adm3_shp, how="inner", predicate="within")

In [12]:
del ggl_gdf

In [13]:
ggl_gdf_within.drop(
    ggl_gdf_within.columns.difference(["latitude", "longitude", "geometry"]),
    axis=1,
    inplace=True,
)
ggl_gdf_within["random_id"] = ggl_gdf_within.reset_index().index + 1

In [14]:
ggl_gdf_within[
    (ggl_gdf_within["latitude"].between(17.35, 17.45))
    & (ggl_gdf_within["longitude"].between(122.05, 122.15))
]

Unnamed: 0,latitude,longitude,geometry,random_id
1844083,17.382679,122.138921,POINT (122.13892 17.38268),24102107


In [16]:
ggl_gdf_within.shape

(33466954, 4)

### By Municipality

In [51]:
phl_ggl_bld_municip_count = (
    adm3_shp.sjoin(ggl_gdf_within, how="left")
    .groupby("ADM3_PCODE")
    .count()
    .rename(columns={"random_id": "numbuildings"})
    .reset_index()[["ADM3_PCODE", "numbuildings"]]
)

In [52]:
phl_ggl_bld_municip_count["numbuildings"].sum()

33466954

In [53]:
phl_ggl_bld_municip_count.to_csv(
    input_dir / "phl_google_bld_municip_count.csv", index=False
)

In [56]:
del phl_ggl_bld_municip_count

### By Grid

In [45]:
phl_ggl_bld_grid_count = (
    grid.sjoin(ggl_gdf_within, how="left")
    .groupby(["id", "Centroid"])
    .count()
    .rename(columns={"random_id": "numbuildings"})
    .reset_index()[["id", "Centroid", "numbuildings"]]
)

In [47]:
phl_ggl_bld_grid_count["numbuildings"].sum()

33466954

In [49]:
phl_ggl_bld_grid_count[phl_ggl_bld_grid_count["Centroid"] == "122.1E_17.4N"]

Unnamed: 0,id,Centroid,numbuildings
1565,13064,122.1E_17.4N,1


In [50]:
phl_ggl_bld_grid_count.to_csv(
    input_dir / "phl_google_bld_grid_count.csv", index=False
)

In [55]:
del phl_ggl_bld_grid_count

### By Municipality and Grid Intersection

In [59]:
del adm3_shp, grid

In [17]:
phl_ggl_bld_intersection_count = (
    adm3_grid_intersection.sjoin(ggl_gdf_within, how="left")
    .groupby(["ADM3_PCODE", "id", "Centroid"])
    .count()
    .rename(columns={"random_id": "numbuildings"})
    .reset_index()[["ADM3_PCODE", "id", "Centroid", "numbuildings"]]
)

In [18]:
phl_ggl_bld_intersection_count["numbuildings"].sum()

33466954

In [19]:
phl_ggl_bld_intersection_count[
    phl_ggl_bld_intersection_count["Centroid"] == "122.1E_17.4N"
]

Unnamed: 0,ADM3_PCODE,id,Centroid,numbuildings
850,PH023106000,13064.0,122.1E_17.4N,0
956,PH023117000,13064.0,122.1E_17.4N,1
1069,PH023133000,13064.0,122.1E_17.4N,0
1091,PH023137000,13064.0,122.1E_17.4N,0


In [20]:
phl_ggl_bld_intersection_count[
    ["ADM3_PCODE", "id", "Centroid", "numbuildings"]
].to_csv(input_dir / "phl_google_bld_intersection_count.csv", index=False)