# Google Building Footprint Data

The data is gotten from `https://sites.research.google/open-buildings/#download`

In [16]:
%load_ext jupyter_black
import pandas as pd
import geopandas as gpd
import os
from pathlib import Path
import requests

The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black


In [15]:
input_dir = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input/02_housing_damage/input/Google Footprint Data/"
)
shp_input_dir = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input/02_housing_damage/input/"
)
output_dir = (
    Path(os.getenv("STORM_DATA_DIR"))
    / "analysis/02_new_model_input/02_housing_damage/output/"
)

In [21]:
adm3_shp = gpd.read_file(
    shp_input_dir / "phl_adminboundaries_candidate_adm3.zip"
)
grid = gpd.read_file(output_dir / "phl_0.1_degree_grid_land_overlap.gpkg")
adm3_grid_intersection = gpd.read_file(
    shp_input_dir / "phl_adm3_grid_intersection.gpkg"
)

The files that cover the Philippines are below and are selected by viewing on the map.

### Downloading data

In [4]:
file_pattern = ["33d", "33b", "339", "325", "32f", "323", "331", "347"]
polygons_url_link = "https://storage.googleapis.com/open-buildings-data/v2/polygons_s2_level_4_gzip/"
points_url_link = "https://storage.googleapis.com/open-buildings-data/v2/points_s2_level_4_gzip/"
file_list = [patt + "_buildings.csv.gz" for patt in file_pattern]

Using the points data as we are using centroids.

In [22]:
for file in file_list:
    r = requests.get(points_url_link + file, allow_redirects=True)
    open(input_dir / file, "wb").write(r.content)

### Merging all files into one

In [10]:
google_df = pd.DataFrame()
for file in file_list:
    zone_file = pd.read_csv(input_dir / file, compression="gzip")
    google_df = pd.concat([google_df, zone_file])
google_df

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code
0,9.429870,118.531309,26.5833,0.6818,6PXWCGHJ+WGVX
1,9.440374,118.482974,17.9793,0.7408,6PXWCFRM+45XR
2,9.363887,118.464563,37.3005,0.8230,6PXW9F77+HR4C
3,9.495301,118.569990,73.2031,0.7485,6PXWFHW9+4XH4
4,9.381147,118.503310,62.4306,0.8135,6PXW9GJ3+F83X
...,...,...,...,...,...
5370,20.449684,121.971273,50.4747,0.6766,7QG3CXXC+VGC8
5371,19.511708,121.913273,68.3271,0.7195,7QF3GW67+M8J8
5372,20.451546,121.972214,7.8440,0.6061,7QG3FX2C+JV8X
5373,20.619810,121.707082,7.3060,0.6186,7QG3JP94+WRG8


In [11]:
google_df.to_csv(input_dir / "google_footprint_data.csv", index=False)

In [20]:
# converting point df to geodataframe
ggl_gdf = gpd.GeoDataFrame(
    google_df,
    geometry=gpd.points_from_xy(google_df.longitude, google_df.latitude),
)

In [35]:
ggl_gdf.set_crs(adm3_shp.crs, inplace=True)

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code,geometry
0,9.429870,118.531309,26.5833,0.6818,6PXWCGHJ+WGVX,POINT (118.53131 9.42987)
1,9.440374,118.482974,17.9793,0.7408,6PXWCFRM+45XR,POINT (118.48297 9.44037)
2,9.363887,118.464563,37.3005,0.8230,6PXW9F77+HR4C,POINT (118.46456 9.36389)
3,9.495301,118.569990,73.2031,0.7485,6PXWFHW9+4XH4,POINT (118.56999 9.49530)
4,9.381147,118.503310,62.4306,0.8135,6PXW9GJ3+F83X,POINT (118.50331 9.38115)
...,...,...,...,...,...,...
5370,20.449684,121.971273,50.4747,0.6766,7QG3CXXC+VGC8,POINT (121.97127 20.44968)
5371,19.511708,121.913273,68.3271,0.7195,7QF3GW67+M8J8,POINT (121.91327 19.51171)
5372,20.451546,121.972214,7.8440,0.6061,7QG3FX2C+JV8X,POINT (121.97221 20.45155)
5373,20.619810,121.707082,7.3060,0.6186,7QG3JP94+WRG8,POINT (121.70708 20.61981)


## Counting number of buildings by region

### By Municipality

In [27]:
phl_ggl_bld_municip_count = adm3_shp.merge(
    adm3_shp.sjoin(ggl_gdf, how="left")
    .groupby("ADM3_PCODE")
    .count()
    .geometry.rename("numbuildings")
    .reset_index()
)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: None

  return geopandas.sjoin(left_df=self, right_df=df, *args, **kwargs)


In [28]:
phl_ggl_bld_municip_count.columns

Index(['Shape_Leng', 'Shape_Area', 'ADM3_EN', 'ADM3_PCODE', 'ADM3_REF',
       'ADM3ALT1EN', 'ADM3ALT2EN', 'ADM2_EN', 'ADM2_PCODE', 'ADM1_EN',
       'ADM1_PCODE', 'ADM0_EN', 'ADM0_PCODE', 'date', 'validOn', 'validTo',
       'geometry', 'numbuildings'],
      dtype='object')

In [30]:
phl_ggl_bld_municip_count[["ADM3_PCODE", "ADM3_EN", "numbuildings"]].to_csv(
    input_dir / "phl_google_bld_municip_count.csv", index=False
)

### By Grid

In [36]:
phl_ggl_bld_grid_count = grid.merge(
    grid.sjoin(ggl_gdf, how="left")
    .groupby("id")
    .count()
    .geometry.rename("numbuildings")
    .reset_index()
)

In [37]:
phl_ggl_bld_grid_count.columns

Index(['id', 'Longitude', 'Latitude', 'Centroid', 'geometry', 'numbuildings'], dtype='object')

In [38]:
phl_ggl_bld_grid_count[["id", "Centroid", "numbuildings"]].to_csv(
    input_dir / "phl_google_bld_grid_count.csv", index=False
)

### By Municipality and Grid Intersection

In [39]:
phl_ggl_bld_intersection_count = adm3_grid_intersection.merge(
    adm3_grid_intersection.sjoin(ggl_gdf, how="left")
    .groupby(["ADM3_PCODE", "Centroid"])
    .count()
    .geometry.rename("numbuildings")
    .reset_index()
)

In [40]:
phl_ggl_bld_intersection_count.columns

Index(['Shape_Leng', 'Shape_Area', 'ADM3_EN', 'ADM3_PCODE', 'ADM3_REF',
       'ADM3ALT1EN', 'ADM3ALT2EN', 'ADM2_EN', 'ADM2_PCODE', 'ADM1_EN',
       'ADM1_PCODE', 'ADM0_EN', 'ADM0_PCODE', 'date', 'validOn', 'validTo',
       'id', 'Longitude', 'Latitude', 'Centroid', 'geometry', 'numbuildings'],
      dtype='object')

In [41]:
phl_ggl_bld_intersection_count[
    ["ADM3_PCODE", "ADM3_EN", "id", "Centroid", "numbuildings"]
].to_csv(input_dir / "phl_google_bld_intersection_count.csv", index=False)