# Adding Housing Construction Materials

Based on a discussion on the performance of the model without the construction materials features, we are testing out using the materials but converting from municipality to grid values based on weights computed from Google Building Footprint data. Will look at the performance of the model after adding these.

In [1]:
%load_ext jupyter_black
import pandas as pd
import geopandas as gpd
from pathlib import Path
import os

In [16]:
base_dir = Path(os.getenv("STORM_DATA_DIR")) / "analysis/02_new_model_input/"
input_dir = base_dir / "05_vulnerablility/input/"
output_dir = base_dir / "05_vulnerablility/output/"
weights_dir = base_dir / "02_housing_damage/input/Google Footprint Data/"
hu_dir = base_dir / "02_housing_damage/output/"

In [17]:
mun_to_grid_weights = pd.read_csv(weights_dir / "ggl_mun_to_grid_weights.csv")
construction_materials_df = pd.read_excel(
    input_dir
    / "180814_construction-materials-of-the-outer-walls-and-roof_by-city_municipality.xlsx",
    sheet_name="by category",
)
hu_bygrid = pd.read_csv(hu_dir / "transformed_housingunits_bygrid.csv")

In [18]:
construction_materials_df.columns

Index(['Region', 'Region Code', 'Province', 'Province Code',
       'Municipality_City', 'Municipality_City Code', 'Housing Units',
       'Strong Roof/Strong Wall', 'Strong Roof/Light Wall',
       'Strong Roof/Salvage Wall', 'Light Roof/Strong Wall',
       'Light Roof/Light Wall', 'Light Roof/Salvage Wall',
       'Salvaged Roof/Strong Wall', 'Salvaged Roof/Light Wall',
       'Salvaged Roof/Salvage Wall'],
      dtype='object')

In [19]:
merged_df = construction_materials_df.merge(
    mun_to_grid_weights[["ADM3_PCODE", "id", "Centroid", "weight"]],
    how="right",
    left_on="Municipality_City Code",
    right_on="ADM3_PCODE",
)

In [20]:
# There are 2 ways of going about this conversion to grid values
# 1. Convert the mun values to grid values using the weights and compute the % from the grid HUs
# 2. Compute % values by municipality then convert those values using the weights to grid values.

In [21]:
# Method 1
# merged_df.loc[:, merged_df.columns.str.contains("Roof")]
merged_df.drop(
    [
        "Region",
        "Region Code",
        "Province",
        "Province Code",
        "Municipality_City",
        "Housing Units",
    ],
    inplace=True,
    axis=1,
)

In [25]:
merged_df.loc[:, merged_df.columns.str.contains("Roof")].multiply(
    merged_df["weight"], axis="index"
)

Unnamed: 0,Strong Roof/Strong Wall,Strong Roof/Light Wall,Strong Roof/Salvage Wall,Light Roof/Strong Wall,Light Roof/Light Wall,Light Roof/Salvage Wall,Salvaged Roof/Strong Wall,Salvaged Roof/Light Wall,Salvaged Roof/Salvage Wall
0,211.244980,126.746988,0.000000,0.844980,8.449799,0.000000,0.000000,0.000000,0.000000
1,38.755020,23.253012,0.000000,0.155020,1.550201,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,5605.209657,466.571812,5.554426,3.967447,27.772132,0.000000,0.793489,0.000000,1.586979
...,...,...,...,...,...,...,...,...,...
9882,5.809962,3.248221,0.000000,0.293010,4.445375,0.025115,0.000000,0.000000,0.000000
9883,98.769360,55.219757,0.000000,4.981164,75.571369,0.426957,0.000000,0.000000,0.000000
9884,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9885,32.232558,9.970402,0.304440,1.978858,25.002114,0.723044,0.342495,0.266385,0.723044
