Skip to content

Commit

Permalink
Update data to May 2023 (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-kap authored Jul 21, 2024
1 parent ef8114f commit 253cb5d
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 7 deletions.
2 changes: 1 addition & 1 deletion lib/projections.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const months = {
}

// eslint-disable-next-line @typescript-eslint/no-inferrable-types
const latestMonth: number = 3
const latestMonth: number = 5
const glueWord = latestMonth == 2 ? "and" : "through"
const observedMonths =
latestMonth == 1
Expand Down
4 changes: 2 additions & 2 deletions python/housing_data/build_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def get_numerical_columns(
CANADA_POPULATION_DIR = Path("data", "canada-population")

# Last year and month for which monthly BPS data is available (and is cloned to housing-data-data).
LATEST_MONTH = (2024, 3)
LAST_YEAR_ANNUAL_DATA_RELEASED = False
LATEST_MONTH = (2024, 5)
LAST_YEAR_ANNUAL_DATA_RELEASED = True


def write_to_json_directory(df: pd.DataFrame, path: Path) -> None:
Expand Down
19 changes: 15 additions & 4 deletions python/housing_data/california_hcd_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def load_california_hcd_data(
df = df[df["UNIT_CAT_DESC"] != "Mobile Home Unit"].copy()

df["units"] = df[BUILDING_PERMIT_COLUMNS].sum(axis="columns", numeric_only=True)

df = df[
(df["units"] > 0)
# Exclude rows with a certificate of occupancy, because it's very unlikely
Expand All @@ -45,7 +46,7 @@ def load_california_hcd_data(
# permit anyway.
# NB: I only looked at LA data to validate this assumption. The data looks
# _way_ more accurate when we drop these rows.
& df["CO_ISSUE_DT1"].isnull()
& (df["CO_ISSUE_DT1"].isnull() | ((df["BP_ISSUE_DT1"] == df["CO_ISSUE_DT1"])))
].copy()

df["building_type"] = np.select(
Expand Down Expand Up @@ -121,13 +122,18 @@ def _aggregate_to_geography(
assert (wide_df[["JURS_NAME", "year"]].value_counts() == 1).all()
wide_df = wide_df.drop(columns=["CNTY_NAME"])
if level == "place":
old_rows = len(wide_df)
old_wide_df = wide_df
# Add place_or_county_code
wide_df = wide_df.merge(
_load_fips_crosswalk(data_path), left_on="JURS_NAME", right_on="name"
).drop(columns=["name", "county_code"])
new_rows = len(wide_df)
assert old_rows == new_rows, f"{old_rows=} != {new_rows=}"
if len(old_wide_df) != len(wide_df):
dropped_cities = set(old_wide_df["JURS_NAME"]) - set(wide_df["JURS_NAME"])
added_cities = set(wide_df["JURS_NAME"]) - set(old_wide_df["JURS_NAME"])
raise ValueError(
f"wide_df had {len(old_wide_df)} rows before merge and {len(wide_df)} rows after merge. "
f"{dropped_cities=} {added_cities=}"
)
elif level == "county":
# Add county_code
old_rows = len(wide_df)
Expand Down Expand Up @@ -167,6 +173,11 @@ def _load_fips_crosswalk(data_path: Path) -> pd.DataFrame:
"Carmel-by-the-Sea": "CARMEL",
"La Cañada Flintridge": "LA CANADA FLINTRIDGE",
"Angels": "ANGELS CAMP",
# The crosswalk has a city called "Amador City city".
# I think the BPS data cleaning code messes this city up and shortens it to just "Amador".
# This is wrong/we should probably fix it like we fixed Jersey City, but for now
# let's just change it "AMADOR" to fix the "rows dropped in merge" error
"Amador City": "AMADOR",
}
)
.str.upper()
Expand Down

0 comments on commit 253cb5d

Please sign in to comment.