Skip to content

Commit

Permalink
Fix Washington, DC places and counties view
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-kap committed Dec 30, 2023
1 parent 761f13e commit c5f1342
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
10 changes: 9 additions & 1 deletion python/housing_data/build_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,15 @@ def get_state_abbrs(state_codes: pd.Series) -> pd.Series:
:param state_codes: state_codes: pd.Series of int
:return: pd.Series of state abbrs as str
"""
return state_codes.astype(str).str.zfill(2).map(us.states.mapping("fips", "abbr"))
# (see https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code#FIPS_state_codes).
fips_to_abbr = us.states.mapping("fips", "abbr") | {
# DC is not in us.states.STATES_AND_TERRITORIES for some reason
"11": "DC",
# BPS uses alternate FIPS codes for Puerto Rico and Virgin Islands, idk why
"43": "PR",
"52": "VI",
}
return state_codes.astype(str).str.zfill(2).map(fips_to_abbr)


def load_bps_all_years_plus_monthly(
Expand Down
7 changes: 5 additions & 2 deletions python/housing_data/build_places.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,10 @@ def load_places(
places_df["path_2"] = name.str.replace("/", "-").str.replace(" ", "_")
places_df = places_df.drop(columns=["place_name", "place_type"])

# Not sure why I have to do this
places_df = places_df[places_df["path_1"].notnull() & places_df["path_2"].notnull()]
not_null_rows = places_df["path_1"].notnull() & places_df["path_2"].notnull()
assert (
not_null_rows.all()
), f"Found rows where path_1 or path_2 is null: {places_df[['path_1', 'path_2']]}"
places_df = places_df[not_null_rows]

return raw_places_df, places_df
8 changes: 6 additions & 2 deletions python/housing_data/county_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_county_populations_2000s(data_path: Path, data_repo_path: Path) -> pd.Da
f"https://www2.census.gov/programs-surveys/popest/tables/2000-2010/"
f"intercensal/county/co-est00int-01-{state.fips}.csv",
)
for state in us.STATES_AND_TERRITORIES
for state in us.STATES_AND_TERRITORIES + [us.states.DC]
if state.fips not in ["60", "66", "69", "72", "78"] # exclude territories
]

Expand Down Expand Up @@ -83,7 +83,11 @@ def get_county_populations_2000s(data_path: Path, data_repo_path: Path) -> pd.Da
engine="python", # for skipfooter
)
df["state_code"] = state_code
df["County Name"] = df["County Name"].str.lstrip(".")

# In these CSV files, the total row looks like "Connecticut",
# while the rows for each county look like ".Fairfield County".
df = df[df["County Name"].str.startswith(".")]
df["County Name"] = df["County Name"].str.removeprefix(".")

dfs.append(df)

Expand Down

0 comments on commit c5f1342

Please sign in to comment.