Skip to content

Commit

Permalink
Fix openssl build error, upgrade to python3.11 (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-kap committed Sep 24, 2023
1 parent 4e66623 commit 8709357
Show file tree
Hide file tree
Showing 8 changed files with 1,714 additions and 1,212 deletions.
15 changes: 13 additions & 2 deletions build_data_vercel.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
#! /usr/bin/env bash

# Python 3.9, last time I checked
yum install -y python3
# Fixes this error: https://urllib3.readthedocs.io/en/latest/v2-migration-guide.html#ssl-module-is-compiled-with-openssl-1-0-2-k-fips
# TODO: remove when Vercel upgrades to Amazon Linux 2023
yum remove openssl openssl-devel
yum update openssl11 openssl11-devel
yum install -y openssl11 openssl11-devel

curl https://pyenv.run | bash

export PATH="$HOME/.pyenv/bin:$PATH"
eval "$(pyenv init -)"

pyenv install 3.11.5
pyenv global 3.11.5

pip3 install --user poetry
cd python || exit
Expand Down
2 changes: 1 addition & 1 deletion python/housing_data/california_hcd_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def load_california_hcd_data(
# BPS doesn't include mobile homes, so we shouldn't include them here either
df = df[df["UNIT_CAT_DESC"] != "Mobile Home Unit"].copy()

df["units"] = df[BUILDING_PERMIT_COLUMNS].sum(axis="columns")
df["units"] = df[BUILDING_PERMIT_COLUMNS].sum(axis="columns", numeric_only=True)
df = df[
(df["units"] > 0)
# Exclude rows with a certificate of occupancy, because it's very unlikely
Expand Down
4 changes: 2 additions & 2 deletions python/housing_data/canada_bper.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def aggregate_to_metros(df: pd.DataFrame) -> pd.DataFrame:
df = (
df.drop(columns=["place_name", "province_abbr", "province"])
.groupby(["metro", "year", "metro_province_abbr"], as_index=False)
.sum()
.sum(numeric_only=True)
)
add_per_capita_columns(df, [DataSource.CANADA])

Expand All @@ -226,7 +226,7 @@ def aggregate_to_states(df: pd.DataFrame) -> pd.DataFrame:
df = (
df.drop(columns=["path_1", "path_2"])
.groupby(["province", "year"], as_index=False)
.sum()
.sum(numeric_only=True)
)
add_per_capita_columns(df, [DataSource.CANADA])

Expand Down
2 changes: 1 addition & 1 deletion python/housing_data/county_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def get_county_populations_1980s(data_path: Optional[Path]) -> pd.DataFrame:
df = (
df.dropna(subset=["year"])
.groupby(["year", "combined_fips"])
.sum()
.sum(numeric_only=True)
.sum(axis=1)
.rename("population")
.reset_index()
Expand Down
2 changes: 1 addition & 1 deletion python/housing_data/place_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def get_place_populations_1990s(data_path: Optional[Path]) -> pd.DataFrame:
["place", "state_abbr", "state_fips", "place_fips", "place_or_county_code"],
dropna=False,
)
.sum()
.sum(numeric_only=True)
.reset_index()
)

Expand Down
6 changes: 3 additions & 3 deletions python/housing_data/state_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def get_state_populations_1990s(data_path: Optional[Path]) -> pd.DataFrame:
]
)
.groupby(["year", "state"])
.sum()
.sum(numeric_only=True)
.reset_index()
)

Expand Down Expand Up @@ -263,13 +263,13 @@ def get_state_population_estimates(data_path: Optional[Path]) -> pd.DataFrame:
divisions_df = (
states_df.assign(state=states_df["state"].map(STATE_TO_DIVISION))
.groupby(["state", "year"])
.sum()
.sum(numeric_only=True)
.reset_index()
)
regions_df = (
states_df.assign(state=states_df["state"].map(STATE_TO_REGION))
.groupby(["state", "year"])
.sum()
.sum(numeric_only=True)
.reset_index()
)

Expand Down
2,877 changes: 1,686 additions & 1,191 deletions python/poetry.lock

Large diffs are not rendered by default.

18 changes: 7 additions & 11 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,14 @@ description = ""
authors = ["Sidharth Kapur <sidharthkapur1@gmail.com>"]

[tool.poetry.dependencies]
# Python 3.9 on vercel because CentOS/yum doesn't have Python 3.10.
# But locally I'm using 3.10 because pandas only released a universal2 (Apple Silicon-compatible)
# wheel for Python 3.10 (and also only for 1.3.5).
python = ">=3.9,<3.11"
pandas = "^1.4.4"
requests = "^2.28.1"
typing-extensions = "^4.3.0"
pyarrow = "^9.0.0"
tqdm = "^4.64.1"
us = "^2.0.2"
python = ">=3.11,<3.12"
pandas = "^2.1.1"
requests = "^2.31.0"
pyarrow = "^13.0.0"
tqdm = "^4.66.0"
us = "^3.1.1"
xlrd = "^2.0.1"
openpyxl = "^3.0.10"
openpyxl = "^3.1.2"

[tool.poetry.dev-dependencies]
pytest = "^7.1.3"
Expand Down

1 comment on commit 8709357

@vercel
Copy link

@vercel vercel bot commented on 8709357 Sep 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.