In [None]:
import collections
import json
import pathlib

import pandas as pd
import matplotlib.pyplot as plt
import shapely
from shapely import plotting
from shapely import geometry

from bbs import realestate


In [None]:
states = json.loads(pathlib.Path("bbs/realestate/resources/states.json").read_text())
states = pd.DataFrame({"state": k, "fips": v} for k, v in states.items())
counties = json.loads(pathlib.Path("bbs/realestate/resources/counties.geojson").read_text())
listings = pd.DataFrame(json.loads(pathlib.Path("real_estate_snapshots.json").read_text()))

In [None]:
listings = pd.merge(listings, states, on="state", how="right")
listings = listings[listings["bedrooms"] > 0]

In [None]:
shapes = collections.defaultdict(list)
for county in counties["features"]:
    shapes[county["properties"]["STATEFP"]].append((geometry.shape(county["geometry"]), county["properties"]["AFFGEOID"]))

def county_code(series: pd.Series) -> str:
    point = shapely.Point(series.longitude, series.latitude)
    for county, affgeoid in shapes[series.fips]:
        if county.contains(point):
            return affgeoid
    return None

listings["affgeoid"] = listings.apply(county_code, axis=1)
listings["mortgage"] = listings["list_price"].apply(lambda x: realestate.mortgage_payment(x * 0.95, 0.075 / 12, 30 * 12))


In [None]:
listings

In [None]:
listings = listings[["property_id", "mortgage", "affgeoid"]].drop_duplicates()

In [None]:
census = realestate.get_census_data()

In [None]:
df = pd.merge(listings, census, on="affgeoid")
df["acceptable_mortgage"] = (df["income_bracket"] / 12) * 0.3
df["affordable"] = df["mortgage"] <= df["acceptable_mortgage"]
affordable = df[df["affordable"]]
affordable = affordable.groupby(["affgeoid", "property_id"])["household_percent"].sum().reset_index()
default = df[["affgeoid", "property_id"]].drop_duplicates()
default["household_percent"] = 0.0
default = pd.concat([default, affordable])
df = default.groupby(["affgeoid", "property_id"])["household_percent"].sum().reset_index()
df = df.groupby("affgeoid").filter(lambda x: len(x) >= 5)
df = df.groupby("affgeoid")["household_percent"].mean().reset_index()
geo_affordability = df.sort_values("household_percent", ascending=False)

In [None]:
counties = json.loads(pathlib.Path("bbs/realestate/resources/counties.geojson").read_text())
counties = {
    county["properties"]["AFFGEOID"]: geometry.shape(county["geometry"])
    for county in counties["features"]
}
states = json.loads(pathlib.Path("bbs/realestate/resources/states.geojson").read_text())
states = [
    geometry.shape(state["geometry"])
    for state in states["features"]
]

In [None]:
plt.figure(figsize=(40,20))
ax = plt.gca()
ax.set_xlim(-125, -65)
ax.set_ylim(25, 50)

minimum = geo_affordability.household_percent.min()
p1 = geo_affordability.household_percent.quantile(0.01)
maximum = geo_affordability.household_percent.max()
p99 = geo_affordability.household_percent.quantile(0.99)
median = geo_affordability.household_percent.median()
missing = set(counties.keys()) - set(geo_affordability.affgeoid)
deviser = max(abs(p1 - median), abs(p99 - median))
for affgeoid, percent in geo_affordability.values:
    county = counties[affgeoid]
    alpha = min(abs(percent - median) / deviser, 1)
    color = "red" if percent < median else "green"
    f = shapely.plotting.plot_polygon(county, ax=ax, add_points=False, linewidth=0 if alpha else 1, facecolor=color, alpha=alpha)
for affgeoid in missing:
    county = counties[affgeoid]
    shapely.plotting.plot_polygon(county, ax=ax, add_points=False, linewidth=1, facecolor="grey", alpha=0.25)
for state in states:
    shapely.plotting.plot_polygon(state, ax=ax, add_points=False, linewidth=2, facecolor="none", edgecolor="black")