In [None]:
import pathlib
import time

import pandas as pd
import census
import us

import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

%config InlineBackend.figure_format = 'svg'

from tqdm.auto import tqdm

api_key = pathlib.Path('api_key_secret.txt').read_text()

c = census.Census(api_key)

In [None]:
# https://data.census.gov/cedsci/table?q=gross%20rent%20san%20francisco&g=0100000US&tid=ACSDP1Y2021.DP04

search_state = us.states.CA.fips
start_year = 2010
end_year = 2021
stats_of_interest = {
    'B25064_001E': 'Median Rent',
    'B25077_001E': 'Median Value',
}

In [None]:
search_years = list(range(start_year, end_year+1))
census_data = []
reporter = tqdm(total=len(stats_of_interest)*len(search_years))
for stat_id, stat_name in stats_of_interest.items():
    for year in search_years:
        try:
            df = pd.DataFrame.from_records(c.acs1.get(('NAME', stat_id), {'for': 'county:*', 'in': 'state:{}'.format(search_state)}, year=year))
        except Exception:
            time.sleep(0.1)
            reporter.update()
            continue
        df = df.rename(columns={stat_id: 'Value', 'NAME': 'Name', 'state': 'State', 'county': 'County'})
        df['Statistic'] = stat_name
        df['Stat ID'] = stat_id
        df['Year'] = year
        census_data.append(df)
        time.sleep(0.1)
        reporter.update()
reporter.close()
census_data = pd.concat(census_data, ignore_index=True)
print(len(census_data))
census_data.head()

In [None]:
census_data.loc[census_data['Name'] == 'San Francisco County, California']

In [None]:
plot_counties = [
    'San Francisco County, California',
    # 'Alameda County, California', 'Santa Clara County, California',
    'San Diego County, California', 'Los Angeles County, California',
    'Riverside County, California',
    'Lake County, California', 'Sonoma County, California',
]
for stat_name in census_data['Statistic'].drop_duplicates():
    fig, ax = plt.subplots()
    sns.lineplot(data=census_data.loc[
        census_data['Name'].isin(plot_counties) &
        (census_data['Statistic'] == stat_name)
    ], x='Year', y='Value', hue='Name')
    ax.set_title(stat_name)

In [None]:
import cpi
cpi.update()

In [None]:
census_wide = census_data[['Name', 'State', 'County', 'Year']].drop_duplicates()
for stat_name in census_data['Statistic'].drop_duplicates():
    census_wide = census_wide.merge(census_data.loc[census_data['Statistic'] == stat_name].drop(columns=['Statistic', 'Stat ID']).rename(columns={'Value': stat_name}), on=['Name', 'State', 'County', 'Year'], how='left')
for stat_name in census_data['Statistic'].drop_duplicates():
    census_wide[stat_name+' ({} dollars)'.format(cpi.LATEST_YEAR)] = census_wide.apply(lambda x: cpi.inflate(x[stat_name], x['Year'], to=cpi.LATEST_YEAR), axis=1)
census_wide['Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR)] = census_wide['Median Value ({} dollars)'.format(cpi.LATEST_YEAR)] / (census_wide['Median Rent ({} dollars)'.format(cpi.LATEST_YEAR)] * 12.0)
print(len(census_wide))
census_wide.loc[census_wide['Name'] == 'San Francisco County, California'].sort_values('Year')

In [None]:
fig, ax = plt.subplots()
sns.lineplot(data=census_wide.loc[census_wide['Name'].isin(plot_counties)], x='Year', y='Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR), hue='Name')
ax.set_title('Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR))
ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")

In [None]:
plot_county = 'San Francisco County, California'
df = census_wide.loc[census_wide['Name'] == plot_county]
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
sns.lineplot(data=df, x='Year', y='Median Rent ({} dollars)'.format(cpi.LATEST_YEAR), ax=ax1, color=sns.color_palette()[0], label='Rent')
sns.lineplot(data=df, x='Year', y='Median Value ({} dollars)'.format(cpi.LATEST_YEAR), ax=ax2, color=sns.color_palette()[1], label='Value')
df.head()
ax1.legend(bbox_to_anchor=(1.1, 1), loc="upper left")
ax2.legend(bbox_to_anchor=(1.1, 0.1), loc="upper left")