In [35]:
import pathlib
import time

import census
import us
import cpi
import datetime
import pandas as pd
# cpi.update()  # on first run

import plotly.express as px
import plotly.subplots
import plotly.graph_objects as go

import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

from tqdm.auto import tqdm

api_key = pathlib.Path('api_key_secret.txt').read_text().strip()
print('api_key: "' + api_key + '"')

c = census.Census(api_key)

api_key: "724a9e800f0a47d3648a2d76bc32afb87e0f408c"


In [2]:
# https://data.census.gov/cedsci/table?q=gross%20rent%20san%20francisco&g=0100000US&tid=ACSDP1Y2021.DP04

start_year = 2010  # Earliest year data available
end_year = datetime.date.today().year
stats_of_interest = {
    'B25064_001E': 'Median Rent',
    'B25077_001E': 'Median Value',
}

In [11]:
search_years = list(range(start_year, end_year+1))
census_data = []
reporter = tqdm(total=len(stats_of_interest)*len(search_years))
for stat_id, stat_name in stats_of_interest.items():
    for year in search_years:
        try:
            df = pd.DataFrame.from_records(c.acs1.get(('NAME', stat_id), {'for': 'county:*', 'in': 'state:*'}, year=year))
        except Exception as e:
            print("Caught exception for stat_name, year, stat_id:", stat_name, year, stat_id)
            print(e)
            time.sleep(0.1)
            reporter.update()
            continue
        df = df.rename(columns={stat_id: 'Value', 'NAME': 'Name', 'state': 'State', 'county': 'County'})
        df['Statistic'] = stat_name
        df['Stat ID'] = stat_id
        df['Year'] = year
        census_data.append(df)
        time.sleep(0.1)
        reporter.update()
reporter.close()
census_data = pd.concat(census_data, ignore_index=True)
print(len(census_data))
census_data.head()

  0%|          | 0/30 [00:00<?, ?it/s]

Caught exception for stat_name, year, stat_id: Median Rent 2020 B25064_001E
<!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>
Caught exception for stat_name, year, stat_id: Median Rent 2024 B25064_001E
<!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body>

Unnamed: 0,Name,Value,State,County,Statistic,Stat ID,Year
0,"Baldwin County, Alabama",832.0,1,3,Median Rent,B25064_001E,2010
1,"Calhoun County, Alabama",607.0,1,15,Median Rent,B25064_001E,2010
2,"Cullman County, Alabama",585.0,1,43,Median Rent,B25064_001E,2010
3,"DeKalb County, Alabama",502.0,1,49,Median Rent,B25064_001E,2010
4,"Elmore County, Alabama",847.0,1,51,Median Rent,B25064_001E,2010


In [12]:
census_data.drop_duplicates('State')

Unnamed: 0,Name,Value,State,County,Statistic,Stat ID,Year
0,"Baldwin County, Alabama",832.0,1,3,Median Rent,B25064_001E,2010
21,"Anchorage Municipality, Alaska",995.0,2,20,Median Rent,B25064_001E,2010
24,"Apache County, Arizona",554.0,4,1,Median Rent,B25064_001E,2010
34,"Benton County, Arkansas",699.0,5,7,Median Rent,B25064_001E,2010
45,"Alameda County, California",1198.0,6,1,Median Rent,B25064_001E,2010
85,"Adams County, Colorado",898.0,8,1,Median Rent,B25064_001E,2010
96,"Fairfield County, Connecticut",1233.0,9,1,Median Rent,B25064_001E,2010
104,"Kent County, Delaware",959.0,10,1,Median Rent,B25064_001E,2010
107,"District of Columbia, District of Columbia",1198.0,11,1,Median Rent,B25064_001E,2010
108,"Alachua County, Florida",883.0,12,1,Median Rent,B25064_001E,2010


In [23]:
# This cell is really slow and should be optimized now that we are using all state data
census_wide = census_data[['Name', 'State', 'County', 'Year']].drop_duplicates()
for stat_name in census_data['Statistic'].drop_duplicates():
    census_wide = census_wide.merge(census_data.loc[census_data['Statistic'] == stat_name].drop(columns=['Statistic', 'Stat ID']).rename(columns={'Value': stat_name}), on=['Name', 'State', 'County', 'Year'], how='left')
for stat_name in census_data['Statistic'].drop_duplicates():
    census_wide[stat_name+' ({} dollars)'.format(cpi.LATEST_YEAR)] = None
    for year in census_wide['Year'].drop_duplicates():
        # cpi is slow so we calculate the inflation factor once and then apply it to all rows for that year
        inflation_factor = cpi.inflate(1, year, to=cpi.LATEST_YEAR)
        census_wide.loc[census_wide['Year'] == year, stat_name+' ({} dollars)'.format(cpi.LATEST_YEAR)] = census_wide.loc[census_wide['Year'] == year, stat_name] * inflation_factor
    assert not census_wide[stat_name+' ({} dollars)'.format(cpi.LATEST_YEAR)].isnull().any()
census_wide['Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR)] = census_wide['Median Value ({} dollars)'.format(cpi.LATEST_YEAR)] / (census_wide['Median Rent ({} dollars)'.format(cpi.LATEST_YEAR)] * 12.0)
print(len(census_wide))
census_wide.loc[census_wide['Name'] == 'San Francisco County, California'].sort_values('Year')

10840


Unnamed: 0,Name,State,County,Year,Median Rent,Median Value,Median Rent (2023 dollars),Median Value (2023 dollars),Price-to-Rent Ratio (2023 dollars)
69,"San Francisco County, California",6,75,2010,1385.0,768000.0,1935.33895,1073169.90131,46.209386
887,"San Francisco County, California",6,75,2011,1407.0,719800.0,1905.919889,975039.897928,42.632078
1709,"San Francisco County, California",6,75,2012,1512.0,727600.0,2006.626584,965622.687004,40.101411
2534,"San Francisco County, California",6,75,2013,1491.0,778000.0,1950.19116,1017604.776847,43.483121
3362,"San Francisco County, California",6,75,2014,1587.0,846800.0,2042.621629,1089913.040687,44.465448
4190,"San Francisco County, California",6,75,2015,1659.0,941400.0,2132.761017,1210235.817684,47.287523
5020,"San Francisco County, California",6,75,2016,1784.0,1024000.0,2264.885474,1300023.949301,47.832586
6543,"San Francisco County, California",6,75,2017,1836.0,1104100.0,2282.281625,1372476.657148,50.113471
6688,"San Francisco County, California",6,75,2018,1880.0,1195700.0,2281.257631,1450904.122147,53.000887
7526,"San Francisco County, California",6,75,2019,1959.0,1217500.0,2334.81273,1451064.062396,51.79088


In [27]:
# fig, ax = plt.subplots(dpi=150)
# sns.lineplot(data=census_wide.loc[census_wide['Name'].isin(plot_counties)], x='Year', y='Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR), hue='Name')
# ax.set_title('Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR))
# ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left")

fig = px.line(census_wide.loc[census_wide['Name'].isin(plot_counties)], x='Year', y='Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR), color='Name', title='Price-to-Rent Ratio ({} dollars)'.format(cpi.LATEST_YEAR))
fig.update_layout(width=1000, height=500)
fig.show()

In [37]:
for plot_county in plot_counties:
    df = census_wide.loc[census_wide['Name'] == plot_county].copy()

    # Convert above plot to Plotly showing both lines on independent y axes
    fig = plotly.subplots.make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=df['Year'], y=df['Median Rent ({} dollars)'.format(cpi.LATEST_YEAR)], mode='lines', name='Rent', line=dict(color='blue')), secondary_y=False)
    fig.add_trace(go.Scatter(x=df['Year'], y=df['Median Value ({} dollars)'.format(cpi.LATEST_YEAR)], mode='lines', name='Value', line=dict(color='red')), secondary_y=True)
    fig.update_layout(width=1000, height=500)
    fig.update_xaxes(title_text='Year')
    fig.update_yaxes(title_text='Median Rent ({} dollars)'.format(cpi.LATEST_YEAR), secondary_y=False)
    fig.update_yaxes(title_text='Median Value ({} dollars)'.format(cpi.LATEST_YEAR), secondary_y=True)
    fig.update_layout(title=plot_county)
    fig.show()
