# Residential Real Estate

---

A look at residental real estate, according to the Federal Reserve and the Federal Housing Authority.

In [1]:
import pandas as pd
import altair as alt
import numpy as np
import re
from os import environ

try:
    # for local execution
    apiKeyFromFile = open("/Users/kyledunn/fredApiKey.txt", "r").read().strip()
except FileNotFoundError:
    apiKeyFromFile = None
    pass
# for CI
apiKey = environ.get("FRED_API_KEY", apiKeyFromFile)

from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
    
def getSeries(series="", apiKey=apiKey, description=None):
    fetchCommand = "https://api.stlouisfed.org/fred/series/observations?series_id={s}&realtime_end=9999-12-31&api_key={k}&file_type=txt" 
    
    resp = urlopen(fetchCommand.format(s=series, k=apiKey))
    zipfile = ZipFile(BytesIO(resp.read()))
    
    filesInZip = zipfile.namelist()
    
    data = zipfile.open(filesInZip[1])
    
    if description is None:
        description = series
    
    df = pd.read_csv(data, sep="\t", header=None, skiprows=1,
                       names=["date", description, "rt_start", "rt_end"], na_values=".")
    
    df['date'] = pd.to_datetime(df.date)
    
    return df.set_index("date")

df = getSeries("USSTHPI")

df2 = df.resample('1Q').mean()

In [2]:
subset = df2[df2.index <= '2000-12-31']

y = subset.USSTHPI.values
x = [n for n, v in enumerate(subset.index.values)]

z = np.polyfit(x, y, 1)

        # 1 * x^0 + b 
trend = np.add(np.multiply(z[1], 1), np.multiply(range(df2.shape[0]), z[0]))

### How do current prices compare with the pre-low-interest rate era? (i.e. before 2001)

In [3]:
(
    alt.Chart(pd.DataFrame(index=df2.index, data=trend, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +\
    alt.Chart(df2.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('USSTHPI:Q', axis=alt.Axis(title='Price Index [1980-Q1 = 100]'))
    )
).properties(
    title='US FHA House Price Index',
    width=750,
    height=400,
    background='white'
)

In [4]:
dfcs = getSeries("CSUSHPINSA").resample('1Q').mean()

#dfcs.head()

subset2 = dfcs[dfcs.index <= '2000-12-31']

y2 = subset2.CSUSHPINSA.values
x2 = [n for n, v in enumerate(subset2.index.values)]

z2 = np.polyfit(x2, y2, 1)

trend2 = np.add(np.multiply(z2[1], 1), np.multiply(range(dfcs.shape[0]), z2[0]))

In [5]:
(
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +\
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index',
    width=750,
    height=400,
    background='white'
)

## Some possible scenarios of where home prices go from here...

In [6]:
extrapolatedDates = pd.date_range(start='1/1/2020', end='1/1/2040', freq='Q')

y40 = np.add(np.multiply(z2[1] + 44, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))

(
    alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-20*4:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('fit2:Q', axis=alt.Axis(title=''))
    ) +
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +    
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    ) +\
    alt.Chart(pd.DataFrame(index=pd.to_datetime(range(2020, 2040), format="%Y"), data=dfcs.max()['CSUSHPINSA'], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('p:Q', axis=alt.Axis(title=''))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index (stagnation projection)',
    width=750,
    height=400,
    background='white'
)

In [7]:
extrapolatedDates = pd.date_range(start='1/1/2020', end='1/1/2030', freq='Q')

base = dfcs.max()['CSUSHPINSA']
decline = [base*(v + 1)**-.05 for v in range(extrapolatedDates.shape[0])]
#print(decline)

y40 = np.add(np.multiply(z2[1] + 15, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))

(
    alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-10*4:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('fit2:Q', axis=alt.Axis(title=''))
    ) +
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +    
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    ) +\
    alt.Chart(pd.DataFrame(index=extrapolatedDates[:-8], data=decline[:-8], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
        alt.X('index:T'),
        alt.Y('p:Q', axis=alt.Axis(title=''))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index (correction projection)',
    width=750,
    height=400,
    background='white'
)

In [8]:
extrapolatedDates = pd.date_range(start='1/1/2020', end='1/1/2030', freq='Q')

base = dfcs.max()['CSUSHPINSA']
decline = [base+(base * .011 * v) for v in range(extrapolatedDates.shape[0])]
#print(decline)

y40 = np.add(np.multiply(z2[1] + 15, 1), np.multiply(range(dfcs.shape[0] + 20), z2[0]))

(
    alt.Chart(pd.DataFrame(index=extrapolatedDates, data=y40[-10*4:], columns=['fit2']).reset_index()).mark_line(color='blue').encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('fit2:Q', axis=alt.Axis(title=''))
    ) +
    alt.Chart(pd.DataFrame(index=dfcs.index, data=trend2, columns=['fit']).reset_index()).mark_line(color='blue').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('fit:Q', axis=alt.Axis(title=''))
    ) +    
    alt.Chart(dfcs.reset_index()).mark_line(color='black').encode(
        alt.X('date:T', axis=alt.Axis(title='')),
        alt.Y('CSUSHPINSA:Q', axis=alt.Axis(title='Price Index [2000-Jan = 100]'))
    ) +\
    alt.Chart(pd.DataFrame(index=extrapolatedDates[:-8], data=decline[:-8], columns=['p']).reset_index()).mark_line(color='black', strokeDash=[5]).encode(
        alt.X('index:T', axis=alt.Axis(title='')),
        alt.Y('p:Q', axis=alt.Axis(title=''))
    )
).properties(
    title='S&P/Case-Shiller U.S. National Home Price Index (continued growth projection)',
    width=750,
    height=400,
    background='white'
)

## How have prices changed over the past century when correcting for inflation?

In [9]:
df100yr = pd.read_excel("http://www.econ.yale.edu/~shiller/data/Fig2-1.xls", skiprows=3)

names = [" ".join(map(lambda v: str(v).replace("nan", ""), df100yr[:3][c])).strip() for c in df100yr.columns]

unames = [nm + str(n) if (n != 0 and nm == "Date") else nm for n, nm in enumerate(names)]

df100yr.columns = unames
df100yr = df100yr.iloc[3:]

df100yr2 = df100yr[["Date", "Home Price Index"]].copy()
df100yr2["Date"] = pd.to_datetime(df100yr2["Date"].apply(int).apply(str), format="%Y")
df100yr2["Home Price Index"] = df100yr2["Home Price Index"].apply(float)
df100yr_agg = df100yr2.groupby("Date").agg(np.mean).reset_index()


# start from 2015
# date	CSUSHPINSA
df100yr_agg_rest = dfcs.resample('1Y').last().reset_index().tail(6)
df100yr_agg_rest.columns = ["Date", "Home Price Index"]

df100_yr_ready = pd.concat([df100yr_agg, df100yr_agg_rest])

df_recessions = getSeries("JHDUSRGDPBR", description="value")

c = (
    alt.Chart(df_recessions.reset_index()[["date", "value"]]).mark_bar(color="#D3D3D3").encode(
        alt.X("date:T", axis=alt.Axis(title='')),
        alt.Y("value", axis=alt.Axis(title='', orient="right", labels=False))
    ) +\
    alt.Chart(df100_yr_ready).mark_line(color='darkred').encode(
        alt.X("Date:T", axis=alt.Axis(title='')),
        alt.Y("Home Price Index", axis=alt.Axis(orient="left"))
    )
).properties(
    title="Robert Shiller \"Irrational Exuberance\" Housing Data (inflation adjusted)",
    width=750,
    height=400,
    background='white'
).resolve_scale(y='independent')

c.save('residential-realestate.png')
c.display()

In [10]:
df_pop60 = getSeries("POPTOTUSA647NWDB")

df_pop70 = df_pop60[df_pop60.index > "1969-01-01"]

df_pop70.columns = ["U.S. Population Millions"] + df_pop70.columns.to_list()[1:] 
df_pop70 = df_pop70.reset_index()
df_pop70.columns = ["Date"] + df_pop70.columns.to_list()[1:]

df_pop70["U.S. Population Millions"] = df_pop70["U.S. Population Millions"] / 1e6

df_pop70 = df_pop70.iloc[:, :2]

#df_pop70.head()

In [11]:
df100yr_pop = df_pop70.groupby("Date").agg(np.mean)
df100yr_pop = df100yr_pop.resample("1Y").mean().reset_index()

#df100yr_pop["Date"] = pd.to_datetime(df100yr_pop["Date"])

df100yr_hpi = df100_yr_ready.set_index("Date").resample("1Y").mean().reset_index()

df100yr_pop = df100yr_pop.merge(df100yr_hpi[["Date", "Home Price Index"]], left_on="Date", right_on="Date")

## How have prices changed when also correcting for population growth?

In [12]:
basePop = df100yr_pop["U.S. Population Millions"].values[0]

df100yr2_agg = df100yr_pop.copy()
df100yr2_agg["Demand-Adj"] = df100yr_pop["Home Price Index"] / (df100yr_pop["U.S. Population Millions"] / basePop)

skipbad = (df100yr2_agg["Date"] < pd.to_datetime("1960-12-31")) |\
          (df100yr2_agg["Date"] > pd.to_datetime("1970-12-31"))

coverbad = (df100yr2_agg["Date"].isin([pd.to_datetime("1959-12-31"), pd.to_datetime("1971-12-31")]))

(
    alt.Chart(df100yr2_agg[skipbad]).mark_line().encode(
        alt.X("Date", axis=alt.Axis(title='')),
        alt.Y("Demand-Adj")
    ) +\
    alt.Chart(df100yr2_agg[coverbad]).mark_line(color="white", strokeDash=[5]).encode(
        alt.X("Date", axis=alt.Axis(title='')),
        alt.Y("Demand-Adj")
    )
).properties(
    title="U.S. Historical Housing Price Index (inflation, population adjusted)",
    width=750,
    height=400,
    background='white'
)

In [13]:
df_new = pd.read_excel("https://www.census.gov/construction/nrs/xls/sold_cust.xls", skiprows=9, header=None).iloc[:-4]

#df_new.tail(20)

df_new_latest = pd.read_excel("https://www.census.gov/construction/nrs/xls/newressales.xls", skiprows=8).iloc[:14, :-1]
df_new_latest.columns = ['Month', 'US-Sold', 'NE-Sold', 'MW-Sold', 'S-Sold', 'W-Sold',
                         'US-Available', 'NE-Available', 'MW-Available', 'S-Available', 'W-Available',
                         'Months-Supply', 'MedianPrice', 'AveragePrice']

# Drop erroneous label rows
df_new_latest = df_new_latest.dropna(how='all')
df_new_latest = df_new_latest[df_new_latest.Month != 2019]

df_new_latest['Month'] = pd.concat([df_new_latest['Month'].iloc[:3].map(lambda v: re.sub('[^A-z]+', '', str(v).split(' ')[0]) + "-2018"),\
                                      df_new_latest['Month'].iloc[3:].map(lambda v: re.sub('[^A-z]+', '', str(v).split(' ')[0]) + "-2019")])

#df_new_latest.head(20)

df_new_latest['dt'] = pd.to_datetime(df_new_latest['Month'], format='%B-%Y', errors='coerce')

df_new_latest = df_new_latest.dropna()

# https://www.census.gov/housing/hvs/data/histtabs.html

df_o = pd.read_excel("http://www.census.gov/housing/hvs/data/histtab19.xlsx", skiprows=3).iloc[3:158]

df_o = df_o.dropna(how='all')

#df_o.head()

In [14]:
vals = []
for i in range(0, df_o.shape[0], 5):
    year = 1994 + int(i / 5)
    vals = vals + [None, "{:d}-Q1".format(year), "{:d}-Q2".format(year), "{:d}-Q3".format(year), "{:d}-Q4".format(year)]
    
#print(vals)

df_o['dt'] = list(map(lambda v: pd.to_datetime(v), vals))

## How has home ownership changed by age group since the mid-90s?

In [15]:
sortOrder = ['Under 35 years', '35 to 44 years', '45 to 54 years', 
             '55 to 64 years', '65 years and over', 'U.S.']

yoy = df_o.dropna().iloc[:, 1:].set_index('dt')
yoy = yoy.pct_change(4)

alt.Chart(yoy.reset_index().melt(id_vars='dt')).mark_bar().encode(
    alt.X('dt:T'),
    alt.Y('value:Q', axis=alt.Axis(title='YoY Growth [%]')),
    alt.Color('variable:N', title='Age Group', sort=sortOrder),
    alt.Row('variable:N', sort=sortOrder)
).properties(
    width=650,
    height=450
)

In [16]:
alt.Chart(yoy.cumsum().reset_index().melt(id_vars='dt')).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('value:Q', axis=alt.Axis(title='Cummulative Growth [%]')),
    alt.Color('variable:N', title='Group', sort=sortOrder)
).properties(
    title='Home Ownership Cummulative Growth [Since 1994]',
    width=700,
    height=450
)

In [17]:
alt.Chart(yoy[yoy.index > '01-01-2009'].cumsum().reset_index().melt(id_vars='dt')).mark_line().encode(
    alt.X('dt:T', axis=alt.Axis(title='')),
    alt.Y('value:Q', axis=alt.Axis(title='Cummulative Growth [%]')),
    alt.Color('variable:N', title='Group', sort=sortOrder)
).properties(
    title='Home Ownership Cummulative Growth [Since 2009]',
    width=700,
    height=450
)