# RHNA projections

We have three draft options for RHNA production, but they just give total unit numbers by jurisdiction—convert to PUMA-level single and multifamily amounts of construction.

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas as pd
import numpy as np
import geopandas as gp
import matplotlib.pyplot as plt
import itertools

## Single/multifamily split by jurisdiction

Figure out the single/multi-family split of new construction by jurisdiction, using Building Permits Survey info.

In [None]:
bps = []
for year in range(2010, 2019):
    # d*$n census bureau and your arcane record layouts...
    header = pd.read_csv(f'../data/permits/we{year}a.txt', header=None, nrows=1).iloc[0]
    origHeader = header.copy()
    header2 = pd.read_csv(f'../data/permits/we{year}a.txt', header=None, nrows=1, skiprows=1).iloc[0]
    
    for i in range(len(header)):
        if isinstance(origHeader.loc[i], str) and 'unit' in origHeader.loc[i]:
            #assert pd.isnull(header.iloc[i - 1])
            #assert pd.isnull(header.iloc[i + 1])
            header.loc[i - 1] = origHeader.loc[i]
            header.loc[i + 1] = origHeader.loc[i]
            
    finalHeader = [f'{h1} {h2}'.strip() for h1, h2 in zip(header.fillna(''), header2.fillna(''))]
    
    data = pd.read_csv(f'../data/permits/we{year}a.txt', header=None, skiprows=2)
    data.columns = finalHeader
    bps.append(data)
    
bps = pd.concat(bps, ignore_index=True)

In [None]:
bps.columns

In [None]:
bps = bps[(bps['State Code'] == 6) & bps['County Code'].isin([25, 37, 59, 65, 71, 111])] # LA, OC

In [None]:
bps['Place Name'] = bps['Place Name'].str.replace(' town', '')
production = bps.groupby('Place Name').agg({
    '1-unit Units': np.sum,
    '2-units Units': np.sum,
    '3-4 units Units': np.sum,
    '5+ units Units': np.sum,
    '1-unit rep Units': np.sum,
    '2-units rep Units': np.sum,
    '3-4 units rep Units': np.sum,
    '5+ units rep Units': np.sum,
})

In [None]:
production['mf_production'] = production['2-units Units'] + production['3-4 units Units'] + production['5+ units Units']
production['sf_production'] = production['1-unit Units']
production['total_production'] = production.mf_production + production.sf_production

In [None]:
production.index.values

### Compare reported and imputed numbers

In [None]:
for units in ['1-unit', '2-units', '3-4 units', '5+ units']:
    production[f'{units} diff'] = production[f'{units} Units'] - production[f'{units} rep Units']
    
production[['1-unit diff', '2-units diff', '3-4 units diff', '5+ units diff']].describe()

### Compute proportion multifamily of recent construction

In [None]:
production['proportionMultifamily'] = production.mf_production / production.total_production

In [None]:
production.proportionMultifamily[production.proportionMultifamily.isnull()]

### Deal with cities that haven't produced any housing

Laguna Woods is a bit of a surprise, but of course Vernon and City of Industry aren't. Fill these in with the values from the existing housing stock, from Census Reporter ACS numbers.

Calipatria is not in the building permits database.

In [None]:
production.loc['Laguna Woods', 'proportionMultifamily'] = 0.66
production.loc['Vernon', 'proportionMultifamily'] = 0.43
production.loc['Industry', 'proportionMultifamily'] = 0.05 # actually census reporter reports 4 percent, but 12 percent in mobile homes, so recalculated with remaining units
production.loc['Calipatria', 'proportionMultifamily'] = 0.27

In [None]:
assert not np.any(production.proportionMultifamily.isnull())

In [None]:
production.rename(index={
    'La Canada Flintridge': 'La Cañada Flintridge',
    'San Buenaventura': 'San Buenaventura (Ventura)'
}, inplace=True)

## Read SCAG RHNA options

Thanks Paavo!

In [None]:
# latest version retrieved from here: https://twitter.com/calwatch/status/1192321136354197504
rhna = pd.read_excel('../data/All Options Worksheet 11-06-19.xlsx', sheet_name='All Options')

In [None]:
rhna[' city'] = rhna[' city'].replace({
    'Eastvale City': 'Eastvale city',
    'Jurupa Valley City': 'Jurupa Valley city'
})
rhna['city'] = rhna[' city'].str.replace(' city', '').str.replace(' town', '')

rhna.rename(columns={
    'Staff Option': 'opt_x',
    'Option M': 'opt_m',
    'Option H': 'opt_h',
    'M Modified': 'opt_m_mod'
}, inplace=True)

In [None]:
rhna = rhna.merge(production, left_on='city', right_index=True, how='left', validate='1:1')

In [None]:
rhna.city[rhna.proportionMultifamily.isnull()]

In [None]:
# read places: https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2019&layergroup=Places

In [None]:
places = gp.read_file('../data/places/tl_2019_06_place.shp')


In [None]:
# how many ways can we mess up ñ?
places['NAMELSAD'] = places.NAMELSAD.replace({
    'La CaÃ±ada Flintridge city': 'La Cañada Flintridge city'
})


In [None]:
places = places.merge(rhna, left_on='NAMELSAD', right_on=' city', how='inner') # note space before city - it's a diff column

In [None]:
placehu = pd.read_csv('../data/place_hu/place_hudata_with_overlays_2019-11-06T212431.csv', skiprows=[1])
placehu_header = pd.read_csv('../data/place_hu/place_hudata_with_overlays_2019-11-06T212431.csv', header=None, nrows=2).transpose()
with pd.option_context('display.max_colwidth', 200, 'display.max_rows', 600):
    display(placehu_header)

In [None]:
placehu['totalExistingSF'] = placehu.DP04_0007E + placehu.DP04_0008E
placehu['totalExistingMF'] = placehu.DP04_0009E + placehu.DP04_0010E + placehu.DP04_0011E + placehu.DP04_0012E + placehu.DP04_0013E

In [None]:
places['GEO_ID'] = '1600000US06' + places.PLACEFP

In [None]:
places = places.merge(placehu[['GEO_ID', 'totalExistingSF', 'totalExistingMF']], on='GEO_ID',
                     how='left', validate='1:1')
places['totalHousingUnits'] = places.totalExistingSF + places.totalExistingMF # ignores boats, RVs, mobile homes, etc.

## Deal with unincorporated places

In [None]:
counties = gp.read_file('../data/counties/tl_2019_us_county.shp')
counties = counties[(counties.STATEFP == '06') & counties.COUNTYFP.isin(['025', '037', '059', '065', '071', '111'])].to_crs(epsg=26943).set_index('COUNTYFP')

In [None]:
# so the strategy here is to read the county-level files, then subtract off the place-level files to get the unincorporated portion
cobps = []
for year in range(2010, 2019):
    # d*$n census bureau and your arcane record layouts...
    header = pd.read_csv(f'../data/permits/we{year}a.txt', header=None, nrows=1).iloc[0]
    origHeader = header.copy()
    header2 = pd.read_csv(f'../data/permits/we{year}a.txt', header=None, nrows=1, skiprows=1).iloc[0]
    
    for i in range(len(header)):
        if isinstance(origHeader.loc[i], str) and 'unit' in origHeader.loc[i]:
            #assert pd.isnull(header.iloc[i - 1])
            #assert pd.isnull(header.iloc[i + 1])
            header.loc[i - 1] = origHeader.loc[i]
            header.loc[i + 1] = origHeader.loc[i]
            
    finalHeader = [f'{h1} {h2}'.strip() for h1, h2 in zip(header.fillna(''), header2.fillna(''))]
    
    data = pd.read_csv(f'../data/permits/we{year}a.txt', header=None, skiprows=2)
    data.columns = finalHeader
    cobps.append(data)
    
cobps = pd.concat(cobps, ignore_index=True)

In [None]:
cobps = cobps[(cobps['State Code'] == 6) & cobps['County Code'].isin([25, 37, 59, 65, 71, 111])].copy()

In [None]:
cotot = cobps[['County Code', '1-unit Units', '2-units Units', '3-4 units Units', '5+ units Units']].groupby('County Code').agg(sum)

In [None]:
cotot['sf_production'] = cotot['1-unit Units']
cotot['mf_production'] = cotot['2-units Units'] + cotot['3-4 units Units'] + cotot['5+ units Units']
cotot['total_production'] = cotot.sf_production + cotot.mf_production

In [None]:
# we need existing housing stock to create growth rates
county_housing = pd.read_csv('../data/county_housing/ACSDP5Y2017.DP04_data_with_overlays_2019-11-08T143350.csv', skiprows=[1])
county_housing_idx = pd.read_csv('../data/county_housing/ACSDP5Y2017.DP04_data_with_overlays_2019-11-08T143350.csv', nrows=1).transpose()
#county_housing_idx

In [None]:
county_housing['totalExistingSF'] = county_housing.DP04_0007E + county_housing.DP04_0008E
county_housing['totalExistingMF'] = county_housing.DP04_0009E + county_housing.DP04_0010E + county_housing.DP04_0011E + county_housing.DP04_0012E + county_housing.DP04_0013E

In [None]:
county_housing['countyfips'] = county_housing.GEO_ID.str.slice(11).astype('int')

In [None]:
cotot = cotot.merge(county_housing[['countyfips', 'totalExistingSF', 'totalExistingMF']], how='left', left_index=True, right_on='countyfips', validate='1:1').set_index('countyfips')

In [None]:
cotot

In [None]:
# subtract off incorporated places
def findCounty (place):
    intArea = counties.intersection(place.geometry).area
    if np.any(intArea > 100):
        return intArea.idxmax()
    else:
        return np.nan

places['county'] = places.to_crs(epsg=26943).apply(findCounty, axis=1)
places = places.dropna(subset=['county']) # get rid of places outside SCAG region

In [None]:
places[['NAME', 'county']].sample(10)

In [None]:
places['countyint'] = places.county.astype('int64')
coinc = places[['sf_production', 'mf_production', 'total_production', 'totalExistingSF', 'totalExistingMF', 'countyint']].groupby('countyint').sum()

In [None]:
couninc = cotot[['sf_production', 'mf_production', 'total_production', 'totalExistingSF', 'totalExistingMF']] - coinc

In [None]:
couninc['proportionMultifamily'] = couninc.mf_production / couninc.total_production

In [None]:
couninc

In [None]:
assert np.all(couninc >= 0)

In [None]:
# create the geometry for unincorporated places
unincgeom = gp.overlay(counties, places.to_crs(epsg=26943), how='difference')

In [None]:
unincgeom.plot()

In [None]:
unincgeom.rename(index=int, inplace=True)
unincgeom['proportionMultifamily'] = couninc.proportionMultifamily
unincgeom['totalExistingSF'] = couninc.totalExistingSF
unincgeom['totalExistingMF'] = couninc.totalExistingMF

In [None]:
unincgeom.proportionMultifamily

In [None]:
unincgeom.loc[111, 'NAMELSAD'] = 'Unincorporated Ventura Co.'
unincgeom.loc[37, 'NAMELSAD'] = 'Unincorporated Los Angeles Co.'
unincgeom.loc[71, 'NAMELSAD'] = 'Unincorporated San Bernardino Co.'
unincgeom.loc[65, 'NAMELSAD'] = 'Unincorporated Riverside Co. (incl. March JPA)'
unincgeom.loc[59, 'NAMELSAD'] = 'Unincorporated Orange Co.'
unincgeom.loc[25, 'NAMELSAD'] = 'Unincorporated Imperial Co.'
unincgeom['NAME'] = unincgeom.NAMELSAD

In [None]:
unincgeom = unincgeom.merge(rhna[['city', ' city', 'opt_x', 'opt_h', 'opt_m', 'opt_m_mod']], left_on='NAMELSAD', right_on='city', how='left', validate='1:1')

In [None]:
unincgeom

In [None]:
all_places = pd.concat([places.to_crs(epsg=26943), unincgeom[['NAMELSAD', 'NAME', 'city', ' city', 'geometry', 'proportionMultifamily', 'totalExistingSF', 'totalExistingMF', 'opt_x', 'opt_h', 'opt_m', 'opt_m_mod']]], ignore_index=True, sort=True)
all_places.crs = {'init': 'epsg:26943'}

In [None]:
assert not all_places.NAMELSAD.duplicated().any() # there are some duplicates in the Census dataset, but they are not in LA/Orange County

In [None]:
rhna[' city'][~rhna[' city'].isin(all_places.NAMELSAD)]

In [None]:
all_places.tail(10)

In [None]:
rhna[' city'][~rhna[' city'].isin(all_places.NAMELSAD)] # Should just have NaN which is blank row in excel sheet

In [None]:
all_places['opt_x_mf'] = all_places.opt_x * all_places.proportionMultifamily
all_places['opt_h_mf'] = all_places.opt_h * all_places.proportionMultifamily
all_places['opt_m_mf'] = all_places.opt_m * all_places.proportionMultifamily
all_places['opt_m_mod_mf'] = all_places.opt_m_mod * all_places.proportionMultifamily

all_places['opt_x_sf'] = all_places.opt_x * (1 - all_places.proportionMultifamily)
all_places['opt_h_sf'] = all_places.opt_h * (1 - all_places.proportionMultifamily)
all_places['opt_m_sf'] = all_places.opt_m * (1 - all_places.proportionMultifamily)
all_places['opt_m_mod_sf'] = all_places.opt_m_mod * (1 - all_places.proportionMultifamily)

all_places['opt_x_mf_growth'] = all_places.opt_x_mf / all_places.totalExistingMF
all_places['opt_h_mf_growth'] = all_places.opt_h_mf / all_places.totalExistingMF
all_places['opt_m_mf_growth'] = all_places.opt_m_mf / all_places.totalExistingMF
all_places['opt_m_mod_mf_growth'] = all_places.opt_m_mod_mf / all_places.totalExistingMF

all_places['opt_x_sf_growth'] = all_places.opt_x_sf / all_places.totalExistingSF
all_places['opt_h_sf_growth'] = all_places.opt_h_sf / all_places.totalExistingSF
all_places['opt_m_sf_growth'] = all_places.opt_m_sf / all_places.totalExistingSF
all_places['opt_m_mod_sf_growth'] = all_places.opt_m_mod_sf / all_places.totalExistingSF

In [None]:
all_places.loc[(all_places.totalExistingMF == 0) & (all_places.proportionMultifamily == 0), 'opt_x_mf_growth'] = 0
all_places.loc[(all_places.totalExistingMF == 0) & (all_places.proportionMultifamily == 0), 'opt_h_mf_growth'] = 0
all_places.loc[(all_places.totalExistingMF == 0) & (all_places.proportionMultifamily == 0), 'opt_m_mf_growth'] = 0
all_places.loc[(all_places.totalExistingMF == 0) & (all_places.proportionMultifamily == 0), 'opt_m_mod_mf_growth'] = 0

In [None]:
# dump for qgis mapping
all_places.to_file('../maps/places_with_rhna.shp')

In [None]:
f, ax = plt.subplots(2, 4, figsize=(36, 12))

optnames = {
    'x': 'Staff Option',
    'h': 'Option H', 
    'm': 'Option M',
    'm_mod': 'Coastal Option'
}

mfsfnames = {
    'mf': 'Multifamily',
    'sf': 'Single family'
}

for col, opt in enumerate(['x', 'h', 'm', 'm_mod']):
    for row, mf in enumerate(['mf', 'sf']):
        #plt.subplot(2, 4, row * 4 + col + 1)
        all_places[f'opt_{opt}_{mf}_dens'] = np.log(all_places[f'opt_{opt}_{mf}'] / all_places.area * 1e6 + 1)
        all_places.plot(column=f'opt_{opt}_{mf}_dens', legend=True, ax=ax[row, col])
        ax[row, col].set_title(f'{optnames[opt]}, {mfsfnames[mf]}')
        ax[row, col].set_xticks([])
        ax[row, col].set_yticks([])

## Summarize to PUMA level

Push down to tracts and summarize up weighting by housing in the tract, I was going to weight by SF/MF separately, but that assumes that all apartment will go where apartments are now, for example. Instead I just use a population weighted average from MABLE - which assumes infill development, but not a specific single-family/multifamily distribution.

In [None]:
tracts = gp.read_file('../data/tracts/tl_2019_06_tract.shp')
tracts = tracts[tracts.COUNTYFP.isin(['025', '037', '059', '065', '071', '111'])].to_crs(epsg=26943)
tracts.plot()

In [None]:
tracts.head()

In [None]:
len(tracts)

In [None]:
tractCentroids = tracts.copy()
tractCentroids['geometry'] = tracts.centroid

In [None]:
tractPlaces = gp.sjoin(tractCentroids, all_places.to_crs(epsg=26943), op='within')

In [None]:
len(tractPlaces)

In [None]:
tractPlaces.head()

In [None]:
tractHousing = pd.read_csv('../data/tract_housing/ACS_17_5YR_DP04_with_ann.csv', skiprows=[1], dtype={'GEO.id2': 'str'})
tractHousingIdx = pd.read_csv('../data/tract_housing/ACS_17_5YR_DP04_with_ann.csv', nrows=1).transpose()
tractHousingIdx.head()

In [None]:
tractPlaces = tractPlaces.merge(tractHousing[['GEO.id2', 'HC01_VC03']].rename(columns={'HC01_VC03': 'tractTotalHsg'}), left_on='GEOID_left', right_on='GEO.id2', how='left', validate='1:1')

In [None]:
tractPumas = pd.read_csv('../data/mable_tracts_pumas.csv', skiprows=[1], dtype={'county': 'str', 'tract': 'str', 'puma12': 'str'})

In [None]:
tractPumas['geoid'] = tractPumas.county.str.cat(tractPumas.tract.str.replace('.', ''))

In [None]:
tractPlaces = tractPlaces.merge(tractPumas, left_on='GEOID_left', right_on='geoid', how='left', validate='1:1')

In [None]:
len(tractPlaces)

In [None]:
tractPlaces.columns

In [None]:
grpd = tractPlaces.groupby('puma12')
pumaMfGrowth = pd.DataFrame({
    k: grpd.apply(lambda df: np.average(df[k], weights=df.tractTotalHsg)) for k in tractPlaces.columns if k.endswith('mf_growth')
}).rename(columns=lambda x: x.replace('_mf', ''))

pumaSfGrowth = pd.DataFrame({
    k: grpd.apply(lambda df: np.average(df[k], weights=df.tractTotalHsg)) for k in tractPlaces.columns if k.endswith('sf_growth')
}).rename(columns=lambda x: x.replace('_sf', ''))

pumaGrowth = pd.concat([pumaMfGrowth, pumaSfGrowth], keys=['mf', 'sf'], sort=True)
pumaGrowth.index = [f'{puma}_{mfsf}' for mfsf, puma in pumaGrowth.index.values]

In [None]:
pumaGrowth.head()

In [None]:
pumaGrowth.to_csv('../data/puma_growth.csv')

In [None]:
np.sum(all_places.opt_m_mod_mf) + np.sum(all_places.opt_m_mod_sf)

In [None]:
pumaGrowth.mean()