# Vacant property hedonic

How much is vacant property worth?

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import geopandas as gp
import libpysal.weights
import sqlalchemy as sq
from glob import glob

In [None]:
DB_URI = 'postgresql://matthewc@localhost/matthewc'

In [None]:
data = pd.read_sql('''
SELECT "Main_ImportParcelID", "Main_SalesPriceAmount", "Main_RecordingDate",
    "Main_SalesPriceAmountStndCode", "Main_PropertyUseStndCode", "Main_IntraFamilyTransferFlag",
    a."Main_LotSizeSquareFeet", p.scag_zn_co, p.puma, ST_Area(p.geog) AS computed_area_sqm
FROM diss.ztrans t
    INNER JOIN diss.zasmt a ON (a."Main_ImportParcelID" = t."PropertyInfo_ImportParcelID")
    INNER JOIN diss.gp16 p ON (lower(p.county) = lower(a."Main_County") AND p.clean_apn = a.clean_apn)
    WHERE a."Building_PropertyLandUseStndCode" = 'VL101'  -- TODO should probably use lu16 here
        AND "Main_SalesPriceAmount" > 0
        AND "Main_IntraFamilyTransferFlag" IS NULL
        AND "Main_SalesPriceAmountStndCode" <> 'NA' -- non-arms-length transaction
        AND scag_zn_co IN ('1110', '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1119', -- single-family
        '1150'); -- rural residential
''', DB_URI)

In [None]:
len(data)

In [None]:
data['lot_size_sqm'] = (data.Main_LotSizeSquareFeet / 10.7639).fillna(data.computed_area_sqm)

In [None]:
data['Main_RecordingDate'] = pd.to_datetime(data.Main_RecordingDate, format='%Y-%m-%d')

In [None]:
data = data[data.Main_RecordingDate.dt.year >= 2008]

In [None]:
data.Main_SalesPriceAmount.describe().round()

# Merge PUMAs with few observations

To avoid large condition numbers.

In [None]:
pumas = gp.read_file('/Volumes/Pheasant Ridge/IPUMS/pumas/socal_pumas_projected.shp').dissolve('PUMA')

In [None]:
puma_vcs = data.puma.value_counts()

In [None]:
queen_weights = libpysal.weights.Queen(pumas.geometry, ids=pumas.index.to_list()).to_adjlist().set_index('focal')

In [None]:
pumas_to_merge = puma_vcs[puma_vcs < 5].index

In [None]:
pumas_to_merge

In [None]:
# merge with largest adjacent
merges = {}
for puma in pumas_to_merge:
    candidates = queen_weights.loc[puma].neighbor
    neighbor = puma_vcs.loc[[i for i in candidates if i in puma_vcs.index]].idxmax()
    merges[puma] = neighbor
# manually do this one, I guess it's isolated and ends up merged with another sub-5 PUMA
merges['03765'] = '03767'
merges

In [None]:
data['merged_puma'] = data.puma.replace(merges)

In [None]:
# probably just recorded incorrectly ()
data = data[data.Main_SalesPriceAmount >= 1000].copy()

In [None]:
data['year'] = data.Main_RecordingDate.dt.year

In [None]:
# now build the model
data['logprice'] = np.log(data.Main_SalesPriceAmount)
data['logarea_sqm'] = np.log(data.lot_size_sqm)

In [None]:
# adding dummies for year means we are estimating the appreciation within the model, using the observed data
exog = sm.add_constant(
    pd.get_dummies(data[['logarea_sqm', 'merged_puma', 'year']], 
                   columns=['merged_puma', 'year']).drop(columns=['merged_puma_07104', 'year_2017']))
mod = sm.OLS(data.logprice, exog)
fit = mod.fit()
fit.summary()

In [None]:
fit.resid.std()

In [None]:
res = pd.DataFrame({
    'Coef': fit.params,
    'Std. Err.': fit.bse,
    't-value': fit.tvalues,
    'p-value': fit.pvalues
})

res = res.loc[filter(lambda x: not 'puma' in x, res.index)]

res = res.rename(index={'const': 'Constant', 'logarea_sqm': 'ln(lot area) (square meters)'}).rename(index=lambda x: x.replace('year_', 'Transaction year: '))

def _sigstars (pval):
    if pval < 0.001:
        return '***'
    elif pval < 0.01:
        return '**'
    elif pval < 0.05:
        return '*'
    elif pval < 0.1:
        return '.'
    else:
        return ''

def sigstars (coefs, pvals):
    return coefs.astype('str').str.cat(pvals.apply(_sigstars))

res['Coef'] = sigstars(res.Coef.round(2), res['p-value'])

res[['Std. Err.', 't-value', 'p-value']] = res[['Std. Err.', 't-value', 'p-value']].round(2)

res.loc['Transaction year: 2017', 'Coef'] = 'base'
res.loc['Sample size', 'Coef'] = '{:,d}'.format(int(fit.nobs))
res.loc['R^2', 'Coef'] = fit.rsquared.round(2)
res.loc['Adj. R^2', 'Coef'] = fit.rsquared_adj.round(2)
res

In [None]:
print(res.fillna('').to_latex())

In [None]:
# plot out just the rent new MF
land = gp.read_file('../../sorting/data/ne_10m_land.shp').to_crs(epsg=26911)

roads = pd.concat([gp.read_file(i).to_crs(epsg=26911) for i in glob('../../sorting/data/tl_roads/*.shp')], ignore_index=True)

counties = gp.read_file('../../sorting/data/counties/tl_2019_us_county.shp').to_crs(26911)
counties = counties[(counties.STATEFP == '06') & counties.NAME.isin(['Los Angeles', 'Ventura', 'Orange', 'Riverside', 'San Bernardino', 'Imperial'])]

In [None]:
pumas['merged_puma'] = pumas.GEOID.replace(merges)

f, ax = plt.subplots(1, 1, figsize=(9, 5.5))

fes = fit.params.loc[[i for i in fit.params.index if 'puma' in i]].rename('fe').reset_index()
fes['puma'] = '06' + fes['index'].str.slice(-5)
fes = fes.set_index('puma')

# add back base effect
fes.loc['0607104'] = 0

pumas_fes = pumas.merge(fes, left_on='merged_puma', right_index=True, validate='m:1')

pumas_fes.to_crs(epsg=26911).plot(ax=ax, column='fe', cmap='Blues', scheme='quantiles', legend=True)
roads.plot(color='#888888', ax=ax, lw=0.5)
counties.plot(edgecolor='#000',  facecolor='none', ax=ax, lw=1)
#water.plot(color='#aaaaaa', ax=ax)
ax.set_ylim(3.59e6, 3.95e6)
ax.set_xlim(2.74e5, 7.7e5)
ax.set_xticks([])
ax.set_yticks([])

ax.set_yticks([])
ax.set_xticks([])
#ax.set_axis_off()


#     ax.legend(
#         [mpatch.Patch(color=c) for c in colors.values()],
#         [i.replace('$-', '-$').replace('$', '\\$') for i in colors.keys()],
#         loc='lower left',
#         title='Change in average rent',
#         framealpha=1,
#         fontsize='medium',
#         title_fontsize='medium'
#     )
plt.savefig('../../dissertation/fig/construction/vacant_fes.png', dpi=300, bbox_inches='tight')


## now estimate value of all vacant land in gp16

In [None]:
est_data = pd.read_sql('''
    SELECT gid, puma, ST_Area(geog) AS area_sqm FROM diss.gp16 WHERE scag_zn_co IN ('1110', '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1119', -- single-family
        '1150') AND Building_PropertyLandUseStndCode = 'VL101'
''', DB_URI)

In [None]:
est_data['logarea_sqm'] = np.log(est_data.area_sqm)

In [None]:
est_data['merged_puma'] = est_data.puma.replace(merges)

In [None]:
for yr in range(2008, 2017):
    # set all dummies to 0, 2017 is left out year, to account for inflation
    est_data[f'year_{yr}'] = 0

In [None]:
correction_factor = np.sum(fit.resid ** 2) / (fit.nobs - len(fit.params)) / 2
est_data['npv'] = np.exp(fit.predict(sm.add_constant(pd.get_dummies(est_data, columns=['merged_puma']))[exog.columns]) + correction_factor)

In [None]:
correction_factor

In [None]:
est_data[['gid', 'npv']].to_sql('vacant_npv', DB_URI, schema='diss')

In [None]:
eng = sq.create_engine(DB_URI)
with eng.connect() as trans:
    trans.execute('ALTER TABLE diss.gp16 ADD COLUMN vacant_npv DOUBLE PRECISION')
    trans.execute('UPDATE diss.gp16 SET vacant_npv = npv FROM diss.vacant_npv v WHERE gp16.gid = v.gid')
    trans.execute('DROP TABLE diss.vacant_npv')