In [96]:
import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import contextily as ctx
from shapely.wkt import loads


In [118]:
dbi = gpd.read_file('./data/Building Permits.geojson', rows=10000)

In [4]:
parcels = pd.read_csv('./data/Blue Sky Code and Inputs/SF_Logistic_Data.csv')

In [5]:
allParcels = gpd.read_file('./data/Parcels   Active and Retired/parcels.shp')

In [6]:
sites = gpd.read_file('./data/site_inventory/xn--Bay_Area_Housing_Opportunity_Sites_Inventory__20072023_-it38a.shp')

In [7]:
tax = pd.read_excel('./data/tax_assessor/2019.8.20__SF_ASR_Secured_Roll_Data_2007-2008.xlsx')

In [51]:
tax8 = pd.read_excel('./data/tax_assessor/2019.8.20__SF_ASR_Secured_Roll_Data_2008-2009.xlsx')

In [8]:
use_codes = pd.read_excel('./data/tax_assessor/2019.8.20__SF_ASR_Secured_Roll_Data_2007-2008.xlsx', 
                          sheet_name='Class & Use Code')

In [9]:
neighborhood_codes = pd.read_excel('./data/tax_assessor/2019.8.20__SF_ASR_Secured_Roll_Data_2007-2008.xlsx', 
                                   sheet_name='Neigborhood Code')

### Training Set is RHNA 4

In [30]:
trainParcels = parcels[np.logical_and(parcels.year >= 2007, parcels.year <= 2015)]

In [31]:
trainY = trainParcels.groupby('MapBlkLot_Master')['Developed'].agg(lambda x: x.ne(0).sum())

In [32]:
trainY.sum()

308

In [33]:
round(trainY.mean(), 3) * 100

0.2

In [34]:
trainX = trainParcels[trainParcels.year == 2007]

No duplicative index.

In [35]:
nunique_lots = trainParcels[trainParcels.year == 2007].MapBlkLot_Master.nunique()
n_lots = trainParcels[trainParcels.year == 2007].shape[0]
assert nunique_lots == n_lots

In [36]:
trainX.MapBlkLot_Master.isin(trainY.index).mean()

1.0

In [37]:
trainDf = pd.merge(trainX.drop('Developed', axis=1), trainY, left_on='MapBlkLot_Master', right_index=True)

In [38]:
trainDf.Developed.value_counts()

0    152910
1       306
2         1
Name: Developed, dtype: int64

In [39]:
def clean_apn(apn):
    apn = ''.join(apn.split(' '))
    if len(apn) < 9:
        return apn
    block_length = 4
    #if apn[block_length].isalpha():
    #    return apn[:block_length] + apn[block_length+1:]
    return apn
 
    
tax['MapBlkLot_Master'] = tax.RP1PRCLID.apply(clean_apn)

### Developed parcels

In [41]:
built = trainDf.loc[trainDf.Developed > 0,]

In [43]:
built.MapBlkLot_Master.isin(tax.MapBlkLot_Master).mean().round(2)

0.45

In [46]:
built.MapBlkLot_Master.str.len().value_counts()

7    286
8     21
Name: MapBlkLot_Master, dtype: int64

In [47]:
built.MapBlkLot_Master

920413      0041103
922617     0129013A
923873      0184042
924274      0196030
925507      0279057
             ...   
1073065     8711025
1073066     8711019
1073070     8711033
1073150     8720117
1073152     8720018
Name: MapBlkLot_Master, Length: 307, dtype: object

In [153]:
dbi.columns

Index(['record_id', 'proposed_construction_type_description', 'issued_date',
       'existing_construction_type_description', 'zipcode',
       'neighborhoods_analysis_boundaries', 'fire_only_permit', 'proposed_use',
       'existing_use', 'description', 'tidf_compliance', 'revised_cost',
       'existing_construction_type', 'site_permit', 'plansets',
       'permit_creation_date', 'status_date',
       'first_construction_document_date', 'existing_units', 'filed_date',
       'street_name', 'block', 'estimated_cost', 'permit_expiration_date',
       'unit_suffix', 'proposed_construction_type', 'permit_type_definition',
       'status', 'completed_date', 'permit_number', 'street_number_suffix',
       'lot', 'number_of_existing_stories', 'street_suffix', 'proposed_units',
       'unit', 'permit_type', 'number_of_proposed_stories',
       'voluntary_soft_story_retrofit', 'supervisor_district', 'street_number',
       'structural_notification', 'geometry', 'units', 'apn',
       'na_exis

In [154]:
dbi['units'] = dbi.proposed_units.fillna(0).astype(int) - dbi.existing_units.fillna(0).astype(int)

In [155]:
dbi.status.value_counts()

complete       5448
expired        1356
cancelled      1155
issued         1142
filed           623
filing          103
withdrawn        76
approved         66
disapproved      23
reinstated        6
Name: status, dtype: int64

In [156]:
dbi.columns

Index(['record_id', 'proposed_construction_type_description', 'issued_date',
       'existing_construction_type_description', 'zipcode',
       'neighborhoods_analysis_boundaries', 'fire_only_permit', 'proposed_use',
       'existing_use', 'description', 'tidf_compliance', 'revised_cost',
       'existing_construction_type', 'site_permit', 'plansets',
       'permit_creation_date', 'status_date',
       'first_construction_document_date', 'existing_units', 'filed_date',
       'street_name', 'block', 'estimated_cost', 'permit_expiration_date',
       'unit_suffix', 'proposed_construction_type', 'permit_type_definition',
       'status', 'completed_date', 'permit_number', 'street_number_suffix',
       'lot', 'number_of_existing_stories', 'street_suffix', 'proposed_units',
       'unit', 'permit_type', 'number_of_proposed_stories',
       'voluntary_soft_story_retrofit', 'supervisor_district', 'street_number',
       'structural_notification', 'geometry', 'units', 'apn',
       'na_exis

In [157]:
date_cols = [c for c in dbi.columns if 'Date' in c]
dbi[date_cols] = dbi[date_cols].apply(pd.to_datetime)
dbi['apn'] = dbi['block'].astype(str) + dbi['lot'].astype(str)

In [158]:
dbi['na_existing_units'] = dbi['existing_units'].isna()
relevant_uses = [
    'apartments', '1 family dwelling', '2 family dwelling',
    'residential hotel', 'misc group residns.', 'artist live/work',
    'convalescent home', 'accessory cottage', 'nursing home non amb',
    'orphanage', 'r-3(dwg) nursing', 'nursing home gt 6'
]
rhna_permits = dbi[
    dbi['units'] > 0
    & dbi['proposed_use'].isin(relevant_uses)
    & dbi['permit_type'].isin([1, 2, 3, 8])
].copy()

rhna_permits.query('not (`permit_type` == 8 and na_existing_units)', inplace=True)
rhna_permits.query('not (`permit_type` == 3 and na_existing_units)', inplace=True)


# Add / rename columns to fit ABAG format
rhna_permits['permyear'] = rhna_permits['issued_date'].dt.year

# Address is split up into multiple columns. Must re-combine.
id_on_street = rhna_permits['street_number'].astype(str) + " " + rhna_permits['street_number_suffix'].fillna("")
street = rhna_permits['street_name'] + ' ' + rhna_permits['street_suffix']
rhna_permits['address'] = id_on_street + ' ' + street

rhna_permits = rhna_permits.rename(columns={'Location': 'geometry'})

In [159]:
rhna_permits = rhna_permits.loc[np.logical_and(rhna_permits.status_date.dt.year >= 2007,
                                               rhna_permits.status_date.dt.year <= 2015),]

In [160]:
rhna_permits.apn

213     4591D131
214     4591D131
384      5513012
385      5513011
556      1547004
561     4591D131
562     4591D131
1319     3995001
3463     7552023
5897    1129014A
9196    1460017A
Name: apn, dtype: object

In [161]:
rhna_permits.apn.isin(parcels.MapBlkLot_Master).mean()

0.9090909090909091

In [163]:
rhna_permits.units

213     18
214      8
384      2
385      1
556      1
561     10
562     11
1319     8
3463     1
5897     2
9196     3
Name: units, dtype: int64