# Identify permitted and potentially unpermitted attached and detached units in San Jose (2020)

In [1]:
import os
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import numpy as np
import math

In [2]:
# File paths
OAK_FP = '/oak/stanford/groups/deho/building_compliance/'
INPUT_FP = os.path.join(OAK_FP, 'outputs', 'Permit-Matching')
SJ_RES_PARCELS_FP = os.path.join(INPUT_FP, 'inputs', 'san_jose_parcels_res.geojson')
ZONING_FP = os.path.join(OAK_FP, 'san_jose_suppl', 'san_jose_Zoning_Districts.geojson')
BUILD_FP = os.path.join(OAK_FP, 'outputs', 'cbg-inference-{}', 'inference_building_processed')
OSM_FP = os.path.join(OAK_FP, 'outputs', 'cbg-inference-2020', 'osm_building_processed')

In [3]:
# Load data
# * Residential parcels
sj_parcels_res = gpd.read_file(SJ_RES_PARCELS_FP)
sj_parcels_res = sj_parcels_res[sj_parcels_res['APN'].notna()]

# * Building permits
bldg_active = gpd.read_file(os.path.join(INPUT_FP, 'inputs', 'permits', 'bldg_active.geojson'))
bldg_recent = gpd.read_file(os.path.join(INPUT_FP, 'inputs','permits', 'bldg_recent.geojson'))
bldg_expired = gpd.read_file(os.path.join(INPUT_FP, 'inputs', 'permits', 'bldg_expired.geojson'))

# * Zoning
sj_zoning = gpd.read_file(ZONING_FP)
sj_residential = sj_zoning[(sj_zoning['ZONING'].str.contains('R-1')) | (sj_zoning['ZONING'].str.contains('R-2')) |\
         ((sj_zoning['ZONING'].str.contains('R-M')) & (sj_zoning['ZONING'] != 'R-MH'))]

# * parcel+permit
sj_parcel_permit = pd.read_csv(os.path.join(INPUT_FP, 'outputs', 'parcel_permit_found.csv'))
sj_permit_noparcel = pd.read_csv(os.path.join(INPUT_FP, 'outputs', 'parcel_permit_notfound.csv'))
sj_parcel_permit['geometry_parcel'] = gpd.GeoSeries.from_wkt(sj_parcel_permit['geometry_parcel'])
sj_parcel_permit['geometry_permit'] = gpd.GeoSeries.from_wkt(sj_parcel_permit['geometry_permit'])
sj_permit_noparcel['geometry'] = gpd.GeoSeries.from_wkt(sj_permit_noparcel['geometry'])

Handle active permits differently than expired permits
- Expired permits barely report dwelling units and square footage
- Should we filter really finely like we do for active permits, or filter for just work description as we do for expired permits to catch ALL the construction permits we can find? I worry that some of the buildings we infer will be under non-SFR permits

Keep this block of code in case we want to filter more specifically for possible dwelling units, but we will work with the more relaxed conditions to get more possible permits to compare to

In [4]:
# Filter permits
cols = list(bldg_active.columns) + ['permit_status']
permits = gpd.GeoDataFrame(columns=cols)
for i, status in zip([bldg_active, bldg_recent, bldg_expired], ['active', 'recent', 'expired']):
    i['permit_status'] = status
    permits = pd.concat([permits, i[i['WORKDESC'].isin(['New Construction', 'Additions/Alterations'])]])
permits = permits.reset_index(drop=True)

permits.crs = "EPSG:4326"

# Functions

In [5]:
def match_parcel(parcel_apn):
    # Obtain parcel-level data
    parcel_inputs = parcel_level_data(
      parcel_apn, sj_parcels_res, sj_parcel_permit)
    inferred_buildings_2020_parcel, inferred_buildings_2016_parcel, osm_buildings_parcel, permits_parcel = parcel_inputs

    # Incorporate OSM data
    parcel_buildings = process_OSM_data(
      inferred_buildings_2020_parcel, inferred_buildings_2016_parcel, osm_buildings_parcel)

    # Incorporate permit data
    parcel_buildings = process_permit_data(parcel_apn, permits_parcel, parcel_buildings)

    return parcel_buildings

In [6]:
def parcel_level_data(parcel_apn, sj_parcels_res, sj_parcel_permit):
    parcel_bounds = sj_parcels_res[sj_parcels_res['APN'] == parcel_apn]

    # clip inferred buildings and osm buildings
    # i don't think clip is a good idea since it truncates the inferences according to the mask
    # inferred_buildings_2020_parcel = gpd.clip(building_inference_2020, parcel_bounds)

    # def mask_buildings(df, parcel_bounds):
    #   df_out = df.sjoin(parcel_bounds[['geometry','geom']])
    #   # print(df_out)
    #   df_out['iou'] = df_out['geometry'].intersection(df_out['geom']).area/df_out['geom'].area
    #   df_out = df_out[df_out['iou'] > 0.7]
    #   return df_out

    def mask_buildings1(parcel_bounds, fp):
        df_out = gpd.read_file(fp, mask=parcel_bounds)
        df_out['iou'] = df_out['geometry'].intersection(parcel_bounds).area/df_out['geometry'].area
        df_out = df_out[df_out['iou'] > 0.7]
        if df_out.empty:
            return None
        else:
            return df_out

    inferred_buildings_2020_parcel = mask_buildings1(parcel_bounds['geometry'].values[0], BUILD_FP.format('2020'))
    inferred_buildings_2016_parcel = mask_buildings1(parcel_bounds['geometry'].values[0], BUILD_FP.format('2016'))
    osm_buildings_parcel = mask_buildings1(parcel_bounds['geometry'].values[0], OSM_FP)

    permits_parcel = sj_parcel_permit[sj_parcel_permit['APN_parcel'] == parcel_apn]
    if permits_parcel.empty:
        permits_parcels = None

    return inferred_buildings_2020_parcel, inferred_buildings_2016_parcel, osm_buildings_parcel, permits_parcel

In [7]:
MIN_ATTACHED_SIZE = 40
MIN_ATTACHED_SIZE_2020_2016_DIFF = 40
MIN_ATTACHED_SIZE_OSM_2016_DIFF = 40

def process_OSM_data(inferred_buildings_2020_parcel, inferred_buildings_2016_parcel, osm_buildings_parcel):
    # Returns a gpd.GeoDataFrame with the following columns:
    #   - GEOID, area, small, large
    #   - flags: OSM_flag, expansion_OSM_flag, main_building_flag, expansion_2016_flat
    #   - building geometry
    gpd_cols = ['main_building_flag', 'OSM_flag', 'expansion_OSM_flag', 'expansion_2016_flag', 'geometry']
    parcel_buildings = gpd.GeoDataFrame(geometry=[], columns=gpd_cols)
    
    # Drop OSM index_left column
    if osm_buildings_parcel is not None and 'index_left' in osm_buildings_parcel.columns:
        osm_buildings_parcel.drop('index_left', axis=1, inplace=True)
        
    # Identify main 2016 building to check for expansions in cases below
    inference_main_build_geom_2016 = None
    if inferred_buildings_2016_parcel is not None:
        # Identify 2016 main building
        inferred_buildings_2016_parcel = inferred_buildings_2016_parcel.sort_values(
            'area', ascending=False)
        inference_main_build_2016 = inferred_buildings_2016_parcel.iloc[0]
        inference_main_build_geom_2016 = inference_main_build_2016['geometry']

        
    # Case 1: No inference nor OSM data ----------------- 
    if inferred_buildings_2020_parcel is None and osm_buildings_parcel is None:
        return parcel_buildings

    
    # Case 2: No inference but OSM data ----------------- 
    # We fully rely on OSM data
    if inferred_buildings_2020_parcel is None and osm_buildings_parcel is not None:
        parcel_buildings = osm_buildings_parcel.copy()
        parcel_buildings = parcel_buildings.sort_values('area', ascending=False)
        parcel_buildings['main_building_flag'] = parcel_buildings.apply(
            lambda row: True if row.name == 0 else False, axis=1)
        parcel_buildings['OSM_flag'] = True
        parcel_buildings['expansion_OSM_flag'] = False
        parcel_buildings['expansion_2016_flag'] = None
        
        # Check for OSM expansion vs 2016
        if inference_main_build_geom_2016 is not None: 
            parcel_buildings['expansion_2016_flag'] = False
            
            # Get OSM main building
            osm_main_build = osm_buildings_parcel.sort_values('area', ascending=False).iloc[[0]]
            osm_main_build_geom = osm_main_build['geometry'].iloc[0]
            
            union_main_build = osm_main_build_geom.union(inference_main_build_geom_2016)
            diff_OSM_16 = compute_largest_protruding_poly(
                union_main_build, inference_main_build_geom_2016)
            
            if diff_OSM_16['area'] > MIN_ATTACHED_SIZE_OSM_2016_DIFF:
                parcel_buildings['expansion_2016_flag'] = parcel_buildings.apply(
                lambda row: True if row.name == 0 else False, axis=1)
        return parcel_buildings[gpd_cols]
    
    
    # Case 3: Inference and no OSM data ----------------- 
    if osm_buildings_parcel is None:

        # i. Identify main building in 2020 data as the largest polygon
        inferred_buildings_2020_parcel = inferred_buildings_2020_parcel.sort_values(
            'area', ascending=False)
        inference_main_build = inferred_buildings_2020_parcel.iloc[[0]]
        inference_main_build_geom = inference_main_build['geometry'].iloc[0]
        expansion_2016_flag = None

        parcel_buildings = inference_main_build.copy()

        # ii. Compare to 2016 footprint
        if inference_main_build_geom_2016 is not None:
            expansion_2016_flag = False
            union_main_build = inference_main_build_geom.union(inference_main_build_geom_2016)
            diff_20_16 = compute_largest_protruding_poly(union_main_build, inference_main_build_geom_2016)

            if diff_20_16['area'] > MIN_ATTACHED_SIZE_2020_2016_DIFF:
                parcel_buildings = inference_main_build_2016.copy()
                expansion_2016_flag = True

        parcel_buildings['expansion_2016_flag'] = expansion_2016_flag
        parcel_buildings['expansion_OSM_flag'] = None
        parcel_buildings['main_building_flag'] = True
        parcel_buildings['OSM_flag'] = False
        parcel_buildings = parcel_buildings[gpd_cols]

        # iii. Add inferred small buildings 
        inferred_small = inferred_buildings_2020_parcel.iloc[1:].copy()
        inferred_small['expansion_OSM_flag'] = False
        inferred_small['expansion_2016_flag'] = False
        inferred_small['OSM_flag'] = False
        inferred_small['main_building_flag'] = False
        parcel_buildings = pd.concat([parcel_buildings, inferred_small[gpd_cols]])

        return parcel_buildings[gpd_cols]

    # Case 4: Inference and OSM data -----------------   
    parcel_buildings = gpd.GeoDataFrame(geometry=[])

    # i. Identify main building(s) in OSM and inferences
    osm_main_build = osm_buildings_parcel.sort_values('area', ascending=False).iloc[[0]]
    osm_main_build_geom = osm_main_build['geometry'].iloc[0]

    inference_main_build = inferred_buildings_2020_parcel.sjoin(
        osm_main_build, how='left', predicate='intersects')
    inference_main_build = inference_main_build.loc[~inference_main_build['index_right'].isna()]
    inference_main_build_geom = inference_main_build.geometry.unary_union
    inference_main_build.drop('index_right', axis=1, inplace=True)
    
    expansion_OSM_flag = False
    osm_flag = True
    # If there is no overlap between model inferences and OSM, we use OSM main build
    if len(inference_main_build) == 0:
        inference_main_build_geom = osm_main_build_geom
        
    # * Check for building expansion
    # ** From OSM
    union_main_build = inference_main_build_geom.union(osm_main_build_geom)
    diff_main_build = compute_largest_protruding_poly(union_main_build, osm_main_build_geom)

    parcel_main_build_geom = osm_main_build_geom
    if (diff_main_build['area'] > MIN_ATTACHED_SIZE):
        expansion_OSM_flag = True
        osm_flag = False
        parcel_main_build_geom = union_main_build
        
    # ** From 2016
    expansion_2016_flag = None
    if inference_main_build_geom_2016 is not None:
        expansion_2016_flag = False
        union_main_build = inference_main_build_geom.union(inference_main_build_geom_2016)
        diff_20_16 = compute_largest_protruding_poly(union_main_build, inference_main_build_geom_2016)

        if diff_20_16['area'] > MIN_ATTACHED_SIZE_2020_2016_DIFF:
            expansion_2016_flag = True

    # Generate main building gpd and append to parcel buildlings
    parcel_main_build = gpd.GeoDataFrame(geometry=[parcel_main_build_geom], crs='EPSG:4326')
    parcel_main_build['main_building_flag'] = True
    parcel_main_build['expansion_OSM_flag'] = expansion_OSM_flag
    parcel_main_build['expansion_2016_flag'] = expansion_2016_flag
    parcel_main_build['OSM_flag'] = osm_flag
    parcel_buildings = pd.concat([parcel_buildings, parcel_main_build[gpd_cols]])

    # ii. Match small buildings
    osm_buildings_parcel_small = osm_buildings_parcel.sort_values(
        'area', ascending=False).iloc[1:].copy()
    inferred_buildings_2020_parcel_small = inferred_buildings_2020_parcel.sjoin(
        inference_main_build[['GEOID_left', 'geometry']], how='left', predicate='intersects')
    inferred_buildings_2020_parcel_small = inferred_buildings_2020_parcel_small.loc[
        inferred_buildings_2020_parcel_small['index_right'].isna()]
    inferred_buildings_2020_parcel_small.drop('index_right', axis=1, inplace=True)

    parcel_small_build = match_small_buildings(
        inferred_buildings_2020_parcel_small, osm_buildings_parcel_small, gpd_cols)

    parcel_buildings = pd.concat([parcel_buildings, parcel_small_build[gpd_cols]])
    return parcel_buildings[gpd_cols]



In [8]:
def compute_largest_protruding_poly(union_build, base_build):
    """
    Note we care about concentrated building
    # expansions along a single wall and not general changes in building footprint due to noisy
    # inferences so we isolate the largest protruding polygon
    """
    diff_main_build = gpd.GeoSeries(union_build.difference(base_build), crs='EPSG:4326')
    diff_main_build = diff_main_build.explode(ignore_index=True, index_parts=False)
    
    diff_main_build = gpd.GeoDataFrame(geometry=diff_main_build, crs='EPSG:4326')
    diff_main_build['area'] = diff_main_build.to_crs('EPSG:26910').area
    diff_main_build = diff_main_build.sort_values('area', ascending=False).iloc[0]
    return diff_main_build

In [9]:
def match_small_buildings(inferred, osm, gpd_cols):
    """
    """
    # Note: we want to default to using OSM unless there are small buildings not 
    # captured by OSM

    if len(osm) == 0 and len(inferred) == 0:
        return gpd.GeoDataFrame(geometry=[], columns=gpd_cols)

    if (len(osm) == 0 and len(inferred) > 0) or (len(osm) > 0 and len(inferred) == 0):
        parcel_small_build = osm.copy() if len(osm) > 0 else inferred.copy()
        parcel_small_build['main_building_flag'] = False
        parcel_small_build['OSM_flag'] = True if len(osm) > 0 else False
        parcel_small_build['expansion_OSM_flag'] = False
        parcel_small_build['expansion_2016_flag'] = False
        return parcel_small_build[gpd_cols]

    # Default to OSM
    parcel_small_build = osm.copy()
    parcel_small_build['main_building_flag'] = False
    parcel_small_build['OSM_flag'] = True
    parcel_small_build['expansion_OSM_flag'] = False
    parcel_small_build['expansion_2016_flag'] = False

    # Matching
    inferred_match = inferred.sjoin(osm, predicate='intersects', how='left')

    # Keep only isolated inferences not captured in OSM
    inferred_match = inferred_match.loc[inferred_match['index_right'].isna()]
    inferred_match = inferred_match[[
      'GEOID_left', 'area_left', 'small_left', 'large_left', 'geometry']]
    inferred_match.rename(
      columns={c: c.replace('_left', '') for c in inferred_match.columns}, 
      inplace=True)
    inferred_match['main_building_flag'] = False
    inferred_match['OSM_flag'] = False
    inferred_match['expansion_2016_flag'] = False
    inferred_match['expansion_OSM_flag'] = False

    parcel_small_build = pd.concat([parcel_small_build, inferred_match[gpd_cols]])
    return parcel_small_build[gpd_cols]

In [10]:
def process_permit_data(parcel_apn, permits_parcel, parcel_buildings):
    # filter by parcel_buildings in this first pass
    """
    We will only consider non-main buildings OR main buildings with a detected expansion from OSM or 2016.
    
    For each parcel, we will:
    1. Filter for parcels that are clearly not SFRs (has >5 buildings)
    
    There are three types of classifications:
    1. Permitted (building/activity matches to a permit)
        - If expansion, permit needs to be issued between 2015-2020
        - If small building, 
    2. Unpermitted (building/activity doesn't match reasonably to a permit)
    """
    if parcel_buildings is not None:
        if len(parcel_buildings) <= 5:
            # then consider
            parcel_buildings = parcel_buildings[(parcel_buildings['main_building_flag'] == False) | \
                                                ((parcel_buildings['main_building_flag']) & \
                                                 ((parcel_buildings['expansion_OSM_flag']) | \
                                                (parcel_buildings['expansion_2016_flag'])))]
            if not parcel_buildings.empty:
                # then start to do matching
                if permits_parcel is not None and len(permits_parcel) > 0:
                    ## ------ MAIN DRIVER HERE ------
                    output_df = gpd.GeoDataFrame(columns=['apn'] + list(parcel_buildings.columns) + ['verdict', 'permit_id', 'certainty'])
                    permit_nomatch_df = pd.DataFrame(columns=permits_parcel.columns)
                    
                    # then there is a permit matching in that parcel
                    small_building = parcel_buildings[parcel_buildings['main_building_flag'] == False].reset_index(drop=True)
                    expansion = parcel_buildings[(parcel_buildings['main_building_flag']) & \
                                                 ((parcel_buildings['expansion_OSM_flag']) | \
                                                (parcel_buildings['expansion_2016_flag']))].reset_index(drop=True)
                    
                    
                    if not expansion.empty:
                        assert len(expansion) == 1
                        
                        
                    for idx, row in permits_parcel.iterrows():
                        if row['WORKDESC'] == 'Additions/Alterations' and row['year'] <= 2020 and row['year'] >= 2015 \
                        and row['PERMITVALUE'] > 0 and row['SQUAREFOOT'] > 0:
                            if not expansion.empty:
                                output_df.loc[len(output_df)] = [parcel_apn] + expansion.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 3]
                                expansion.drop(0, inplace=True)
                            else:
                                permit_nomatch_df.loc[len(permit_nomatch_df)] = row.tolist()
                        elif row['WORKDESC'] == 'Additions/Alterations' and row['year'] <= 2020 and row['year'] >= 2015 \
                        and row['PERMITVALUE'] > 0:
                            if not expansion.empty:
                                output_df.loc[len(output_df)] = [parcel_apn] + expansion.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 2]
                                expansion.drop(0, inplace=True)
                            else:
                                permit_nomatch_df.loc[len(permit_nomatch_df)] = row.tolist()
                        elif row['WORKDESC'] == 'Additions/Alterations' and row['year'] <= 2020 and row['year'] >= 2015:
                            if not expansion.empty:
                                output_df.loc[len(output_df)] = [parcel_apn] + expansion.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 1]
                                expansion.drop(0, inplace=True)
                            else:
                                permit_nomatch_df.loc[len(permit_nomatch_df)] = row.tolist()
                        elif (row['WORKDESC'] == 'New Construction' or row['WORKDESC'] == 'Additions/Alterations') and row['DWELLINGUNIT'] > 0 and \
                        row['PERMITVALUE'] > 0 and row['SQUAREFOOT'] > 0:
                            if not small_building.empty:
                                output_df.loc[len(output_df)] = [parcel_apn] + small_building.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 3]
                                small_building = small_building.drop(0).reset_index(drop=True)
                            elif not expansion.empty and row['year'] <= 2020 and row['year'] >= 2015:
                                output_df.loc[len(output_df)] = [parcel_apn] + expansion.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 3]
                                expansion.drop(0, inplace=True)
                            else:
                                permit_nomatch_df.loc[len(permit_nomatch_df)] = row.tolist()
                        elif (row['WORKDESC'] == 'New Construction' or row['WORKDESC'] == 'Additions/Alterations') and row['PERMITVALUE'] > 0 and row['SQUAREFOOT'] > 0:
                            if not small_building.empty:
                                output_df.loc[len(output_df)] = [parcel_apn] + small_building.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 3]
                                small_building = small_building.drop(0).reset_index(drop=True)
                            elif not expansion.empty and row['year'] <= 2020 and row['year'] >= 2015:
                                output_df.loc[len(output_df)] = [parcel_apn] + expansion.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 2]
                                expansion.drop(0, inplace=True)
                            else:
                                permit_nomatch_df.loc[len(permit_nomatch_df)] = row.tolist()
                        elif (row['WORKDESC'] == 'New Construction' or row['WORKDESC'] == 'Additions/Alterations'):
                            if not small_building.empty:
                                output_df.loc[len(output_df)] = [parcel_apn] + small_building.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 3]
                                small_building = small_building.drop(0).reset_index(drop=True)
                            elif not expansion.empty and row['year'] <= 2020 and row['year'] >= 2015:
                                output_df.loc[len(output_df)] = [parcel_apn] + expansion.loc[0].tolist() + ['permitted', row['OBJECTID_left'], 1]
                                expansion.drop(0, inplace=True)
                            else:
                                permit_nomatch_df.loc[len(permit_nomatch_df)] = row.tolist()
                            # then try to find the expansion
                        else:
                            print(row[['WORKDESC', 'SUBDESC', 'DWELLINGUNIT', 'PERMITVALUE', 'SQUAREFOOT', 'year']])
                            # add to unmatched permits -- probably permits never went through or FN from model
                    
                    # rest is unpermitted
                    if not small_building.empty:
                        small_building['verdict'] = 'unpermitted'
                        small_building['apn'] = parcel_apn
                        output_df = pd.concat([output_df, small_building])
                        
                    if not expansion.empty:
                        expansion['verdict'] = 'unpermitted'
                        expansion['apn'] = parcel_apn
                        output_df = pd.concat([output_df, expansion])
                        
                    return output_df, permit_nomatch_df
#                     if not expansion.empty:
#                         print(expansion)
#                         print(permits_parcel[['WORKDESC', 'SUBDESC', 'DWELLINGUNIT', 'PERMITVALUE', 'SQUAREFOOT', 'FOLDERNUM']])
                        
#                     if not small_building.empty:
#                         print(small_building)
#                         print(permits_parcel[['WORKDESC', 'SUBDESC', 'DWELLINGUNIT', 'PERMITVALUE', 'SQUAREFOOT', 'FOLDERNUM']])
                    
                        
                else:
                    # return all the filtered parcel_buildings, with the caveat that we're highly overestimating
                    parcel_buildings['verdict'] = 'unpermitted'
                    parcel_buildings['apn'] = parcel_apn
                    return parcel_buildings, None
            else:
                # return nothing since we can't consider any of the buildings (at least not rn)
#                 if permits_parcel is not None and len(permits_parcel) > 0:
#                     return None, permits_parcel
#                 else:
#                     return None, None
                return None, None
        else:
            # return nothing since we don't want to consider this parcel
            return None, None
    
#     if len(permits_parcel) > 0:
#         print(permits_parcel[['WORKDESC', 'SUBDESC', 'DWELLINGUNIT', 'PERMITVALUE', 'SQUAREFOOT', 'FOLDERNUM']])
#         if parcel_buildings is not None:
#             print(parcel_buildings)
#         else:
#             print(0)
#         pass
#     else:
#         return parcel_buildings

# Main

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
if os.path.exists('shortlist.csv'):
    shortlist = pd.read_csv('shortlist.csv')
else:
    shortlist = pd.DataFrame(columns=['apn', 'main_building_flag', 'OSM_flag', 
                                      'expansion_OSM_flag', 'expansion_2016_flag', 
                                      'geometry', 'verdict', 'permit_id', 'certainty'])
    
if os.path.exists('permit_nomatch.csv'):
    permit_nomatch = pd.read_csv('permit_nomatch.csv')
else:
    permit_nomatch = pd.DataFrame()

In [None]:
from tqdm import tqdm

In [None]:
# Loop over each parcel
parcel_apns = sj_parcels_res['APN'].unique()
idx = 0
for parcel_apn in tqdm(parcel_apns):
    # Run the condition below if you want to filter only on permits that match to a parcel
#     if not sj_parcel_permit[sj_parcel_permit['APN_parcel'] == parcel_apn].empty:
    shortlist_parcel, permit_nomatch_df = match_parcel(parcel_apn)
    if shortlist_parcel is not None and len(shortlist_parcel) > 0:
        shortlist = pd.concat([shortlist, shortlist_parcel])
    if permit_nomatch_df is not None and len(permit_nomatch_df) > 0:
        permit_nomatch = pd.concat([permit_nomatch, permit_nomatch_df]) 
    idx += 1
    if idx % 100 == 0:
        shortlist.to_csv('shortlist.csv', index=False)
        permit_nomatch.to_csv('permit_nomatch.csv', index=False)

In [11]:
# Check
parcel_apn='48114037'
inferred_buildings_2020_parcel, inferred_buildings_2016_parcel, osm_buildings_parcel, permits_parcel = parcel_level_data(parcel_apn, sj_parcels_res, sj_parcel_permit)
parcel_buildings = process_OSM_data(
    inferred_buildings_2020_parcel, inferred_buildings_2016_parcel, osm_buildings_parcel)


  df_out['iou'] = df_out['geometry'].intersection(parcel_bounds).area/df_out['geometry'].area

  df_out['iou'] = df_out['geometry'].intersection(parcel_bounds).area/df_out['geometry'].area

  df_out['iou'] = df_out['geometry'].intersection(parcel_bounds).area/df_out['geometry'].area
  return merge(


In [2]:
# 3.5 Sort shortlists
# TO DO
# Risk levels:
# highest risk, no permitted additions of any kind, and you detect detached
# no permitted additions of any kind, and you detect attached
# permitted detached ADU, you detect more than one detached
# permitted detached ADU, you detect attached and detached
# permitted attached ADU, you detect attached and detached
# permitted attached ADU, you detect a detached (or vice versa)
# permitted attached ADU and detached ADU, you detect anything unpermitted