In [18]:
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt
import numpy as np
import contextily as ctx
import scipy.stats as stats

In [119]:
homes = pd.read_excel('../data/Housing Production 2005-2022.xlsx')
dbi = gpd.read_feather('../data/dbi_permits.geofeather')
fees = gpd.read_file('../data/Neighborhood-Specific Impact Fee Areas.geojson')

## Fees

In [120]:
fees

Unnamed: 0,area,tier,url,objectid,ordinance,fee,geometry
0,,Tier A,https://codelibrary.amlegal.com/codes/san_fran...,19,Planning Code Section 432,Central SoMa Community Services Facilities Fee...,"MULTIPOLYGON (((-122.40302 37.78072, -122.4032..."
1,Rincon Hill - Residential,,https://codelibrary.amlegal.com/codes/san_fran...,48,Planning Code Section 418.3(d),South of Market Area Community Stabilization Fee,"MULTIPOLYGON (((-122.38783 37.78733, -122.3878..."
2,,Tier 1 for 45 and 55 feet. Tier 2 for 65 feet.,https://codelibrary.amlegal.com/codes/san_fran...,7,Planning Code Section 423,Eastern Neighborhoods Infrastructure Impact Fe...,"MULTIPOLYGON (((-122.41268 37.77357, -122.4123..."
3,,Tier C,https://codelibrary.amlegal.com/codes/san_fran...,13,Planning Code Section 419.3,UMU District Affordable Housing Fee - Tier C,"MULTIPOLYGON (((-122.40799 37.76530, -122.4081..."
4,Transit Center - C-3-O(SD) Zoning District,,https://codelibrary.amlegal.com/codes/san_fran...,31,Planning Code Section 424.7,Transit Center Transportation and Street Impro...,"MULTIPOLYGON (((-122.39235 37.79257, -122.3921..."
5,,Tier 1,https://codelibrary.amlegal.com/codes/san_fran...,11,Planning Code Section 423,Eastern Neighborhoods Infrastructure Impact Fe...,"MULTIPOLYGON (((-122.41727 37.74832, -122.4173..."
6,,Tier B,https://codelibrary.amlegal.com/codes/san_fran...,15,Planning Code Section 432,Central SoMa Community Services Facilities Fee...,"MULTIPOLYGON (((-122.39241 37.78035, -122.3924..."
7,Market and Octavia - Residential and Commercial,,https://codelibrary.amlegal.com/codes/san_fran...,23,Planning Code Section 421,Market and Octavia Community Infrastructure Im...,"MULTIPOLYGON (((-122.42115 37.77616, -122.4211..."
8,Market and Octavia - Residential (Not RTO),,https://codelibrary.amlegal.com/codes/san_fran...,44,Planning Code Section 416,Market and Octavia Inclusionary Affordable Hou...,"MULTIPOLYGON (((-122.42209 37.78082, -122.4219..."
9,,Tier C,https://codelibrary.amlegal.com/codes/san_fran...,24,Planning Code Section 433,Central SoMa Infrastructure Impact Fee - Tier C,"MULTIPOLYGON (((-122.40025 37.77555, -122.4009..."


In [20]:
fees.loc[fees.tier.str.contains('Tier 1 if residential') == True, 'tier'] = 'Tier 1'
simplefees = fees[fees.tier.isin(['Tier 1', 'Tier 2'])] # , 'Tier 3'
simplefees = simplefees.dissolve('tier').reset_index()
simplefees = simplefees.to_crs(3310)

In [21]:
simplefees.head()

Unnamed: 0,tier,geometry,area,url,objectid,ordinance,fee
0,Tier 1,"MULTIPOLYGON (((-213124.608 -27091.523, -21312...",,https://codelibrary.amlegal.com/codes/san_fran...,11,Planning Code Section 423,Eastern Neighborhoods Infrastructure Impact Fe...
1,Tier 2,"MULTIPOLYGON (((-212958.707 -27084.191, -21295...",,https://codelibrary.amlegal.com/codes/san_fran...,10,Planning Code Section 423,Eastern Neighborhoods Infrastructure Impact Fe...


## Permits

In [22]:
homes.shape

(5275, 69)

In [23]:
dbi = dbi.sort_values('permit_creation_date', ascending=False)
dbi = dbi.drop_duplicates('permit_number')

# NB: 5 rows in homes don't match-
permits = pd.merge(dbi[['geometry', 'permit_number', 'permit_creation_date']],
                   homes,
                   left_on='permit_number', right_on='BPA', how='inner')


In [24]:
# I can track down three mismatches by merging on BlockLot in homes and block + lot in dbi. 
# But for rdd, not worth it bc none are in right location / time
homes[~homes.BlockLot.isin(permits.BlockLot)]

Unnamed: 0,BPA,Keep/ Exclude,Address,BlockLot,MOHCD Project ID,PPTS Project ID,Form No.,Permit Type,PTS Existing Units,PTS Proposed Units,...,Deed Restriction Type (Inclusionary/ Density Bonus),Assistance Program,Analysis Neighborhood,Planning Dist.,Zoning Dist.,Sup. Dist.,Existing Use,Existing Inventory Bldg Type,Proposed Inventory Bldg Type,Proposed Use
0,1801 Wedemeyer St,Keep,1801 Wedemeyer St,1300001,,,3,Site Permit,,154.0,...,,,Inner Richmond,0 - Presidio,RH-1,1,,,20+,apartments
3666,202304044992,Keep,1427 QUESADA AV,4760019B,,,8,Full BP,1.0,2.0,...,,,Bayview Hunters Point,10 - South Bayshore,RH-1,10,1 FAMILY DWELLING,Single,2 to 4,2 FAMILY DWELLING
3716,9903234s,Keep,639 Shotwell St,3611053,,1999.805,3,Site Permit,2.0,3.0,...,,,Mission,8 - Mission,RH-3,9,2 family dwelling,2 to 4,2 to 4,apartments


In [25]:
permits = permits[permits['permit_creation_date'].dt.year > 2013]
permits = permits[['geometry', 'permit_number', 'Permit Issued Date', 'Net_Units_Completed', 'Date_Completed', 'BlockLot']]


In [26]:
permits.head()

Unnamed: 0,geometry,permit_number,Permit Issued Date,Net_Units_Completed,Date_Completed,BlockLot
0,POINT (-122.43497 37.75388),202301109975,2023-02-28,1.0,2023-04-19,2771059
1,POINT (-122.40793 37.74350),202212158449,2022-12-16,-1.0,2023-01-25,5556070
2,POINT (-122.50719 37.75825),202206307496,2023-03-21,1.0,2023-06-20,1893045
3,POINT (-122.40475 37.76047),202206216771,2022-06-21,-1.0,2022-07-11,4075029
4,POINT (-122.37454 37.73161),202206136205,2022-11-17,1.0,2023-06-07,4645020


## Geospatial Bluesky permits df

In [27]:
year = 2014

In [28]:
bluesky = gpd.read_feather('../geobluesky.geofeather')

In [29]:
bluesky = bluesky[bluesky.year == year]

In [30]:
gdf = gpd.sjoin(bluesky, permits.to_crs(bluesky.crs), how='left', predicate='contains')

In [31]:
gdf.Developed = gdf.index_right.notna()

In [32]:
gdf['Net_Units_Completed'] = gdf['Net_Units_Completed'].fillna(0)

In [33]:
gdf = gdf.drop(columns=['mapblklot', 'index_right', 'permit_number', 'year'])

In [34]:
gdf = gdf.to_crs(epsg=3310)
gdf['tier1'] =  gdf.geometry.intersects(simplefees.iloc[0].geometry)
gdf['tier2'] = gdf.geometry.intersects(simplefees.iloc[1].geometry)

In [35]:
gdf['x'] = gdf.geometry.centroid.x
gdf['y'] = gdf.geometry.centroid.y

In [36]:
min_x, min_y, max_x, max_y = simplefees.total_bounds

## Take the subset of the data where there are fees

In [37]:
east_homes = gdf[(gdf['tier1'] ^ gdf['tier2'])].copy()
east_homes['tier'] = 'tier2'
east_homes.loc[east_homes['tier1'], 'tier'] = 'tier1'

In [38]:
east_homes = east_homes.to_crs(3310)
simplefees = simplefees.to_crs(3310)

east_homes['dist_to_tier1'] = east_homes.geometry.distance(simplefees.geometry.iat[0])
east_homes['dist_to_tier2'] = east_homes.geometry.distance(simplefees.geometry.iat[1])
east_homes['dist_to_boundary'] = np.maximum(east_homes['dist_to_tier1'], east_homes['dist_to_tier2'])
east_homes['dist_to_boundary'] = east_homes['dist_to_boundary'] * ((-1)**(east_homes['tier'] == 'tier2'))

In [39]:
boundary_homes = east_homes[(np.absolute(east_homes['dist_to_boundary']) < 200)]

In [40]:
boundary_homes.Developed.sum()

223

## Add tax data

In [94]:
tax = gpd.read_feather('../data/tax.geofeather')

In [95]:
boundary_homes = boundary_homes.to_crs(tax.crs)

In [96]:
tax = tax[tax.closed_roll_year == 2014]

In [97]:
full_df = gpd.sjoin(boundary_homes, tax, how='inner', predicate='contains') # contains? worried about nans

In [98]:
full_df['non_geo_match'] = (full_df['block'] + full_df['lot']).isin(boundary_homes.MapBlkLot_Master)

In [99]:
full_df = full_df.sort_values('non_geo_match', ascending=False).drop_duplicates(['x', 'y'])

In [100]:
full_df.columns

Index(['MapBlkLot_Master', 'Developed', 'Historic', 'Residential_Dummy',
       'Zillow_Price_Real', 'Const_FedReserve_Real', 'Envelope_1000',
       'Upzone_Ratio', 'zp_OfficeComm', 'zp_DensRestMulti',
       'zp_FormBasedMulti', 'zp_PDRInd', 'zp_Public', 'zp_Redev', 'zp_RH2',
       'zp_RH3_RM1', 'geometry', 'Permit Issued Date', 'Net_Units_Completed',
       'Date_Completed', 'BlockLot', 'tier1', 'tier2', 'x', 'y', 'tier',
       'dist_to_tier1', 'dist_to_tier2', 'dist_to_boundary', 'index_right',
       'property_class_code_definition', 'lot_code', 'property_area',
       'volume_number', 'percent_of_ownership', 'misc_exemption_value',
       'zoning_code', 'year_property_built', 'analysis_neighborhood',
       'number_of_units', 'use_definition', 'closed_roll_year', 'status_code',
       'number_of_bedrooms', 'assessor_neighborhood', 'number_of_stories',
       'assessor_neighborhood_district', 'exemption_code_definition', 'block',
       'current_sales_date', 'lot_area', 'number_

## Permits

In [101]:
recent_otc = dbi[(2009 <= dbi['permit_creation_date'].dt.year) 
                 & (dbi['permit_creation_date'].dt.year < 2014) 
                 & (dbi['permit_type_definition'] == 'otc alterations permit')]

In [102]:
full_df = full_df.drop(columns=['index_right', 'non_geo_match'])

In [103]:
full_df.shape

(6946, 67)

In [116]:
recent_otc.geometry_str.isna().sum()

0

In [104]:
full_df = gpd.sjoin(full_df, 
                    gpd.GeoDataFrame(recent_otc.value_counts('geometry_str').reset_index()).to_crs(full_df.crs),
                    predicate='contains',
                    how='left')

TypeError: '<' not supported between instances of 'Point' and 'Point'

In [None]:
full_df = full_df.rename({'count': 'otc alterations permits'}, axis=1)

In [38]:
full_df['otc alterations permits'] = full_df['otc alterations permits'].fillna(0)

In [39]:
full_df = full_df.drop(columns=['index_right', 'non_geo_match', 'Zillow_Price_Real', 'Const_FedReserve_Real'])

In [40]:
full_df.to_feather('../data/clean_fees_rdd.feather')

In [118]:
full_df.to_csv('../data/clean_fees_rdd.csv')

In [42]:
full_df.head()

Unnamed: 0,MapBlkLot_Master,Developed,Historic,Residential_Dummy,Envelope_1000,Upzone_Ratio,zp_OfficeComm,zp_DensRestMulti,zp_FormBasedMulti,zp_PDRInd,...,lot,property_location,parcel_number,assessed_fixtures_value,supervisor_district,assessed_land_value,basement_area,assessed_improvement_value,imputed_land_value,otc alterations permits
1463796,3521009,False,0,0,5.04433,2.627255,0,1,0,0,...,9,0000 0360 11TH ST0000,3521009,2138,6.0,227412,0.0,295637,227412.0,0.0
1473936,3995007,False,0,1,16.214481,1.464987,0,0,1,0,...,7,0000 2092 03RD ST0000,3995007,0,10.0,309908,0.0,293548,309908.0,0.0
1473948,3995057,False,0,1,19.350072,0.919462,0,0,1,0,...,57,0000 0635 TENNESSEE ST0201,3995057,0,10.0,19700,0.0,134296,19700.0,0.0
1473947,3995035,False,0,1,4.20332,1.666667,0,0,0,1,...,35,0000 2080 3RD ST0001,3995035,0,10.0,452574,0.0,452574,452574.0,0.0
1473946,3994006,False,0,0,4.939534,1.666667,0,0,0,1,...,6,0000 0650VILLINOIS ST0000,3994006,0,10.0,835701,0.0,0,835701.0,0.0


In [43]:
full_df.year_property_built.isna().sum()

429

In [44]:
full_df['y'].isna().sum()

0

## Confirm upzonings can be controlled for by 2010

In [None]:
upzonings = parcels.pivot(index='MapBlkLot_Master', columns='year', values='Upzone_Ratio')
upzonings = upzonings.diff(axis=1).iloc[:, 1:-1]

In [None]:
plot_upzoning = pd.merge(allParcels[['mapblklot', 'geometry']].drop_duplicates(),
                         upzonings,
                         right_index=True, 
                         left_on='mapblklot')

In [None]:
simplefees = simplefees.to_crs(3857)
min_x, min_y, max_x, max_y = simplefees.total_bounds

In [None]:
plot_upzoning = plot_upzoning.to_crs(3857)

In [None]:
fig, ax = plt.subplots(figsize=(15, 15))
year = 2010
plot_upzoning[['geometry', year]][plot_upzoning[year] > 0].plot(ax=ax, column=year, legend=True)
ax.set_xlim(min_x, max_x)
ax.set_ylim(min_y, max_y)
ctx.add_basemap(ax, source=ctx.providers.CartoDB.PositronNoLabels, attribution=False)

In [None]:
plot_upzoning['tier1'] = plot_upzoning.geometry.intersects(simplefees.iloc[0].geometry)
plot_upzoning['tier2'] = plot_upzoning.geometry.intersects(simplefees.iloc[1].geometry)

In [None]:
eastern_upzonings = plot_upzoning[plot_upzoning.tier1 | plot_upzoning.tier2]

In [None]:
eastern_upzonings

In [None]:
drop_nan = eastern_upzonings[~eastern_upzonings.isna().any(axis=1)]

In [None]:
corr_coefficient, p_value = stats.spearmanr(drop_nan[2010], drop_nan['tier2'])
corr_coefficient

In [None]:
corr_coefficient, p_value = stats.spearmanr(drop_nan[2011], drop_nan['tier2'])
corr_coefficient