In [2]:
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt
import numpy as np
import contextily as ctx
import scipy.stats as stats

In [3]:
homes = pd.read_excel('../data/Housing Production 2005-2022.xlsx')
dbi = gpd.read_feather('../data/dbi_permits.geofeather')
fees = gpd.read_file('../data/Neighborhood-Specific Impact Fee Areas.geojson')

## Fees

In [4]:
fees.loc[fees.tier.str.contains('Tier 1 if residential') == True, 'tier'] = 'Tier 1'
simplefees = fees[fees.tier.isin(['Tier 1', 'Tier 2'])] # , 'Tier 3'
simplefees = simplefees.dissolve('tier').reset_index()
simplefees = simplefees.to_crs(3310)

In [5]:
simplefees.head()

Unnamed: 0,tier,geometry,area,url,objectid,ordinance,fee
0,Tier 1,"MULTIPOLYGON (((-213124.608 -27091.523, -21312...",,https://codelibrary.amlegal.com/codes/san_fran...,11,Planning Code Section 423,Eastern Neighborhoods Infrastructure Impact Fe...
1,Tier 2,"MULTIPOLYGON (((-212958.707 -27084.191, -21295...",,https://codelibrary.amlegal.com/codes/san_fran...,10,Planning Code Section 423,Eastern Neighborhoods Infrastructure Impact Fe...


## Permits

In [6]:
homes.shape

(5275, 69)

In [7]:
dbi = dbi.sort_values('permit_creation_date', ascending=False)
dbi = dbi.drop_duplicates('permit_number')

# NB: 5 rows in homes don't match-
permits = pd.merge(dbi[['geometry', 'permit_number', 'permit_creation_date']],
                   homes,
                   left_on='permit_number', right_on='BPA', how='inner')


In [8]:
# I can track down three mismatches by merging on BlockLot in homes and block + lot in dbi. 
# But for rdd, not worth it bc none are in right location / time
homes[~homes.BlockLot.isin(permits.BlockLot)]

Unnamed: 0,BPA,Keep/ Exclude,Address,BlockLot,MOHCD Project ID,PPTS Project ID,Form No.,Permit Type,PTS Existing Units,PTS Proposed Units,...,Deed Restriction Type (Inclusionary/ Density Bonus),Assistance Program,Analysis Neighborhood,Planning Dist.,Zoning Dist.,Sup. Dist.,Existing Use,Existing Inventory Bldg Type,Proposed Inventory Bldg Type,Proposed Use
0,1801 Wedemeyer St,Keep,1801 Wedemeyer St,1300001,,,3,Site Permit,,154.0,...,,,Inner Richmond,0 - Presidio,RH-1,1,,,20+,apartments
3666,202304044992,Keep,1427 QUESADA AV,4760019B,,,8,Full BP,1.0,2.0,...,,,Bayview Hunters Point,10 - South Bayshore,RH-1,10,1 FAMILY DWELLING,Single,2 to 4,2 FAMILY DWELLING
3716,9903234s,Keep,639 Shotwell St,3611053,,1999.805,3,Site Permit,2.0,3.0,...,,,Mission,8 - Mission,RH-3,9,2 family dwelling,2 to 4,2 to 4,apartments


In [9]:
permits = permits[permits['permit_creation_date'].dt.year > 2010]
permits = permits[['geometry', 'permit_number', 'Permit Issued Date', 'Net_Units_Completed', 'Date_Completed', 'BlockLot']]


In [10]:
permits.head()

Unnamed: 0,geometry,permit_number,Permit Issued Date,Net_Units_Completed,Date_Completed,BlockLot
0,POINT (-122.43497 37.75388),202301109975,2023-02-28,1.0,2023-04-19,2771059
1,POINT (-122.40793 37.74350),202212158449,2022-12-16,-1.0,2023-01-25,5556070
2,POINT (-122.50719 37.75825),202206307496,2023-03-21,1.0,2023-06-20,1893045
3,POINT (-122.40475 37.76047),202206216771,2022-06-21,-1.0,2022-07-11,4075029
4,POINT (-122.37454 37.73161),202206136205,2022-11-17,1.0,2023-06-07,4645020


## Geospatial Bluesky permits df

In [14]:
year = 2010

In [12]:
bluesky = gpd.read_feather('../geobluesky.geofeather')

In [13]:
bluesky = bluesky[bluesky.year == year]

In [15]:
gdf = gpd.sjoin(bluesky, permits.to_crs(bluesky.crs), how='left', predicate='contains')

In [16]:
gdf.Developed = gdf.index_right.notna()

In [17]:
gdf['Net_Units_Completed'] = gdf['Net_Units_Completed'].fillna(0)

In [18]:
gdf = gdf.drop(columns=['mapblklot', 'index_right', 'permit_number', 'year'])

In [19]:
gdf = gdf.to_crs(epsg=3310)
gdf['tier1'] =  gdf.geometry.intersects(simplefees.iloc[0].geometry)
gdf['tier2'] = gdf.geometry.intersects(simplefees.iloc[1].geometry)

In [20]:
gdf['x'] = gdf.geometry.centroid.x
gdf['y'] = gdf.geometry.centroid.y

In [21]:
min_x, min_y, max_x, max_y = simplefees.total_bounds

## Take the subset of the data where there are fees

In [22]:
east_homes = gdf[(gdf['tier1'] ^ gdf['tier2'])].copy()
east_homes['tier'] = 'tier2'
east_homes.loc[east_homes['tier1'], 'tier'] = 'tier1'

In [23]:
east_homes = east_homes.to_crs(3310)
simplefees = simplefees.to_crs(3310)

east_homes['dist_to_tier1'] = east_homes.geometry.distance(simplefees.geometry.iat[0])
east_homes['dist_to_tier2'] = east_homes.geometry.distance(simplefees.geometry.iat[1])
east_homes['dist_to_boundary'] = np.maximum(east_homes['dist_to_tier1'], east_homes['dist_to_tier2'])
east_homes['dist_to_boundary'] = east_homes['dist_to_boundary'] * ((-1)**(east_homes['tier'] == 'tier2'))

In [24]:
boundary_homes = east_homes[(np.absolute(east_homes['dist_to_boundary']) < 200)]

In [25]:
boundary_homes.Developed.sum()

293

## Add tax data

In [27]:
tax = gpd.read_feather('../data/tax.geofeather')

In [28]:
boundary_homes = boundary_homes.to_crs(tax.crs)

In [29]:
tax = tax[tax.closed_roll_year == 2009]

In [30]:
full_df = gpd.sjoin(boundary_homes, tax, how='inner', predicate='contains') # contains? worried about nans

In [31]:
full_df['non_geo_match'] = (full_df['block'] + full_df['lot']).isin(boundary_homes.MapBlkLot_Master)

In [32]:
full_df = full_df.sort_values('non_geo_match', ascending=False).drop_duplicates(['x', 'y'])

## Permits

In [33]:
recent_otc = dbi[(2005 <= dbi['permit_creation_date'].dt.year) 
                 & (dbi['permit_creation_date'].dt.year < 2010) 
                 & (dbi['permit_type_definition'] == 'otc alterations permit')]

In [34]:
full_df = full_df.drop(columns='index_right')

In [35]:
full_df.shape

(6959, 69)

In [36]:
full_df = gpd.sjoin(full_df, 
                    gpd.GeoDataFrame(recent_otc.value_counts('geometry').reset_index()).to_crs(full_df.crs),
                    predicate='contains',
                    how='left')

In [37]:
full_df = full_df.rename({'count': 'otc alterations permits'}, axis=1)

In [38]:
full_df['otc alterations permits'] = full_df['otc alterations permits'].fillna(0)

In [39]:
full_df = full_df.drop(columns=['index_right', 'non_geo_match', 'Zillow_Price_Real', 'Const_FedReserve_Real'])

In [40]:
full_df.to_feather('../data/clean_fees_rdd.feather')

In [41]:
full_df.to_csv('../data/clean_fees_rdd.csv')

In [42]:
full_df.head()

Unnamed: 0,MapBlkLot_Master,Developed,Historic,Residential_Dummy,Envelope_1000,Upzone_Ratio,zp_OfficeComm,zp_DensRestMulti,zp_FormBasedMulti,zp_PDRInd,...,lot,property_location,parcel_number,assessed_fixtures_value,supervisor_district,assessed_land_value,basement_area,assessed_improvement_value,imputed_land_value,otc alterations permits
1463796,3521009,False,0,0,5.04433,2.627255,0,1,0,0,...,9,0000 0360 11TH ST0000,3521009,2138,6.0,227412,0.0,295637,227412.0,0.0
1473936,3995007,False,0,1,16.214481,1.464987,0,0,1,0,...,7,0000 2092 03RD ST0000,3995007,0,10.0,309908,0.0,293548,309908.0,0.0
1473948,3995057,False,0,1,19.350072,0.919462,0,0,1,0,...,57,0000 0635 TENNESSEE ST0201,3995057,0,10.0,19700,0.0,134296,19700.0,0.0
1473947,3995035,False,0,1,4.20332,1.666667,0,0,0,1,...,35,0000 2080 3RD ST0001,3995035,0,10.0,452574,0.0,452574,452574.0,0.0
1473946,3994006,False,0,0,4.939534,1.666667,0,0,0,1,...,6,0000 0650VILLINOIS ST0000,3994006,0,10.0,835701,0.0,0,835701.0,0.0


In [43]:
full_df.year_property_built.isna().sum()

429

In [44]:
full_df['y'].isna().sum()

0

## Confirm upzonings can be controlled for by 2010

In [None]:
upzonings = parcels.pivot(index='MapBlkLot_Master', columns='year', values='Upzone_Ratio')
upzonings = upzonings.diff(axis=1).iloc[:, 1:-1]

In [None]:
plot_upzoning = pd.merge(allParcels[['mapblklot', 'geometry']].drop_duplicates(),
                         upzonings,
                         right_index=True, 
                         left_on='mapblklot')

In [None]:
simplefees = simplefees.to_crs(3857)
min_x, min_y, max_x, max_y = simplefees.total_bounds

In [None]:
plot_upzoning = plot_upzoning.to_crs(3857)

In [None]:
fig, ax = plt.subplots(figsize=(15, 15))
year = 2010
plot_upzoning[['geometry', year]][plot_upzoning[year] > 0].plot(ax=ax, column=year, legend=True)
ax.set_xlim(min_x, max_x)
ax.set_ylim(min_y, max_y)
ctx.add_basemap(ax, source=ctx.providers.CartoDB.PositronNoLabels, attribution=False)

In [None]:
plot_upzoning['tier1'] = plot_upzoning.geometry.intersects(simplefees.iloc[0].geometry)
plot_upzoning['tier2'] = plot_upzoning.geometry.intersects(simplefees.iloc[1].geometry)

In [None]:
eastern_upzonings = plot_upzoning[plot_upzoning.tier1 | plot_upzoning.tier2]

In [None]:
eastern_upzonings

In [None]:
drop_nan = eastern_upzonings[~eastern_upzonings.isna().any(axis=1)]

In [None]:
corr_coefficient, p_value = stats.spearmanr(drop_nan[2010], drop_nan['tier2'])
corr_coefficient

In [None]:
corr_coefficient, p_value = stats.spearmanr(drop_nan[2011], drop_nan['tier2'])
corr_coefficient