In [1]:
import geopandas as gpd
import pandas as pd
import psrcelmerpy

  shapely_geos_version, geos_capi_version_string


In [2]:
equity_data_year = '2023'

In [3]:
# Load parcel geodata
df_parcel = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\dev\landuse\2023\23_on_23_v3\parcels_urbansim.txt',
                         sep='\s+')

In [4]:
len(df_parcel)

1329928

In [5]:
gdf_parcel_full = gpd.GeoDataFrame(
    df_parcel, geometry=gpd.points_from_xy(df_parcel.xcoord_p, df_parcel.ycoord_p), crs="EPSG:2285"
)

In [6]:
gdf_parcel = gdf_parcel_full[['parcelid', 'geometry']]

In [7]:
def load_layer(eg_conn, layer_name, col_list=None):
    gdf = eg_conn.read_geolayer(layer_name)
    if col_list:
        gdf = gdf[col_list]
    gdf = gdf.to_crs('EPSG:2285')

    return gdf

In [8]:
# Merge with Census layers
eg_conn = psrcelmerpy.ElmerGeoConn()
for layer_name, geoid_field in {
    # 'block2010': 'geoid10',
    'block2020': 'geoid20'}.items():
    gdf = load_layer(eg_conn, layer_name, [geoid_field,'geometry'])
    gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
    gdf_parcel.drop(columns=['index_right'], inplace=True)


In [9]:
#  Get the block group and tract from the geoid
# gdf_parcel['Census2010BlockGroup'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:12])
# gdf_parcel['Census2010Tract'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:11])
# gdf_parcel['Census2010Block'] = gdf_parcel['geoid10'].copy()
gdf_parcel['Census2020BlockGroup'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:12])
gdf_parcel['Census2020Tract'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:11])
gdf_parcel['Census2020Block'] = gdf_parcel['geoid20'].copy()

gdf_parcel.rename(columns={
    # 'geoid10': 'GEOID10', 
    'geoid20': 'GEOID20'}, inplace=True)

In [10]:
# Merge with regional geography
gdf = load_layer(eg_conn, 'regional_geographies', ['class_desc','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'class_desc': 'rg_proposed'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [11]:
# Merge with city boundaries
gdf = load_layer(eg_conn, 'cities', ['city_name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'city_name': 'CityName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [12]:
# Merge with county boundaries
gdf = load_layer(eg_conn, 'county_background', ['county_nm','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'county_nm': 'CountyName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [13]:
len(gdf_parcel)

1329928

In [14]:
# Merge with FAZ
# gdf = load_layer(eg_conn, 'faz_2010', ['faz10','geometry'])
# gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
# gdf_parcel.rename(columns={'faz10': 'FAZID'}, inplace=True)
# gdf_parcel.drop(columns=['index_right'], inplace=True)

# Merge with TAZ
gdf = load_layer(eg_conn, 'taz2010', ['taz','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'taz': 'taz_p'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)
gdf_parcel['TAZ'] = gdf_parcel['taz_p'].copy()

# district
gdf = load_layer(eg_conn, 'soundcast_taz_districts', ['district','new_distri','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'district': 'District', 'new_distri': 'district_name'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

# regional growth centers
gdf = load_layer(eg_conn, 'urban_centers', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'GrowthCenterName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [15]:
# census place
# gdf = load_layer(eg_conn, 'place2010', ['name10','geometry'])
# gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
# gdf_parcel.rename(columns={'name10': 'place_name_2010'}, inplace=True)
# gdf_parcel.drop(columns=['index_right'], inplace=True)

gdf = load_layer(eg_conn, 'place2020', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'place_name_2020'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [16]:
# df_equity.head()

In [17]:
# We can use Elmer tables to get the equity geography data
e_conn = psrcelmerpy.ElmerConn()

# See this for reference http://aws-linux/mediawiki/index.php/Equity_Geographies_in_Elmer
df_equity = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.racial_equity_geographies("+equity_data_year+", 'Tract')")
df_equity.rename(columns={'equity_geog_vs_50_percent': 'racial_geog_vs_50_percent',
                          'equity_geog_vs_reg_total': 'racial_geog_vs_reg_total'},
                          inplace=True)

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.disability_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'disability_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'disability_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.elderly_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'elderly_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'elderly_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.limited_english_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'english_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'english_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.poverty_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'poverty_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'poverty_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.youth_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'youth_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'youth_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

In [18]:
# Merge to geodataframe
# Note, per ACS the geography should correspond with the latest data in ACS 3- or 5-year data (or the given year of 1-year ACS).
# This means we will use 2020 geography for 5-year ACS data from 2017-2022, which is listed as 2022 data in Elmer for the tract equity data 
# https://www.census.gov/programs-surveys/acs/geography-acs/geography-boundaries-by-year.2022.html

# join to geodataframe basedon tract
if int(equity_data_year) >= 2020:
    gdf_col = 'Census2020Tract'
else:
    gdf_col = 'Census2010Tract'

gdf_parcel = gdf_parcel.merge(df_equity, left_on=gdf_col, right_on='geoid', how='left')

In [19]:
# 
gdf_parcel.rename(columns={'PARCEL_ID': 'ParcelID'}, inplace=True)
gdf_parcel['BaseYear'] = 2023

In [20]:
# Specifically label parcels outside of RGCs
gdf_parcel['GrowthCenterName'] = gdf_parcel['GrowthCenterName'].fillna('Not in RGC')

# Rename parcel ID to match convention
gdf_parcel.rename(columns={'parcelid': 'ParcelID'}, inplace=True)

In [21]:
# Rename counties that are outside the region
gdf_parcel.loc[~gdf_parcel['CountyName'].isin(['King','Kitsap','Pierce','Snohomish']), 'CountyName'] = 'Outside Region'

In [22]:
# Load HCT station data provided by Craig
gdf_stops = gpd.read_file(r'R:\e2projects_two\2023_base_year\network\transit_stops.shp')
gdf_stops.rename(columns={'all_day': 'all_day_transit',
                          'frequent': 'frequent_transit',
                          'min_routes': 'min_transit'}, inplace=True)

In [23]:
gdf_stops.head()
# FIXME: make sure the geography matches

Unnamed: 0,stop_id,stop_name,frequent_transit,all_day_transit,min_transit,hct,geometry
0,ct_1,Marine Dr NE & 27th Ave NE,0.0,0.0,0.0,0.0,"POLYGON ((1308628.035 388068.676, 1308626.226 ..."
1,ct_7,Marine Dr NE & 23rd Ave NE,0.0,0.0,0.0,0.0,"POLYGON ((1307297.752 387728.165, 1307295.943 ..."
2,ct_12,Totem Beach Rd & Tulalip Bay Dr,0.0,0.0,0.0,0.0,"POLYGON ((1288816.444 390279.579, 1288814.635 ..."
3,ct_13,Totem Beach Rd & 70th St NW,0.0,0.0,0.0,0.0,"POLYGON ((1289288.480 389692.453, 1289286.671 ..."
4,ct_18,Marine Dr NE & 19th Ave NE,0.0,0.0,0.0,0.0,"POLYGON ((1306231.570 387167.052, 1306229.761 ..."


In [32]:
gdf_all_day = gdf_stops[gdf_stops['all_day_transit']==1][['all_day_transit','geometry']]
gdf_frequent = gdf_stops[gdf_stops['frequent_transit']==1][['frequent_transit','geometry']]
gdf_hct = gdf_stops[gdf_stops['hct']==1][['hct','geometry']]
gdf_min = gdf_stops[gdf_stops['min_transit']==1][['min_transit','geometry']]

In [None]:


# Merge spatially join each to parcel data
gdf_parcel = gpd.sjoin(gdf_parcel, gdf_all_day, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['all_day_transit'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

gdf_parcel = gpd.sjoin(gdf_parcel, gdf_frequent, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['frequent_transit'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

gdf_parcel = gpd.sjoin(gdf_parcel, gdf_hct, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['hct'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

gdf_parcel = gpd.sjoin(gdf_parcel, gdf_min, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['min_transit'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

KeyError: 'gdf_min'

In [None]:
len(gdf_parcel)

In [None]:
gdf_parcel.drop(['geoid','taz_p'], axis=1, inplace=True)

# Set some fields as integers
col_list = ['TAZ','District','racial_geog_vs_50_percent', 'racial_geog_vs_reg_total',
       'disability_geog_vs_50_percent', 'disability_geog_vs_reg_total',
       'elderly_geog_vs_50_percent', 'elderly_geog_vs_reg_total',
       'english_geog_vs_50_percent', 'english_geog_vs_reg_total',
       'poverty_geog_vs_50_percent', 'poverty_geog_vs_reg_total',
       'youth_geog_vs_50_percent', 'youth_geog_vs_reg_total',
       'all_day_transit','frequent_transit','hct','min_transit']
gdf_parcel[col_list] = gdf_parcel[col_list].fillna(-1).astype('int32')

In [None]:
gdf_parcel.drop_duplicates(inplace=True)

In [None]:
len(gdf_parcel)

1329928

In [None]:
gdf_parcel.to_csv(r'C:\Workspace\parcel_2023_geography.csv')

In [None]:
# gdf_parcel = pd.read_csv(r'C:\Workspace\parcel_2023_geography.csv')

In [None]:
# gdf_parcel['hct'].value_counts()

0    1268116
1      61812
Name: hct, dtype: int64