In [53]:
import geopandas as gpd
import pandas as pd
import psrcelmerpy

In [54]:
equity_data_year = '2023'

In [55]:
# Load parcel geodata
df_parcel = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\dev\landuse\2023\23_on_23_v3\parcels_urbansim.txt',
                         sep='\s+')
# df_parcel = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\dev\landuse\2050\rtp_2026_2050\parcels_urbansim.txt',
#                          sep='\s+')

In [56]:
df_parcel.parcelid

0                1
1                2
2                3
3                4
4                5
            ...   
1329923    1329924
1329924    1329925
1329925    1329926
1329926    1329927
1329927    1329928
Name: parcelid, Length: 1329928, dtype: int64

In [57]:
gdf_parcel_full = gpd.GeoDataFrame(
    df_parcel, geometry=gpd.points_from_xy(df_parcel.xcoord_p, df_parcel.ycoord_p), crs="EPSG:2285"
)

In [58]:
gdf_parcel = gdf_parcel_full[['parcelid', 'geometry']]

In [59]:
def load_layer(eg_conn, layer_name, col_list=None):
    gdf = eg_conn.read_geolayer(layer_name)
    if col_list:
        gdf = gdf[col_list]
    gdf = gdf.to_crs('EPSG:2285')

    return gdf

In [60]:
# Merge with Census layers
eg_conn = psrcelmerpy.ElmerGeoConn()
for layer_name, geoid_field in {
    # 'block2010': 'geoid10',
    'block2020': 'geoid20'}.items():
    gdf = load_layer(eg_conn, layer_name, [geoid_field,'geometry'])
    gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
    gdf_parcel.drop(columns=['index_right'], inplace=True)


In [61]:
#  Get the block group and tract from the geoid
# gdf_parcel['Census2010BlockGroup'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:12])
# gdf_parcel['Census2010Tract'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:11])
# gdf_parcel['Census2010Block'] = gdf_parcel['geoid10'].copy()
gdf_parcel['Census2020BlockGroup'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:12])
gdf_parcel['Census2020Tract'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:11])
gdf_parcel['Census2020Block'] = gdf_parcel['geoid20'].copy()

gdf_parcel.rename(columns={
    # 'geoid10': 'GEOID10', 
    'geoid20': 'GEOID20'}, inplace=True)

In [62]:
# Merge with regional geography
gdf = load_layer(eg_conn, 'regional_geographies', ['class_desc','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'class_desc': 'rg_proposed'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [63]:
# Merge with city boundaries
gdf = load_layer(eg_conn, 'cities', ['city_name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'city_name': 'CityName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [64]:
# Merge with county boundaries
gdf = load_layer(eg_conn, 'county_background', ['county_nm','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'county_nm': 'CountyName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [65]:
len(gdf_parcel)

1329928

In [66]:
# Merge with FAZ
# gdf = load_layer(eg_conn, 'faz_2010', ['faz10','geometry'])
# gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
# gdf_parcel.rename(columns={'faz10': 'FAZID'}, inplace=True)
# gdf_parcel.drop(columns=['index_right'], inplace=True)

# Merge with TAZ
gdf = load_layer(eg_conn, 'taz2010', ['taz','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'taz': 'taz_p'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)
gdf_parcel['TAZ'] = gdf_parcel['taz_p'].copy()

# district
gdf = load_layer(eg_conn, 'soundcast_taz_districts', ['district','new_distri','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'district': 'District', 'new_distri': 'district_name'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

# regional growth centers
gdf = load_layer(eg_conn, 'urban_centers', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'GrowthCenterName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [67]:
# regional growth centers
gdf = load_layer(eg_conn, 'micen', ['mic','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.drop(columns=['index_right'], inplace=True)


In [68]:
# census place
# gdf = load_layer(eg_conn, 'place2010', ['name10','geometry'])
# gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
# gdf_parcel.rename(columns={'name10': 'place_name_2010'}, inplace=True)
# gdf_parcel.drop(columns=['index_right'], inplace=True)

gdf = load_layer(eg_conn, 'place2020', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'place_name_2020'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [69]:
gdf_parcel.columns

Index(['parcelid', 'geometry', 'GEOID20', 'Census2020BlockGroup',
       'Census2020Tract', 'Census2020Block', 'rg_proposed', 'CityName',
       'CountyName', 'taz_p', 'TAZ', 'District', 'district_name',
       'GrowthCenterName', 'mic', 'place_name_2020'],
      dtype='object')

In [None]:
# Get new equity geographies
# FIXME: pull from Elmer when available
gdf_efa = gpd.read_file(r"C:\Users\Modeller\OneDrive - Puget Sound Regional Council\GIS - RTP_2026\equity_focus_areas\efa_3groupings_1SD\equity_focus_areas_2023_acs.gdb",
                        layer='overall')
gdf_parcel = gdf_parcel.merge(gdf_efa[["L0ElmerGeo_DBO_tract2020_nowater_geoid20",
         "equity_focus_areas_2023__efa_poc",
         "equity_focus_areas_2023__efa_pov200",
         "equity_focus_areas_2023__efa_lep",
         "equity_focus_areas_2023__efa_youth",
         "equity_focus_areas_2023__efa_older",
         "equity_focus_areas_2023__efa_dis",
         ]], left_on='Census2020Tract', right_on='L0ElmerGeo_DBO_tract2020_nowater_geoid20', how='left')

Unnamed: 0,L0ElmerGeo_DBO_tract2020_nowater_geoid20,equity_focus_areas_2023__efa_poc,equity_focus_areas_2023__efa_pov200,equity_focus_areas_2023__efa_lep,equity_focus_areas_2023__efa_youth,equity_focus_areas_2023__efa_older,equity_focus_areas_2023__efa_dis,equity_focus_areas_2023__efa_overall,Shape_Length,Shape_Area,geometry
0,53033000101,2.0,2.0,2.0,0.0,0.0,1.0,2.0,8662.102066,4.465063e+06,"MULTIPOLYGON (((1282039.651 271033.927, 128203..."
1,53033000102,0.0,1.0,0.0,0.0,2.0,1.0,2.0,18709.713553,1.591965e+07,"MULTIPOLYGON (((1283563.07 271006.33, 1283572...."
2,53033000201,1.0,1.0,1.0,0.0,0.0,2.0,2.0,18651.597930,1.944626e+07,"MULTIPOLYGON (((1277047.636 271197.295, 127704..."
3,53033000202,1.0,0.0,1.0,0.0,0.0,0.0,1.0,17285.856345,1.593036e+07,"MULTIPOLYGON (((1279038.171 271125.999, 127937..."
4,53033000300,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17298.155480,1.303428e+07,"MULTIPOLYGON (((1273717.013 271310.03, 1273714..."
...,...,...,...,...,...,...,...,...,...,...,...
914,53061053801,0.0,1.0,0.0,0.0,1.0,1.0,1.0,764870.152845,1.228220e+10,"MULTIPOLYGON (((1565308.342 358869.75, 1565308..."
915,53061053802,0.0,0.0,1.0,0.0,1.0,1.0,1.0,81069.964520,2.423798e+08,"MULTIPOLYGON (((1403591.018 314916.886, 140351..."
916,53061053803,0.0,1.0,0.0,0.0,1.0,1.0,1.0,288032.842773,2.957189e+09,"MULTIPOLYGON (((1460382.695 358064.657, 146048..."
917,53061940001,0.0,1.0,0.0,1.0,1.0,2.0,2.0,141411.191399,5.675567e+08,"MULTIPOLYGON (((1296243.431 412813.688, 129668..."


In [71]:
# •	0 = not EFA, where % share of tract-level equity population is below the regional average
# •	1 = EFA (above regional average), where the % share of tract-level equity population is between the regional average and 1 standard deviation above the regional average
# •	2 = EFA (higher share of equity focus population), where % share of tract-level equity population is 1 standard deviation above the regional average

# gdf_efa only includes tracts within equity geography. Fill NaN with 0
gdf_parcel['equity_focus_areas_2023__efa_dis'] = gdf_parcel['equity_focus_areas_2023__efa_dis'].fillna(0)

In [72]:
# gdf_parcel['equity_focus_areas_2023__efa_dis'].value_counts()

In [73]:
# We can use Elmer tables to get the equity geography data
e_conn = psrcelmerpy.ElmerConn()

# See this for reference http://aws-linux/mediawiki/index.php/Equity_Geographies_in_Elmer
df_equity = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.racial_equity_geographies("+equity_data_year+", 'Tract')")
df_equity.rename(columns={'equity_geog_vs_50_percent': 'racial_geog_vs_50_percent',
                          'equity_geog_vs_reg_total': 'racial_geog_vs_reg_total'},
                          inplace=True)

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.disability_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'disability_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'disability_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.elderly_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'elderly_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'elderly_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.limited_english_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'english_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'english_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.poverty_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'poverty_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'poverty_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.youth_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'youth_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'youth_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

In [74]:
# Merge to geodataframe
# Note, per ACS the geography should correspond with the latest data in ACS 3- or 5-year data (or the given year of 1-year ACS).
# This means we will use 2020 geography for 5-year ACS data from 2017-2022, which is listed as 2022 data in Elmer for the tract equity data 
# https://www.census.gov/programs-surveys/acs/geography-acs/geography-boundaries-by-year.2022.html

# join to geodataframe basedon tract
if int(equity_data_year) >= 2020:
    gdf_col = 'Census2020Tract'
else:
    gdf_col = 'Census2010Tract'

gdf_parcel = gdf_parcel.merge(df_equity, left_on=gdf_col, right_on='geoid', how='left')

In [75]:
# 
gdf_parcel.rename(columns={'PARCEL_ID': 'ParcelID'}, inplace=True)
gdf_parcel['BaseYear'] = 2023

In [76]:
# Specifically label parcels outside of RGCs
gdf_parcel['GrowthCenterName'] = gdf_parcel['GrowthCenterName'].fillna('Not in RGC')

# Rename parcel ID to match convention
gdf_parcel.rename(columns={'parcelid': 'ParcelID'}, inplace=True)

In [77]:
# Rename counties that are outside the region
gdf_parcel.loc[~gdf_parcel['CountyName'].isin(['King','Kitsap','Pierce','Snohomish']), 'CountyName'] = 'Outside Region'

In [78]:
# Load HCT station data provided by Craig
gdf_stops = gpd.read_file(r'R:\e2projects_two\2023_base_year\network\transit_stops.shp')
gdf_stops.rename(columns={'all_day': 'all_day_transit',
                          'frequent': 'frequent_transit',
                          'min_routes': 'min_transit'}, inplace=True)

In [79]:
gdf_stops.head()
# FIXME: make sure the geography matches

Unnamed: 0,stop_id,stop_name,frequent_transit,all_day_transit,min_transit,hct,geometry
0,ct_1,Marine Dr NE & 27th Ave NE,0.0,0.0,0.0,0.0,"POLYGON ((1308628.035 388068.676, 1308626.226 ..."
1,ct_7,Marine Dr NE & 23rd Ave NE,0.0,0.0,0.0,0.0,"POLYGON ((1307297.752 387728.165, 1307295.943 ..."
2,ct_12,Totem Beach Rd & Tulalip Bay Dr,0.0,0.0,0.0,0.0,"POLYGON ((1288816.444 390279.579, 1288814.635 ..."
3,ct_13,Totem Beach Rd & 70th St NW,0.0,0.0,0.0,0.0,"POLYGON ((1289288.48 389692.453, 1289286.671 3..."
4,ct_18,Marine Dr NE & 19th Ave NE,0.0,0.0,0.0,0.0,"POLYGON ((1306231.57 387167.052, 1306229.761 3..."


In [80]:
gdf_all_day = gdf_stops[gdf_stops['all_day_transit']==1][['all_day_transit','geometry']]
gdf_frequent = gdf_stops[gdf_stops['frequent_transit']==1][['frequent_transit','geometry']]
gdf_hct = gdf_stops[gdf_stops['hct']==1][['hct','geometry']]
gdf_min = gdf_stops[gdf_stops['min_transit']==1][['min_transit','geometry']]

In [81]:


# Merge spatially join each to parcel data
gdf_parcel = gpd.sjoin(gdf_parcel, gdf_all_day, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['all_day_transit'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

gdf_parcel = gpd.sjoin(gdf_parcel, gdf_frequent, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['frequent_transit'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

gdf_parcel = gpd.sjoin(gdf_parcel, gdf_hct, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['hct'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

gdf_parcel = gpd.sjoin(gdf_parcel, gdf_min, how="left")
gdf_parcel.drop('index_right', axis=1, inplace=True)
gdf_parcel['min_transit'].fillna(0, inplace=True)
gdf_parcel.drop_duplicates(inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  gdf_parcel['all_day_transit'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  gdf_parcel['frequent_transit'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are

In [82]:
# gdf_parcel

In [83]:
# gdf_parcel.drop(['geoid','taz_p'], axis=1, inplace=True)

# # Set some fields as integers
# col_list = ['TAZ','District',
#        #      'racial_geog_vs_50_percent', 'racial_geog_vs_reg_total',
#        # 'disability_geog_vs_50_percent', 'disability_geog_vs_reg_total',
#        # 'elderly_geog_vs_50_percent', 'elderly_geog_vs_reg_total',
#        # 'english_geog_vs_50_percent', 'english_geog_vs_reg_total',
#        # 'poverty_geog_vs_50_percent', 'poverty_geog_vs_reg_total',
#        # 'youth_geog_vs_50_percent', 'youth_geog_vs_reg_total',
#        'all_day_transit','frequent_transit','hct','min_transit']
# gdf_parcel[col_list] = gdf_parcel[col_list].fillna(-1).astype('int32')

In [84]:
gdf_parcel.drop_duplicates(inplace=True)

In [85]:
len(gdf_parcel)

1329928

In [86]:
gdf_parcel.to_csv(r'C:\Workspace\parcel_2023_geography.csv')

In [87]:
gdf_parcel.columns

Index(['ParcelID', 'geometry', 'GEOID20', 'Census2020BlockGroup',
       'Census2020Tract', 'Census2020Block', 'rg_proposed', 'CityName',
       'CountyName', 'taz_p', 'TAZ', 'District', 'district_name',
       'GrowthCenterName', 'mic', 'place_name_2020',
       'L0ElmerGeo_DBO_tract2020_nowater_geoid20',
       'equity_focus_areas_2023__efa_dis', 'geoid',
       'racial_geog_vs_50_percent', 'racial_geog_vs_reg_total',
       'disability_geog_vs_50_percent', 'disability_geog_vs_reg_total',
       'elderly_geog_vs_50_percent', 'elderly_geog_vs_reg_total',
       'english_geog_vs_50_percent', 'english_geog_vs_reg_total',
       'poverty_geog_vs_50_percent', 'poverty_geog_vs_reg_total',
       'youth_geog_vs_50_percent', 'youth_geog_vs_reg_total', 'BaseYear',
       'all_day_transit', 'frequent_transit', 'hct', 'min_transit'],
      dtype='object')

In [88]:
# gdf_parcel.drop(['all_day_transit','BaseYear','all_day_transit','frequent_transit','hct','min_transit'], axis=1, inplace=True)
# gdf_parcel.drop(['taz_p'], axis=1, inplace=True)

In [89]:
# gdf_parcel.drop(['all_day_transit','BaseYear','all_day_transit','frequent_transit','hct','min_transit'], axis=1, inplace=True)

In [90]:
# gdf_parcel[['TAZ','District']] = gdf_parcel[['TAZ','District']].fillna(-1).astype('int32')

In [91]:
# Export as shapefile
# gdf_parcel.to_file(r'T:\60day-TEMP\Brice\parcels\2024\parcel_2024.shp', driver='ESRI Shapefile')
# gdf_parcel.to_file(r'T:\60day-TEMP\Brice\parcels\2050\parcel_2050.shp', driver='ESRI Shapefile')
# gdf_parcel = pd.read_csv(r'C:\Workspace\parcel_2023_geography.csv')

In [92]:
gdf_parcel.head()

Unnamed: 0,ParcelID,geometry,GEOID20,Census2020BlockGroup,Census2020Tract,Census2020Block,rg_proposed,CityName,CountyName,taz_p,...,english_geog_vs_reg_total,poverty_geog_vs_50_percent,poverty_geog_vs_reg_total,youth_geog_vs_50_percent,youth_geog_vs_reg_total,BaseYear,all_day_transit,frequent_transit,hct,min_transit
0,1,POINT (1292255.144 162728.617),530330292061006,530330292061,53033029206,530330292061006,Core,Kent,King,1019.0,...,1.0,0.0,1.0,0.0,0.0,2023,0.0,0.0,0.0,0.0
1,2,POINT (1291832.241 164041.743),530330292061018,530330292061,53033029206,530330292061018,Core,Kent,King,1018.0,...,1.0,0.0,1.0,0.0,0.0,2023,1.0,0.0,0.0,1.0
2,3,POINT (1291594.615 164048.67),530330292061018,530330292061,53033029206,530330292061018,Core,Kent,King,1018.0,...,1.0,0.0,1.0,0.0,0.0,2023,1.0,1.0,0.0,1.0
3,4,POINT (1291539.635 164050.179),530330292061018,530330292061,53033029206,530330292061018,Core,Kent,King,1018.0,...,1.0,0.0,1.0,0.0,0.0,2023,1.0,1.0,0.0,1.0
4,5,POINT (1291479.355 164042.397),530330292061018,530330292061,53033029206,530330292061018,Core,Kent,King,1018.0,...,1.0,0.0,1.0,0.0,0.0,2023,1.0,1.0,0.0,1.0
