In [84]:
import geopandas as gpd
import pandas as pd
import psrcelmerpy

In [85]:
# Get geographic lookup categories for reference
df_cols = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\db_inputs\parcel_2023_geography.csv')

# Load parcel geodata
# df_parcel = gpd.read_file(r'R:\e2projects_two\2023_base_year\all_streets\2023_parcels\inputs\parcels_2023.csv')
# df_parcel['PARCEL_ID'] = df_parcel['PARCEL_ID'].apply(lambda x: x.split('.')[0]).astype('int64')

df_parcel = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\dev\landuse\2023\23_on_23_v2\parcels_urbansim.txt',
                         sep='\s+')

In [86]:
df_parcel.head()

Unnamed: 0,aparks,empedu_p,empfoo_p,empgov_p,empind_p,empmed_p,empofc_p,empoth_p,empret_p,emprsc_p,...,ppricdyp,pprichrp,sfunits,sqft_p,stugrd_p,stuhgh_p,stuuni_p,taz_p,xcoord_p,ycoord_p
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,85802.59375,0,0,0,1019,1292255.0,162728.617255
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.0,0,0,0,1018,1291832.0,164041.742835
2,0,0,0,0,0,0,2,0,0,0,...,0,0,0,6000.066365,0,0,0,1018,1291595.0,164048.669737
3,0,0,0,0,2,0,0,2,2,0,...,0,0,0,7200.084044,0,0,0,1018,1291540.0,164050.178628
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,6068.041674,0,0,0,1018,1291479.0,164042.397388


In [87]:
gdf_parcel_full = gpd.GeoDataFrame(
    df_parcel, geometry=gpd.points_from_xy(df_parcel.xcoord_p, df_parcel.ycoord_p), crs="EPSG:2285"
)

In [88]:
gdf_parcel = gdf_parcel_full[['parcelid', 'geometry']]

# FIXME: use a small gdf for testing
# gdf_parcel = gdf_parcel.iloc[0:1000]

In [89]:
def load_layer(eg_conn, layer_name, col_list=None):
    gdf = eg_conn.read_geolayer(layer_name)
    if col_list:
        gdf = gdf[col_list]
    gdf = gdf.to_crs('EPSG:2285')

    return gdf

In [90]:
# Merge with Census layers
eg_conn = psrcelmerpy.ElmerGeoConn()
for layer_name, geoid_field in {
    'block2010': 'geoid10',
    'block2020': 'geoid20'}.items():
    gdf = load_layer(eg_conn, layer_name, [geoid_field,'geometry'])
    gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
    gdf_parcel.drop(columns=['index_right'], inplace=True)


In [91]:
#  Get the block group and tract from the geoid
gdf_parcel['Census2010BlockGroup'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:12])
gdf_parcel['Census2010Tract'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:11])
gdf_parcel['Census2020BlockGroup'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:12])
gdf_parcel['Census2020Tract'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:11])
gdf_parcel['Census2010Block'] = gdf_parcel['geoid10'].copy()
gdf_parcel['Census2020Block'] = gdf_parcel['geoid20'].copy()

gdf_parcel.rename(columns={'geoid10': 'GEOID10', 'geoid20': 'GEOID20'}, inplace=True)

In [92]:
# gdf_parcel.head()
# eg_conn.read_geolayer(layer_name)

In [93]:
# Merge with regional geography
gdf = load_layer(eg_conn, 'regional_geographies', ['class_desc','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'class_desc': 'rg_proposed'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [94]:
# Merge with city boundaries
gdf = load_layer(eg_conn, 'cities', ['city_name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'city_name': 'CityName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [95]:
# Merge with county boundaries
gdf = load_layer(eg_conn, 'county_background', ['county_nm','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'county_nm': 'CountyName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [96]:
len(gdf_parcel)

1329928

In [97]:
# Merge with FAZ
gdf = load_layer(eg_conn, 'faz_2010', ['faz10','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'faz10': 'FAZID'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

# Merge with TAZ
gdf = load_layer(eg_conn, 'taz2010', ['taz','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'taz': 'taz_p'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)
gdf_parcel['TAZ'] = gdf_parcel['taz_p'].copy()

# district
gdf = load_layer(eg_conn, 'soundcast_taz_districts', ['district','new_distri','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'district': 'District', 'new_distri': 'district_name'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

# regional growth centers
gdf = load_layer(eg_conn, 'urban_centers', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'GrowthCenterName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [98]:
# census place
gdf = load_layer(eg_conn, 'place2010', ['name10','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name10': 'place_name_2010'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

gdf = load_layer(eg_conn, 'place2020', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'place_name_2020'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [99]:
# We can use Elmer tables to get the equity geography data
# Note that 2023 is not yet available
# FIXME: update to use 2023 census defintions when available
e_conn = psrcelmerpy.ElmerConn()
equity_data_year = '2022'
df_equity = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.racial_equity_geographies("+equity_data_year+", 'Tract')")


In [100]:



df_equity = pd.DataFrame()

# See this for reference http://aws-linux/mediawiki/index.php/Equity_Geographies_in_Elmer
equity_data_year = '2022'
df_equity = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.racial_equity_geographies("+equity_data_year+", 'Tract')")
df_equity.rename(columns={'equity_geog_vs_50_percent': 'racial_geog_vs_50_percent',
                          'equity_geog_vs_reg_total': 'racial_geog_vs_reg_total'},
                          inplace=True)

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.disability_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'disability_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'disability_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.elderly_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'elderly_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'elderly_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.limited_english_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'english_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'english_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.poverty_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'poverty_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'poverty_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.youth_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'youth_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'youth_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

In [101]:
# Merge to geodataframe
# Note, per ACS the geography should correspond with the latest data in ACS 3- or 5-year data (or the given year of 1-year ACS).
# This means we will use 2020 geography for 5-year ACS data from 2017-2022, which is listed as 2022 data in Elmer for the tract equity data 
# https://www.census.gov/programs-surveys/acs/geography-acs/geography-boundaries-by-year.2022.html

# join to geodataframe basedon tract
if int(equity_data_year) >= 2020:
    gdf_col = 'Census2020Tract'
else:
    gdf_col = 'Census2010Tract'

gdf_parcel = gdf_parcel.merge(df_equity, left_on=gdf_col, right_on='geoid', how='left')

In [102]:
# 
gdf_parcel.rename(columns={'PARCEL_ID': 'ParcelID'}, inplace=True)
gdf_parcel['BaseYear'] = 2023



In [103]:
# Specifically label parcels outside of RGCs
gdf_parcel['GrowthCenterName'] = gdf_parcel['GrowthCenterName'].fillna('Not in RGC')

# Rename parcel ID to match convention
gdf_parcel.rename(columns={'parcelid': 'ParcelID'}, inplace=True)

In [108]:
# Rename counties that are outside the region
gdf_parcel.loc[~gdf_parcel['CountyName'].isin(['King','Kitsap','Pierce','Snohomish']), 'CountyName'] = 'Outside Region'

In [120]:
# Set some fields as integers
col_list = ['taz_p','TAZ','District','racial_geog_vs_50_percent', 'racial_geog_vs_reg_total',
       'disability_geog_vs_50_percent', 'disability_geog_vs_reg_total',
       'elderly_geog_vs_50_percent', 'elderly_geog_vs_reg_total',
       'english_geog_vs_50_percent', 'english_geog_vs_reg_total',
       'poverty_geog_vs_50_percent', 'poverty_geog_vs_reg_total',
       'youth_geog_vs_50_percent', 'youth_geog_vs_reg_total']
gdf_parcel[col_list] = gdf_parcel[col_list].fillna(-1).astype('int32')

In [123]:
gdf_parcel.to_csv(r'R:\e2projects_two\SoundCast\Inputs\db_inputs\parcel_2023_geography.csv')

In [112]:
gdf_parcel['rg_proposed'].value_counts()

Metro          334898
HCT            286599
Core           240845
CitiesTowns    134834
UU              57468
Name: rg_proposed, dtype: int64

In [113]:
gdf_parcel['CountyName'].value_counts()

King              629941
Pierce            309255
Snohomish         275449
Kitsap            115181
Outside Region       102
Name: CountyName, dtype: int64

In [107]:
df_cols.columns[~df_cols.columns.isin(gdf_parcel.columns)]

Index(['Unnamed: 0'], dtype='object')