In [249]:
import geopandas as gpd
import pandas as pd
import psrcelmerpy

In [250]:
# Get geographic lookup categories for reference
df_cols = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\db_inputs\parcel_2023_geography.csv')

# Load parcel geodata
df_parcel = gpd.read_file(r'R:\e2projects_two\2023_base_year\all_streets\2023_parcels\inputs\parcels_2023.csv')
df_parcel['PARCEL_ID'] = df_parcel['PARCEL_ID'].apply(lambda x: x.split('.')[0]).astype('int64')

In [251]:
gdf_parcel_full = gpd.GeoDataFrame(
    df_parcel, geometry=gpd.points_from_xy(df_parcel.X_COORD_SP	, df_parcel.Y_COORD_SP), crs="EPSG:2285"
)

In [271]:
gdf_parcel = gdf_parcel_full[['PARCEL_ID', 'geometry']]

# FIXME: use a small gdf for testing
# gdf_parcel = gdf_parcel.iloc[0:1000]

In [253]:
def load_layer(eg_conn, layer_name, col_list=None):
    gdf = eg_conn.read_geolayer(layer_name)
    if col_list:
        gdf = gdf[col_list]
    gdf = gdf.to_crs('EPSG:2285')

    return gdf

In [273]:
# Merge with Census layers
eg_conn = psrcelmerpy.ElmerGeoConn()
for layer_name, geoid_field in {
    'block2010': 'geoid10',
    'block2020': 'geoid20'}.items():
    gdf = load_layer(eg_conn, layer_name, [geoid_field,'geometry'])
    gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
    gdf_parcel.drop(columns=['index_right'], inplace=True)


In [277]:
#  Get the block group and tract from the geoid
gdf_parcel['Census2010BlockGroup'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:12])
gdf_parcel['Census2010Tract'] = gdf_parcel['geoid10'].astype('str').apply(lambda x: x[0:11])
gdf_parcel['Census2020BlockGroup'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:12])
gdf_parcel['Census2020Tract'] = gdf_parcel['geoid20'].astype('str').apply(lambda x: x[0:11])
gdf_parcel['Census2010Block'] = gdf_parcel['geoid10'].copy()
gdf_parcel['Census2020Block'] = gdf_parcel['geoid20'].copy()

gdf_parcel.rename(columns={'geoid10': 'GEOID10', 'geoid20': 'GEOID20'}, inplace=True)

In [None]:
# gdf_parcel.head()


geopandas.geodataframe.GeoDataFrame

In [278]:
# Merge with regional geography
gdf = load_layer(eg_conn, 'urban_centers', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'rg_proposed'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [279]:
# Merge with city boundaries
gdf = load_layer(eg_conn, 'cities', ['city_name','cnty_name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'city_name': 'CityName', 'cnty_name': 'CountyName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [280]:
df_cols.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'ParcelID', 'rg_proposed', 'CityName',
       'Census2010Block', 'Census2010BlockGroup', 'Census2010Tract',
       'minority_geog', 'poverty_geog', 'FAZID', 'taz_p', 'parcel_id_x',
       'District', 'district_name', 'CountyName', 'TAZ', 'Low Income',
       'PeopleofColor', 'BaseYear', 'GEOID10', 'place_name', 'parcel_id_y',
       'disability_geog_vs_50_percent', 'disability_geog_vs_reg_total',
       'elderly_geog_vs_50_percent', 'elderly_geog_vs_reg_total',
       'english_geog_vs_50_percent', 'english_geog_vs_reg_total',
       'poverty_geog_vs_50_percent', 'poverty_geog_vs_reg_total',
       'racial_geog_vs_50_percent', 'racial_geog_vs_reg_total',
       'youth_geog_vs_50_percent', 'youth_geog_vs_reg_total', 'parcel_id',
       'GrowthCenterName'],
      dtype='object')

In [281]:
# Merge with FAZ
gdf = load_layer(eg_conn, 'faz_2010', ['faz10','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'faz10': 'FAZID'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

# Merge with TAZ
gdf = load_layer(eg_conn, 'taz2010', ['taz','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'taz': 'taz_p'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)
gdf_parcel['TAZ'] = gdf_parcel['taz_p'].copy()

# district
gdf = load_layer(eg_conn, 'soundcast_taz_districts', ['district','new_distri','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'district': 'District', 'new_distri': 'district_name'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

# regional growth centers
gdf = load_layer(eg_conn, 'urban_centers', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'GrowthCenterName'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [282]:
# census place
gdf = load_layer(eg_conn, 'place2010', ['name10','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name10': 'place_name_2010'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

gdf = load_layer(eg_conn, 'place2020', ['name','geometry'])
gdf_parcel = gpd.sjoin(gdf_parcel, gdf, how="left")
gdf_parcel.rename(columns={'name': 'place_name_2020'}, inplace=True)
gdf_parcel.drop(columns=['index_right'], inplace=True)

In [283]:
# We can use Elmer tables to get the equity geography data
# Note that 2023 is not yet available
# FIXME: update to use 2023 census defintions when available
e_conn = psrcelmerpy.ElmerConn()
equity_data_year = '2022'
df_equity = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.racial_equity_geographies("+equity_data_year+", 'Tract')")


In [284]:



df_equity = pd.DataFrame()

# See this for reference http://aws-linux/mediawiki/index.php/Equity_Geographies_in_Elmer
equity_data_year = '2022'
df_equity = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.racial_equity_geographies("+equity_data_year+", 'Tract')")
df_equity.rename(columns={'equity_geog_vs_50_percent': 'racial_geog_vs_50_percent',
                          'equity_geog_vs_reg_total': 'racial_geog_vs_reg_total'},
                          inplace=True)

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.disability_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'disability_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'disability_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.elderly_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'elderly_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'elderly_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.limited_english_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'english_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'english_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.poverty_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'poverty_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'poverty_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

df = e_conn.get_query("select geoid, equity_geog_vs_50_percent, equity_geog_vs_reg_total from census.youth_equity_geographies("+equity_data_year+", 'Tract')")
df.rename(columns={'equity_geog_vs_50_percent': 'youth_geog_vs_50_percent',
                   'equity_geog_vs_reg_total': 'youth_geog_vs_reg_total'},
                   inplace=True)
df_equity = df_equity.merge(df, on='geoid')

In [285]:
# Merge to geodataframe
# Note, per ACS the geography should correspond with the latest data in ACS 3- or 5-year data (or the given year of 1-year ACS).
# This means we will use 2020 geography for 5-year ACS data from 2017-2022, which is listed as 2022 data in Elmer for the tract equity data 
# https://www.census.gov/programs-surveys/acs/geography-acs/geography-boundaries-by-year.2022.html

# join to geodataframe basedon tract
if int(equity_data_year) >= 2020:
    gdf_col = 'Census2020Tract'
else:
    gdf_col = 'Census2010Tract'

gdf_parcel = gdf_parcel.merge(df_equity, left_on=gdf_col, right_on='geoid', how='left')

In [286]:
# 
gdf_parcel.rename(columns={'PARCEL_ID': 'ParcelID'}, inplace=True)
gdf_parcel['BaseYear'] = 2023



In [287]:
gdf_parcel.to_csv(r'R:\e2projects_two\SoundCast\Inputs\db_inputs\parcel_2023_geography.csv')

In [270]:
gdf_parcel

Unnamed: 0,ParcelID,geometry,GEOID10,GEOID20,Census2010BlockGroup,Census2010Tract,Census2020BlockGroup,Census2020Tract,Census2010Block,Census2020Block,...,disability_geog_vs_reg_total,elderly_geog_vs_50_percent,elderly_geog_vs_reg_total,english_geog_vs_50_percent,english_geog_vs_reg_total,poverty_geog_vs_50_percent,poverty_geog_vs_reg_total,youth_geog_vs_50_percent,youth_geog_vs_reg_total,BaseYear
0,333,POINT (1274027.606 226669.996),530330086002002,530330086003001,530330086002,53033008600,530330086003,53033008600,530330086002002,530330086003001,...,0,0,0,0,0,0,1,0,0,2023
1,336,POINT (1274362.042 226751.698),530330086002000,530330086003001,530330086002,53033008600,530330086003,53033008600,530330086002000,530330086003001,...,0,0,0,0,0,0,1,0,0,2023
2,337,POINT (1274361.109 226719.725),530330086002000,530330086003001,530330086002,53033008600,530330086003,53033008600,530330086002000,530330086003001,...,0,0,0,0,0,0,1,0,0,2023
3,338,POINT (1274395.442 226654.362),530330086002000,530330086003001,530330086002,53033008600,530330086003,53033008600,530330086002000,530330086003001,...,0,0,0,0,0,0,1,0,0,2023
4,339,POINT (1274360.558 226655.226),530330086002000,530330086003001,530330086002,53033008600,530330086003,53033008600,530330086002000,530330086003001,...,0,0,0,0,0,0,1,0,0,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21224,1128686,POINT (1303888.932 361386.139),530610408002001,530610408002001,530610408002,53061040800,530610408002,53061040800,530610408002001,530610408002001,...,1,0,1,0,0,0,0,0,0,2023
21225,1128687,POINT (1303859.702 361418.412),530610408002001,530610408002001,530610408002,53061040800,530610408002,53061040800,530610408002001,530610408002001,...,1,0,1,0,0,0,0,0,0,2023
21226,1128688,POINT (1303919.775 361410.094),530610408002001,530610408002001,530610408002,53061040800,530610408002,53061040800,530610408002001,530610408002001,...,1,0,1,0,0,0,0,0,0,2023
21227,1128689,POINT (1303892.586 361492.328),530610408002001,530610408002001,530610408002,53061040800,530610408002,53061040800,530610408002001,530610408002001,...,1,0,1,0,0,0,0,0,0,2023


In [267]:
df_cols.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'ParcelID', 'rg_proposed', 'CityName',
       'Census2010Block', 'Census2010BlockGroup', 'Census2010Tract',
       'minority_geog', 'poverty_geog', 'FAZID', 'taz_p', 'parcel_id_x',
       'District', 'district_name', 'CountyName', 'TAZ', 'Low Income',
       'PeopleofColor', 'BaseYear', 'GEOID10', 'place_name', 'parcel_id_y',
       'disability_geog_vs_50_percent', 'disability_geog_vs_reg_total',
       'elderly_geog_vs_50_percent', 'elderly_geog_vs_reg_total',
       'english_geog_vs_50_percent', 'english_geog_vs_reg_total',
       'poverty_geog_vs_50_percent', 'poverty_geog_vs_reg_total',
       'racial_geog_vs_50_percent', 'racial_geog_vs_reg_total',
       'youth_geog_vs_50_percent', 'youth_geog_vs_reg_total', 'parcel_id',
       'GrowthCenterName'],
      dtype='object')

In [268]:
df_cols.columns[~df_cols.columns.isin(gdf_parcel.columns)]

Index(['Unnamed: 0', 'Unnamed: 0.1', 'minority_geog', 'poverty_geog',
       'parcel_id_x', 'Low Income', 'PeopleofColor', 'place_name',
       'parcel_id_y', 'parcel_id'],
      dtype='object')