In [1]:
import geopandas as gpd
import pandas as pd
import psrcelmerpy

# Create External Work Trip Distribution Table
This process uses LEHD Origin-Destination data to build a trip table between internal PSRC zones and external stations
External blocks are assigned a single likely external station where workers pass through on their way to work or home.
The data works in both directions, including people that live outisde the region and commute to the region for work
as well as those that live in the region and travel outside for work. 

## Create Block-Zone Lookup
A census block file was updated in ArcGIS to include a likely external station for blocks in adjacent counties. 
While workers may use other stations in many cases, this is a best guess for where people will enter/leave the region. 

In [2]:
# Load shapefile of 2020 census blocks in adjacent counties were separated into likely external zone locations.
block_ext_gdf = gpd.read_file(r'R:\e2projects_two\2023_base_year\externals\externals_blocks2020_v4.shp')
# Create a lookup file for GEOID and TAZ, starting with these external zones
block_taz_df = block_ext_gdf[['GEOID20','EXT_STA']].copy()
block_taz_df.rename(columns={'EXT_STA': 'taz'}, inplace=True)

# The block-zone lookup above only includes blocks outside the PSRC region. 
# We also need an internal lookup so all blocks within the region and in adjacent counties have a TAZ

# Load TAZ shapefile from ElmerGeo
eg_conn = psrcelmerpy.ElmerGeoConn()
gdf_taz = eg_conn.read_geolayer('taz2010')

# Load regional Census block data from Elmer 
gdf_blocks = eg_conn.read_geolayer('BLOCK2020')
# Convert block polygon to centroid to spatially join to TAZ polygon file
gdf_blocks_pt = gdf_blocks.copy()
gdf_blocks_pt['geometry'] = gdf_blocks_pt['geometry'].centroid
block_taz_gdf = gpd.sjoin(gdf_blocks_pt, gdf_taz)
block_taz_gdf['GEOID20'] = block_taz_gdf['geoid20'].astype('str')

# Append the external blocks to internal blocks; all blocks in adjacent counties and internal zones should have a TAZ
block_taz_df = block_taz_df.append(block_taz_gdf[['GEOID20','taz']])

block_taz_df['taz'] = block_taz_df['taz'].astype('int')




In [3]:
block_taz_df.head()

Unnamed: 0,GEOID20,taz
0,530079601001000,3740
1,530079601001001,3740
2,530079601001002,3740
3,530079601001003,3740
4,530079601001004,3740


In [4]:
# Check that the append didn't duplicate blocks
block_taz_df[block_taz_df['GEOID20'].duplicated()]

Unnamed: 0,GEOID20,taz


## Process LEHD Data
Loading v8 LODES data for latest year which should use 2020 Census geographies
Note that as of Oct 2024, the latest year available is 2021

In [5]:
# Load LEHD data
# Note that v8 uses 2020 Census geog
df_lehd = pd.read_csv(r'R:\e2projects_two\2023_base_year\LEHD\wa_od_main_JT00_2021.csv')
for col in ['h_geocode','w_geocode']:
    df_lehd[col] = df_lehd[col].astype('str')

In [6]:
# Merge the block-TAZ lookup onto the data to get TAZ for each block
# Both home and work GEOID fields need to be joined
df = df_lehd.merge(block_taz_df, left_on='h_geocode', right_on='GEOID20')
df.rename(columns={'taz': 'h_taz'}, inplace=True)
df.drop('GEOID20',inplace=True,axis=1)

df = df.merge(block_taz_df, left_on='w_geocode', right_on='GEOID20')
df.rename(columns={'taz': 'w_taz'}, inplace=True)
df.drop('GEOID20',inplace=True,axis=1)

In [7]:
df.head()

Unnamed: 0,w_geocode,h_geocode,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,createdate,h_taz,w_taz
0,530079603012019,530079603023006,1,0,0,1,0,1,0,1,0,0,20231016,3740,3740
1,530079603012019,530079603013004,1,0,1,0,0,1,0,1,0,0,20231016,3740,3740
2,530079603012019,530610527084000,1,0,1,0,0,1,0,1,0,0,20231016,2203,3740
3,530079603012019,530079603013019,1,0,1,0,0,1,0,1,0,0,20231016,3740,3740
4,530079603012019,530079603032007,1,0,1,0,0,1,0,1,0,0,20231016,3740,3740


In [8]:
# Check that all home and work GEOIDs are valid
print(len(df[df['h_taz']==0]))
print(len(df[df['h_taz'].isnull()]))

print(len(df[df['w_taz']==0]))
print(len(df[df['w_taz'].isnull()]))

0
0
0
0


### Calculate External-Internal Travel
Home is outside region, work is inside region

In [9]:
# Commute mode share assumptions of auto trips
# Update with latest survey data for commute tour mode share (auto trips only, should sum to 1)
sov_share = 0.80
hov2_share = 0.12
hov3_share = 0.08

hov3_occupancy = 3.5    # average assumed for other analyses

In [10]:
# For external_internal, we only want people that live outside of our region and work in the region
df_ei = df[(df['h_taz']>3700) & (df['w_taz']<=3700)].copy()
suffix = 'EI'


df_ei['SOV_Per_'+suffix] = df_ei['S000']*sov_share
df_ei['HOV2_Per_'+suffix] = df_ei['S000']*hov2_share
df_ei['HOV3_Per_'+suffix] = df_ei['S000']*hov3_share

# Convert from person trips to vehicle trips
# Assume SOV is 1 person trip and HOV2 is 0.5 vehicles trips
# HOV3 is 1/3.5 vehicle trips
df_ei['SOV_Veh_'+suffix] = df_ei['SOV_Per_'+suffix].copy()
df_ei['HOV2_Veh_'+suffix] = df_ei['HOV2_Per_'+suffix]/2    # by definition
df_ei['HOV3_Veh_'+suffix] = df_ei['HOV3_Per_'+suffix]/hov3_occupancy

df_ei.rename(columns={'S000': 'Total_EI',
                      'h_taz': 'External_Station',
                      'w_taz': 'PSRC_TAZ'}, 
             inplace=True)

df_ei = df_ei[['PSRC_TAZ','External_Station','Total_EI',
               'SOV_Per_EI','HOV2_Per_EI','HOV3_Per_EI',
               'SOV_Veh_EI','HOV2_Veh_EI','HOV3_Veh_EI']]

### Internal-External Travel

In [11]:
# For internal-external, we only want people that live outside of our region
df_ie = df[(df['w_taz']>3700) & (df['h_taz']<=3700)].copy()
suffix = 'IE'
df_ie['SOV_Per_'+suffix] = df_ie['S000']*sov_share
df_ie['HOV2_Per_'+suffix] = df_ie['S000']*hov2_share
df_ie['HOV3_Per_'+suffix] = df_ie['S000']*hov3_share

# Convert from person trips to vehicle trips
# Assume SOV is 1 person trip and HOV2 is 0.5 vehicles trips
# HOV3 is 1/3.5 vehicle trips
df_ie['SOV_Veh_'+suffix] = df_ie['SOV_Per_'+suffix].copy()
df_ie['HOV2_Veh_'+suffix] = df_ie['HOV2_Per_'+suffix]/2
df_ie['HOV3_Veh_'+suffix] = df_ie['HOV3_Per_'+suffix]/hov3_occupancy

df_ie.rename(columns={'S000': 'Total_IE', 
                      'w_taz': 'External_Station',
                      'h_taz': 'PSRC_TAZ'}, inplace=True)

df_ie = df_ie[['PSRC_TAZ','External_Station','Total_IE',
               'SOV_Per_IE','HOV2_Per_IE','HOV3_Per_IE',
               'SOV_Veh_IE','HOV2_Veh_IE','HOV3_Veh_IE']]

# Scale to Account for Average Travel
Not all workers commute every day, and some are fully at home

In [12]:
# Merge and Format the data

# Append the two datasets even though unique columns are used for each; fill na with zero
# This matches the format of an older version and works with the current code
df = df_ie.append(df_ei).fillna(0)
col_list = ['PSRC_TAZ', 'External_Station', 'Total_IE', 'Total_EI', 
        'SOV_Per_IE', 'SOV_Per_EI', 'HOV2_Per_IE', 'HOV2_Per_EI', 
        'HOV3_Per_IE', 'HOV3_Per_EI', 'SOV_Veh_IE', 'SOV_Veh_EI', 
        'HOV2_Veh_IE', 'HOV2_Veh_EI', 'HOV3_Veh_IE','HOV3_Veh_EI']
df = df[col_list]

travel_col_list = ['Total_IE', 'Total_EI', 
        'SOV_Per_IE', 'SOV_Per_EI', 'HOV2_Per_IE', 'HOV2_Per_EI', 
        'HOV3_Per_IE', 'HOV3_Per_EI', 'SOV_Veh_IE', 'SOV_Veh_EI', 
        'HOV2_Veh_IE', 'HOV2_Veh_EI', 'HOV3_Veh_IE','HOV3_Veh_EI']

# Source: daysim-formatted validation summary: validation-notebook/validation_scripts/telecommute.html
wfh_share = 0.1361    # worker type: WFH
telework_share = 0.1066    # worker type: telecommuter
commuter_share = (1-wfh_share-telework_share)    # worker type: commuter

# Average number of telecommuters making work tours
teleworkers_commute_rate = 0.3354    # (work tours> 0)
commuter_commuter_rate = 0.7052
# Likelihood of a commuter traveling to work on a given day

######## CALIBRATION ##########
# # The above resulted in less travel than expected at external stations
# # To adjust, we can assume a lower wfh_share since King County
# # is driving the high share and places outside of King had rates closer to 10% in 2023
# wfh_share = 0.08
# telework_share = 0.00
# # recompute commuter_share
# commuter_share = (1-wfh_share-telework_share)
################################

# Calculate reduction factor across all trips
reduction_factor = (telework_share*teleworkers_commute_rate)+(commuter_share*commuter_commuter_rate)
# First remove all fully WFH people
df[travel_col_list] = df[travel_col_list]*(1-wfh_share)
# Then apply the reduction factor to those remaining commuters and teleworkers, 
# to account for likelihood of travel in the given day
df[travel_col_list] = df[travel_col_list]*reduction_factor


In [13]:
reduction_factor

0.5698016

In [14]:
df['Total_IE'].sum()

27583.810783120636

In [15]:
df['Total_EI'].sum()

59198.66993698463

In [16]:
(df['Total_IE'].sum()+df['Total_EI'].sum())
# target 105k

86782.48072010526

# Keep JBLM trips from previous estimates
JBLM trips are based on a license-plate survey. Any trips to/from these zones should be replaced with 
previous version of the trip table

In [17]:
jblm_taz_list = [3061, 3070, 3346, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356]

In [18]:
df_old = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\db_inputs\hold\external_trip_distribution.csv')
df_old = df_old.groupby(['PSRC_TAZ','External_Station']).sum().reset_index()

In [19]:

df_jblm = df_old[(df_old['PSRC_TAZ'].isin(jblm_taz_list)) | (df_old['External_Station'].isin(jblm_taz_list))].copy()

In [20]:
df_jblm.drop(['GEOID','BKR_TAZ'], axis=1, inplace=True)

In [21]:
1169

1169

In [22]:
# Remove any LEHD data for workers living in JBLM zones
df = df[~(df['External_Station'].isin(jblm_taz_list))]
# Remove any LEHD data for workers commuting to JBLM zones
df = df[~(df['PSRC_TAZ'].isin(jblm_taz_list))]

# Append the jblm data to the dataframe
df = pd.concat([df, df_jblm])

In [23]:
df['Total_IE'].sum()

69257.27841946272

In [24]:
df['Total_EI'].sum()

101200.5468683411

In [25]:
(df['Total_IE'].sum()+df['Total_EI'].sum())

170457.82528780383

# Export

In [26]:
df.to_csv(r'R:\e2projects_two\2023_base_year\externals\external_trip_distribution.csv')

In [148]:
df

Unnamed: 0,PSRC_TAZ,External_Station,Total_IE,Total_EI,SOV_Per_IE,SOV_Per_EI,HOV2_Per_IE,HOV2_Per_EI,HOV3_Per_IE,HOV3_Per_EI,SOV_Veh_IE,SOV_Veh_EI,HOV2_Veh_IE,HOV2_Veh_EI,HOV3_Veh_IE,HOV3_Veh_EI
2,2203,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
22,189,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
23,1937,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
68,2428,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
135,343,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,3733,3351,2876.000000,3325.0,2139.744000,2473.800,347.708400,401.9925,187.227600,216.4575,2310.923520,2671.70400,187.762536,217.075950,57.773088,66.792600
17518,3733,3352,477.000000,490.0,354.888000,364.560,57.669300,59.2410,31.052700,31.8990,383.279040,393.72480,31.141422,31.990140,9.581976,9.843120
17519,3733,3353,2192.000000,2255.0,1630.848000,1677.720,265.012800,272.6295,142.699200,146.8005,1761.315840,1811.93760,143.106912,147.219930,44.032896,45.298440
17520,3733,3354,2173.000000,1863.0,1616.712000,1386.072,262.715700,225.2367,141.462300,121.2813,1746.048960,1496.95776,141.866478,121.627818,43.651224,37.423944


## Validation

In [149]:
# Load Validation Data
df_old = pd.read_csv(r'R:\e2projects_two\SoundCast\Inputs\db_inputs\hold\external_trip_distribution.csv')

In [150]:
df['External_Station'].min()

3061

In [151]:
df['External_Station'].max()

3750

In [152]:
df['PSRC_TAZ'].min()

1

In [153]:
df['PSRC_TAZ'].max()

3733

Validation: Internal-External

In [154]:
df['Total_IE'].sum()

75080.23387584

In [155]:
df_old['Total_IE'].sum()

82879

In [156]:
df['Total_IE'].sum()/df_old['Total_IE'].sum()

0.9059017830311659

In [157]:
(df['SOV_Veh_IE']+df['HOV2_Veh_IE']+df['HOV3_Veh_IE']).sum()

66504.37046538448

In [158]:
(df_old['SOV_Veh_IE']+df_old['HOV2_Veh_IE']+df_old['HOV3_Veh_IE']).sum()

69049.5032052

In [159]:
(df['SOV_Veh_IE']+df['HOV2_Veh_IE']+df['HOV3_Veh_IE']).sum()/(df_old['SOV_Veh_IE']+df_old['HOV2_Veh_IE']+df_old['HOV3_Veh_IE']).sum()

0.9631404626872991

Validation: External-Internal

In [160]:
df['Total_EI'].sum()

113697.30632064

In [161]:
df_old['Total_EI'].sum()

133708

In [162]:
df['Total_EI'].sum()/df_old['Total_EI'].sum()

0.8503403410464594

In [163]:
(df['SOV_Veh_EI']+df['HOV2_Veh_EI']+df['HOV3_Veh_EI']).sum()

100529.57147225077

In [164]:
(df_old['SOV_Veh_EI']+df_old['HOV2_Veh_EI']+df_old['HOV3_Veh_EI']).sum()

109251.53848080001

In [165]:
(df['SOV_Veh_EI']+df['HOV2_Veh_EI']+df['HOV3_Veh_EI']).sum()/(df_old['SOV_Veh_EI']+df_old['HOV2_Veh_EI']+df_old['HOV3_Veh_EI']).sum()

0.9201661859427083

In [166]:
# Total jobs
(df['Total_EI']+df['Total_IE']).sum()

188777.54019648

In [167]:
(df_old['Total_EI']+df_old['Total_IE']).sum()

216587

In [168]:
(df['Total_EI']+df['Total_IE']).sum()/(df_old['Total_EI']+df_old['Total_IE']).sum()

0.8716014358963372

In [169]:
# Craig's check for total jobs in adjacent counties, converted from R

# Neighboring external counties
external_counties = ['007', '067', '057', '029', '041', '037', '031', '045', '077']

# Blocks in PSRC Region
eg_conn = psrcelmerpy.ElmerGeoConn()
gdf = eg_conn.read_geolayer('block2020')
psrc_blocks = gdf["geoid20"].tolist()

# Load LODES data
import psrcelmerpy
lodes_od = pd.read_csv(r"R:\e2projects_two\2023_base_year\externals\wa_od_main_JT00_2018_v8.csv", 
                       usecols=["w_geocode", "h_geocode", "S000"])

# Convert geocode columns to string
lodes_od["w_geocode"] = lodes_od["w_geocode"].astype(str)
lodes_od["h_geocode"] = lodes_od["h_geocode"].astype(str)

# Filter data: exclude blocks outside PSRC and include only work geocodes within PSRC blocks
lodes_od_filtered = lodes_od[
    (~lodes_od["h_geocode"].isin(psrc_blocks)) & 
    (lodes_od["w_geocode"].isin(psrc_blocks))
]

# Add county information by extracting substring from h_geocode
lodes_od_filtered["h_county"] = lodes_od_filtered["h_geocode"].str[2:5]

# Filter rows where h_county is in external_counties
lodes_od_filtered = lodes_od_filtered[lodes_od_filtered["h_county"].isin(external_counties)]

# Sum of external job flows
total_ext_jobs = lodes_od_filtered["S000"].sum()

print(total_ext_jobs)


111696


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [170]:
lodes_od_filtered = lodes_od[
    (lodes_od["h_geocode"].isin(psrc_blocks)) & 
    (~lodes_od["w_geocode"].isin(psrc_blocks))
]

# Filter rows where h_county is in external_counties
lodes_od_filtered["w_county"] = lodes_od_filtered["w_geocode"].str[2:5]
lodes_od_filtered = lodes_od_filtered[lodes_od_filtered["w_county"].isin(external_counties)]

# Sum of external job flows
total_ext_jobs = lodes_od_filtered["S000"].sum()

print(total_ext_jobs)

54785


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [171]:
df

Unnamed: 0,PSRC_TAZ,External_Station,Total_IE,Total_EI,SOV_Per_IE,SOV_Per_EI,HOV2_Per_IE,HOV2_Per_EI,HOV3_Per_IE,HOV3_Per_EI,SOV_Veh_IE,SOV_Veh_EI,HOV2_Veh_IE,HOV2_Veh_EI,HOV3_Veh_IE,HOV3_Veh_EI
2,2203,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
22,189,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
23,1937,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
68,2428,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
135,343,3740,0.596881,0.0,0.477505,0.000,0.071626,0.0000,0.047751,0.0000,0.477505,0.00000,0.035813,0.000000,0.013643,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,3733,3351,2876.000000,3325.0,2139.744000,2473.800,347.708400,401.9925,187.227600,216.4575,2310.923520,2671.70400,187.762536,217.075950,57.773088,66.792600
17518,3733,3352,477.000000,490.0,354.888000,364.560,57.669300,59.2410,31.052700,31.8990,383.279040,393.72480,31.141422,31.990140,9.581976,9.843120
17519,3733,3353,2192.000000,2255.0,1630.848000,1677.720,265.012800,272.6295,142.699200,146.8005,1761.315840,1811.93760,143.106912,147.219930,44.032896,45.298440
17520,3733,3354,2173.000000,1863.0,1616.712000,1386.072,262.715700,225.2367,141.462300,121.2813,1746.048960,1496.95776,141.866478,121.627818,43.651224,37.423944
