### Obtain the total census tract-to-census tract workflow for lower-paying work trips in Georgia

To derive the travel time decay coefficient, it is necessary to estimate it from travel behavior. I estimate this coefficient using [LEHD LODES](https://lehd.ces.census.gov/data/) data. The objective is to calculate tract-to-tract travel times and then weight these times by trip frequency using a histogram to estimate the coefficient.

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# read in data
os.chdir("D:\M.S. GIS\Georgia Tech\GRA\CQGRD\Job_Accessibility")
df = pd.read_csv("ga_od_main_JT01_2021.csv")
df.dtypes

w_geocode     int64
h_geocode     int64
S000          int64
SA01          int64
SA02          int64
SA03          int64
SE01          int64
SE02          int64
SE03          int64
SI01          int64
SI02          int64
SI03          int64
createdate    int64
dtype: object

In [3]:
# change data type
df['ct_w'] = df['w_geocode'].astype(str).str[:11]
df['ct_h'] = df['h_geocode'].astype(str).str[:11]
df.dtypes

w_geocode      int64
h_geocode      int64
S000           int64
SA01           int64
SA02           int64
SA03           int64
SE01           int64
SE02           int64
SE03           int64
SI01           int64
SI02           int64
SI03           int64
createdate     int64
ct_w          object
ct_h          object
dtype: object

In [4]:
# first step of removing duplicate OD trip: sort Origin-Destination census block code 
df['concat'] = np.where(df['ct_w'] < df['ct_h'], df['ct_w']+df['ct_h'], df['ct_h']+df['ct_w'])
df1 = df.loc[df['SE01'] != 0].copy()
df1['freq'] = df1.groupby('concat')['SE01'].transform('sum')
df1

Unnamed: 0,w_geocode,h_geocode,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,createdate,ct_w,ct_h,concat,freq
1,130019501001000,130019501001009,4,1,2,1,1,0,3,2,2,0,20231016,13001950100,13001950100,1300195010013001950100,10
19,130019501001000,130019501001067,1,1,0,0,1,0,0,1,0,0,20231016,13001950100,13001950100,1300195010013001950100,10
27,130019501001000,130019501002001,1,0,0,1,1,0,0,0,1,0,20231016,13001950100,13001950100,1300195010013001950100,10
31,130019501001000,130019501002018,2,1,1,0,1,0,1,1,1,0,20231016,13001950100,13001950100,1300195010013001950100,10
52,130019501001000,130019501003063,1,0,1,0,1,0,0,1,0,0,20231016,13001950100,13001950100,1300195010013001950100,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3544988,133219506002031,133219506001022,5,1,4,0,1,0,4,5,0,0,20231016,13321950600,13321950600,1332195060013321950600,6
3544995,133219506002031,133219506002010,1,0,1,0,1,0,0,1,0,0,20231016,13321950600,13321950600,1332195060013321950600,6
3545011,133219506002035,133219504002022,1,0,0,1,1,0,0,0,0,1,20231016,13321950600,13321950400,1332195040013321950600,2
3545015,133219506002044,130719702003008,1,1,0,0,1,0,0,1,0,0,20231016,13321950600,13071970200,1307197020013321950600,5


In [5]:
# second step of removing duplicate OD trip
df1 = df1.drop_duplicates(subset='concat')
df1

Unnamed: 0,w_geocode,h_geocode,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,createdate,ct_w,ct_h,concat,freq
1,130019501001000,130019501001009,4,1,2,1,1,0,3,2,2,0,20231016,13001950100,13001950100,1300195010013001950100,10
107,130019501001000,130019503012028,1,0,1,0,1,0,0,1,0,0,20231016,13001950100,13001950301,1300195010013001950301,26
127,130019501001000,130019503021042,1,0,0,1,1,0,0,1,0,0,20231016,13001950100,13001950302,1300195010013001950302,9
221,130019501001000,130299203033017,2,1,1,0,1,1,0,1,1,0,20231016,13001950100,13029920303,1300195010013029920303,1
255,130019501001000,130339501021022,3,1,2,0,1,0,2,2,1,0,20231016,13001950100,13033950102,1300195010013033950102,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3544965,133219506002031,130990905002022,1,0,0,1,1,0,0,1,0,0,20231016,13321950600,13099090500,1309909050013321950600,1
3544967,133219506002031,131319505022039,1,0,1,0,1,0,0,1,0,0,20231016,13321950600,13131950502,1313195050213321950600,1
3544969,133219506002031,131530211204017,1,0,1,0,1,0,0,1,0,0,20231016,13321950600,13153021120,1315302112013321950600,1
3544979,133219506002031,132050905012057,1,1,0,0,1,0,0,1,0,0,20231016,13321950600,13205090501,1320509050113321950600,1


In [6]:
# export necessary columns
dfexport = df1[['ct_w', 'ct_h', 'freq']]
dfexport.to_csv('all_lower_paying_jobs.csv', index=False)