In [2]:
import pandas as pd
import gzip

In [3]:
with gzip.open("data/il_od_main_JT00_2021.csv.gz", 'rt') as f:
        df = pd.read_csv(f, low_memory=False)

In [4]:
df.head()

Unnamed: 0,w_geocode,h_geocode,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,createdate
0,170010001001000,170010002022022,1,0,1,0,1,0,0,1,0,0,20231016
1,170010001001001,170010002012010,1,1,0,0,0,0,1,0,1,0,20231016
2,170010001001001,170010004002023,1,0,1,0,0,1,0,0,0,1,20231016
3,170010001001001,170010007002003,1,1,0,0,0,0,1,0,0,1,20231016
4,170010001001001,170010007002018,1,1,0,0,0,0,1,0,0,1,20231016


In [None]:
df = df.rename(columns={
    'w_geocode': 'workplace_block_code',
    'h_geocode': 'home_block_code',
    'S000': 'total_jobs',
    'SA01': 'jobs_age_29_under',
    'SA02': 'jobs_age_30_54',
    'SA03': 'jobs_age_55_plus',
    'SE01': 'jobs_earnings_low',      # $1250/month or less
    'SE02': 'jobs_earnings_mid',      # $1251-$3333/month
    'SE03': 'jobs_earnings_high',     # >$3333/month
    'SI01': 'jobs_goods_producing',
    'SI02': 'jobs_trade_trans_util', 
    'SI03': 'jobs_other_services',
    'h_geo': 'home_county_code',
    'createdate': 'created_date'
})

In [6]:
df.head()

Unnamed: 0,workplace_block_code,home_block_code,total_jobs,jobs_age_29_under,jobs_age_30_54,jobs_age_55_plus,jobs_earnings_low,jobs_earnings_mid,jobs_earnings_high,jobs_goods_producing,jobs_trade_trans_util,jobs_other_services,created_date
0,170010001001000,170010002022022,1,0,1,0,1,0,0,1,0,0,20231016
1,170010001001001,170010002012010,1,1,0,0,0,0,1,0,1,0,20231016
2,170010001001001,170010004002023,1,0,1,0,0,1,0,0,0,1,20231016
3,170010001001001,170010007002003,1,1,0,0,0,0,1,0,0,1,20231016
4,170010001001001,170010007002018,1,1,0,0,0,0,1,0,0,1,20231016


In [7]:
df.to_csv("data/LODES.csv")

# Optimality Score

In [9]:
df["workplace_block_code"] = df["workplace_block_code"].apply(lambda x: str(x)[:11])
df["home_block_code"] = df["home_block_code"].apply(lambda x: str(x)[:11])

tracts = ['17001000100', '17203030700'] 
filtered_df = df[(df['workplace_tract_code'].isin(tracts)) | (df['home_tract_code'].isin(tracts))]

total_live_in_tracts = filtered_df[filtered_df['home_tract_code'].isin(tracts)]['total_jobs'].sum()
total_work_in_tracts = filtered_df[filtered_df['workplace_tract_code'].isin(tracts)]['total_jobs'].sum()

live_and_work_in_tracts = filtered_df[
    (filtered_df['workplace_tract_code'].isin(tracts)) &
    (filtered_df['home_tract_code'].isin(tracts))
]['total_jobs'].sum()

percentage_live_and_work = (live_and_work_in_tracts / total_live_in_tracts) * 100 if total_live_in_tracts > 0 else 0
percentage_work_and_live = (live_and_work_in_tracts / total_work_in_tracts) * 100 if total_work_in_tracts > 0 else 0

print(f"Percentage of people living in the tracts who also work within those tracts: {percentage_live_and_work:.2f}%")
print(f"Percentage of people working in the tracts who also live within those tracts: {percentage_work_and_live:.2f}%")

Percentage of people living in the tracts who also work within those tracts: 18.02%
Percentage of people working in the tracts who also live within those tracts: 11.62%


Ideally, having everyone who works somewhere also live there creates snake-like districts that attempt to match suburbs with their corresponding cities. Instead, we found ensuring everyone who lives in an area works there as well to be more effective, causing it to be weighted more. 

In [10]:
employment_congruity_score = 0.25*percentage_work_and_live + 0.75*percentage_live_and_work