In [None]:
# Load Accessibiliy Measures for 2017 and 2025
import geopandas as gpd

In [None]:
# Load 2017 accessibility shapefile
tract_transit_acc17 = gpd.read_file("output/tract_transit_acc17.shp")
print(tract_transit_acc17.head())

In [None]:

# Load 2025 accessibility shapefile
tract_transit_acc25 = gpd.read_file("output/tract_transit_acc25.shp")
print(tract_transit_acc25.head())

In [None]:
# Merge/join on GEOID
Access_Merged = tract_transit_acc25.merge(tract_transit_acc17, on="GEOID", suffixes=("_17", "_25"))
Access_Merged.head()

In [None]:
Access_Merged.columns.tolist()

In [None]:
import geopandas as gpd

# Step 1: Copy relevant columns and rename geometry_25 to geometry
access_comp = Access_Merged[[
    "GEOID", "STATEFP_25", "COUNTYFP_25", "TRACTCE_25", "NAMELSAD_17", "from_id_25",
    "ALI22_45", "AMI22_45", "AHI22_45", "ATOT22_45",
    "ALI17_45", "AMI17_45", "AHI17_45", "ATOT17_45",
    "geometry_25"
]].copy()

# Rename geometry column
access_comp = access_comp.rename(columns={"geometry_25": "geometry"})

# Ensure geometry column is of correct type
access_comp["geometry"] = gpd.GeoSeries(access_comp["geometry"])

# Now convert to GeoDataFrame with proper geometry
access_comp = gpd.GeoDataFrame(access_comp, geometry="geometry")

access_comp.set_crs("EPSG:4326", allow_override=True, inplace=True)

# Step 2: Compute differences
access_comp["diff_LI"] = access_comp["ALI22_45"] - access_comp["ALI17_45"]
access_comp["diff_MI"] = access_comp["AMI22_45"] - access_comp["AMI17_45"]
access_comp["diff_HI"] = access_comp["AHI22_45"] - access_comp["AHI17_45"]
access_comp["diff_TOT"] = access_comp["ATOT22_45"] - access_comp["ATOT17_45"]

# Preview
access_comp.head()

In [None]:
# Filter only Travis County tracts
travis_access = access_comp[access_comp["COUNTYFP_25"] == "453"]

# Filter only Travis County tracts
travis_access = access_comp[access_comp["COUNTYFP_25"] == "453"]

# Plotting
fig, ax = plt.subplots(figsize=(12, 10))

travis_access.plot(
    column="diff_TOT",
    cmap="RdBu",
    linewidth=0.3,
    edgecolor="white",
    legend=True,
    legend_kwds={"label": "Change in Job Accessibility (2025 - 2017)", "shrink": 0.4},
    ax=ax
)
ax.set_title("Overall Change in Transit Accessibility to Low-wage Jobs\n between 2017 to 2025 in Travis County, TX", fontsize=15)
ax.set_axis_off()
plt.tight_layout()
plt.show()


In [None]:
# Filter only Travis County tracts
travis_access = access_comp[access_comp["COUNTYFP_25"] == "453"]

# Plotting
fig, ax = plt.subplots(figsize=(12, 10))

travis_access.plot(
    column="diff_TOT",
    cmap="RdBu",
    linewidth=0.3,
    edgecolor="white",
    legend=True,
    legend_kwds={"label": "Change in Job Accessibility (2025 - 2017)", "shrink": 0.4},
    ax=ax
)

ax.set_title("Overall change in Transit Accessibility to Total Jobs \n between 2017 to 2025 in Travis County, TX", fontsize=15)
ax.set_axis_off()
plt.tight_layout()
plt.show()

In [None]:
# Filter only Travis County tracts
travis_access = access_comp[access_comp["COUNTYFP_25"] == "453"]

# Plotting
fig, ax = plt.subplots(figsize=(12, 10))

travis_access.plot(
    column="diff_HI",
    cmap="RdBu",
    linewidth=0.3,
    edgecolor="white",
    legend=True,
    legend_kwds={"label": "Change in Job Accessibility (2025 - 2017)", "shrink": 0.4},
    ax=ax
)

ax.set_title("Overall change in Transit Accessibility to High-wage Jobs \n between 2017 to 2025 in Travis County, TX", fontsize=15)
ax.set_axis_off()
plt.tight_layout()
plt.show()

# Join Demographic Data with Access Score

In [None]:

#Get Demographic Data by Census Tract
from pygris.data import get_census

# Example: 2023 ACS demographic data for Texas tracts
tx_demo23 = get_census(
    dataset="acs/acs5",
    variables=[
        "B03002_001E",  # Total population
        "B03002_003E",  # White alone
        "B03002_004E",  # Black alone
        "B03002_012E",  # Hispanic/Latino
        "B19013_001E",  # Median household income
        "B08201_002E",  # No vehicle available
    ],
    year=2023,
    params={"for": "tract:*", "in": "state:48"},  # Texas
    return_geoid=True
)


In [None]:
tx_demo23.rename(columns={
    "B03002_001E": "pop_total23",
    "B03002_003E": "pop_white23",
    "B03002_004E": "pop_black23",
    "B03002_012E": "pop_latino23",
    "B19013_001E": "med_income23",
    "B08201_002E": "no_vehicle23"
}, inplace=True)

tx_demo23

In [None]:
access_demo23 = access_comp.merge(tx_demo23, on="GEOID", how="left")
access_demo23

In [None]:
access_demo23[["pop_black23","pop_white23","pop_latino23", "pop_total23", "no_vehicle23","med_income23"]].dtypes

In [None]:
cols_to_numeric = ["pop_black23","pop_white23", "pop_latino23", "pop_total23", "no_vehicle23","med_income23"]
access_demo23[cols_to_numeric].isna().sum()


In [None]:
access_demo23[cols_to_numeric] = access_demo23[cols_to_numeric].apply(pd.to_numeric, errors='coerce')

In [None]:
import pandas as pd

access_demo23["income_group"] = pd.qcut(
    access_demo23["med_income23"],
    q=10,
    labels=[f"D{i}" for i in range(1, 11)]
)
access_demo23["pct_white23"] = access_demo23["pop_white23"] / access_demo23["pop_total23"]
access_demo23["pct_black23"] = access_demo23["pop_black23"] / access_demo23["pop_total23"]
access_demo23["pct_latino23"] = access_demo23["pop_latino23"] / access_demo23["pop_total23"]
access_demo23["zero_car_pct23"] = access_demo23["no_vehicle23"] / access_demo23["pop_total23"]

access_demo23

In [None]:
access_demo23 = gpd.GeoDataFrame(access_demo23, geometry="geometry")

# Step 2: Set CRS (use EPSG:4326 for lat/lon)
access_demo23.set_crs("EPSG:4326", inplace=True)

# Step 3: Save to shapefile
access_demo23.to_file("output/access_demo23.shp", driver="ESRI Shapefile")

In [None]:
access_demo23.to_csv("output/access_demo23.csv", index=False)

In [None]:
#Get Demographic Data by Census Tract
from pygris.data import get_census

# Example: 2023 ACS demographic data for Texas tracts
tx_demo17 = get_census(
    dataset="acs/acs5",
    variables=[
        "B03002_001E",  # Total population
        "B03002_003E",  # White alone
        "B03002_004E",  # Black alone
        "B03002_012E",  # Hispanic/Latino
        "B19013_001E",  # Median household income
        "B08201_002E",  # No vehicle available
    ],
    year=2017,
    params={"for": "tract:*", "in": "state:48"},  # Texas
    return_geoid=True
)


In [None]:
tx_demo17

In [None]:
tx_demo17.rename(columns={
    "B03002_001E": "pop_tot17",
    "B03002_003E": "pop_wht17",
    "B03002_004E": "pop_blk17",
    "B03002_012E": "pop_lat17",
    "B19013_001E": "med_inc17",
    "B08201_002E": "no_veh17"
}, inplace=True)

tx_demo17

In [None]:
tx_demo23_17 = tx_demo23.merge(tx_demo17, on="GEOID", how="left")
tx_demo23_17

In [None]:
cols_to_numeric = ["pop_blk17","pop_wht17", "pop_lat17", "pop_tot17", "no_veh17","med_inc17"]
tx_demo23_17[cols_to_numeric].isna().sum()

# Get Low/Med/How Income Workers in Tracts

In [None]:

from pygris.data import get_lodes

tx_lodes_rac22 = get_lodes(
  state = "TX", 
  year = 2022, 
  lodes_type = "rac",
  cache = True,
  return_lonlat = True
)

tx_lodes_rac17 = get_lodes(
  state = "TX", 
  year = 2017, 
  lodes_type = "rac",
  cache = True,
  return_lonlat = True
)

In [None]:
tx_lodes_rac22

In [None]:
tx_lodes_rac17

In [None]:
# Create new columns for clarity (optional but helpful)
tx_lodes_rac22["LI22"] = tx_lodes_rac22["CE01"]
tx_lodes_rac22["MI22"] = tx_lodes_rac22["CE02"]
tx_lodes_rac22["HI22"] = tx_lodes_rac22["CE03"]
tx_lodes_rac22["TOT22"] = tx_lodes_rac22["C000"]

# Optionally, keep just the relevant columns
residents_by_income22 = tx_lodes_rac22[["h_geocode", "LI22", "MI22", "HI22","TOT22"]]

# View summary
residents_by_income22.head()

In [None]:
# Create new columns for clarity (optional but helpful)
tx_lodes_rac17["LI17"] = tx_lodes_rac17["CE01"]
tx_lodes_rac17["MI17"] = tx_lodes_rac17["CE02"]
tx_lodes_rac17["HI17"] = tx_lodes_rac17["CE03"]
tx_lodes_rac17["TOT17"] = tx_lodes_rac17["C000"]

# Optionally, keep just the relevant columns
residents_by_income17 = tx_lodes_rac17[["h_geocode", "LI17", "MI17", "HI17","TOT17"]]

# View summary
residents_by_income17.head()

In [None]:
xwalk20 = pd.read_csv('tx_xwalk_LODES8.csv', dtype = 'str').loc[:, ['tabblk2020', 'trct']] # Read everything in as a string
residents_by_income22 = residents_by_income22.merge(xwalk20, how = 'left', left_on = 'h_geocode', right_on = 'tabblk2020')
residents_by_income17= residents_by_income17.merge(xwalk20, how = 'left', left_on = 'h_geocode', right_on = 'tabblk2020')

In [None]:
residents_by_income22.to_csv("output/residents_by_income22.csv", index=False)
residents_by_income22

In [None]:
residents_by_income17.to_csv("output/residents_by_income17.csv", index=False)
residents_by_income17

In [None]:
# Group by tract and sum the income-level jobs
tx_rac17  = residents_by_income17.groupby("trct")[["LI17", "MI17", "HI17","TOT17"]].sum().reset_index()
# Group by tract and sum the income-level jobs
tx_rac22 = residents_by_income22.groupby("trct")[["LI22", "MI22", "HI22","TOT22"]].sum().reset_index()

In [None]:
# Save 2017 data to CSV
tx_rac17.to_csv("output/tx_tract_residents_by_income_2017_lodes.csv", index=False)

# Save 2022 data to CSV
tx_rac22.to_csv("output/tx_tract_residents_by_income_2022_lodes.csv", index=False)

# Join Income wise workers number in each tracts

In [None]:

# Load the shapefile
access_demo23 = gpd.read_file("output/access_demo23.shp")


# Load 2017 RAC data
tx_rac17 = pd.read_csv("output/tx_tract_residents_by_income_2017_lodes.csv", dtype={"trct": str})

# Load 2022 RAC data
tx_rac22 = pd.read_csv("output/tx_tract_residents_by_income_2022_lodes.csv", dtype={"trct": str})


print(access_demo23.head())

# Preview the data
print(tx_rac17.head())

print("\n2022 Sample:")
print(tx_rac22.head())

In [None]:
# Ensure both merge keys are string type
access_demo23["GEOID"] = access_demo23["GEOID"].astype(str)
tx_rac17["trct"] = tx_rac17["trct"].astype(str)

# Then merge
access_demo_rac17 = access_demo23.merge(tx_rac17, left_on="GEOID", right_on="trct", how="left")


In [None]:
# Then merge
access_demo_rac22_17 = access_demo_rac17.merge(tx_rac22, left_on="GEOID", right_on="trct", how="left")

In [None]:
# List of column names to rename with _rac
rac_columns = ['LI17', 'MI17', 'HI17', 'TOT17', 'LI22', 'MI22', 'HI22', 'TOT22']

# Rename them in-place by appending '_rac'
access_demo_rac22_17.rename(
    columns={col: f"{col}_rac" for col in rac_columns},
    inplace=True
)

# Preview updated column list
access_demo_rac22_17.columns.tolist()

# Join Total Number of Jobs im Census tracts by different income group


In [None]:
tx_wac22 = pd.read_csv("output/tx_tract_jobs_by_income_2022_lodes.csv", dtype={"trct": str})
tx_wac17 = pd.read_csv("output/tx_tract_jobs_by_income_2017_lodes.csv", dtype={"trct": str})

In [None]:
tx_wac22 .columns.tolist()

In [None]:
# Merge the two datasets on 'trct', adding suffixes to distinguish years
tx_wac_combined = tx_wac22.merge(tx_wac17, on="trct", how="outer", suffixes=("_17", "_22"))

# Preview the merged dataframe
tx_wac_combined.head()

In [None]:
# Rename all columns except 'trct' by adding '_wac'
tx_wac_combined = tx_wac_combined.rename(
    columns={col: f"{col}_wac" for col in tx_wac_combined.columns if col != "trct"}
)

# Preview updated columns
tx_wac_combined.head()

In [None]:
# Merge access_demo_rac22_17 with tx_wac_combined on GEOID and trct
merged_final = access_demo_rac22_17.merge(
    tx_wac_combined, left_on="GEOID", right_on="trct", how="left", suffixes=('', '_wac')
)

# Display the result
merged_final.head(10)

In [None]:
merged_final.columns.tolist()

In [None]:
merged_final = gpd.GeoDataFrame(merged_final, geometry="geometry")

merged_final.set_crs("EPSG:4326", inplace=True)

merged_final.to_file("output/Austin_Transit_Access25_17.shp", driver="ESRI Shapefile")

In [None]:
# Save 2022 data to CSV
merged_final.to_csv("output/Austin_Transit_Access25_17.csv", index=False)

# Equity  Analysis

In [None]:
# Load the shapefile
access_df = gpd.read_file("output/Austin_Transit_Access25_17.shp")


In [None]:
access_df.columns.tolist()

In [None]:
# Distribution of Low Income people and Low Income Jobs

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point
import matplotlib.pyplot as plt

# Set ratio: 1 dot = 50 persons/jobs
DOT_RATIO = 50

def generate_dots(gdf, count_col, dot_ratio, seed=42):
    np.random.seed(seed)
    dot_geoms = []
    
    for _, row in gdf.iterrows():
        count = int(row[count_col] / dot_ratio)
        polygon = row.geometry
        
        if polygon.is_empty or polygon is None or not polygon.is_valid:
            continue
        
        for _ in range(count):
            minx, miny, maxx, maxy = polygon.bounds
            while True:
                pnt = Point(np.random.uniform(minx, maxx), np.random.uniform(miny, maxy))
                if polygon.contains(pnt):
                    dot_geoms.append(pnt)
                    break
    return gpd.GeoDataFrame(geometry=dot_geoms, crs=gdf.crs)

In [None]:
# --- Plot: Dot Density of Low-Wage Jobs (2022) ---
fig, ax = plt.subplots(figsize=(10, 10))

# Add grey fill to Travis County for better dot contrast
travis.plot(ax=ax, color="#f0f0f0", edgecolor="white", linewidth=.9)

# Plot the job dots
dots_jobs.plot(ax=ax, color='deepskyblue', markersize=1, label='1 dot = 50 Low-wage Jobs')

# Aesthetics
ax.set_title("Distribution of Low-wage Jobs in Travis County (2022)", fontsize=16, weight="bold")
ax.set_axis_off()
ax.legend(loc="upper right", fontsize=10, frameon=True, facecolor="white", edgecolor="black")
plt.tight_layout()
plt.show()

In [None]:
# --- Plot: Dot Density of Low-Income Workers (2022) ---
fig, ax = plt.subplots(figsize=(10, 10))

# Add grey fill to Travis County for better contrast
travis.plot(ax=ax, color="#f0f0f0", edgecolor="white", linewidth=0.9)

# Plot worker dots
dots_workers.plot(ax=ax, color='deeppink', markersize=1, label='1 dot = 50 Low-wage Workers')

# Aesthetics
ax.set_title("Distribution of Low-wage Workers in Travis County (2022)", fontsize=16, weight="bold")
ax.set_axis_off()
ax.legend(loc="upper right", fontsize=10, frameon=True, facecolor="white", edgecolor="black")
plt.tight_layout()
plt.show()

# Weighted Analysis

In [None]:
# Low-income group: Transit access to low-wage jobs per low-income resident
access_df["ALI17_45_n"] = access_df["ALI17_45"] / access_df["LI17_rac"]
access_df["ALI22_45_n"] = access_df["ALI22_45"] / access_df["LI22_rac"]

# Mid-income group: Transit access to mid-wage jobs per mid-income resident
access_df["AMI17_45_n"] = access_df["AMI17_45"] / access_df["MI17_rac"]
access_df["AMI22_45_n"] = access_df["AMI22_45"] / access_df["MI22_rac"]

# High-income group: Transit access to high-wage jobs per high-income resident
access_df["AHI17_45_n"] = access_df["AHI17_45"] / access_df["HI17_rac"]
access_df["AHI22_45_n"] = access_df["AHI22_45"] / access_df["HI22_rac"]

# Normalize total job access per total population
access_df["ATOT17_45_n"] = access_df["ATOT17_45"] / access_df["TOT17_rac"]
access_df["ATOT22_45_n"] = access_df["ATOT22_45"] / access_df["TOT22_rac"]


In [None]:
# Low-income difference
access_df["ALI_diff_n"] = access_df["ALI22_45_n"] - access_df["ALI17_45_n"]

# Mid-income difference
access_df["AMI_diff_n"] = access_df["AMI22_45_n"] - access_df["AMI17_45_n"]

# High-income difference
access_df["AHI_diff_n"] = access_df["AHI22_45_n"] - access_df["AHI17_45_n"]

#Total-difference
access_df["ATOT_diff_n"] = access_df["ATOT22_45_n"] - access_df["ATOT17_45_n"]


In [None]:
access_df

In [None]:
import matplotlib.pyplot as plt

# Filter to Travis County
travis_map = access_df[access_df["COUNTYFP_2"] == "453"]

custom_bins = [0, 1, 3, 7, 15, 30, 70, 450]

# --- 2017 Map with quantile-based classification ---
fig, ax = plt.subplots(figsize=(10, 10))
travis_map.plot(
    column="ALI17_45_n",
    cmap="Blues",
    linewidth=0.4,
    edgecolor="black",
    scheme="user_defined",
    classification_kwds={'bins': custom_bins},
    legend=True,
    legend_kwds={
        "title": "Transit Access Score"
        },
    ax=ax
)

ax.set_title("Low-wage Jobs Reachable in 45 Minutes by Transit per Low-wage Worker\nEstimates for 7–9 am on Friday, June 16, 2017", fontsize=14)
ax.set_axis_off()
plt.tight_layout()
plt.show()

 

In [None]:
import matplotlib.pyplot as plt

# Filter to Travis County
travis_map = access_df[access_df["COUNTYFP_2"] == "453"]

# --- 2025 Map with quantile-based classification ---
fig, ax = plt.subplots(figsize=(10, 10))
travis_map.plot(
    column="ALI22_45_n",
    cmap="Blues",
    linewidth=0.4,
    edgecolor="black",
    scheme="user_defined",
    classification_kwds={'bins': custom_bins},
    legend=True,
    legend_kwds={
        "title": "Transit Access Score"
        },
    ax=ax
)

ax.set_title("Low-wage Jobs Reachable in 45 Minutes by Transit per Low-wage Worker\nEstimates for 7–9 am on Tuesday, April 1, 2025", fontsize=14)
ax.set_axis_off()
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Filter to Travis County
travis_map = access_df[access_df["COUNTYFP_2"] == "453"]

# --- 2025 Map with diverging color ---
fig, ax = plt.subplots(figsize=(10, 10))
travis_map.plot(
    column="ALI_diff_n",
    cmap="RdBu",  # Red = negative, Blue = positive
    linewidth=0.3,
    edgecolor="grey",
    legend=True,
    scheme="natural_breaks",
    k=8,
    legend_kwds={
        "title": "Change in Access Score"
    },
    ax=ax
)

ax.set_title("Change in Low-wage Jobs Reachable in 45 Minutes by Transit per Low-wage Worker\n(2025 - 2017)", fontsize=14)
ax.set_axis_off()
plt.tight_layout()
plt.show()

# Racial Group Specific Analysis

In [None]:
# Ensure the relevant access and population percentage columns are numeric
access_df["ATOT22_45"] = pd.to_numeric(access_df["ATOT22_45"], errors="coerce")
access_df["pct_white2"] = pd.to_numeric(access_df["pct_white2"], errors="coerce")
access_df["pct_black2"] = pd.to_numeric(access_df["pct_black2"], errors="coerce")
access_df["pct_latino"] = pd.to_numeric(access_df["pct_latino"], errors="coerce")

# Multiply total job access by population share to estimate group-specific access
access_df["access_white"] = access_df["ATOT22_45"] * access_df["pct_white2"]
access_df["access_black"] = access_df["ATOT22_45"] * access_df["pct_black2"]
access_df["access_latino"] = access_df["ATOT22_45"] * access_df["pct_latino"]

# Weighted averages by summing numerator and dividing by group size
avg_access_white = access_df["access_white"].sum() / access_df["pct_white2"].sum()
avg_access_black = access_df["access_black"].sum() / access_df["pct_black2"].sum()
avg_access_latino = access_df["access_latino"].sum() / access_df["pct_latino"].sum()

# Construct summary DataFrame
racial_access_avg = pd.DataFrame({
    "race_group": ["White", "Black", "Latino"],
    "avg_access": [avg_access_white, avg_access_black, avg_access_latino]
})

# Optional: Add 'All'
racial_access_avg.loc[len(racial_access_avg.index)] = ["All", access_df["ATOT22_45"].mean()]

In [None]:
import altair as alt

# Bar sticks (base)
sticks = alt.Chart(racial_access_avg).mark_bar(
    color="lightgrey", height=4
).encode(
    alt.X("avg_access:Q", title="Average Transit Job Access (2025)"),
    alt.Y("race_group:N", title="", sort=["All", "White", "Black", "Latino"])
)

# Circles (end points)
lollipop = alt.Chart(racial_access_avg).mark_circle(
    color="#1f77b4", size=250, opacity=1
).encode(
    alt.X("avg_access:Q", title="Average Transit Job Access (2025)"),
    alt.Y("race_group:N", title="", sort=["All", "White", "Black", "Latino"])
)

# Combine
(sticks + lollipop).properties(
    title="Transit Accessibility to Jobs by Race Group (2025)",
    width=420,
    height=140
).configure(
    font="Arial"
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=False
).configure_axisY(
    labelFontWeight="bold"
)

In [None]:
## Only for low income 2025

In [None]:
# Ensure columns are numeric
access_df["ALI22_45_n"] = pd.to_numeric(access_df["ALI22_45_n"], errors="coerce")
access_df["pct_white2"] = pd.to_numeric(access_df["pct_white2"], errors="coerce")
access_df["pct_black2"] = pd.to_numeric(access_df["pct_black2"], errors="coerce")
access_df["pct_latino"] = pd.to_numeric(access_df["pct_latino"], errors="coerce")

# Multiply access score by demographic percentage to estimate contribution
access_df["access_white"] = access_df["ALI22_45_n"] * access_df["pct_white2"]
access_df["access_black"] = access_df["ALI22_45_n"] * access_df["pct_black2"]
access_df["access_latino"] = access_df["ALI22_45_n"] * access_df["pct_latino"]

# Calculate population-weighted averages
avg_access_white = access_df["access_white"].sum() / access_df["pct_white2"].sum()
avg_access_black = access_df["access_black"].sum() / access_df["pct_black2"].sum()
avg_access_latino = access_df["access_latino"].sum() / access_df["pct_latino"].sum()

# Create summary DataFrame
racial_access_avg_2025 = pd.DataFrame({
    "race_group": ["White", "Black", "Latino"],
    "avg_access": [avg_access_white, avg_access_black, avg_access_latino]
})

# Add overall average (optional)
racial_access_avg_2025.loc[len(racial_access_avg_2025)] = ["All", access_df["ALI22_45_n"].mean()]

In [None]:
import altair as alt

# Bar sticks (gray base line)
sticks_2025 = alt.Chart(racial_access_avg_2025).mark_bar(
    color="lightgrey", height=4
).encode(
    alt.X("avg_access:Q", title="Average Transit Job Access (2025)"),
    alt.Y("race_group:N", title="", sort=["All", "White", "Black", "Latino"])
)

# Circles (colored end points)
lollipop_2025 = alt.Chart(racial_access_avg_2025).mark_circle(
    color="#1f77b4", size=250, opacity=1
).encode(
    alt.X("avg_access:Q", title="Average Transit Job Access (2025)"),
    alt.Y("race_group:N", title="", sort=["All", "White", "Black", "Latino"])
)

# Combine and style
(sticks_2025 + lollipop_2025).properties(
    title="Transit Accessibility to Jobs by Race Group (2025)",
    width=420,
    height=140
).configure(
    font="Arial"
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=False
).configure_axisY(
    labelFontWeight="bold"
)

In [None]:
# Filter for Travis County
travis_df = access_df[access_df["COUNTYFP_2"] == "453"].copy()

# Ensure relevant columns are numeric
cols = ["pop_total2", "pct_white2", "pct_black2", "pct_latino", "ALI17_45", "ALI22_45"]
travis_df[cols] = travis_df[cols].apply(pd.to_numeric, errors="coerce")

# Create deciles by total income
travis_df["inc_decile"] = pd.qcut(travis_df["med_income"], 10, labels=[f"D{i+1}" for i in range(10)])

# Calculate group-specific access to low-wage jobs
travis_df["white_access_17"] = travis_df["ALI17_45"] * travis_df["pop_white2"]
travis_df["black_access_17"] = travis_df["ALI17_45"] * travis_df["pop_black2"]
travis_df["latino_access_17"] = travis_df["ALI17_45"] * travis_df["pop_latino"]

travis_df["white_access_22"] = travis_df["ALI22_45"] * travis_df["pop_white2"]
travis_df["black_access_22"] = travis_df["ALI22_45"] * travis_df["pop_black2"]
travis_df["latino_access_22"] = travis_df["ALI22_45"] * travis_df["pop_latino"]

# Aggregate average access by decile
summary = travis_df.groupby("inc_decile")[[
    "white_access_17", "black_access_17", "latino_access_17",
    "white_access_22", "black_access_22", "latino_access_22"
]].mean().reset_index()

# Reshape for plotting
plot_df = summary.melt(id_vars="inc_decile", var_name="group_year", value_name="avg_access")
plot_df["Race"] = plot_df["group_year"].str.extract(r'(\w+)_access')
plot_df["Year"] = plot_df["group_year"].str.extract(r'access_(\d+)')

# Plot
plt.figure(figsize=(12, 6))
for race in ["white", "black", "latino"]:
    for year in ["17", "22"]:
        line = plot_df[(plot_df["Race"] == race) & (plot_df["Year"] == year)]
        plt.plot(line["inc_decile"], line["avg_access"], marker='o', label=f"{race.title()} - 20{year}")

plt.title("Transit Access to Low-Wage Jobs by Income Decile and Race (Travis County)")
plt.xlabel("Median Income Decile (D1 = Most Poor)")
plt.ylabel("Avg. Low-Wage Job Access")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
summary.head()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# STEP 1: Filter Travis County
travis_df = access_df[access_df["COUNTYFP_2"] == "453"].copy()

# STEP 2: Ensure relevant columns are numeric
cols = ["med_income", "ALI17_45", "ALI22_45", "pop_white2", "pop_black2", "pop_latino"]
travis_df[cols] = travis_df[cols].apply(pd.to_numeric, errors="coerce")

# STEP 3: Create income deciles
travis_df["income_decile"] = pd.qcut(travis_df["med_income"], 5, labels=[f"D{i+1}" for i in range(5)])

# STEP 4: Calculate weighted access using actual population counts
weighted_data = []

for decile in travis_df["income_decile"].unique():
    subset = travis_df[travis_df["income_decile"] == decile]
    for race, pop_col in zip(["white", "black", "latino"], ["pop_white2", "pop_black2", "pop_latino"]):
        for year, access_col in zip(["2017", "2025"], ["ALI17_45", "ALI22_45"]):
            access_weighted_sum = (subset[access_col] * subset[pop_col]).sum()
            population_sum = subset[pop_col].sum()
            weighted_avg = access_weighted_sum / population_sum if population_sum > 0 else None
            weighted_data.append({
                "income_decile": decile,
                "Race": race.title(),
                "Year": year,
                "Weighted_Access": weighted_avg
            })

# STEP 5: Create DataFrame and plot
plot_df = pd.DataFrame(weighted_data).sort_values(by="income_decile")

# STEP 6: Plot
plt.figure(figsize=(12, 6))
for race in plot_df["Race"].unique():
    for year in ["2017", "2025"]:
        df_sub = plot_df[(plot_df["Race"] == race) & (plot_df["Year"] == year)]
        plt.plot(df_sub["income_decile"], df_sub["Weighted_Access"], marker='o', label=f"{race} - {year}")

plt.title("Population-Weighted Transit Access to Low-Wage Jobs\nby Median Income Decile and Race (Travis County)", fontsize=14)
plt.xlabel("Income Quintiles (D1 = Lowest Income)")
plt.ylabel("Avg. Transit Access to Low-Wage Jobs")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


# Income Group


In [None]:
# Step 1: Filter for Travis County only (FIPS code 453)
travis_df = access_df[access_df["COUNTYFP_2"] == "453"].copy()

# Step 2: Ensure columns are numeric
cols_to_convert = [
    "ALI22_45", "AMI22_45", "AHI22_45", "ATOT22_45",
    "LI22_rac", "MI22_rac", "HI22_rac", "TOT22_rac"
]
travis_df[cols_to_convert] = travis_df[cols_to_convert].apply(pd.to_numeric, errors="coerce")

# Step 3: Multiply access score by population counts
travis_df["access_li_22"] = travis_df["ALI22_45"] * travis_df["LI22_rac"]
travis_df["access_mi_22"] = travis_df["AMI22_45"] * travis_df["MI22_rac"]
travis_df["access_hi_22"] = travis_df["AHI22_45"] * travis_df["HI22_rac"]
travis_df["access_all_22"] = travis_df["ATOT22_45"] * travis_df["TOT22_rac"]

# Step 4: Compute weighted averages (population-weighted accessibility)
avg_access_li_22 = travis_df["access_li_22"].sum() / travis_df["LI22_rac"].sum()
avg_access_mi_22 = travis_df["access_mi_22"].sum() / travis_df["MI22_rac"].sum()
avg_access_hi_22 = travis_df["access_hi_22"].sum() / travis_df["HI22_rac"].sum()
avg_access_all_22 = travis_df["access_all_22"].sum() / travis_df["TOT22_rac"].sum()

# Step 5: Construct summary DataFrame
income_access_avg_travis_2025 = pd.DataFrame({
    "income_group": ["Low-wage Jobs", "Medium-wage Jobs", "High-wage Jobs", "All Jobs"],
    "Population-weighted Avg Job Access (2025)": [
        avg_access_li_22,
        avg_access_mi_22,
        avg_access_hi_22,
        avg_access_all_22
    ]
})

# Construct summary DataFrame
income_access_avg_2025 = pd.DataFrame({
    "income_group": ["Low-wage Jobs", "Medium-wage Jobs", "High-wage Jobs", "All Jobs"],
    "Population-weighted Average Job Access": [
        avg_access_li_22,
        avg_access_mi_22,
        avg_access_hi_22,
        avg_access_all_22
    ]
})

In [None]:
income_access_avg_2025

In [None]:
# Step 2: Ensure columns are numeric for 2017
cols_2017 = [
    "ALI17_45", "AMI17_45", "AHI17_45", "ATOT17_45",
    "LI17_rac", "MI17_rac", "HI17_rac", "TOT17_rac"
]
travis_df[cols_2017] = travis_df[cols_2017].apply(pd.to_numeric, errors="coerce")

# Step 3: Multiply access score by population counts (2017)
travis_df["access_li_17"] = travis_df["ALI17_45"] * travis_df["LI17_rac"]
travis_df["access_mi_17"] = travis_df["AMI17_45"] * travis_df["MI17_rac"]
travis_df["access_hi_17"] = travis_df["AHI17_45"] * travis_df["HI17_rac"]
travis_df["access_all_17"] = travis_df["ATOT17_45"] * travis_df["TOT17_rac"]

# Step 4: Compute weighted averages (population-weighted accessibility) for 2017
avg_access_li_17 = travis_df["access_li_17"].sum() / travis_df["LI17_rac"].sum()
avg_access_mi_17 = travis_df["access_mi_17"].sum() / travis_df["MI17_rac"].sum()
avg_access_hi_17 = travis_df["access_hi_17"].sum() / travis_df["HI17_rac"].sum()
avg_access_all_17 = travis_df["access_all_17"].sum() / travis_df["TOT17_rac"].sum()

# Step 5: Construct summary DataFrame
income_access_avg_travis_2017 = pd.DataFrame({
    "income_group": ["Low-wage Jobs", "Medium-wage Jobs", "High-wage Jobs", "All Jobs"],
    "Population-weighted Average Job Access": [
        avg_access_li_17,
        avg_access_mi_17,
        avg_access_hi_17,
        avg_access_all_17
    ]
})

# Optional: Displ

In [None]:
income_access_avg_travis_2017

In [None]:
# Add year column to both DataFrames
income_access_avg_travis_2017["year"] = "2017"
income_access_avg_2025["year"] = "2025"

# Combine into long-format DataFrame for Altair
income_access_combined = pd.concat([income_access_avg_travis_2017, income_access_avg_2025], ignore_index=True)

In [None]:
income_access_combined

In [None]:
import altair as alt
import pandas as pd

# Sorting order
income_order = ["Low-wage Jobs", "Medium-wage Jobs", "High-wage Jobs","All Jobs"]

# Base encoding
base = alt.Chart(income_access_combined).encode(
    x=alt.X("Population-weighted Average Job Access:Q", title="Weighted-average Jobs"),
    y=alt.Y("income_group:N", sort=income_order, title="")
)

# Bar (stick)
bars = base.mark_bar(color="lightgray", height=4)

# Circle (dot)
dots = base.mark_circle(color="#1f77b4", size=250)

# Combine and facet by year
chart = (bars + dots).facet(
    column=alt.Column("year:N", title=None)
).properties(
    title="Transit Access to Jobs: 2017 vs 2025"
).configure_axis(
    grid=False
).configure_axisY(
    labelFontWeight="bold"
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=18,
    anchor="start"
)

chart

In [None]:
# Measure Gini Coefficient for distribution of Transit Access to Low-Wage jobs per Low-wage workers in different 

In [None]:
import numpy as np

import numpy as np
import matplotlib.pyplot as plt

def lorenz_curve(values, weights):
    """Return cumulative population share and cumulative access share"""
    # Sort by access value
    sorted_idx = np.argsort(values)
    sorted_values = np.array(values)[sorted_idx]
    sorted_weights = np.array(weights)[sorted_idx]

    cum_weights = np.cumsum(sorted_weights)
    cum_access = np.cumsum(sorted_values * sorted_weights)

    cum_weights_norm = cum_weights / cum_weights[-1]
    cum_access_norm = cum_access / cum_access[-1]

    # Add (0,0) to the beginning for proper Lorenz curve
    cum_weights_norm = np.insert(cum_weights_norm, 0, 0)
    cum_access_norm = np.insert(cum_access_norm, 0, 0)

    return cum_weights_norm, cum_access_norm

def plot_lorenz(cum_pop, cum_access, gini_score, title="Lorenz Curve of Transit Access"):
    """Plot Lorenz Curve with Gini coefficient"""
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.plot(cum_pop, cum_access, label='Lorenz Curve', color='blue', linewidth=2)
    ax.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Line of Equality')

    ax.fill_between(cum_pop, cum_access, cum_pop, color='lightblue', alpha=0.5)

    ax.text(0.6, 0.2, f"Gini Coefficient = {gini_score:.3f}", fontsize=12, bbox=dict(facecolor='white'))

    ax.set_title(title, fontsize=14)
    ax.set_xlabel("Cumulative Share of Population")
    ax.set_ylabel("Cumulative Share of Transit Access")
    ax.set_aspect('equal')
    ax.legend()
    ax.grid(False)
    plt.tight_layout()
    plt.show()

In [None]:
# Step 1: Filter for Travis County only (FIPS code 453)
travis_df = access_df[access_df["COUNTYFP_2"] == "453"]

# Drop NA and zero weights
valid = travis_df[["ATOT22_45", "TOT22_rac"]].dropna()
valid = valid[valid["TOT22_rac"] > 0]

# Get Lorenz Curve data
cum_pop, cum_access = lorenz_curve(valid["ATOT22_45"], valid["TOT22_rac"])

# Compute Gini
gini_score = 1 - 2 * np.trapz(cum_access, cum_pop)

# Plot
plot_lorenz(cum_pop, cum_access, gini_score, 
            title="Lorenz Curve: 2025 Transit Access to Jobs")

In [None]:
# Drop NA and zero weights
valid = access_df[["ATOT17_45", "TOT17_rac"]].dropna()
valid = valid[valid["TOT17_rac"] > 0]

# Get Lorenz Curve data
cum_pop, cum_access = lorenz_curve(valid["ATOT17_45"], valid["TOT17_rac"])

# Compute Gini
gini_score = 1 - 2 * np.trapz(cum_access, cum_pop)

# Plot
plot_lorenz(cum_pop, cum_access, gini_score, 
            title="Lorenz Curve: 2017 Transit Access to Jobs")

In [None]:
## Low income people

# Drop NA and zero weights
valid = travis_df[["ALI22_45", "LI22_rac"]].dropna()
valid = valid[valid["LI22_rac"] > 0]

# Get Lorenz Curve data
cum_pop, cum_access = lorenz_curve(valid["ALI22_45"], valid["LI22_rac"])

# Compute Gini
gini_score = 1 - 2 * np.trapz(cum_access, cum_pop)

# Plot
plot_lorenz(cum_pop, cum_access, gini_score, 
            title="Lorenz Curve: 2025 Transit Access to Low-wage Jobs")

In [None]:
## Low income people

# Drop NA and zero weights
valid = travis_df[["ALI17_45", "LI17_rac"]].dropna()
valid = valid[valid["LI17_rac"] > 0]

# Get Lorenz Curve data
cum_pop, cum_access = lorenz_curve(valid["ALI17_45"], valid["LI17_rac"])

# Compute Gini
gini_score = 1 - 2 * np.trapz(cum_access, cum_pop)

# Plot
plot_lorenz(cum_pop, cum_access, gini_score, 
            title="Lorenz Curve: 2017 Transit Access to Low-wage Jobs")

In [None]:
##Gentrification and Transit Accessbility Link


In [None]:
import geopandas as gpd

# Load shapefile (all parts must be in the same folder)
austin_gent = gpd.read_file("DataPortal/City of Austin Displacement Risk Areas 2020_20250420/geo_export_12d09481-a298-4c7d-a7f6-61f84da11512.shp")

# Preview
print(austin_gent.head())
print(austin_gent.columns)

In [None]:
# Check CRS
print("CRS:", austin_gent.crs)

# Check potential merge column
austin_gent.columns

In [None]:
# Make sure both GEOID columns are string type
austin_gent["geoid20"] = austin_gent["geoid20"].astype(str)
access_df["GEOID"] = access_df["GEOID"].astype(str)

In [None]:
# Merge on tract GEOID
access_gent = access_df.merge(
    austin_gent[["geoid20", "gentrifica", "vulnerable", "demographi", "housing_ma", "displaceme"]],
    left_on="GEOID",
    right_on="geoid20",
    how="left"
)
access_gent

In [None]:
# Get unique values in 'gentrifica' and 'displaceme' columns
print("Gentrification Categories:\n", austin_gent["gentrifica"].unique())
print("\nDisplacement Risk Categories:\n", austin_gent["displaceme"].unique())

In [None]:
# Group by gentrification and displacement categories
summary_table = access_gent.groupby('displaceme').agg({
    'ALI17_45': 'mean',
    'ALI22_45': 'mean',
    'ATOT17_45': 'mean',
    'ATOT22_45': 'mean',
    'diff_LI': 'mean',
    'diff_TOT': 'mean'
}).reset_index()

# Rename columns for clarity
summary_table.columns = [
    'Displacement',
    'Low-Income Access 2017', 'Low-Income Access 2025',
    'Total Job Access 2017', 'Total Job Access 2025',
    'Change in LI Access', 'Change in Total Access'
]

# Display as table
import pandas as pd
import IPython.display as disp

disp.display(summary_table.round(2))

In [None]:

import pandas as pd

# Step 1: Classify each tract as Gain or Loss in low-income access
access_gent["access_change_type"] = access_gent["diff_LI"].apply(lambda x: "Gain" if x > 0 else "Loss")

# Step 2: Group by gentrification category
grouped = access_gent.groupby("gentrifica")

# Step 3: Proportion of tracts with gains vs losses
access_change_counts = access_gent.groupby(["gentrifica", "access_change_type"]).size().unstack(fill_value=0)

# Add proportion columns
access_change_counts["Total"] = access_change_counts.sum(axis=1)
access_change_counts["% Gained"] = (access_change_counts["Gain"] / access_change_counts["Total"]) * 100
access_change_counts["% Lost"] = (access_change_counts["Loss"] / access_change_counts["Total"]) * 100

# Step 4: Mean and Median change in access per gentrification group
summary_stats = grouped["diff_LI"].agg(["mean", "median"]).rename(
    columns={"mean": "Mean Change", "median": "Median Change"}
)

# Step 5: Merge summary stats with gain/loss proportions
access_summary = access_change_counts.join(summary_stats)

# View the summary
import pandas as pd
import IPython.display as disp
disp.display(access_summary)

In [None]:

import pandas as pd

# Step 1: Classify each tract as Gain or Loss in low-income access
access_gent["access_change_type"] = access_gent["ATOT_diff_n"].apply(lambda x: "Gain" if x > 0 else "Loss")

# Step 2: Group by gentrification category
grouped = access_gent.groupby("gentrifica")

# Step 3: Proportion of tracts with gains vs losses
access_change_counts = access_gent.groupby(["gentrifica", "access_change_type"]).size().unstack(fill_value=0)

# Add proportion columns
access_change_counts["Total"] = access_change_counts.sum(axis=1)
access_change_counts["% Gained"] = (access_change_counts["Gain"] / access_change_counts["Total"]) * 100
access_change_counts["% Lost"] = (access_change_counts["Loss"] / access_change_counts["Total"]) * 100

# Step 4: Mean and Median change in access per gentrification group
summary_stats = grouped["diff_LI"].agg(["mean", "median"]).rename(
    columns={"mean": "Mean Change", "median": "Median Change"}
)

# Step 5: Merge summary stats with gain/loss proportions
access_summary = access_change_counts.join(summary_stats)

# View the summary
import pandas as pd
import IPython.display as disp
disp.display(access_summary)

In [None]:
access_gent.columns.tolist()

# Unweighted Analysis