In [1]:
# ===== Auto-download data (LODES8 2022 + Census TIGER) =====
# pip install pandas geopandas shapely fiona pyproj requests

import os, zipfile, io, requests, pandas as pd, geopandas as gpd
from pathlib import Path

DATA = Path("data"); DATA.mkdir(exist_ok=True)

def fetch(url: str, out_path: Path):
    if out_path.exists():
        print(f"✅ Exists: {out_path.name}")
        return out_path
    print(f"⬇️  Downloading: {url}")
    r = requests.get(url, stream=True, timeout=60)
    r.raise_for_status()
    out_path.write_bytes(r.content)
    print(f"✅ Saved: {out_path.name}")
    return out_path

def fetch_unzip(url: str, out_dir: Path):
    out_dir.mkdir(exist_ok=True, parents=True)
    marker = out_dir / ".unzipped"
    if marker.exists():
        print(f"✅ Unzipped: {out_dir.name}")
        return out_dir
    print(f"⬇️  Downloading & unzipping: {url}")
    r = requests.get(url, stream=True, timeout=60)
    r.raise_for_status()
    with zipfile.ZipFile(io.BytesIO(r.content)) as z:
        z.extractall(out_dir)
    marker.touch()
    print(f"✅ Ready: {out_dir}")
    return out_dir

# --- LODES8 (WA) OD + crosswalk (2022) ---
od_url  = "https://lehd.ces.census.gov/data/lodes/LODES8/wa/od/wa_od_main_JT00_2022.csv.gz"
xw_url  = "https://lehd.ces.census.gov/data/lodes/LODES8/wa/wa_xwalk.csv.gz"
od_fp   = fetch(od_url, DATA / "wa_od_main_JT00_2022.csv.gz")
xw_fp   = fetch(xw_url, DATA / "wa_xwalk.csv.gz")

# --- TIGER/Line 2021 tracts (Washington; FIPS 53) ---
tracts_dir = fetch_unzip(
    "https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_53_tract.zip",
    DATA / "tl_2021_53_tract"
)

# --- TIGER/Line 2021 places (to extract Seattle boundary programmatically) ---
places_dir = fetch_unzip(
    "https://www2.census.gov/geo/tiger/TIGER2021/PLACE/tl_2021_53_place.zip",
    DATA / "tl_2021_53_place"
)

# Build Seattle city boundary from TIGER places (no need to hunt for a GeoJSON)
places = gpd.read_file(places_dir / "tl_2021_53_place.shp").to_crs(4326)
seattle = places.query("NAME == 'Seattle'").copy()
assert len(seattle) == 1, "Seattle boundary not found or multiple matches."
seattle_fp = DATA / "seattle_boundary.geojson"
seattle.to_file(seattle_fp, driver="GeoJSON")
print(f"✅ Wrote Seattle boundary: {seattle_fp}")

# Quick peek of LODES files (optional)
od = pd.read_csv(od_fp, nrows=5); xw = pd.read_csv(xw_fp, nrows=5)
print("OD columns:", od.columns.tolist()[:10])
print("Xwalk columns:", xw.columns.tolist()[:10])


⬇️  Downloading: https://lehd.ces.census.gov/data/lodes/LODES8/wa/od/wa_od_main_JT00_2022.csv.gz
✅ Saved: wa_od_main_JT00_2022.csv.gz
⬇️  Downloading: https://lehd.ces.census.gov/data/lodes/LODES8/wa/wa_xwalk.csv.gz
✅ Saved: wa_xwalk.csv.gz
⬇️  Downloading & unzipping: https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_53_tract.zip
✅ Ready: data\tl_2021_53_tract
⬇️  Downloading & unzipping: https://www2.census.gov/geo/tiger/TIGER2021/PLACE/tl_2021_53_place.zip
✅ Ready: data\tl_2021_53_place
✅ Wrote Seattle boundary: data\seattle_boundary.geojson
OD columns: ['w_geocode', 'h_geocode', 'S000', 'SA01', 'SA02', 'SA03', 'SE01', 'SE02', 'SE03', 'SI01']
Xwalk columns: ['tabblk2020', 'st', 'stusps', 'stname', 'cty', 'ctyname', 'trct', 'trctname', 'bgrp', 'bgrpname']


In [6]:
# ---- Step 1: Load LODES8 OD (keep geocodes as strings to preserve leading zeros)
import pandas as pd, geopandas as gpd

od = pd.read_csv(
    "data/wa_od_main_JT00_2022.csv.gz",
    dtype={"h_geocode":"string", "w_geocode":"string"}
)
# S000 = all jobs; keep as int for aggregation
od["S000"] = od["S000"].astype("int64")

print("OD sample cols:", od.columns.tolist()[:12])

OD sample cols: ['w_geocode', 'h_geocode', 'S000', 'SA01', 'SA02', 'SA03', 'SE01', 'SE02', 'SE03', 'SI01', 'SI02', 'SI03']


In [7]:
# ---- Step 2: Load crosswalk & build tract GEOIDs from st+cty+trct
xw = pd.read_csv("data/wa_xwalk.csv.gz", dtype={"tabblk2020":"string","st":"string","cty":"string","trct":"string"})
keep_cols = ["tabblk2020","st","cty","trct","blklatdd","blklondd"]
xw = xw[keep_cols].copy()

  xw = pd.read_csv("data/wa_xwalk.csv.gz", dtype={"tabblk2020":"string","st":"string","cty":"string","trct":"string"})


In [8]:
# zero-pad & construct tract geoid (2 + 3 + 6 = 11 digits)
xw["st"]  = xw["st"].str.zfill(2)
xw["cty"] = xw["cty"].str.zfill(3)
xw["trct"] = xw["trct"].str.zfill(6)
xw["tract_geoid"] = xw["st"] + xw["cty"] + xw["trct"]

xw_home = xw[["tabblk2020","tract_geoid"]].rename(columns={"tabblk2020":"h_geocode","tract_geoid":"tract_home"})
xw_work = xw[["tabblk2020","tract_geoid"]].rename(columns={"tabblk2020":"w_geocode","tract_geoid":"tract_work"})

In [9]:
# ---- Step 3: Attach tract IDs to OD (home & work)
od = od.merge(xw_home, on="h_geocode", how="left")
od = od.merge(xw_work, on="w_geocode", how="left", suffixes=("",""))

# sanity check: any missing tract IDs?
missing_home = od["tract_home"].isna().mean()
missing_work = od["tract_work"].isna().mean()
print(f"Missing tract_home: {missing_home:.3%} | Missing tract_work: {missing_work:.3%}")

Missing tract_home: 0.000% | Missing tract_work: 0.000%


In [10]:
# ---- Step 4: Aggregate to tract-to-tract flows
flows = (od.groupby(["tract_home","tract_work"], dropna=False, as_index=False)["S000"].sum()
           .rename(columns={"S000":"jobs"}))

# drop OD pairs with missing tract ids just in case
flows = flows.dropna(subset=["tract_home","tract_work"]).copy()

print("Flow pairs (tract→tract):", len(flows))

Flow pairs (tract→tract): 668211


In [11]:
# ---- Step 5: Load WA tracts (TIGER/Line 2021) and compute centroids
tracts = gpd.read_file("data/tl_2021_53_tract/tl_2021_53_tract.shp").to_crs(4326)
tracts = tracts.rename(columns={"GEOID":"tract"}).loc[:, ["tract","geometry"]].copy()
tracts["lon"] = tracts.geometry.centroid.x
tracts["lat"] = tracts.geometry.centroid.y


  tracts["lon"] = tracts.geometry.centroid.x

  tracts["lat"] = tracts.geometry.centroid.y


In [12]:
# ---- Step 6: Attach origin/destination coords to flows
flows = flows.merge(tracts[["tract","lon","lat"]], left_on="tract_home", right_on="tract", how="left")
flows = flows.rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["tract"])

flows = flows.merge(tracts[["tract","lon","lat"]], left_on="tract_work", right_on="tract", how="left")
flows = flows.rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["tract"])

In [13]:
# ---- Step 7: Filter to flows with WORK in Seattle (build Seattle from TIGER Places you saved earlier)
seattle = gpd.read_file("data/seattle_boundary.geojson").to_crs(4326)
flows_gdf = gpd.GeoDataFrame(flows, geometry=gpd.points_from_xy(flows["work_lon"], flows["work_lat"]), crs=4326)
flows_seattle = flows_gdf[flows_gdf.within(seattle.unary_union)].copy()

  flows_seattle = flows_gdf[flows_gdf.within(seattle.unary_union)].copy()


In [14]:
# ---- Step 8: Export for Kepler.gl Arc Layer
out_csv = "data/seattle_flows_kepler_2022.csv"
flows_seattle[["home_lon","home_lat","work_lon","work_lat","jobs"]].to_csv(out_csv, index=False)
print(f"✅ Exported for Kepler.gl: {out_csv}")

✅ Exported for Kepler.gl: data/seattle_flows_kepler_2022.csv


# ReRunning the code to export OD data with block, blockgroup, tract level flows

In [20]:
# ==========================================================
# Seattle Commuting Flows from LODES8 (WA, 2022)
# - Block, Block Group, and Tract level
# - Filtered to jobs with WORK in Seattle city boundary
# ==========================================================

import pandas as pd, geopandas as gpd, requests, zipfile, io
from pathlib import Path

DATA = Path("data"); DATA.mkdir(exist_ok=True)

# ----------------------------------------------------------
# 1) Fetch OD + Crosswalk + TIGER (tracts, block groups, places)
# ----------------------------------------------------------
def fetch(url, out_path):
    if not out_path.exists():
        print("⬇️", url)
        r = requests.get(url, timeout=60)
        r.raise_for_status()
        out_path.write_bytes(r.content)
    return out_path

def fetch_unzip(url, out_dir):
    shp_path = list(out_dir.glob("*.shp"))
    if shp_path: return shp_path[0]
    print("⬇️", url)
    r = requests.get(url, timeout=60)
    r.raise_for_status()
    with zipfile.ZipFile(io.BytesIO(r.content)) as z: z.extractall(out_dir)
    return list(out_dir.glob("*.shp"))[0]

od_fp  = fetch("https://lehd.ces.census.gov/data/lodes/LODES8/wa/od/wa_od_main_JT00_2022.csv.gz",
               DATA/"wa_od_main_JT00_2022.csv.gz")
xw_fp  = fetch("https://lehd.ces.census.gov/data/lodes/LODES8/wa/wa_xwalk.csv.gz",
               DATA/"wa_xwalk.csv.gz")

tracts_shp = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_53_tract.zip",
                         DATA/"tl_2021_53_tract")
bg_shp     = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/BG/tl_2021_53_bg.zip",
                         DATA/"tl_2021_53_bg")
places_shp = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/PLACE/tl_2021_53_place.zip",
                         DATA/"tl_2021_53_place")

In [21]:
# ----------------------------------------------------------
# 2) Load OD + Crosswalk
# ----------------------------------------------------------
od = pd.read_csv(od_fp, dtype={"h_geocode":"string","w_geocode":"string"})
od["S000"] = od["S000"].astype("int64")

xw = pd.read_csv(xw_fp, dtype={"tabblk2020":"string","st":"string","cty":"string","trct":"string"})
xw["st"]   = xw["st"].str.zfill(2)
xw["cty"]  = xw["cty"].str.zfill(3)
xw["trct"] = xw["trct"].str.zfill(6)
xw["tract_geoid"] = xw["st"] + xw["cty"] + xw["trct"]
xw["block_code4"] = xw["tabblk2020"].str[-4:]
xw["bgrp"] = xw["block_code4"].str[0]
xw["bg_geoid"] = xw["tract_geoid"] + xw["bgrp"]

# lookups
blk_coords = xw.rename(columns={"tabblk2020":"geocode","blklondd":"lon","blklatdd":"lat"})[["geocode","lon","lat"]]
blk_to_bg  = xw[["tabblk2020","bg_geoid"]].rename(columns={"tabblk2020":"geocode"})
blk_to_tr  = xw[["tabblk2020","tract_geoid"]].rename(columns={"tabblk2020":"geocode"})

  xw = pd.read_csv(xw_fp, dtype={"tabblk2020":"string","st":"string","cty":"string","trct":"string"})


In [22]:
# ----------------------------------------------------------
# 3) Seattle boundary
# ----------------------------------------------------------
places = gpd.read_file(places_shp).to_crs(4326)
seattle = places.query("NAME == 'Seattle'").copy()
print("Seattle boundary loaded.")

Seattle boundary loaded.


In [23]:
# ----------------------------------------------------------
# 4) Block → Block flows
# ----------------------------------------------------------
flows_block = od[["h_geocode","w_geocode","S000"]].rename(columns={"S000":"jobs"})

# attach coords
flows_block = flows_block.merge(blk_coords, left_on="h_geocode", right_on="geocode", how="left") \
                         .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["geocode"])
flows_block = flows_block.merge(blk_coords, left_on="w_geocode", right_on="geocode", how="left") \
                         .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["geocode"])

# filter Seattle destinations
flows_block_gdf = gpd.GeoDataFrame(flows_block,
                                   geometry=gpd.points_from_xy(flows_block["work_lon"], flows_block["work_lat"]),
                                   crs=4326)
flows_block_sea = flows_block_gdf[flows_block_gdf.within(seattle.unary_union)].dropna(
    subset=["home_lon","home_lat","work_lon","work_lat"]
).copy()
flows_block_sea["geo_level"] = "block"
print("Block-level flows:", len(flows_block_sea))

  flows_block_sea = flows_block_gdf[flows_block_gdf.within(seattle.unary_union)].dropna(


Block-level flows: 544781


In [24]:
# ----------------------------------------------------------
# 5) Block Group → Block Group flows
# ----------------------------------------------------------
od_bg = od.merge(blk_to_bg.rename(columns={"geocode":"h_geocode","bg_geoid":"bg_home"}),
                 on="h_geocode", how="left") \
          .merge(blk_to_bg.rename(columns={"geocode":"w_geocode","bg_geoid":"bg_work"}),
                 on="w_geocode", how="left")

flows_bg = (od_bg.groupby(["bg_home","bg_work"], as_index=False)["S000"].sum()
                 .rename(columns={"S000":"jobs"}))

# load BG shapes for centroids
bg = gpd.read_file(bg_shp).to_crs(4326).rename(columns={"GEOID":"bg_geoid"})
bg["lon"] = bg.geometry.centroid.x
bg["lat"] = bg.geometry.centroid.y

flows_bg = flows_bg.merge(bg[["bg_geoid","lon","lat"]], left_on="bg_home", right_on="bg_geoid", how="left") \
                   .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["bg_geoid"])
flows_bg = flows_bg.merge(bg[["bg_geoid","lon","lat"]], left_on="bg_work", right_on="bg_geoid", how="left") \
                   .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["bg_geoid"])

flows_bg_gdf = gpd.GeoDataFrame(flows_bg,
                                geometry=gpd.points_from_xy(flows_bg["work_lon"], flows_bg["work_lat"]),
                                crs=4326)
flows_bg_sea = flows_bg_gdf[flows_bg_gdf.within(seattle.unary_union)].dropna(
    subset=["home_lon","home_lat","work_lon","work_lat"]
).copy()
flows_bg_sea["geo_level"] = "blockgroup"
print("Block-group flows:", len(flows_bg_sea))


  bg["lon"] = bg.geometry.centroid.x

  bg["lat"] = bg.geometry.centroid.y


Block-group flows: 0


  flows_bg_sea = flows_bg_gdf[flows_bg_gdf.within(seattle.unary_union)].dropna(


In [25]:
# ----------------------------------------------------------
# 6) Tract → Tract flows
# ----------------------------------------------------------
od_tr = od.merge(blk_to_tr.rename(columns={"geocode":"h_geocode","tract_geoid":"tract_home"}),
                 on="h_geocode", how="left") \
          .merge(blk_to_tr.rename(columns={"geocode":"w_geocode","tract_geoid":"tract_work"}),
                 on="w_geocode", how="left")
od_tr = od_tr.dropna(subset=["tract_home","tract_work"])

flows_tr = (od_tr.groupby(["tract_home","tract_work"], as_index=False)["S000"].sum()
                 .rename(columns={"S000":"jobs"}))

tracts = gpd.read_file(tracts_shp).to_crs(4326).rename(columns={"GEOID":"tract"})
tracts["lon"] = tracts.geometry.centroid.x
tracts["lat"] = tracts.geometry.centroid.y

flows_tr = flows_tr.merge(tracts[["tract","lon","lat"]], left_on="tract_home", right_on="tract", how="left") \
                   .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["tract"])
flows_tr = flows_tr.merge(tracts[["tract","lon","lat"]], left_on="tract_work", right_on="tract", how="left") \
                   .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["tract"])

flows_tr_gdf = gpd.GeoDataFrame(flows_tr,
                                geometry=gpd.points_from_xy(flows_tr["work_lon"], flows_tr["work_lat"]),
                                crs=4326)
flows_tr_sea = flows_tr_gdf[flows_tr_gdf.within(seattle.unary_union)].dropna(
    subset=["home_lon","home_lat","work_lon","work_lat"]
).copy()
flows_tr_sea["geo_level"] = "tract"
print("Tract-level flows:", len(flows_tr_sea))


  tracts["lon"] = tracts.geometry.centroid.x

  tracts["lat"] = tracts.geometry.centroid.y


Tract-level flows: 0


  flows_tr_sea = flows_tr_gdf[flows_tr_gdf.within(seattle.unary_union)].dropna(


In [26]:
# ----------------------------------------------------------
# 7) Save outputs
# ----------------------------------------------------------
cols = ["home_lon","home_lat","work_lon","work_lat","jobs","geo_level"]

flows_block_sea[cols].to_csv(DATA/"seattle_flows_block_2022.csv", index=False)
flows_bg_sea[cols].to_csv(DATA/"seattle_flows_blockgroup_2022.csv", index=False)
flows_tr_sea[cols].to_csv(DATA/"seattle_flows_tract_2022.csv", index=False)

flows_all = pd.concat([flows_block_sea[cols], flows_bg_sea[cols], flows_tr_sea[cols]], ignore_index=True)
flows_all.to_csv(DATA/"seattle_flows_multilevel_2022.csv", index=False)

print("✅ Saved all CSVs in data/ folder")

✅ Saved all CSVs in data/ folder


In [27]:
sample = pd.read_csv("data/wa_od_main_JT00_2022.csv.gz", nrows=5)
print(sample.columns)
print(sample[["h_geocode","w_geocode","S000"]])
print(sample["S000"].describe())


Index(['w_geocode', 'h_geocode', 'S000', 'SA01', 'SA02', 'SA03', 'SE01',
       'SE02', 'SE03', 'SI01', 'SI02', 'SI03', 'createdate'],
      dtype='object')
         h_geocode        w_geocode  S000
0  530019501001039  530019501001000     2
1  530019502001089  530019501001000     1
2  530750003002015  530019501001000     1
3  530750003004008  530019501001000     1
4  530019501002042  530019501001011     1
count    5.000000
mean     1.200000
std      0.447214
min      1.000000
25%      1.000000
50%      1.000000
75%      1.000000
max      2.000000
Name: S000, dtype: float64


In [31]:
# ==========================================================
# Rebuild tract → tract flows (Seattle work destinations)
# ==========================================================

import pandas as pd, geopandas as gpd

# Ensure OD geocodes are strings
od["h_geocode"] = od["h_geocode"].astype(str)
od["w_geocode"] = od["w_geocode"].astype(str)

# Build block → tract lookup from crosswalk
xw["st"]   = xw["st"].astype(str).str.zfill(2)
xw["cty"]  = xw["cty"].astype(str).str.zfill(3)
xw["trct"] = xw["trct"].astype(str).str.zfill(6)
xw["tract_geoid"] = xw["st"] + xw["cty"] + xw["trct"]

blk_to_tr = xw[["tabblk2020","tract_geoid"]].rename(columns={"tabblk2020":"geocode"})
blk_to_tr["geocode"] = blk_to_tr["geocode"].astype(str)

# Attach tract IDs to OD
od_tr = (
    od.merge(blk_to_tr.rename(columns={"geocode":"h_geocode","tract_geoid":"tract_home"}),
             on="h_geocode", how="left")
      .merge(blk_to_tr.rename(columns={"geocode":"w_geocode","tract_geoid":"tract_work"}),
             on="w_geocode", how="left")
)

# Drop missing tract IDs
od_tr = od_tr.dropna(subset=["tract_home","tract_work"])

# Aggregate to tract → tract jobs
flows_tr = (
    od_tr.groupby(["tract_home","tract_work"], as_index=False)["S000"].sum()
          .rename(columns={"S000":"jobs"})
)

print("Tract flows created:", len(flows_tr))
print(flows_tr.head())

# --- Attach tract centroids
tracts = gpd.read_file("data/tl_2021_53_tract/tl_2021_53_tract.shp").to_crs(4326)
tracts["GEOID"] = tracts["GEOID"].astype(str).str.zfill(11)
tracts["lon"] = tracts.geometry.centroid.x
tracts["lat"] = tracts.geometry.centroid.y

flows_tr = flows_tr.merge(tracts[["GEOID","lon","lat"]],
                          left_on="tract_home", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["GEOID"])
flows_tr = flows_tr.merge(tracts[["GEOID","lon","lat"]],
                          left_on="tract_work", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["GEOID"])

# --- Filter to WORK in Seattle
flows_tr_gdf = gpd.GeoDataFrame(
    flows_tr,
    geometry=gpd.points_from_xy(flows_tr["work_lon"], flows_tr["work_lat"]),
    crs=4326
)
flows_tr_sea = flows_tr_gdf[flows_tr_gdf.within(seattle.union_all())].dropna(
    subset=["home_lon","home_lat","work_lon","work_lat"]
).copy()
flows_tr_sea["geo_level"] = "tract"

print("Seattle tract flows:", len(flows_tr_sea))
print(flows_tr_sea.sort_values("jobs", ascending=False).head())

# Save CSV
flows_tr_sea[["home_lon","home_lat","work_lon","work_lat","jobs","geo_level"]] \
    .to_csv("data/seattle_flows_tract_2022.csv", index=False)


Tract flows created: 668211
           tract_home          tract_work  jobs
0  535300153001950100  535300153001950100   401
1  535300153001950100  535300153001950200    62
2  535300153001950100  535300153001950301     2
3  535300153001950100  535300153001950302     1
4  535300153001950100  535300153001950400     6



  tracts["lon"] = tracts.geometry.centroid.x

  tracts["lat"] = tracts.geometry.centroid.y


Seattle tract flows: 0
Empty GeoDataFrame
Columns: [tract_home, tract_work, jobs, home_lon, home_lat, work_lon, work_lat, geometry, geo_level]
Index: []


In [32]:
pd.read_csv("data/seattle_flows_tract_2022.csv")["jobs"].describe()


count       0
unique      0
top       NaN
freq      NaN
Name: jobs, dtype: object

In [33]:
print("Seattle polygons:", seattle.shape[0])

# Intersect tracts with Seattle boundary
tracts_in_sea = tracts[tracts.intersects(seattle.union_all())]
print("Tracts intersecting Seattle:", len(tracts_in_sea))

print(tracts_in_sea.head())


Seattle polygons: 1
Tracts intersecting Seattle: 199
   STATEFP COUNTYFP TRACTCE        GEOID    NAME             NAMELSAD  MTFCC  \
67      53      033  001701  53033001701   17.01   Census Tract 17.01  G5020   
68      53      033  010401  53033010401  104.01  Census Tract 104.01  G5020   
79      53      033  010701  53033010701  107.01  Census Tract 107.01  G5020   
80      53      033  011001  53033011001  110.01  Census Tract 110.01  G5020   
96      53      033  004301  53033004301   43.01   Census Tract 43.01  G5020   

   FUNCSTAT    ALAND  AWATER     INTPTLAT      INTPTLON  \
67        S   892768       0  +47.6963619  -122.3544843   
68        S  1629874       0  +47.5532095  -122.2955875   
79        S  1457384    3664  +47.5451419  -122.3675265   
80        S  1080391       0  +47.5384101  -122.2861704   
96        S   979236       0  +47.6694019  -122.3002215   

                                             geometry         lon        lat  
67  POLYGON ((-122.36335 47.6942

In [2]:
# ==========================================================
# Seattle Commuting Flows (LODES8, WA 2022)
# - Block, Block Group, and Tract levels
# - Destinations filtered to polygons that INTERSECT Seattle
# - Centroids computed in projected CRS to avoid warnings
# - Exports: block / blockgroup / tract CSVs + combined CSV
# ==========================================================

import os, io, zipfile, requests
from pathlib import Path
import pandas as pd
import geopandas as gpd

# -----------------------------
# Config & helpers
# -----------------------------
DATA = Path("data"); DATA.mkdir(exist_ok=True)

def fetch(url: str, out_path: Path):
    if out_path.exists():
        print(f"✅ Exists: {out_path.name}")
        return out_path
    print(f"⬇️  Downloading: {url}")
    r = requests.get(url, timeout=120)
    r.raise_for_status()
    out_path.write_bytes(r.content)
    print(f"✅ Saved: {out_path.name}")
    return out_path

def fetch_unzip(url: str, out_dir: Path) -> Path:
    out_dir.mkdir(exist_ok=True, parents=True)
    shp = list(out_dir.glob("*.shp"))
    if shp:
        print(f"✅ Exists: {out_dir.name}")
        return shp[0]
    print(f"⬇️  Downloading & unzipping: {url}")
    r = requests.get(url, timeout=180)
    r.raise_for_status()
    with zipfile.ZipFile(io.BytesIO(r.content)) as z:
        z.extractall(out_dir)
    shp = list(out_dir.glob("*.shp"))
    print(f"✅ Unzipped: {out_dir} ({shp[0].name})")
    return shp[0]

def geounion(gdf: gpd.GeoDataFrame):
    # use union_all() if available; fallback to unary_union for older versions
    return gdf.union_all() if hasattr(gdf, "union_all") else gdf.unary_union

# -----------------------------
# 1) Download inputs
# -----------------------------
od_fp  = fetch("https://lehd.ces.census.gov/data/lodes/LODES8/wa/od/wa_od_main_JT00_2022.csv.gz",
               DATA / "wa_od_main_JT00_2022.csv.gz")
xw_fp  = fetch("https://lehd.ces.census.gov/data/lodes/LODES8/wa/wa_xwalk.csv.gz",
               DATA / "wa_xwalk.csv.gz")

tracts_shp  = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_53_tract.zip",
                          DATA / "tl_2021_53_tract")
bg_shp      = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/BG/tl_2021_53_bg.zip",
                          DATA / "tl_2021_53_bg")
places_shp  = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/PLACE/tl_2021_53_place.zip",
                          DATA / "tl_2021_53_place")

# -----------------------------
# 2) Load OD + crosswalk
# -----------------------------
od = pd.read_csv(od_fp, dtype={"h_geocode":"string","w_geocode":"string"})
od["S000"] = od["S000"].astype("int64")

xw = pd.read_csv(
    xw_fp,
    dtype={"tabblk2020":"string","st":"string","cty":"string","trct":"string"}
)

# build tract + block-group GEOIDs from crosswalk (LODES8 schema)
xw["st"]   = xw["st"].str.zfill(2)
xw["cty"]  = xw["cty"].str.zfill(3)
xw["trct"] = xw["trct"].str.zfill(6)
xw["tract_geoid"] = xw["st"] + xw["cty"] + xw["trct"]

xw["block_code4"] = xw["tabblk2020"].str[-4:]  # last 4 chars
xw["bgrp"] = xw["block_code4"].str[0]         # first char of block => block group
xw["bg_geoid"] = xw["tract_geoid"] + xw["bgrp"]

# handy lookups
blk_coords = xw.rename(columns={"tabblk2020":"geocode",
                                "blklondd":"lon",
                                "blklatdd":"lat"})[["geocode","lon","lat"]]
blk_to_bg  = xw[["tabblk2020","bg_geoid"]].rename(columns={"tabblk2020":"geocode"})
blk_to_tr  = xw[["tabblk2020","tract_geoid"]].rename(columns={"tabblk2020":"geocode"})

# -----------------------------
# 3) Load TIGER shapes (WGS84) + Seattle polygon
# -----------------------------
tracts = gpd.read_file(tracts_shp).to_crs(4326)
tracts["GEOID"] = tracts["GEOID"].astype(str).str.zfill(11)

bg = gpd.read_file(bg_shp).to_crs(4326).rename(columns={"GEOID":"bg_geoid"})
bg["bg_geoid"] = bg["bg_geoid"].astype(str).str.zfill(12)

places = gpd.read_file(places_shp).to_crs(4326)
seattle = places.query("NAME == 'Seattle'").copy()
assert len(seattle) == 1, "Seattle boundary not found or multiple matches."
SEA = geounion(seattle)  # unified polygon

print("Seattle boundary polygons:", len(seattle))

# -----------------------------
# 4) Helper: accurate centroids (project, centroid, back)
# -----------------------------
def centroid_lonlat(gdf: gpd.GeoDataFrame, proj_epsg=3857):
    # project to metric CRS for centroid calc
    g_proj = gdf.to_crs(epsg=proj_epsg)
    cents = g_proj.geometry.centroid
    # back to lon/lat
    cents_ll = gpd.GeoSeries(cents, crs=g_proj.crs).to_crs(4326)
    return pd.DataFrame({"lon": cents_ll.x.values, "lat": cents_ll.y.values})

# Precompute centroids for BG and tracts
bg_cent = centroid_lonlat(bg)
bg = pd.concat([bg.reset_index(drop=True), bg_cent], axis=1)

tr_cent = centroid_lonlat(tracts)
tracts = pd.concat([tracts.reset_index(drop=True), tr_cent], axis=1)

# -----------------------------
# 5) Block → Block flows (direct coords from crosswalk)
# -----------------------------
flows_block = od[["h_geocode","w_geocode","S000"]].rename(columns={"S000":"jobs"})

flows_block = flows_block.merge(blk_coords, left_on="h_geocode", right_on="geocode", how="left") \
                         .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["geocode"])
flows_block = flows_block.merge(blk_coords, left_on="w_geocode", right_on="geocode", how="left") \
                         .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["geocode"])

# polygon-based destination filter (intersects Seattle)
flows_block_gdf = gpd.GeoDataFrame(
    flows_block,
    geometry=gpd.points_from_xy(flows_block["work_lon"], flows_block["work_lat"]),
    crs=4326
)
flows_block_sea = flows_block_gdf[flows_block_gdf.intersects(SEA)].dropna(
    subset=["home_lon","home_lat","work_lon","work_lat"]
).copy()
flows_block_sea["geo_level"] = "block"
print("Block-level flows:", len(flows_block_sea))

# -----------------------------
# 6) Block Group → Block Group flows (ID-based destination filter)
# -----------------------------
# Map blocks to BGs
od_bg = (od.merge(blk_to_bg.rename(columns={"geocode":"h_geocode","bg_geoid":"bg_home"}),
                  on="h_geocode", how="left")
           .merge(blk_to_bg.rename(columns={"geocode":"w_geocode","bg_geoid":"bg_work"}),
                  on="w_geocode", how="left"))
od_bg = od_bg.dropna(subset=["bg_home","bg_work"])
od_bg["bg_home"] = od_bg["bg_home"].astype(str).str.zfill(12)
od_bg["bg_work"] = od_bg["bg_work"].astype(str).str.zfill(12)

# Which BG polygons touch Seattle?
bg_in_sea = set(bg[bg.intersects(SEA)]["bg_geoid"])
od_bg = od_bg[od_bg["bg_work"].isin(bg_in_sea)]

# Aggregate BG→BG
flows_bg = (od_bg.groupby(["bg_home","bg_work"], as_index=False)["S000"]
                 .sum().rename(columns={"S000":"jobs"}))

# Attach centroids
flows_bg = flows_bg.merge(bg[["bg_geoid","lon","lat"]],
                          left_on="bg_home", right_on="bg_geoid", how="left") \
                   .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["bg_geoid"])
flows_bg = flows_bg.merge(bg[["bg_geoid","lon","lat"]],
                          left_on="bg_work", right_on="bg_geoid", how="left") \
                   .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["bg_geoid"])

flows_bg = flows_bg.dropna(subset=["home_lon","home_lat","work_lon","work_lat"]).copy()
flows_bg["geo_level"] = "blockgroup"
print("Block-group flows:", len(flows_bg))

# -----------------------------
# 7) Tract → Tract flows (ID-based destination filter)
# -----------------------------
# Map blocks to tracts
od_tr = (od.merge(blk_to_tr.rename(columns={"geocode":"h_geocode","tract_geoid":"tract_home"}),
                  on="h_geocode", how="left")
           .merge(blk_to_tr.rename(columns={"geocode":"w_geocode","tract_geoid":"tract_work"}),
                  on="w_geocode", how="left"))
od_tr = od_tr.dropna(subset=["tract_home","tract_work"])
od_tr["tract_home"] = od_tr["tract_home"].astype(str).str.zfill(11)
od_tr["tract_work"] = od_tr["tract_work"].astype(str).str.zfill(11)

# Which tract polygons touch Seattle?
tracts_in_sea = set(tracts[tracts.intersects(SEA)]["GEOID"])
od_tr = od_tr[od_tr["tract_work"].isin(tracts_in_sea)]

# Aggregate tract→tract
flows_tr = (od_tr.groupby(["tract_home","tract_work"], as_index=False)["S000"]
                 .sum().rename(columns={"S000":"jobs"}))

# Attach centroids
flows_tr = flows_tr.merge(tracts[["GEOID","lon","lat"]],
                          left_on="tract_home", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["GEOID"])
flows_tr = flows_tr.merge(tracts[["GEOID","lon","lat"]],
                          left_on="tract_work", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["GEOID"])

flows_tr = flows_tr.dropna(subset=["home_lon","home_lat","work_lon","work_lat"]).copy()
flows_tr["geo_level"] = "tract"
print("Tract-level flows:", len(flows_tr))

# -----------------------------
# 8) Save outputs (and combined)
# -----------------------------
cols = ["home_lon","home_lat","work_lon","work_lat","jobs","geo_level"]

flows_block_sea[cols].to_csv(DATA/"seattle_flows_block_2022.csv", index=False)
flows_bg[cols].to_csv(DATA/"seattle_flows_blockgroup_2022.csv", index=False)
flows_tr[cols].to_csv(DATA/"seattle_flows_tract_2022.csv", index=False)

flows_all = pd.concat([flows_block_sea[cols], flows_bg[cols], flows_tr[cols]], ignore_index=True)
flows_all.to_csv(DATA/"seattle_flows_multilevel_2022.csv", index=False)

print("✅ Saved:")
print(" - data/seattle_flows_block_2022.csv")
print(" - data/seattle_flows_blockgroup_2022.csv")
print(" - data/seattle_flows_tract_2022.csv")
print(" - data/seattle_flows_multilevel_2022.csv  (all levels in one file)")

# -----------------------------
# 9) (Optional) Thin tiny flows for Kepler performance
# -----------------------------
# Example: keep flows with ≥ 20 jobs
# flows_all_thin = flows_all[flows_all["jobs"] >= 20].copy()
# flows_all_thin.to_csv(DATA/"seattle_flows_multilevel_2022_min20.csv", index=False)
# print("✅ Also saved: data/seattle_flows_multilevel_2022_min20.csv (jobs >= 20)")


✅ Exists: wa_od_main_JT00_2022.csv.gz
✅ Exists: wa_xwalk.csv.gz
✅ Exists: tl_2021_53_tract
✅ Exists: tl_2021_53_bg
✅ Exists: tl_2021_53_place


  xw = pd.read_csv(


Seattle boundary polygons: 1
Block-level flows: 544781
Block-group flows: 0
Tract-level flows: 0
✅ Saved:
 - data/seattle_flows_block_2022.csv
 - data/seattle_flows_blockgroup_2022.csv
 - data/seattle_flows_tract_2022.csv
 - data/seattle_flows_multilevel_2022.csv  (all levels in one file)


In [4]:
# ==========================================================
# Seattle Commuting Flows (LODES8, WA 2022)
# - Block, Block Group, and Tract levels
# - GEOIDs derived directly from 15-digit block codes (2020-compatible)
# - Destinations filtered to polygons that INTERSECT Seattle
# - Centroids computed in projected CRS; diagnostics printed
# - Exports: block / blockgroup / tract CSVs + combined CSV
# ==========================================================

import os, io, zipfile, requests
from pathlib import Path
import pandas as pd
import geopandas as gpd

# -----------------------------
# Config & helpers
# -----------------------------
DATA = Path("data"); DATA.mkdir(exist_ok=True)

def fetch(url: str, out_path: Path):
    if out_path.exists():
        print(f"✅ Exists: {out_path.name}")
        return out_path
    print(f"⬇️  Downloading: {url}")
    r = requests.get(url, timeout=180)
    r.raise_for_status()
    out_path.write_bytes(r.content)
    print(f"✅ Saved: {out_path.name}")
    return out_path

def fetch_unzip(url: str, out_dir: Path) -> Path:
    out_dir.mkdir(exist_ok=True, parents=True)
    shp = list(out_dir.glob("*.shp"))
    if shp:
        print(f"✅ Exists: {out_dir.name}")
        return shp[0]
    print(f"⬇️  Downloading & unzipping: {url}")
    r = requests.get(url, timeout=240)
    r.raise_for_status()
    with zipfile.ZipFile(io.BytesIO(r.content)) as z:
        z.extractall(out_dir)
    shp = list(out_dir.glob("*.shp"))
    print(f"✅ Unzipped: {out_dir} ({shp[0].name})")
    return shp[0]

def geounion(gdf: gpd.GeoDataFrame):
    # union_all for newer versions; fallback to unary_union
    return gdf.union_all() if hasattr(gdf, "union_all") else gdf.unary_union

def centroid_lonlat(gdf: gpd.GeoDataFrame, proj_epsg=3857):
    g_proj = gdf.to_crs(proj_epsg)
    cents = g_proj.geometry.centroid
    cents_ll = gpd.GeoSeries(cents, crs=g_proj.crs).to_crs(4326)
    return pd.DataFrame({"lon": cents_ll.x.values, "lat": cents_ll.y.values})

def log(msg): print(f"▶ {msg}")

# -----------------------------
# 1) Download inputs
# -----------------------------
od_fp  = fetch("https://lehd.ces.census.gov/data/lodes/LODES8/wa/od/wa_od_main_JT00_2022.csv.gz",
               DATA / "wa_od_main_JT00_2022.csv.gz")
xw_fp  = fetch("https://lehd.ces.census.gov/data/lodes/LODES8/wa/wa_xwalk.csv.gz",
               DATA / "wa_xwalk.csv.gz")
tracts_shp  = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_53_tract.zip",
                          DATA / "tl_2021_53_tract")
bg_shp      = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/BG/tl_2021_53_bg.zip",
                          DATA / "tl_2021_53_bg")
places_shp  = fetch_unzip("https://www2.census.gov/geo/tiger/TIGER2021/PLACE/tl_2021_53_place.zip",
                          DATA / "tl_2021_53_place")

# -----------------------------
# 2) Load OD and derive GEOIDs from block codes (2020-compatible)
# -----------------------------
od = pd.read_csv(od_fp, dtype={"h_geocode":"string","w_geocode":"string"})
od["S000"] = od["S000"].astype("int64")
log(f"OD rows: {len(od):,}")

# Ensure 15 digits; derive tract (11) and block group (12) from block IDs
od["h_geocode"] = od["h_geocode"].str.zfill(15)
od["w_geocode"] = od["w_geocode"].str.zfill(15)
od["tract_home"] = od["h_geocode"].str[:11]
od["tract_work"] = od["w_geocode"].str[:11]
od["bg_home"]    = od["h_geocode"].str[:12]
od["bg_work"]    = od["w_geocode"].str[:12]
log(f"Unique work tracts: {od['tract_work'].nunique():,} | work BGs: {od['bg_work'].nunique():,}")

# For block-level coordinates we can use crosswalk block coords (lat/lon)
xw = pd.read_csv(xw_fp, dtype={"tabblk2020":"string"})
xw["tabblk2020"] = xw["tabblk2020"].str.zfill(15)
blk_coords = xw.rename(columns={"tabblk2020":"geocode",
                                "blklondd":"lon",
                                "blklatdd":"lat"})[["geocode","lon","lat"]]

# -----------------------------
# 3) Load TIGER shapes (WGS84) + Seattle polygon
# -----------------------------
tracts = gpd.read_file(tracts_shp).to_crs(4326)
tracts["GEOID"] = tracts["GEOID"].astype(str).str.zfill(11)
bg = gpd.read_file(bg_shp).to_crs(4326)
bg["GEOID"] = bg["GEOID"].astype(str).str.zfill(12)

places = gpd.read_file(places_shp).to_crs(4326)
seattle = places.query("NAME == 'Seattle'").copy()
assert len(seattle) == 1, "Seattle boundary not found or multiple matches."
SEA = geounion(seattle)
log("Seattle polygon loaded")

# Precompute accurate centroids for BG & tracts (projected → back to lon/lat)
bg_cent = centroid_lonlat(bg)
bg = pd.concat([bg.reset_index(drop=True), bg_cent], axis=1)     # adds lon/lat
tracts_cent = centroid_lonlat(tracts)
tracts = pd.concat([tracts.reset_index(drop=True), tracts_cent], axis=1)

# -----------------------------
# 4) Build ID sets of destinations that touch Seattle (intersects)
# -----------------------------
# Do spatial tests in a metric CRS for robustness
bg_m = bg.to_crs(3857); tracts_m = tracts.to_crs(3857); seattle_m = seattle.to_crs(3857)
SEA_M = geounion(seattle_m)

bg_ids_in_sea  = set(bg_m[bg_m.intersects(SEA_M)].index)
tr_ids_in_sea  = set(tracts_m[tracts_m.intersects(SEA_M)].index)
bg_geoids_in_sea = set(bg.iloc[list(bg_ids_in_sea)]["GEOID"]) if bg_ids_in_sea else set()
tr_geoids_in_sea = set(tracts.iloc[list(tr_ids_in_sea)]["GEOID"]) if tr_ids_in_sea else set()
log(f"BGs intersecting Seattle: {len(bg_geoids_in_sea):,}")
log(f"Tracts intersecting Seattle: {len(tr_geoids_in_sea):,}")

# -----------------------------
# 5) BLOCK → BLOCK flows (coords from crosswalk; polygon filter)
# -----------------------------
flows_block = od[["h_geocode","w_geocode","S000"]].rename(columns={"S000":"jobs"})
flows_block = flows_block.merge(blk_coords, left_on="h_geocode", right_on="geocode", how="left") \
                         .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["geocode"])
flows_block = flows_block.merge(blk_coords, left_on="w_geocode", right_on="geocode", how="left") \
                         .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["geocode"])

flows_block_gdf = gpd.GeoDataFrame(
    flows_block,
    geometry=gpd.points_from_xy(flows_block["work_lon"], flows_block["work_lat"]),
    crs=4326
).to_crs(3857)

flows_block_sea = flows_block_gdf[flows_block_gdf.intersects(SEA_M)] \
                      .dropna(subset=["home_lon","home_lat","work_lon","work_lat"]).copy()
flows_block_sea["geo_level"] = "block"
log(f"Block-level flows (Seattle dest): {len(flows_block_sea):,}")

# -----------------------------
# 6) BLOCK GROUP → BLOCK GROUP flows (ID-based dest filter)
# -----------------------------
# Filter by destination BG GEOIDs that touch Seattle, then aggregate
od_bg = od[od["bg_work"].isin(bg_geoids_in_sea)].copy()
log(f"OD rows with BG-work in Seattle: {len(od_bg):,}")

flows_bg = (od_bg.groupby(["bg_home","bg_work"], as_index=False)["S000"]
                 .sum().rename(columns={"S000":"jobs"}))
log(f"BG→BG pairs: {len(flows_bg):,}")

# Attach BG centroids
bg_pts = bg[["GEOID","lon","lat"]].copy()
flows_bg = flows_bg.merge(bg_pts, left_on="bg_home", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["GEOID"])
flows_bg = flows_bg.merge(bg_pts, left_on="bg_work", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["GEOID"])
flows_bg = flows_bg.dropna(subset=["home_lon","home_lat","work_lon","work_lat"]).copy()
flows_bg["geo_level"] = "blockgroup"
log(f"Block-group flows (final): {len(flows_bg):,}")

# -----------------------------
# 7) TRACT → TRACT flows (ID-based dest filter)
# -----------------------------
od_tr = od[od["tract_work"].isin(tr_geoids_in_sea)].copy()
log(f"OD rows with tract-work in Seattle: {len(od_tr):,}")

flows_tr = (od_tr.groupby(["tract_home","tract_work"], as_index=False)["S000"]
                 .sum().rename(columns={"S000":"jobs"}))
log(f"Tract→Tract pairs: {len(flows_tr):,}")

# Attach tract centroids
tr_pts = tracts[["GEOID","lon","lat"]].copy()
flows_tr = flows_tr.merge(tr_pts, left_on="tract_home", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"home_lon","lat":"home_lat"}).drop(columns=["GEOID"])
flows_tr = flows_tr.merge(tr_pts, left_on="tract_work", right_on="GEOID", how="left") \
                   .rename(columns={"lon":"work_lon","lat":"work_lat"}).drop(columns=["GEOID"])
flows_tr = flows_tr.dropna(subset=["home_lon","home_lat","work_lon","work_lat"]).copy()
flows_tr["geo_level"] = "tract"
log(f"Tract flows (final): {len(flows_tr):,}")

# -----------------------------
# 8) Save outputs (and combined)
# -----------------------------
cols = ["home_lon","home_lat","work_lon","work_lat","jobs","geo_level"]

flows_block_sea[cols].to_csv(DATA/"seattle_flows_block_2022.csv", index=False)
flows_bg[cols].to_csv(DATA/"seattle_flows_blockgroup_2022.csv", index=False)
flows_tr[cols].to_csv(DATA/"seattle_flows_tract_2022.csv", index=False)

flows_all = pd.concat([flows_block_sea[cols], flows_bg[cols], flows_tr[cols]], ignore_index=True)
flows_all.to_csv(DATA/"seattle_flows_multilevel_2022.csv", index=False)

print("✅ Saved:")
print(" - data/seattle_flows_block_2022.csv")
print(" - data/seattle_flows_blockgroup_2022.csv")
print(" - data/seattle_flows_tract_2022.csv")
print(" - data/seattle_flows_multilevel_2022.csv  (all levels in one file)")

# -----------------------------
# 9) (Optional) Thin tiny flows for Kepler performance
# -----------------------------
# flows_all_min20 = flows_all[flows_all["jobs"] >= 20].copy()
# flows_all_min20.to_csv(DATA/"seattle_flows_multilevel_2022_min20.csv", index=False)
# print("✅ Also saved: data/seattle_flows_multilevel_2022_min20.csv (jobs >= 20)")


✅ Exists: wa_od_main_JT00_2022.csv.gz
✅ Exists: wa_xwalk.csv.gz
✅ Exists: tl_2021_53_tract
✅ Exists: tl_2021_53_bg
✅ Exists: tl_2021_53_place
▶ OD rows: 2,905,296
▶ Unique work tracts: 1,771 | work BGs: 5,292


  xw = pd.read_csv(xw_fp, dtype={"tabblk2020":"string"})


▶ Seattle polygon loaded
▶ BGs intersecting Seattle: 571
▶ Tracts intersecting Seattle: 199
▶ Block-level flows (Seattle dest): 544,781
▶ OD rows with BG-work in Seattle: 563,744
▶ BG→BG pairs: 281,474
▶ Block-group flows (final): 281,474
▶ OD rows with tract-work in Seattle: 578,207
▶ Tract→Tract pairs: 122,950
▶ Tract flows (final): 122,950
✅ Saved:
 - data/seattle_flows_block_2022.csv
 - data/seattle_flows_blockgroup_2022.csv
 - data/seattle_flows_tract_2022.csv
 - data/seattle_flows_multilevel_2022.csv  (all levels in one file)
