# IBX walkshed uplift

End-to-end pipeline for the IBX walkshed and value uplift.

**Run with uv:**
- `uv sync --editable .`
- start Jupyter with `uv run jupyter lab` (or `uv run jupyter notebook`)
- place data under `data/` (valuation CSV, MapPLUTO FGDB, stations CSV)

In [1]:
import sys
from pathlib import Path
import pandas as pd
import geopandas as gpd

# Ensure project root on path so config/nyt_ibx_lvt import cleanly even when running from notebooks/
ROOT = Path.cwd().resolve()
if (ROOT / 'src').exists():
    sys.path.insert(0, str(ROOT))
    sys.path.insert(0, str(ROOT / 'src'))
elif (ROOT.parent / 'src').exists():
    sys.path.insert(0, str(ROOT.parent))
    sys.path.insert(0, str(ROOT.parent / 'src'))

from nyt_ibx_lvt import (
    prepare_values_parquet,
    load_values,
    load_parcels,
    run_pipeline,
    export_walkshed_map,
)

DATA_FILENAME = 'Property_Valuation_and_Assessment_Data_Tax_Classes_1,2,3,4_20251120.csv'

def locate_data_dir(start: Path, filename: str) -> Path:
    candidates = [start, start.parent, start.parent.parent, start.parent.parent.parent]
    for base in candidates:
        candidate = (base / 'data').resolve()
        if (candidate / filename).exists():
            return candidate
    msg = f"Could not find data/{'{'}filename{'}'} under: {[str((c / 'data').resolve()) for c in candidates]}"
    raise FileNotFoundError(msg)

# Prefer config.DATA_DIR if available
CONFIG_DATA_DIR = None
try:
    from config import DATA_DIR as CONFIG_DATA_DIR  # type: ignore
except Exception:
    CONFIG_DATA_DIR = None

if CONFIG_DATA_DIR:
    DATA_DIR = Path(CONFIG_DATA_DIR).resolve()
else:
    DATA_DIR = locate_data_dir(ROOT, DATA_FILENAME)

print(f'DATA_DIR resolved to: {DATA_DIR}')

RAW_VAL_CSV = DATA_DIR / DATA_FILENAME
VALUES_PARQUET = DATA_DIR / 'property_values.parquet'
PLUTO_PATH = DATA_DIR / 'nyc_mappluto_25v3_fgdb' / 'MapPLUTO25v3.gdb'
STATIONS_PATH = DATA_DIR / 'stations_ibx.json'

VALUES_FIELD = 'CURMKTTOT'
TAX_CLASS_FIELD = 'CURTAXCLASS'
PARCEL_BBL_FIELD = 'BBL'
BUFFER_MILES = 0.5
UPLIFT_RATES = (0.04, 0.06, 0.08, 0.10)
EXCLUDE_TAX_CLASSES = ('0',)
MAP_HTML = DATA_DIR / 'ibx_walkshed.html'
OUT_GEOJSON = DATA_DIR / 'ibx_walkshed.geojson'


In [2]:
# One-time: slim the 5–7 GB valuation CSV to Parquet (skips if already present).
if not VALUES_PARQUET.exists():
    VALUES_PARQUET.parent.mkdir(parents=True, exist_ok=True)
    prepare_values_parquet(
        csv_path=RAW_VAL_CSV,
        out_parquet=VALUES_PARQUET,
        value_field=VALUES_FIELD,
        tax_class_field=TAX_CLASS_FIELD,
    )
else:
    print(f"Using existing {VALUES_PARQUET}")

NameError: name 'VALUES_PARQUET' is not defined

In [None]:
values_df = load_values(VALUES_PARQUET)
parcels_gdf = load_parcels(PLUTO_PATH, bbl_field=PARCEL_BBL_FIELD)
values_df.head(), parcels_gdf.head()

In [None]:
total, uplift_table, taxable_gdf, route, walkshed = run_pipeline(
    parcels_path=PLUTO_PATH,
    values_path=VALUES_PARQUET,
    stations_path=STATIONS_PATH,
    parcel_bbl_field=PARCEL_BBL_FIELD,
    value_field="value",
    tax_class_field="tax_class",
    exclude_tax_classes=EXCLUDE_TAX_CLASSES,
    radius_miles=BUFFER_MILES,
    uplift_rates=UPLIFT_RATES,
    output_geojson=OUT_GEOJSON,
)
print(f"Baseline taxable value: ${total:,.0f}")
uplift_table

In [None]:
taxable_gdf[["bbl", "value", "tax_class"]].head()

In [None]:
export_walkshed_map(
    route=route,
    walkshed=walkshed,
    parcels=taxable_gdf,
    output_html=MAP_HTML,
    value_field="value",
)
MAP_HTML