# 40 - CDC SVI Ingestion (Tract Level)

Place downloaded SVI CSVs for TX/LA/MS/AL/FL under `data/raw/svi/`, then run this notebook to combine and clean.


In [None]:
from pathlib import Path
import pandas as pd

ROOT = Path('/Users/liamguest/LProjects/AURA/AURA')
RAW_SVI = ROOT / 'data' / 'raw' / 'svi'
INT = ROOT / 'data' / 'interim'; INT.mkdir(parents=True, exist_ok=True)
PROC = ROOT / 'data' / 'processed'; PROC.mkdir(parents=True, exist_ok=True)

print('Looking for CSVs in:', RAW_SVI)
frames = []
for csv in sorted(RAW_SVI.glob('*.csv')):
    try:
        df = pd.read_csv(csv, dtype=str)
        df['source_file'] = csv.name
        frames.append(df)
    except Exception as e:
        print('Failed to read', csv, e)

if not frames:
    raise SystemExit('No SVI CSVs found. Place files under data/raw/svi and re-run.')

svi = pd.concat(frames, ignore_index=True)
print('SVI rows:', len(svi))

# Try common GEOID column names
geoid_col = None
for candidate in ['FIPS','TRACTFIPS','TRACT','GEOID','geoid','TractFIPS']:
    if candidate in svi.columns:
        geoid_col = candidate
        break

if geoid_col is None:
    raise SystemExit('Could not find a GEOID-like column in SVI CSVs. Provide mapping.')

# Normalize to 11-digit tract_geoid
svi['tract_geoid'] = svi[geoid_col].astype(str).str[-11:].str.zfill(11)

# Keep a simple subset for now (adjust as needed)
keep_cols = ['tract_geoid']
svi_simple = svi[keep_cols + [c for c in svi.columns if c not in keep_cols][:20]].copy()

out_int = INT / 'svi_combined.csv'
svi.to_csv(out_int, index=False)
print('Wrote combined SVI (raw columns):', out_int)

out_proc = PROC / 'svi_simple.csv'
svi_simple.to_csv(out_proc, index=False)
print('Wrote simple SVI (first columns):', out_proc)
