In [2]:
import pandas as pd
from pathlib import Path

# === SETUP DIRECTORIES BASED ON YOUR FILESYSTEM ===
base_path = Path(r"C:\Users\JosephWhite\Documents\JeffersonTownshipTaxResearch")
data_raw = base_path / "data-raw"
data_output = base_path / "data"
data_output.mkdir(exist_ok=True)

# === FLEXIBLE LOADER ===
def process_excel_file(filepath, township=None):
    try:
        df = pd.read_excel(filepath, dtype=str)
        if township and "Township" in df.columns:
            df = df[df["Township"].str.strip() == township]
        df["SourceFile"] = filepath.name
        return df
    except Exception as e:
        print(f"⚠️ Error reading {filepath.name}: {e}")
        return pd.DataFrame()

# === 1. LOAD ALL ABATEMENT FILES ===
abatement_files = sorted(data_raw.glob("AbatementDetails-TY*.xlsx"))
print(f"\n📂 Found {len(abatement_files)} abatement files\n")

# Load WITHOUT filtering first to verify
abatement_dfs = []
for fp in abatement_files:
    df = process_excel_file(fp, township="JEFFERSON TWP")
    print(f"{fp.name}: {len(df)} rows")
    abatement_dfs.append(df)

# Combine and save
if abatement_dfs:
    combined_abatements = pd.concat(abatement_dfs, ignore_index=True)
    combined_abatements.to_csv(data_output / "Jefferson_Abatement_Details_All_Years.csv", index=False)
    print(f"\n✅ Saved {len(combined_abatements)} rows to Jefferson_Abatement_Details_All_Years.csv")
else:
    print("❌ No abatement data was loaded.")

# === 2. LOAD ALL TIF FILES ===
tif_files = sorted(data_raw.glob("TifDetails-TY*.xlsx"))
print(f"\n📂 Found {len(tif_files)} TIF files\n")

tif_dfs = []
for fp in tif_files:
    df = process_excel_file(fp, township="JEFFERSON TWP")
    print(f"{fp.name}: {len(df)} rows")
    tif_dfs.append(df)

# Combine and save
if tif_dfs:
    combined_tifs = pd.concat(tif_dfs, ignore_index=True)
    combined_tifs.to_csv(data_output / "Jefferson_TIF_Details_All_Years.csv", index=False)
    print(f"\n✅ Saved {len(combined_tifs)} rows to Jefferson_TIF_Details_All_Years.csv")
else:
    print("❌ No TIF data was loaded.")



📂 Found 11 abatement files

AbatementDetails-TY2014-2025-05-22.xlsx: 3 rows
AbatementDetails-TY2015-2025-05-22.xlsx: 2 rows
AbatementDetails-TY2016-2025-05-22.xlsx: 3 rows
AbatementDetails-TY2017-2025-05-22.xlsx: 3 rows
AbatementDetails-TY2018-2025-05-22.xlsx: 2 rows
AbatementDetails-TY2019-2025-05-22.xlsx: 2 rows
AbatementDetails-TY2020-2025-05-22.xlsx: 3 rows
AbatementDetails-TY2021-2025-05-22.xlsx: 3 rows
AbatementDetails-TY2022-2025-05-22.xlsx: 3 rows
AbatementDetails-TY2023-2025-05-22.xlsx: 6 rows
AbatementDetails-TY2024-2025-05-22.xlsx: 6 rows

✅ Saved 36 rows to Jefferson_Abatement_Details_All_Years.csv

📂 Found 11 TIF files

TifDetails-TY2014-2025-05-22.xlsx: 281 rows
TifDetails-TY2015-2025-05-22.xlsx: 281 rows
TifDetails-TY2016-2025-05-22.xlsx: 280 rows
TifDetails-TY2017-2025-05-22.xlsx: 281 rows
TifDetails-TY2018-2025-05-22.xlsx: 326 rows
TifDetails-TY2019-2025-05-22.xlsx: 401 rows
TifDetails-TY2020-2025-05-22.xlsx: 491 rows
TifDetails-TY2021-2025-05-22.xlsx: 652 rows
TifDet