In [None]:
# Cell 1 - Tell python where to find the modules to import from

import sys
from pathlib import Path

#Path.cwd().parent depends on where you run this code from
#Path(__file__).resolve().parents[1] depends on where this file is located on disk
#
#project_root = .../1099-reconciliation-pipeline
project_root = Path.cwd().parent  # running from notebooks/  folder (cwd = current working directory) - .parent gets us to the folder above
sys.path.append(str(project_root)) #sys.path is a list of folders where python looks for modules
                                    # we add the project root to that list with .append

print("Project root: ", project_root)


'--------------------------------  Engine B: Load & Run Analysis  --------------------------------'

In [None]:
# Cell 2 — Imports, Load & Clean inputs (real paths)

from src import load_data
from src.clean_matrix import clean_matrix
from src.clean_relius_demo import clean_relius_demo
from src.age_taxcode_analysis import run_age_taxcode_analysis
from src.age_taxcode_visualization import (
    build_age_taxcode_metrics,
    plot_corrections_over_time,
    plot_mistake_breakdown,
)
from src.config import RAW_DATA_DIR, REPORTS_FIGURES_DIR

relius_demo_path = RAW_DATA_DIR / "real_demo_relius_2025.xlsx"
matrix_path = RAW_DATA_DIR / "real_all_matrix_2025.xlsx"

# Load and clean Matrix raw data
matrix_raw = load_data.load_matrix_excel(path=matrix_path, use_sample_if_none=False)
matrix_clean = clean_matrix(matrix_raw)

# Load and clean Relius demo raw data
relius_demo_raw = load_data.load_relius_demo_excel(path=relius_demo_path, use_sample_if_none=False)
relius_demo_clean = clean_relius_demo(relius_demo_raw)

# Run Engine B analysis
age_matches = run_age_taxcode_analysis(matrix_clean, relius_demo_clean)

print("Engine B rows:", age_matches.shape[0])
age_matches.head(5)


'--------------------------------  Metrics & Plots  --------------------------------'

In [None]:
# Cell 3 — Build monthly correction metrics

metrics_df = build_age_taxcode_metrics(age_matches)
metrics_df.head(10)


In [None]:
# Cell 4 — Plot corrections over time and save

fig1, _ = plot_corrections_over_time(metrics_df)
output_path = REPORTS_FIGURES_DIR / "engine_b_correction_rate_by_month.png"
fig1.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)


In [None]:
# Cell 5 — Plot mistake breakdown and save

fig2, _ = plot_mistake_breakdown(age_matches)
output_path = REPORTS_FIGURES_DIR / "engine_b_mistake_breakdown.png"
fig2.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)
