In [None]:
# Cell 1 - Tell python where to find the modules to import from

import sys
from pathlib import Path

#Path.cwd().parent depends on where you run this code from
#Path(__file__).resolve().parents[1] depends on where this file is located on disk
#
#project_root = .../1099-reconciliation-pipeline if notebook runs from the repo root or from notebooks/
cwd = Path.cwd()
project_root = cwd if (cwd / "src").exists() else cwd.parent # running from notebooks/  folder (cwd = current working directory) - .parent gets us to the folder above
sys.path.append(str(project_root)) #sys.path is a list of folders where python looks for modules
                                    # we add the project root to that list with .append

print("Project root: ", project_root)


'--------------------------------  Engine D: Load & Run Analysis  --------------------------------'


In [None]:
# Cell 2 — Imports, Load & Clean inputs (real paths)

from src.core import load_data
from src.cleaning.clean_matrix import clean_matrix
from src.engines.ira_rollover_analysis import run_ira_rollover_analysis
from src.visualization.ira_rollover_visualization import (
    build_ira_rollover_kpi_summary,
    plot_ira_rollover_kpi_summary,
    build_ira_rollover_metrics,
    plot_ira_rollover_correction_counts,
    plot_ira_rollover_correction_rate,
)
from src.config import RAW_DATA_DIR, USE_SAMPLE_DATA_DEFAULT, get_engine_figures_dir

if USE_SAMPLE_DATA_DEFAULT:
    matrix_path = None
else:
    matrix_path = RAW_DATA_DIR / "real_all_matrix_2025.xlsx"

# Load and clean Matrix raw data
matrix_raw = load_data.load_matrix_excel(path=matrix_path)
matrix_clean = clean_matrix(matrix_raw)

# Run Engine D analysis
ira_matches = run_ira_rollover_analysis(matrix_clean)

print("Engine D rows:", ira_matches.shape[0])
ira_matches.head(5)


'--------------------------------  Metrics & Plots  --------------------------------'


In [None]:
# Cell 3 — Build monthly correction metrics

metrics_df = build_ira_rollover_metrics(ira_matches)
metrics_df.head(10)


In [None]:
# Cell 4 — Plot corrections vs total and save

fig1, _ = plot_ira_rollover_correction_counts(metrics_df)
output_path = get_engine_figures_dir("ira_rollover") / "engine_d_correction_counts_by_month.png"
fig1.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)


In [None]:
# Cell 5 — Plot correction rate and save

fig2, _ = plot_ira_rollover_correction_rate(metrics_df)
output_path = get_engine_figures_dir("ira_rollover") / "engine_d_correction_rate_by_month.png"
fig2.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)


In [None]:
# Cell 6 — Match status KPI summary

kpi_summary = build_ira_rollover_kpi_summary(ira_matches)
kpi_summary


In [None]:
# Cell 7 — Plot match status KPI summary and save

fig3, _ = plot_ira_rollover_kpi_summary(kpi_summary)
output_path = get_engine_figures_dir("ira_rollover") / "engine_d_match_status_kpi.png"
fig3.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)
