In [1]:
# Cell 1 - Tell python where to find the modules to import from

import sys
from pathlib import Path

#Path.cwd().parent depends on where you run this code from
#Path(__file__).resolve().parents[1] depends on where this file is located on disk
#
#project_root = .../1099-reconciliation-pipeline if notebook runs from the repo root or from notebooks/
cwd = Path.cwd()
project_root = cwd if (cwd / "src").exists() else cwd.parent # running from notebooks/  folder (cwd = current working directory) - .parent gets us to the folder above
sys.path.append(str(project_root)) #sys.path is a list of folders where python looks for modules
                                    # we add the project root to that list with .append

print("Project root: ", project_root)


Project root:  /Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline


'--------------------------------  Engine D: Load & Run Analysis (G/H tax codes only)  --------------------------------'


In [2]:
# Cell 2 — Imports, Load & Clean inputs (real paths)

from src.core import load_data
from src.cleaning.clean_matrix import clean_matrix
from src.engines.ira_rollover_analysis import run_ira_rollover_analysis
from src.visualization.ira_rollover_visualization import (
    build_ira_rollover_kpi_summary,
    plot_ira_rollover_kpi_summary,
    build_ira_rollover_metrics,
    plot_ira_rollover_correction_counts,
    plot_ira_rollover_correction_rate,
)
from src.config import RAW_DATA_DIR, USE_SAMPLE_DATA_DEFAULT, get_engine_figures_dir

if USE_SAMPLE_DATA_DEFAULT:
    matrix_path = None
else:
    matrix_path = RAW_DATA_DIR / "real_all_matrix_2025.xlsx"

# Load and clean Matrix raw data
matrix_raw = load_data.load_matrix_excel(path=matrix_path)
matrix_clean = clean_matrix(matrix_raw)

# Run Engine D analysis (G/H tax codes only)
ira_matches = run_ira_rollover_analysis(matrix_clean)

print("Engine D rows (G/H tax codes only):", ira_matches.shape[0])
ira_matches.head(5)


Engine D rows (G/H tax codes only): 266


  matrix_clean = clean_matrix(matrix_raw)


Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,amount_valid,date_valid,code_1099r_valid,validation_issues,match_status,action,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,correction_reason
36461,IRA29PLAT,193361668,Robert Klemow,PA,218.92,0.0,2025-11-04,Check Distribution,G,,...,True,True,True,[],match_needs_correction,UPDATE_1099,0,,0,ira_rollover_tax_form_1099r_expected_no_tax
36484,IRA29PLAT,161389368,Letitia Leitzel,FL,11.27,0.0,2025-10-06,Check Distribution,G,,...,True,True,True,[],match_needs_correction,UPDATE_1099,0,,0,ira_rollover_tax_form_1099r_expected_no_tax
36521,IRA29PLAT,182601810,Thomas Richards,PA,200000.0,0.0,2025-09-02,Check Distribution,G,,...,True,True,True,[],match_needs_correction,UPDATE_1099,0,,0,ira_rollover_tax_form_1099r_expected_no_tax
36544,IRA29PLAT,161389368,Letitia Leitzel,FL,15749.86,0.0,2025-07-31,Check Distribution,G,,...,True,True,True,[],match_needs_correction,UPDATE_1099,0,,0,ira_rollover_tax_form_1099r_expected_no_tax
36650,IRA32PLAT,181466683,Martha Laux,PA,177377.67,0.0,2025-07-28,Check Distribution,G,,...,True,True,True,[],match_needs_correction,UPDATE_1099,0,,0,ira_rollover_tax_form_1099r_expected_no_tax


'--------------------------------  Metrics & Plots (G/H tax codes only)  --------------------------------'


In [3]:
# Cell 3 — Build monthly correction metrics

metrics_df = build_ira_rollover_metrics(ira_matches)
metrics_df.head(10)


Unnamed: 0,txn_month,total_txns,correction_count,correction_rate
0,2025-06-01,26,26,1.0
1,2025-07-01,39,38,0.974359
2,2025-08-01,30,29,0.966667
3,2025-09-01,42,42,1.0
4,2025-10-01,34,34,1.0
5,2025-11-01,34,34,1.0
6,2025-12-01,61,61,1.0


In [None]:
# Cell 4 — Plot corrections vs total and save

fig1, _ = plot_ira_rollover_correction_counts(metrics_df)
output_path = get_engine_figures_dir("ira_rollover") / "engine_d_correction_counts_by_month.png"
fig1.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)


In [None]:
# Cell 5 — Plot correction rate and save

fig2, _ = plot_ira_rollover_correction_rate(metrics_df)
output_path = get_engine_figures_dir("ira_rollover") / "engine_d_correction_rate_by_month.png"
fig2.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)


In [None]:
# Cell 6 — Match status KPI summary

kpi_summary = build_ira_rollover_kpi_summary(ira_matches)
kpi_summary


In [None]:
# Cell 7 — Plot match status KPI summary and save

fig3, _ = plot_ira_rollover_kpi_summary(kpi_summary)
output_path = get_engine_figures_dir("ira_rollover") / "engine_d_match_status_kpi.png"
fig3.savefig(output_path, dpi=150, bbox_inches="tight")
print("Saved:", output_path)
