In [1]:
# Cell 1 - Tell python where to find the modules to import from

import sys
from pathlib import Path

#Path.cwd().parent depends on where you run this code from
#Path(__file__).resolve().parents[1] depends on where this file is located on disk
#
#project_root = .../1099-reconciliation-pipeline if notebook runs from the repo root or from notebooks/
cwd = Path.cwd()
project_root = cwd if (cwd / "src").exists() else cwd.parent # running from notebooks/  folder (cwd = current working directory) - .parent gets us to the folder above
sys.path.append(str(project_root)) #sys.path is a list of folders where python looks for modules
                                    # we add the project root to that list with .append

print("Project root: ", project_root)


Project root:  /Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline


'--------------------------------  Engine D: Load & Run Analysis  --------------------------------'


In [2]:
# Cell 2 — Imports, Load & Clean inputs (real paths)

from src.core import load_data
from src.cleaning.clean_matrix import clean_matrix
from src.engines.ira_rollover_analysis import run_ira_rollover_analysis
from src.outputs.build_correction_file import build_correction_dataframe

from src.config import RAW_DATA_DIR, USE_SAMPLE_DATA_DEFAULT, DateFilterConfig

if USE_SAMPLE_DATA_DEFAULT:
    matrix_path = None
else:
    matrix_path = RAW_DATA_DIR / "real_all_matrix_2025.xlsx"

# Optional date filtering (set to None for "All")
date_filter = None
# date_filter = DateFilterConfig(date_start="2025-07-01", date_end="2025-09-30", months=["July", "Aug"])

# Load and clean Matrix raw data
matrix_raw = load_data.load_matrix_excel(path=matrix_path)
matrix_clean = clean_matrix(matrix_raw)

print(f"Matrix Cleaned DataFrame shape: {matrix_clean.shape}")


Matrix Cleaned DataFrame shape: (8689, 22)


  matrix_clean = clean_matrix(matrix_raw)


### Date filter options
Use `DateFilterConfig` to limit transactions by date range and/or months. Range and months intersect.
Set `date_filter = None` for all data. Missing/invalid dates are excluded when filters are active.

Examples:
- All data: `date_filter = None`
- Range only: `DateFilterConfig(date_start="2025-01-01", date_end="2025-01-31")`
- Months only: `DateFilterConfig(months=["July", 8])`
- Range + months: `DateFilterConfig(date_start="2025-07-01", date_end="2025-09-30", months=["July", "Aug"])`


In [3]:
# Cell 3 — Run Engine D analysis

ira_matches = run_ira_rollover_analysis(matrix_clean, date_filter=date_filter)

print("Engine D rows:", ira_matches.shape[0])
ira_matches.head(10)


Engine D rows: 656


Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,amount_valid,date_valid,code_1099r_valid,validation_issues,match_status,action,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,correction_reason
36427,IRA29PLAT,171421488,George Fetch,PA,22352.94,22352.94,2025-12-29,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover
36428,IRA29PLAT,134447891,Deborah Pease,PA,10.0,10.0,2025-12-15,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover
36446,IRA29PLAT,168363780,Charles Savage,PA,2787.45,2787.45,2025-12-03,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover
36461,IRA29PLAT,193361668,Robert Klemow,PA,218.92,0.0,2025-11-04,Check Distribution,G,,...,True,True,True,[],match_needs_correction,UPDATE_1099,0.0,,0.0,ira_rollover_tax_form_1099r_expected_no_tax
36473,IRA29PLAT,175348995,Bert Burrell,PA,6596.42,6596.42,2025-11-04,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover
36477,IRA29PLAT,197445134,Thomas Palubinsky,PA,4000.0,4000.0,2025-10-23,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover
36478,IRA29PLAT,202382358,Robert Penkala,PA,5408.02,5408.02,2025-10-23,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover
36481,IRA29PLAT,197445134,Thomas Palubinsky,PA,3500.0,3500.0,2025-10-14,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover
36484,IRA29PLAT,161389368,Letitia Leitzel,FL,11.27,0.0,2025-10-06,Check Distribution,G,,...,True,True,True,[],match_needs_correction,UPDATE_1099,0.0,,0.0,ira_rollover_tax_form_1099r_expected_no_tax
36502,IRA29PLAT,201429866,Mary Derr,PA,1666.67,1666.67,2025-09-16,Check Distribution,7,,...,True,True,True,[],match_needs_review,,,,,federal_taxing_method_not_rollover


In [4]:
# Cell 4 — Match status counts

ira_matches["match_status"].value_counts(dropna=False)


match_status
match_needs_review        392
match_needs_correction    264
Name: count, dtype: int64

In [5]:
# Cell 5 — Correction file preview

corrections_df = build_correction_dataframe(ira_matches)
print("Corrections:", corrections_df.shape[0])
corrections_df.head(10)


Corrections: 264


Unnamed: 0,Transaction Id,Transaction Date,Participant SSN,Participant Name,Matrix Account,Current Tax Code 1,Current Tax Code 2,New Tax Code,New Taxable Amount,New First Year contrib,Reason,Action
0,41386906,2025-07-31,161389368,Letitia Leitzel,07P6LM3T,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
1,43303313,2025-10-06,161389368,Letitia Leitzel,07P6LM3T,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
2,42302481,2025-09-02,182601810,Thomas Richards,07P6LM3T,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
3,44178532,2025-11-04,193361668,Robert Klemow,07P6LM3T,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
4,40319558,2025-06-23,172448354,BARBARA KUMAR,07P6LM3V,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
5,41292023,2025-07-28,181466683,Martha Laux,07P6LM3V,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
6,42699653,2025-09-12,160467412,Larry Myers,07P6LM3W,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
7,41982024,2025-08-20,170624760,Diana Kristobek,07P6LM3W,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
8,45226226,2025-12-09,171400540,William Jackson,07P6LM3W,G,,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
9,45608755,2025-12-19,187569907,Paul Simpson,07P6LM3W,4,G,0,,,ira_rollover_tax_form_1099r_expected_no_tax,UPDATE_1099
