In [1]:
# Cell 1 - Tell python where to find the modules to import from

import sys
from pathlib import Path

#Path.cwd().parent depends on where you run this code from
#Path(__file__).resolve().parents[1] depends on where this file is located on disk
#
#project_root = .../1099-reconciliation-pipeline if notebook runs from the repo root or from notebooks/
cwd = Path.cwd()
project_root = cwd if (cwd / "src").exists() else cwd.parent # running from notebooks/  folder (cwd = current working directory) - .parent gets us to the folder above
sys.path.append(str(project_root)) #sys.path is a list of folders where python looks for modules
                                    # we add the project root to that list with .append

print("Project root: ", project_root)

Project root:  /Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline


In [2]:
# Cell 2 — Imports, Load & clean inputs (real paths)

from src.core import load_data
from src.cleaning.clean_matrix import clean_matrix
from src.cleaning.clean_relius_roth_basis import clean_relius_roth_basis
from src.outputs.build_correction_file import build_correction_dataframe, write_correction_file

from src.cleaning.clean_relius_demo import clean_relius_demo

from src.config import RAW_DATA_DIR, USE_SAMPLE_DATA_DEFAULT, DateFilterConfig


if USE_SAMPLE_DATA_DEFAULT:
    matrix_path = None
    relius_roth_basis_path = None
    relius_demo_path = None
    sheet_name = 0
else:
    matrix_path = RAW_DATA_DIR / "real_all_matrix_2025.xlsx"
    relius_roth_basis_path = RAW_DATA_DIR / "real_roth_basis_relius_2025.xlsx"
    relius_demo_path = RAW_DATA_DIR / "real_demo_relius_2025.xlsx"
    sheet_name = "Sheet5"

# Optional date filtering (set to None for "All")
date_filter = None
# date_filter = DateFilterConfig(date_start="2025-07-01", date_end="2025-09-30", months=["July", "Aug", 9])
date_filter = DateFilterConfig(date_start="2025-11-20", date_end="2025-12-31", months=None)

# Load  and clean Matrix raw data
matrix_raw = load_data.load_matrix_excel(path=matrix_path)
matrix_clean = clean_matrix(matrix_raw, date_filter=date_filter)

# Load  and clean Relius Roth Basis raw data
relius_roth_basis_raw = load_data.load_relius_roth_basis_excel(path=relius_roth_basis_path, sheet_name=sheet_name)
relius_roth_basis_clean = clean_relius_roth_basis(relius_roth_basis_raw)

# Load and clean Relius Demo raw data
relius_demo_raw = load_data.load_relius_demo_excel(path=relius_demo_path)
relius_demo_clean = clean_relius_demo(relius_demo_raw)


#.shape is an attribute of pandas DataFrames that returns a tuple of
# (number of rows, number of columns) - e.g. (1000, 15)
print("matrix_raw DataFrame:    ", matrix_raw.shape)
print("matrix_clean DataFrame:  ", matrix_clean.shape)
print("matrix_clean 'ssn' dtype: ", matrix_clean["ssn"].dtype)
print("\n")

print("relius_roth_basis_raw:   ", relius_roth_basis_raw.shape)
print("relius_roth_basis_clean: ", relius_roth_basis_clean.shape)
print("relius_roth_basis_clean 'ssn' dtype: ", relius_roth_basis_clean["ssn"].dtype)
print("\n")
print("relius_demo_raw:       ", relius_demo_raw.shape)
print("relius_demo_clean:       ", relius_demo_clean.shape)
print("relius_demo_clean 'ssn' dtype: ", relius_demo_clean["ssn"].dtype)

  matrix_clean = clean_matrix(matrix_raw, date_filter=date_filter)


matrix_raw DataFrame:     (73940, 56)
matrix_clean DataFrame:   (1771, 21)
matrix_clean 'ssn' dtype:  string


relius_roth_basis_raw:    (7904, 6)
relius_roth_basis_clean:  (7904, 11)
relius_roth_basis_clean 'ssn' dtype:  string


relius_demo_raw:        (61584, 8)
relius_demo_clean:        (61584, 11)
relius_demo_clean 'ssn' dtype:  string


### Date filter options
Use `DateFilterConfig` to limit transactions by date range and/or months. Range and months intersect.
Set `date_filter = None` for all data. Missing/invalid dates are excluded when filters are active.

Examples:
- All data: `date_filter = None`
- Range only: `DateFilterConfig(date_start="2025-01-01", date_end="2025-01-31")`
- Months only: `DateFilterConfig(months=["July", 8])`
- Range + months: `DateFilterConfig(date_start="2025-07-01", date_end="2025-09-30", months=["July", "Aug"])`


In [3]:
# Cell 3 — Quick view of Relius Roth Basis clean data

print("relius_roth_basis_clean Head:")
relius_roth_basis_clean.head(10)


relius_roth_basis_clean Head:


Unnamed: 0,plan_id,ssn,first_name,last_name,first_roth_tax_year,roth_basis_amt,ssn_valid,amount_valid,date_valid,code_1099r_valid,validation_issues
0,100MBDII-R,201746826,Alyssa,Mihalik,2020,2500.0,True,True,,,[]
5277,406MBDII-R,208726408,Josh,Myers,2021,9600.0,True,True,,,[]
5275,406MBDII-R,167788536,Nina,Zanias-Anderson,2022,8400.0,True,True,,,[]
5274,406MBDII-R,192721010,Juliana,Rappo,2018,7700.0,True,True,,,[]
5273,406MBDII-R,165788891,Jacob,Augustus,2022,7700.0,True,True,,,[]
5272,406MBDII-R,205769452,Zackary,Hockman,2022,7300.0,True,True,,,[]
5271,406MBDII-R,530980202,Tristan,Boyd,2022,6975.0,True,True,,,[]
5270,406MBDII-R,158784228,Elizabeth,Burks,2021,6650.0,True,True,,,[]
5269,406MBDII-R,99763952,Katherine,Golebiewski,2023,6400.0,True,True,,,[]
5268,406MBDII-R,197749337,Dana,Salanik,2023,6300.0,True,True,,,[]


In [4]:
# Cell 4 — Check key data inside Relius Roth Basis clean data

print("'ssn' lenght and index:\n", relius_roth_basis_clean["ssn"].str.len().value_counts(dropna=False))
print("\n'ssn' duplicates?:\n", relius_roth_basis_clean.duplicated(["plan_id", "ssn"]).sum())

'ssn' lenght and index:
 ssn
9    7904
Name: count, dtype: Int64

'ssn' duplicates?:
 0


In [5]:
# Cell 5 — Test normalize_ssn function controls different types of formas from raw files

import pandas as pd
from src.core.normalizers import normalize_ssn

tests = pd.Series(["040511830", 40511830.0, "40511830.0", "040-51-1830"])
tests.map(normalize_ssn)

0    040511830
1    040511830
2    040511830
3    040511830
dtype: object

In [6]:
# Cell 6 — Check key SSNs that start with '0' to validate normalize function

ssn_zero_mask = (
    relius_roth_basis_clean["ssn"]
    .astype("string")
    .str.startswith("0")
    .fillna(False)
)
print(f"SSN count that starts with '0': {relius_roth_basis_clean.loc[ssn_zero_mask].shape[0]}")
relius_roth_basis_clean[relius_roth_basis_clean["ssn"].str.startswith("0")].head(10)

SSN count that starts with '0': 207


Unnamed: 0,plan_id,ssn,first_name,last_name,first_roth_tax_year,roth_basis_amt,ssn_valid,amount_valid,date_valid,code_1099r_valid,validation_issues
5269,406MBDII-R,99763952,Katherine,Golebiewski,2023,6400.0,True,True,,,[]
5285,406MBDII-R,44849206,Jennifer,Pichler,2018,43000.0,True,True,,,[]
5418,414MBD-R,97765552,Kyle,Essick,2022,14986.0,True,True,,,[]
5339,409MBDII-R,40827100,Kathleen,Hoy,2022,19750.0,True,True,,,[]
5010,391MBD-R,3541727,Stacy,Gasteiger,2008,15624.0,True,True,,,[]
4995,391MBD-R,68720835,Ryan T.,McGuire,2008,3300.0,True,True,,,[]
5022,391MBD-R,29625485,Christina,Briggs,2008,31100.0,True,True,,,[]
5026,391MBD-R,91742046,Daniel,Frake,2019,41500.0,True,True,,,[]
4939,387MBD-R,53820736,Sammantha,Nelson,2016,10030.0,True,True,,,[]
4981,38MBDII-R,82520305,Carol,Frye,2021,8900.0,True,True,,,[]


In [7]:
# Cell 7 — Check column(s) that are Python 'list'

list_cols = [
    col for col in relius_roth_basis_clean.columns
    if relius_roth_basis_clean[col].apply(lambda x: isinstance(x, list)).any()
]

list_cols

['validation_issues']

Notes:
- Code Crashed in Cell #6 -> `print(relius_roth_basis_clean[relius_roth_basis_clean["ssn"].str.startswith("0").fillna(False)].value_counts().sum())`
    - since there is a column that stores a List (not string, float, ints, dates) and Python `list`is not hashable.

In [8]:
# Cell 8 — Check column´s Dtypes in relius_roth_basis_clean DataFrame

relius_roth_basis_clean.dtypes

plan_id                string[python]
ssn                    string[python]
first_name             string[python]
last_name              string[python]
first_roth_tax_year             Int64
roth_basis_amt                float64
ssn_valid                     boolean
amount_valid                  boolean
date_valid                    boolean
code_1099r_valid              boolean
validation_issues              object
dtype: object

In [9]:
# Cell 9 — Validate required columns exist (pre-flight)

required_matrix_cols = {
    "plan_id","ssn","txn_date","transaction_id","participant_name","matrix_account",
    "gross_amt","fed_taxable_amt","roth_initial_contribution_year","tax_code_1","tax_code_2"
}

# The '-' operator between sets is set difference
# “Give me all items that are in required_matrix_cols but not in matrix_clean.columns.”
missing = required_matrix_cols - set(matrix_clean.columns)

# assert 'CONDITION', "error message if condition is False"
# if it's True  -> nothing happens; code continues normally.
# if it's False -> Python raises an 'AssertionError' with the provided message.
assert not missing, f"Matrix missing columns: {missing}"

required_demo_cols = {"plan_id","ssn","dob"}
missing = required_demo_cols - set(relius_demo_clean.columns)
assert not missing, f"Demo missing columns: {missing}"

required_basis_cols = {"plan_id","ssn","first_roth_tax_year","roth_basis_amt"}
missing = required_basis_cols - set(relius_roth_basis_clean.columns)
assert not missing, f"Roth basis missing columns: {missing}"

print("✓ Required columns present")

✓ Required columns present


In [10]:
# Cell 10 — Run Roth Basis Taxable Analysis Engine

from src.engines.roth_taxable_analysis import run_roth_taxable_analysis


relius_roth_basis = run_roth_taxable_analysis(
    matrix_clean,
    relius_demo_clean,
    relius_roth_basis_clean
)

print("relius_roth_basis_df:", relius_roth_basis.shape)
relius_roth_basis.head(10)

relius_roth_basis_df: (58, 23)


Unnamed: 0,transaction_id,txn_date,ssn,participant_name,matrix_account,plan_id,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,...,roth_initial_contribution_year,first_roth_tax_year,start_roth_year,roth_basis_amt,age_at_txn,suggested_taxable_amt,suggested_first_roth_tax_year,correction_reason,action,match_status
0,45769187,2025-12-29,165565033,Duane Swartz,07P6LM4G,300005MBD,H,,,,...,2019,2019,2019,42200.0,62.0,,,,,match_no_action
1,45269765,2025-12-10,163602230,Marcia Raubenstrauch,07P6LM4G,300005MBD,B,7.0,,,...,2025,2025,2025,0.0,62.0,,,- taxable_within_15pct_of_gross,INVESTIGATE,match_needs_review
2,44745362,2025-11-20,165723517,Rose E. Frankil,07P6LM4G,300005MBD,B,1.0,,,...,2008,2008,2008,1000.0,35.0,,,- taxable_within_15pct_of_gross,INVESTIGATE,match_needs_review
3,45380096,2025-12-12,208745035,Vivian Snell,07P6LM4H,300005MBDII,H,,,,...,2022,2022,2022,3400.0,31.0,,,,,match_no_action
4,45332054,2025-12-11,520922569,Jody Bradley,07P6LM4H,300005MBDII,B,1.0,,,...,2020,2020,2020,10800.0,46.0,,,,,match_no_action
5,44745380,2025-11-20,201461937,Margaret Braccio,07P6LM4H,300005MBDII,H,,,,...,2020,2020,2020,21000.0,69.0,,,,,match_no_action
6,45700161,2025-12-23,175486344,Kathy Lee Thornton,07P6LM4Z,IRA86PLATR,H,,,,...,2011,2011,2011,2102.49,70.0,,,,,match_no_action
7,45700160,2025-12-23,198561443,Lora Mayer,07P6LM4Z,IRA86PLATR,H,,,,...,2022,2022,2022,0.0,65.0,,,,,match_no_action
8,45672514,2025-12-22,171600987,Bianca Hegedus,07P6LM4Z,IRA86PLATR,H,,,,...,2022,2022,2022,0.0,58.0,,,,,match_no_action
9,45269772,2025-12-10,139666160,Karen Barwick,07P6LM4Z,IRA86PLATR,H,,,,...,2024,2024,2024,17164.72,62.0,,,,,match_no_action


In [11]:
# Cell 11 — Output schema check (builder-compatible canonical fields)

required_out_cols = {
    "transaction_id","txn_date","ssn","participant_name","matrix_account",
    "tax_code_1","tax_code_2","suggested_tax_code_1","suggested_tax_code_2",
    "correction_reason","action","match_status",
    "suggested_taxable_amt","suggested_first_roth_tax_year"
}
missing = required_out_cols - set(relius_roth_basis.columns)
assert not missing, f"Engine C output missing columns: {missing}"

print("✓ Engine C output schema OK (builder-compatible)")

✓ Engine C output schema OK (builder-compatible)


In [12]:
# Cell 12 — Filter validation (Roth-only + inherited excluded)

# Roth plan check based on plan_id rules:
is_roth = relius_roth_basis["plan_id"].astype(str).str.startswith("300005") | relius_roth_basis["plan_id"].astype(str).str.endswith("R")
assert is_roth.all(), "Found non-Roth plan_id rows in Engine C output." # .all() returns True only if every value in the Series is True.
                                                                        # If at least one row is False -> .all() returns False.

print("✓ Roth-only filter passed")

✓ Roth-only filter passed


In [13]:
# Cell 13 — Join coverage diagnostics (DOB + basis availability)

import pandas as pd

print("DOB missing in Engine C output:", relius_roth_basis.get("dob", pd.Series(dtype=object)).isna().sum() if "dob" in relius_roth_basis.columns else "DOB not retained")
print("first_roth_tax_year missing:", relius_roth_basis["suggested_first_roth_tax_year"].isna().sum(), "(note: this can be NA if not needed)")

DOB missing in Engine C output: DOB not retained
first_roth_tax_year missing: 48 (note: this can be NA if not needed)


In [14]:
# Cell 14 — Validate “basis coverage” rule is actually triggering

zero_taxable = relius_roth_basis[relius_roth_basis["suggested_taxable_amt"].fillna(pd.NA).eq(0.0)]
print("Rows suggesting taxable=0:", len(zero_taxable))
zero_taxable[
    [
        "plan_id","ssn","age_at_txn","gross_amt","fed_taxable_amt",
        "roth_initial_contribution_year","first_roth_tax_year", "roth_basis_amt",
        "suggested_first_roth_tax_year","suggested_taxable_amt","correction_reason","match_status","action"
    ]
].head(25)

Rows suggesting taxable=0: 4


Unnamed: 0,plan_id,ssn,age_at_txn,gross_amt,fed_taxable_amt,roth_initial_contribution_year,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action
25,2108MBDII-R,181802969,24.0,3.63,0.0,2025,0,2000.0,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
51,416MBDII-R,180508295,61.0,400.0,400.0,2021,2021,19125.0,,0.0,- roth_basis_covers_2025_total\n- taxable_with...,match_needs_correction,UPDATE_1099\nINVESTIGATE
55,2516MBD-R,200529986,51.0,5000.0,247.96,2010,2010,22800.0,,0.0,- roth_basis_covers_2025_total,match_needs_correction,UPDATE_1099
57,360MBDII-R,164664134,40.0,1500.0,27.84,2024,2024,4500.0,,0.0,- roth_basis_covers_2025_total,match_needs_correction,UPDATE_1099


Notes:
- This checks that suggested_taxable_amt == 0 is being produced and why.
- Already fixed: 
    - if 'roth_basis_amt' > 'gross_amount' AND 'first_roth_tax_year' == 'roth_initial_contribution_year' -> 'no correction needed' or 'qualified_roth_distribution'
    - if ppt is older than 59 1/2 AND 'current year' >= 'roth_initial_contribution_year + '5 years' -> 'no correction needed' or 'qualified_roth_distribution'
    - if 'first_roth_tax_year' != 'roth_initial_contribution_year' -> needs_correction

In [15]:
# Cell 15 — Validate the 15% proximity flag (INVESTIGATE behavior)

investigate_df = relius_roth_basis[relius_roth_basis["action"].eq("INVESTIGATE")]
print("INVESTIGATE rows:", len(investigate_df))
investigate_df[["plan_id","ssn","gross_amt","fed_taxable_amt","correction_reason","match_status","action"]].head(25)

INVESTIGATE rows: 6


Unnamed: 0,plan_id,ssn,gross_amt,fed_taxable_amt,correction_reason,match_status,action
1,300005MBD,163602230,10000.0,10000.0,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
2,300005MBD,165723517,1795.39,1795.39,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
23,493MBDII-R,202781554,2131.94,1931.94,- missing_first_roth_tax_year\n- taxable_withi...,match_needs_review,INVESTIGATE
26,448PLAT-R,115689437,20075.68,0.0,- missing_first_roth_tax_year,match_needs_review,INVESTIGATE
53,202PLAT-R,147683532,14266.99,0.0,- missing_first_roth_tax_year,match_needs_review,INVESTIGATE
54,202PLAT-R,195623431,10253.41,0.0,- missing_first_roth_tax_year,match_needs_review,INVESTIGATE


In [16]:
# Cell 16 - Validate columns for quick export to Excel to present to stakeholders

action_df = relius_roth_basis[relius_roth_basis["action"].notna()]
print(f"NEED ACTION rows: {len(action_df)}")
export_roth_basis_df = action_df[
    [
        "plan_id","ssn","participant_name","age_at_txn", "tax_code_1",
        "tax_code_2", "suggested_tax_code_1",
        "suggested_tax_code_2", "new_tax_code", "gross_amt","fed_taxable_amt",
        "roth_initial_contribution_year","first_roth_tax_year","roth_basis_amt",
        "suggested_first_roth_tax_year","suggested_taxable_amt","correction_reason",
        "match_status","action", "matrix_account", "transaction_id", "txn_date",
    ]
]

export_roth_basis_df.head(20)

NEED ACTION rows: 20


Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,...,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action,matrix_account,transaction_id,txn_date
1,300005MBD,163602230,Marcia Raubenstrauch,62.0,B,7,,,,10000.0,...,2025,0.0,,,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE,07P6LM4G,45269765,2025-12-10
2,300005MBD,165723517,Rose E. Frankil,35.0,B,1,,,,1795.39,...,2008,1000.0,,,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE,07P6LM4G,44745362,2025-11-20
11,IRA86PLATR,60482899,Michael Wong,57.0,H,,,,,150830.64,...,2019,7000.0,2019.0,,- roth_initial_year_mismatch,match_needs_correction,UPDATE_1099,07P6LM4Z,45036772,2025-12-03
14,IRA139PLATR,195564752,Lori Melchiorre,65.0,H,,,,,93806.7,...,2022,18500.0,2022.0,,- roth_initial_year_mismatch,match_needs_correction,UPDATE_1099,07P6LM6J,44780430,2025-11-21
16,4881MBDII-R,165680383,Joseph Kittner,38.0,H,,,,,21797.08,...,2023,13900.0,2023.0,,- roth_initial_year_mismatch,match_needs_correction,UPDATE_1099,07P6LMFX,45004510,2025-12-02
17,2164MBD-R,184587732,ALBERT L PARRILLO,52.0,G,,H,,H,22397.27,...,2015,14775.0,2015.0,,- roth_rollover_code_fix_G_blank_to_H\n- roth_...,match_needs_correction,UPDATE_1099,07P6LMH9,45799969,2025-12-29
23,493MBDII-R,202781554,Beatriz Nunez,25.0,B,1,,,,2131.94,...,0,1800.0,,,- missing_first_roth_tax_year\n- taxable_withi...,match_needs_review,INVESTIGATE,07P6LMV2,45226238,2025-12-09
24,348MBDII-R,196780845,Ellisyn Mularski,29.0,H,,,,,4411.59,...,2023,3225.0,2023.0,,- roth_initial_year_mismatch,match_needs_correction,UPDATE_1099,07P6LN25,44849366,2025-11-26
25,2108MBDII-R,181802969,Lauren Beam,24.0,B,G,H,,H,3.63,...,0,2000.0,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LN6W,45220870,2025-12-09
26,448PLAT-R,115689437,Melissa Huber,45.0,H,,,,,20075.68,...,0,10159.72,,,- missing_first_roth_tax_year,match_needs_review,INVESTIGATE,07P6LN7L,45269770,2025-12-10


In [18]:
# Cell 17 - Validate engine behavior for tax codes 'B' and 'G'

export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("B") & export_roth_basis_df["tax_code_2"].eq("G")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,...,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action,matrix_account,transaction_id,txn_date
25,2108MBDII-R,181802969,Lauren Beam,24.0,B,G,H,,H,3.63,...,0,2000.0,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LN6W,45220870,2025-12-09


In [19]:
# Cell 18 - Validate engine behavior for tax codes '4' and 'G'

export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("4") & export_roth_basis_df["tax_code_2"].eq("G")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,...,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action,matrix_account,transaction_id,txn_date


In [20]:
# Cell 19 - Validate engine behavior for tax codes 'B' and '4'

export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("B") & export_roth_basis_df["tax_code_2"].eq("4")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,...,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action,matrix_account,transaction_id,txn_date


In [21]:
# Cell 20 - Validate engine behavior for tax code '4'

export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("4")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,...,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action,matrix_account,transaction_id,txn_date


In [22]:
# Cell 21 - Validate engine behavior for tax code 'G'

export_roth_basis_df[export_roth_basis_df["tax_code_2"].eq("G")].head(10)

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,...,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action,matrix_account,transaction_id,txn_date
25,2108MBDII-R,181802969,Lauren Beam,24.0,B,G,H,,H,3.63,...,0,2000.0,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LN6W,45220870,2025-12-09


'--------------------------------  Test Quick Export to Excel File --------------------------------'

In [29]:
# Cell 22 — Use quick report export for manual DataFrame output to Excel for stakeholders

from src.outputs.export_utils import write_df_excel

path = write_df_excel(export_roth_basis_df, filename_prefix="export_roth_distribs", engine="roth_taxable")

print(f"Export was successful!\nFile path: {path}")

Export was successful!
File path: /Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline/reports/outputs/roth_taxable/export_roth_distribs_20260109_121230.xlsx


In [23]:
# Cell 23 — Use build_correction_file module to build the 'official' correction file

#Build Excel correction file (same build as inherited corrections engine)
roth_basis_correction_df = build_correction_dataframe(relius_roth_basis)

print(roth_basis_correction_df.shape)
roth_basis_correction_df.head(15)

(20, 12)


Unnamed: 0,Transaction Id,Transaction Date,Participant SSN,Participant Name,Matrix Account,Current Tax Code 1,Current Tax Code 2,New Tax Code,New Taxable Amount,New First Year contrib,Reason,Action
0,45269765,2025-12-10,163602230,Marcia Raubenstrauch,07P6LM4G,B,7,,,,- taxable_within_15pct_of_gross,INVESTIGATE
1,44745362,2025-11-20,165723517,Rose E. Frankil,07P6LM4G,B,1,,,,- taxable_within_15pct_of_gross,INVESTIGATE
2,45036772,2025-12-03,60482899,Michael Wong,07P6LM4Z,H,,,,2019.0,- roth_initial_year_mismatch,UPDATE_1099
3,44780430,2025-11-21,195564752,Lori Melchiorre,07P6LM6J,H,,,,2022.0,- roth_initial_year_mismatch,UPDATE_1099
4,45004510,2025-12-02,165680383,Joseph Kittner,07P6LMFX,H,,,,2023.0,- roth_initial_year_mismatch,UPDATE_1099
5,45799969,2025-12-29,184587732,ALBERT L PARRILLO,07P6LMH9,G,,H,,2015.0,- roth_rollover_code_fix_G_blank_to_H\n- roth_...,UPDATE_1099
6,45226238,2025-12-09,202781554,Beatriz Nunez,07P6LMV2,B,1,,,,- missing_first_roth_tax_year\n- taxable_withi...,INVESTIGATE
7,44849366,2025-11-26,196780845,Ellisyn Mularski,07P6LN25,H,,,,2023.0,- roth_initial_year_mismatch,UPDATE_1099
8,45220870,2025-12-09,181802969,Lauren Beam,07P6LN6W,B,G,H,0.0,,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,UPDATE_1099\nINVESTIGATE
9,45269770,2025-12-10,115689437,Melissa Huber,07P6LN7L,H,,,,,- missing_first_roth_tax_year,INVESTIGATE


In [24]:
# Cell 24 — Use build_correction_file module to create/write/export the 'official' correction file

# 2) Write it to Excel with an auto-generated timestamped name
output_path = write_correction_file(roth_basis_correction_df, engine="roth_taxable")

# Run write_correction_file() function
output_path

PosixPath('/Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline/reports/outputs/roth_taxable/correction_file_20260118_232610.xlsx')