In [1]:
import sys
from pathlib import Path

#Path.cwd().parent depends on where you run this code from
#Path(__file__).resolve().parents[1] depends on where this file is located on disk
#
#project_root = .../1099-reconciliation-pipeline
project_root = Path.cwd().parent  # running from notebooks/  folder (cwd = current working directory) - .parent gets us to the folder above
sys.path.append(str(project_root)) #sys.path is a list of folders where python looks for modules
                                    # we add the project root to that list with .append

print("Project root: ", project_root)

Project root:  /Users/manuelreyes/Desktop/dev/1099_reconciliation_pipeline


In [2]:
# Cell 2 — Imports, Load & clean inputs (real paths)

from src import load_data
from src.clean_matrix import clean_matrix
from src.clean_relius_roth_basis import clean_relius_roth_basis
from src.build_correction_file import build_correction_dataframe, write_correction_file

from src.age_taxcode_analysis import clean_relius_demo

from src.config import RAW_DATA_DIR

matrix_path = RAW_DATA_DIR / "real_all_matrix_2025.xlsx"
relius_roth_basis_path = RAW_DATA_DIR / "real_roth_basis_relius_2025.xlsx"
relius_demo_path = RAW_DATA_DIR / "real_demo_relius_2025.xlsx"


# Load  and clean Matrix raw data
matrix_raw = load_data.load_matrix_excel(path=matrix_path, use_sample_if_none=False)
matrix_clean = clean_matrix(matrix_raw)

# Load  and clean Relius Roth Basis raw data
relius_roth_basis_raw = load_data.load_relius_roth_basis_excel(path=relius_roth_basis_path, use_sample_if_none=False, sheet_name="Sheet5")
relius_roth_basis_clean = clean_relius_roth_basis(relius_roth_basis_raw)

# Load and clean Relius Demo raw data
relius_demo_clean = clean_relius_demo(relius_demo_path)   # currently one function to load and clean Relius raw Excel file


#.shape is an attribute of pandas DataFrames that returns a tuple of
# (number of rows, number of columns) - e.g. (1000, 15)
print("matrix_raw DataFrame:    ", matrix_raw.shape)
print("matrix_clean DataFrame:  ", matrix_clean.shape)
print("matrix_clean 'ssn' dtype: ", matrix_clean["ssn"].dtype)
print("\n")

print("relius_roth_basis_raw:   ", relius_roth_basis_raw.shape)
print("relius_roth_basis_clean: ", relius_roth_basis_clean.shape)
print("relius_roth_basis_clean 'ssn' dtype: ", relius_roth_basis_clean["ssn"].dtype)
print("\n")
print("relius_demo_clean:       ", relius_demo_clean.shape)
print("relius_demo_clean 'ssn' dtype: ", relius_demo_clean["ssn"].dtype)

  matrix_clean = clean_matrix(matrix_raw)
  return pd.to_datetime(series, errors=errors, format=format, dayfirst=dayfirst).dt.date


matrix_raw DataFrame:     (60972, 56)
matrix_clean DataFrame:   (6980, 16)
matrix_clean 'ssn' dtype:  string


relius_roth_basis_raw:    (7904, 6)
relius_roth_basis_clean:  (7904, 6)
relius_roth_basis_clean 'ssn' dtype:  string


relius_demo_clean:        (61584, 6)
relius_demo_clean 'ssn' dtype:  string


In [3]:
# Cell 3 — Quick view of Relius Roth Basis clean data

print("relius_roth_basis_clean Head:")
relius_roth_basis_clean.head(10)


relius_roth_basis_clean Head:


Unnamed: 0,plan_id,ssn,first_name,last_name,first_roth_tax_year,roth_basis_amt
0,100MBDII-R,201746826,Alyssa,Mihalik,2020,2500.0
5277,406MBDII-R,208726408,Josh,Myers,2021,9600.0
5275,406MBDII-R,167788536,Nina,Zanias-Anderson,2022,8400.0
5274,406MBDII-R,192721010,Juliana,Rappo,2018,7700.0
5273,406MBDII-R,165788891,Jacob,Augustus,2022,7700.0
5272,406MBDII-R,205769452,Zackary,Hockman,2022,7300.0
5271,406MBDII-R,530980202,Tristan,Boyd,2022,6975.0
5270,406MBDII-R,158784228,Elizabeth,Burks,2021,6650.0
5269,406MBDII-R,99763952,Katherine,Golebiewski,2023,6400.0
5268,406MBDII-R,197749337,Dana,Salanik,2023,6300.0


In [4]:
# Cell 4 — Check key data inside Relius Roth Basis clean data

print("'ssn' lenght and index:\n", relius_roth_basis_clean["ssn"].str.len().value_counts(dropna=False))
print("\n'ssn' duplicates?:\n", relius_roth_basis_clean.duplicated(["plan_id", "ssn"]).sum())

'ssn' lenght and index:
 ssn
9    7904
Name: count, dtype: Int64

'ssn' duplicates?:
 0


In [5]:
import pandas as pd
from src.normalizers import normalize_ssn

tests = pd.Series(["040511830", 40511830.0, "40511830.0", "040-51-1830"])
tests.map(normalize_ssn)

0    040511830
1    040511830
2    040511830
3    040511830
dtype: object

In [6]:
print(relius_roth_basis_clean[relius_roth_basis_clean["ssn"].str.startswith("0")].value_counts().sum())
relius_roth_basis_clean[relius_roth_basis_clean["ssn"].str.startswith("0")].head(10)

203


Unnamed: 0,plan_id,ssn,first_name,last_name,first_roth_tax_year,roth_basis_amt
5269,406MBDII-R,99763952,Katherine,Golebiewski,2023,6400.0
5285,406MBDII-R,44849206,Jennifer,Pichler,2018,43000.0
5418,414MBD-R,97765552,Kyle,Essick,2022,14986.0
5339,409MBDII-R,40827100,Kathleen,Hoy,2022,19750.0
5010,391MBD-R,3541727,Stacy,Gasteiger,2008,15624.0
4995,391MBD-R,68720835,Ryan T.,McGuire,2008,3300.0
5022,391MBD-R,29625485,Christina,Briggs,2008,31100.0
5026,391MBD-R,91742046,Daniel,Frake,2019,41500.0
4939,387MBD-R,53820736,Sammantha,Nelson,2016,10030.0
4981,38MBDII-R,82520305,Carol,Frye,2021,8900.0


In [7]:
relius_roth_basis_clean.dtypes

plan_id                string[python]
ssn                    string[python]
first_name             string[python]
last_name              string[python]
first_roth_tax_year             Int64
roth_basis_amt                float64
dtype: object

In [8]:
# Cell 5 — Validate required columns exist (pre-flight)

required_matrix_cols = {
    "plan_id","ssn","txn_date","transaction_id","participant_name","matrix_account",
    "gross_amt","fed_taxable_amt","roth_initial_contribution_year","tax_code_1","tax_code_2"
}

# The '-' operator between sets is set difference
# “Give me all items that are in required_matrix_cols but not in matrix_clean.columns.”
missing = required_matrix_cols - set(matrix_clean.columns)

# assert 'CONDITION', "error message if condition is False"
# if it's True  -> nothing happens; code continues normally.
# if it's False -> Python raises an 'AssertionError' with the provided message.
assert not missing, f"Matrix missing columns: {missing}"

required_demo_cols = {"plan_id","ssn","dob"}
missing = required_demo_cols - set(relius_demo_clean.columns)
assert not missing, f"Demo missing columns: {missing}"

required_basis_cols = {"plan_id","ssn","first_roth_tax_year","roth_basis_amt"}
missing = required_basis_cols - set(relius_roth_basis_clean.columns)
assert not missing, f"Roth basis missing columns: {missing}"

print("✓ Required columns present")

✓ Required columns present


In [9]:
# Cell 6 — Run Roth Basis Taxable Analysis Engine

from src.roth_taxable_analysis import run_roth_taxable_analysis


relius_roth_basis = run_roth_taxable_analysis(
    matrix_clean,
    relius_demo_clean,
    relius_roth_basis_clean
)

print("relius_roth_basis_df:", relius_roth_basis.shape)
relius_roth_basis.head(10)

relius_roth_basis_df: (287, 22)


Unnamed: 0,transaction_id,txn_date,ssn,participant_name,matrix_account,plan_id,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,...,gross_amt,roth_initial_contribution_year,first_roth_tax_year,roth_basis_amt,age_at_txn,suggested_taxable_amt,suggested_first_roth_tax_year,correction_reason,action,match_status
0,44745362,2025-11-20,165723517,Rose E. Frankil,07P6LM4G,300005MBD,B,1.0,,,...,1795.39,2008,2008.0,1000.0,35.0,,,- taxable_within_15pct_of_gross,INVESTIGATE,match_needs_review
1,44568463,2025-11-14,177749131,Selena Merrick,07P6LM4G,300005MBD,H,,,,...,22601.99,2015,2015.0,15925.0,32.0,,,,,excluded_from_age_engine_rollover_or_inherited
2,44568464,2025-11-14,170608226,Andrea Platts,07P6LM4G,300005MBD,H,,,,...,42774.7,2011,2011.0,0.0,61.0,,,,,excluded_from_age_engine_rollover_or_inherited
3,43961174,2025-10-27,163581720,Jeffrey Smith,07P6LM4G,300005MBD,B,7.0,,,...,1111.11,2025,2018.0,4350.0,60.0,0.0,2018.0,- roth_initial_year_mismatch\n- roth_basis_cov...,UPDATE_1099\nINVESTIGATE,match_needs_correction
4,43886241,2025-10-23,208725197,Casey Goebel,07P6LM4G,300005MBD,H,,,,...,25257.87,2025,2017.0,5500.0,34.0,,,,,excluded_from_age_engine_rollover_or_inherited
5,43144300,2025-10-01,172647883,Kathleen Stettner,07P6LM4G,300005MBD,H,,,,...,53925.27,2009,2009.0,0.0,59.0,,,,,excluded_from_age_engine_rollover_or_inherited
6,42490716,2025-09-08,167401805,Marianne Smith,07P6LM4G,300005MBD,B,7.0,,,...,3000.0,2025,2016.0,10600.0,69.0,0.0,2016.0,- roth_initial_year_mismatch\n- roth_basis_cov...,UPDATE_1099\nINVESTIGATE,match_needs_correction
7,42302462,2025-09-02,167401805,Marianne Smith,07P6LM4G,300005MBD,B,7.0,,,...,3000.0,2025,2016.0,10600.0,69.0,0.0,2016.0,- roth_initial_year_mismatch\n- roth_basis_cov...,UPDATE_1099\nINVESTIGATE,match_needs_correction
8,42034715,2025-08-22,184581413,Kimberly Criss,07P6LM4G,300005MBD,B,4.0,,,...,7160.56,2017,,,,,,,,excluded_from_age_engine_rollover_or_inherited
9,41909277,2025-08-18,192448025,Mary Powers,07P6LM4G,300005MBD,H,,,,...,8189.9,2009,2009.0,6000.0,73.0,,,,,excluded_from_age_engine_rollover_or_inherited


In [10]:
# Cell 7 — Output schema check (builder-compatible canonical fields)

required_out_cols = {
    "transaction_id","txn_date","ssn","participant_name","matrix_account",
    "tax_code_1","tax_code_2","suggested_tax_code_1","suggested_tax_code_2",
    "correction_reason","action","match_status",
    "suggested_taxable_amt","suggested_first_roth_tax_year"
}
missing = required_out_cols - set(relius_roth_basis.columns)
assert not missing, f"Engine C output missing columns: {missing}"

print("✓ Engine C output schema OK (builder-compatible)")

✓ Engine C output schema OK (builder-compatible)


In [11]:
# Cell 8 — Filter validation (Roth-only + inherited excluded)

# Roth plan check based on plan_id rules:
is_roth = relius_roth_basis["plan_id"].astype(str).str.startswith("300005") | relius_roth_basis["plan_id"].astype(str).str.endswith("R")
assert is_roth.all(), "Found non-Roth plan_id rows in Engine C output." # .all() returns True only if every value in the Series is True.
                                                                        # If at least one row is False -> .all() returns False.

print("✓ Roth-only filter passed")

✓ Roth-only filter passed


In [12]:
# Cell 9 — Join coverage diagnostics (DOB + basis availability)

import pandas as pd

print("DOB missing in Engine C output:", relius_roth_basis.get("dob", pd.Series(dtype=object)).isna().sum() if "dob" in relius_roth_basis.columns else "DOB not retained")
print("first_roth_tax_year missing:", relius_roth_basis["suggested_first_roth_tax_year"].isna().sum(), "(note: this can be NA if not needed)")

DOB missing in Engine C output: DOB not retained
first_roth_tax_year missing: 234 (note: this can be NA if not needed)


In [13]:
# Cell 10 — Validate “basis coverage” rule is actually triggering

zero_taxable = relius_roth_basis[relius_roth_basis["suggested_taxable_amt"].fillna(pd.NA).eq(0.0)]
print("Rows suggesting taxable=0:", len(zero_taxable))
zero_taxable[
    [
        "plan_id","ssn","age_at_txn","gross_amt","fed_taxable_amt",
        "roth_initial_contribution_year","first_roth_tax_year", "roth_basis_amt",
        "suggested_first_roth_tax_year","suggested_taxable_amt","correction_reason","match_status","action"
    ]
].head(25)

Rows suggesting taxable=0: 84


Unnamed: 0,plan_id,ssn,age_at_txn,gross_amt,fed_taxable_amt,roth_initial_contribution_year,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action
3,300005MBD,163581720,60.0,1111.11,1057.36,2025.0,2018.0,4350.0,2018.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE
6,300005MBD,167401805,69.0,3000.0,2910.58,2025.0,2016.0,10600.0,2016.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE
7,300005MBD,167401805,69.0,3000.0,2910.58,2025.0,2016.0,10600.0,2016.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE
12,300005MBD,196683352,45.0,5000.0,4963.24,2025.0,2018.0,8900.0,2018.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE
13,300005MBD,199383874,73.0,12457.63,0.0,2010.0,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
14,300005MBD,175407849,75.0,12464.49,0.0,2010.0,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
17,300005MBD,198568933,62.0,9990.0,9990.0,2019.0,2014.0,21750.0,2014.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE
24,300005MBDII,98443885,70.0,1666.67,0.0,2010.0,2010.0,0.0,,0.0,- qualified_roth_distribution,match_no_action,
35,300005MBDII,135506047,72.0,43801.12,0.0,2017.0,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
36,300005MBDII,187442417,73.0,10421.16,0.0,2017.0,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE


Notes:
- This checks that suggested_taxable_amt == 0 is being produced and why.
- Already fixed: 
    - if 'roth_basis_amt' > 'gross_amount' AND 'first_roth_tax_year' == 'roth_initial_contribution_year' -> 'no correction needed' or 'qualified_roth_distribution'
    - if ppt is older than 59 1/2 AND 'current year' >= 'roth_initial_contribution_year + '5 years' -> 'no correction needed' or 'qualified_roth_distribution'
    - if 'first_roth_tax_year' != 'roth_initial_contribution_year' -> needs_correction

In [14]:
# Cell 11 — Validate the 15% proximity flag (INVESTIGATE behavior)

investigate_df = relius_roth_basis[relius_roth_basis["action"].eq("INVESTIGATE")]
print("INVESTIGATE rows:", len(investigate_df))
investigate_df[["plan_id","ssn","gross_amt","fed_taxable_amt","correction_reason","match_status","action"]].head(25)

INVESTIGATE rows: 19


Unnamed: 0,plan_id,ssn,gross_amt,fed_taxable_amt,correction_reason,match_status,action
0,300005MBD,165723517,1795.39,1795.39,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
22,300005MBDII,201647061,5000.0,5000.0,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
26,300005MBDII,161561122,23018.87,23018.87,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
29,300005MBDII,171528065,20000.0,20000.0,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
33,300005MBDII,164540292,8667.92,8667.92,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
52,IRA62PLATR,135660656,2850.0,2824.94,- missing_first_roth_tax_year\n- taxable_withi...,match_needs_review,INVESTIGATE
53,IRA62PLATR,171709347,2850.0,2825.96,- missing_first_roth_tax_year\n- taxable_withi...,match_needs_review,INVESTIGATE
54,IRA62PLATR,190442307,11797.31,11797.31,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
58,IRA109PLATR,163623685,19500.0,19500.0,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE
59,IRA109PLATR,172582491,19500.0,19500.0,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE


In [22]:
# Cell 12 - Validate columns for quick export to Excel to present to stakeholders

action_df = relius_roth_basis[relius_roth_basis["action"].notna()]
print(f"NEED ACTION rows: {len(action_df)}")
export_roth_basis_df = action_df[
    [
        "plan_id","ssn","participant_name","age_at_txn", "tax_code_1",
        "tax_code_2", "suggested_tax_code_1",
        "suggested_tax_code_2", "new_tax_code", "gross_amt","fed_taxable_amt",
        "roth_initial_contribution_year","first_roth_tax_year","roth_basis_amt",
        "suggested_first_roth_tax_year","suggested_taxable_amt","correction_reason",
        "match_status","action", "matrix_account", "transaction_id", "txn_date",
    ]
]

export_roth_basis_df.head(15)

NEED ACTION rows: 116


Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,...,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action,matrix_account,transaction_id,txn_date
0,300005MBD,165723517,Rose E. Frankil,35.0,B,1,,,,1795.39,...,2008.0,1000.0,,,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE,07P6LM4G,44745362,2025-11-20
3,300005MBD,163581720,Jeffrey Smith,60.0,B,7,,,,1111.11,...,2018.0,4350.0,2018.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,43961174,2025-10-27
6,300005MBD,167401805,Marianne Smith,69.0,B,7,,,,3000.0,...,2016.0,10600.0,2016.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,42490716,2025-09-08
7,300005MBD,167401805,Marianne Smith,69.0,B,7,,,,3000.0,...,2016.0,10600.0,2016.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,42302462,2025-09-02
12,300005MBD,196683352,Tara Rebar,45.0,B,1,,,,5000.0,...,2018.0,8900.0,2018.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,41330446,2025-07-29
13,300005MBD,199383874,John R. Cuneo,73.0,B,G,H,,H,12457.63,...,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,40319546,2025-06-23
14,300005MBD,175407849,Beverly S Cuneo,75.0,B,G,H,,H,12464.49,...,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,40319545,2025-06-23
16,300005MBD,162684618,Samantha Engler,38.0,B,G,H,,H,2541.15,...,,,,,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,40163174,2025-06-16
17,300005MBD,198568933,Scott Vairo,62.0,B,,B,7.0,B7,9990.0,...,2014.0,21750.0,2014.0,0.0,- roth_initial_year_mismatch\n- roth_basis_cov...,match_needs_correction,UPDATE_1099\nINVESTIGATE,07P6LM4G,40163173,2025-06-16
22,300005MBDII,201647061,Nancy Rhoades,41.0,B,1,,,,5000.0,...,2024.0,0.0,,,- taxable_within_15pct_of_gross,match_needs_review,INVESTIGATE,07P6LM4H,43358870,2025-10-07


In [18]:
export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("B") & export_roth_basis_df["tax_code_2"].eq("G")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,fed_taxable_amt,roth_initial_contribution_year,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action
13,300005MBD,199383874,John R. Cuneo,73.0,B,G,H,,H,12457.63,0.0,2010,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
14,300005MBD,175407849,Beverly S Cuneo,75.0,B,G,H,,H,12464.49,0.0,2010,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
16,300005MBD,162684618,Samantha Engler,38.0,B,G,H,,H,2541.15,0.0,2013,,,,,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
35,300005MBDII,135506047,Lynne Higgins,72.0,B,G,H,,H,43801.12,0.0,2017,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE
36,300005MBDII,187442417,Janet Kaufold,73.0,B,G,H,,H,10421.16,0.0,2017,,,,0.0,- roth_rollover_code_fix_B_G_to_H\n- missing_f...,match_needs_correction,UPDATE_1099\nINVESTIGATE


In [19]:
export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("4") & export_roth_basis_df["tax_code_2"].eq("G")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,fed_taxable_amt,roth_initial_contribution_year,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action


In [20]:
export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("B") & export_roth_basis_df["tax_code_2"].eq("4")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,fed_taxable_amt,roth_initial_contribution_year,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action


In [21]:
export_roth_basis_df[export_roth_basis_df["tax_code_1"].eq("4")].head()

Unnamed: 0,plan_id,ssn,participant_name,age_at_txn,tax_code_1,tax_code_2,suggested_tax_code_1,suggested_tax_code_2,new_tax_code,gross_amt,fed_taxable_amt,roth_initial_contribution_year,first_roth_tax_year,roth_basis_amt,suggested_first_roth_tax_year,suggested_taxable_amt,correction_reason,match_status,action
83,185MBDII-R,204400447,Joel Braverman,69.0,4,,B,4,B4,268.46,268.46,,2011,33800.0,2011,0.0,- roth_death_code_fix_4_to_B_4\n- roth_initial...,match_needs_correction,UPDATE_1099\nINVESTIGATE


In [23]:
# Cell 13 - 

from src.export_utils import write_df_excel

path = write_df_excel(export_roth_basis_df, filename_prefix="export_roth_distribs")

print(f"Export was successful!\nFile path: {path}")

Export was successful!
File path: /Users/manuelreyes/Desktop/dev/1099_reconciliation_pipeline/reports/outputs/export_roth_distribs_20251219_134806.xlsx
