In [1]:
# Cell 1 - Tell python where to find the modules to import from

import sys
from pathlib import Path

#Path.cwd().parent depends on where you run this code from
#Path(__file__).resolve().parents[1] depends on where this file is located on disk
#
#project_root = .../1099-reconciliation-pipeline if notebook runs from the repo root or from notebooks/
cwd = Path.cwd()
project_root = cwd if (cwd / "src").exists() else cwd.parent # running from notebooks/  folder (cwd = current working directory) - .parent gets us to the folder above
sys.path.append(str(project_root)) #sys.path is a list of folders where python looks for modules
                                    # we add the project root to that list with .append

print("Project root: ", project_root)

Project root:  /Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline


---
### Test Load Data and Clean DataFrames
---

In [None]:
# Cell 2 — Imports, Load & Clean inputs (real paths)

from src.core import load_data
from src.cleaning.clean_matrix import clean_matrix
from src.cleaning.clean_relius_demo import clean_relius_demo
from src.engines.age_taxcode_analysis import run_age_taxcode_analysis
from src.outputs.build_correction_file import build_correction_dataframe, write_correction_file

from src.config import RAW_DATA_DIR, USE_SAMPLE_DATA_DEFAULT, DateFilterConfig

if USE_SAMPLE_DATA_DEFAULT:
    matrix_path = None
    relius_demo_path = None
else:
    matrix_path = RAW_DATA_DIR / "real_all_matrix_2025.xlsx"
    relius_demo_path = RAW_DATA_DIR / "real_demo_relius_2025.xlsx"

# Optional date filtering (set to None for "All")
date_filter = None
# date_filter = DateFilterConfig(date_start="2025-07-01", date_end="2025-09-30", months=["July", "Aug", 9])
date_filter = DateFilterConfig(date_start="2025-11-20", date_end="2025-12-31", months=None)

# Load  and clean Matrix raw data
matrix_raw = load_data.load_matrix_excel(path=matrix_path)
matrix_clean = clean_matrix(matrix_raw, date_filter=date_filter)

# Load and clean Relius demo raw data
relius_demo_raw = load_data.load_relius_demo_excel(path=relius_demo_path)
relius_demo_clean = clean_relius_demo(relius_demo_raw)


#.shape is an attribute of pandas DataFrames that returns a tuple of
# (number of rows, number of columns) - e.g. (1000, 15)
print("\n")
print(f"Matrix Cleaned DataFrame shape: {matrix_clean.shape}")
print(f"Relius Demo Cleaned DataFrame shape: {relius_demo_clean.shape}")

  matrix_clean = clean_matrix(matrix_raw, date_filter=date_filter)




Matrix Cleaned DataFrame shape: (1771, 21)
Relius Demo Cleaned DataFrame shape: (61584, 11)


#### Date filter options
Use `DateFilterConfig` to limit transactions by date range and/or months. Range and months intersect.
Set `date_filter = None` for all data. Missing/invalid dates are excluded when filters are active.

Examples:
- All data: `date_filter = None`
- Range only: `DateFilterConfig(date_start="2025-01-01", date_end="2025-01-31")`
- Months only: `DateFilterConfig(months=["July", 8])`
- Range + months: `DateFilterConfig(date_start="2025-07-01", date_end="2025-09-30", months=["July", "Aug"])`


In [3]:
# Cell 3 — Review Dtype per columns and forst 10 ros of cleaned DataFrame

print(relius_demo_clean.info())
relius_demo_clean.head(10)

<class 'pandas.core.frame.DataFrame'>
Index: 61584 entries, 2110 to 58800
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   plan_id            61584 non-null  string 
 1   ssn                61584 non-null  string 
 2   first_name         61584 non-null  string 
 3   last_name          61584 non-null  string 
 4   dob                61527 non-null  object 
 5   term_date          12358 non-null  object 
 6   ssn_valid          61584 non-null  boolean
 7   amount_valid       0 non-null      boolean
 8   date_valid         12358 non-null  boolean
 9   code_1099r_valid   0 non-null      boolean
 10  validation_issues  61584 non-null  object 
dtypes: boolean(4), object(3), string(4)
memory usage: 4.2+ MB
None


Unnamed: 0,plan_id,ssn,first_name,last_name,dob,term_date,ssn_valid,amount_valid,date_valid,code_1099r_valid,validation_issues
2110,100MBD,113741850,Donald,Mangan,1988-11-12,NaT,True,,,,[]
2197,100MBD,116648243,Deborah,Mosloskie,1966-09-26,NaT,True,,,,[]
3595,100MBD,145609822,Beth,Conley,1968-03-22,NaT,True,,,,[]
4751,100MBD,159460981,Paul J,Petrosky,1952-04-12,2013-09-01,True,,True,,[]
5327,100MBD,159667416,William,Delaney,1969-06-21,NaT,True,,,,[]
6153,100MBD,160643429,Diana,Hanobeck,1973-07-25,NaT,True,,,,[]
6270,100MBD,160667435,Christina,Miller,1985-06-16,NaT,True,,,,[]
7795,100MBD,162581348,Yvonne,Benedict,1961-11-12,NaT,True,,,,[]
7958,100MBD,162642206,Melanie,Ellixson,1975-12-08,NaT,True,,,,[]
8886,100MBD,163607906,Richard,Borcky,1963-06-18,NaT,True,,,,[]


---
### Test Merging DataFrames and Analysis Engine
---

In [4]:
# Cell 4 — Run Merging and Analysis Engine

age_matches = run_age_taxcode_analysis(matrix_clean, relius_demo_clean)

age_matches.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1713 entries, 0 to 1770
Data columns (total 44 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   plan_id                         1713 non-null   string 
 1   ssn                             1713 non-null   string 
 2   participant_name                1713 non-null   object 
 3   state                           1707 non-null   string 
 4   gross_amt                       1713 non-null   float64
 5   fed_taxable_amt                 1713 non-null   float64
 6   txn_date                        1713 non-null   object 
 7   txn_method                      1713 non-null   string 
 8   tax_code_1                      1713 non-null   string 
 9   tax_code_2                      9 non-null      string 
 10  tax_form                        1713 non-null   object 
 11  dist_type                       53 non-null     string 
 12  roth_initial_contribution_year  1 non-n

In [5]:
# Cell 5 — Review tax codes in Series (1 or 2 digits)

age_matches[["tax_code_1"]].value_counts()

tax_code_1
7             1251
G              270
2               67
11              51
4               43
1               29
15               2
Name: count, dtype: int64

In [6]:
# Cell 6 — Review dtype of age at distribution Series and first at merged and analized DataFrame

print("age_at_distribution_year dtype: ", age_matches["age_at_distribution_year"].dtype)
age_matches.head(10)

age_at_distribution_year dtype:  Float64


Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,attained_55_in_term_year,expected_tax_code_1,expected_tax_code_2,correction_reason,action,match_status,code_matches_expected,suggested_tax_code_1,suggested_tax_code_2,new_tax_code
0,IRA127PLAT,194362032,Sandra Marsh,PA,153.22,153.22,2025-12-03,ACH Distribution,7,,...,False,7,,,,match_no_action,True,,,
1,IRA127PLAT,209501562,Kathryn Clausen,PA,200.0,200.0,2025-12-03,ACH Distribution,7,,...,False,7,,,,match_no_action,True,,,
2,IRA29PLAT,167446818,Helen Peters,PA,6.0,6.0,2025-12-29,ACH Distribution,7,,...,False,7,,,,match_no_action,True,,,
3,IRA29PLAT,171421488,George Fetch,PA,22352.94,22352.94,2025-12-29,Check Distribution,7,,...,False,7,,,,match_no_action,True,,,
4,IRA29PLAT,134447891,Deborah Pease,PA,10.0,10.0,2025-12-15,Check Distribution,7,,...,False,7,,,,match_no_action,True,,,
5,IRA29PLAT,160383551,Elaine Walz,PA,31.57,31.57,2025-12-09,ACH Distribution,7,,...,False,7,,,,match_no_action,True,,,
6,IRA29PLAT,181448065,Debra L Kurtz,PA,400.0,400.0,2025-12-03,ACH Distribution,7,,...,False,7,,,,match_no_action,True,,,
7,IRA29PLAT,170389256,Ronald Sandt,PA,436.17,436.17,2025-12-03,ACH Distribution,7,,...,False,7,,,,match_no_action,True,,,
8,IRA29PLAT,191406733,Richard Houseknecht,PA,631.85,631.85,2025-12-03,ACH Distribution,7,,...,False,7,,,,match_no_action,True,,,
9,IRA29PLAT,191364773,Andrew Pennoni,PA,700.0,700.0,2025-12-03,ACH Distribution,7,,...,True,7,,,,match_no_action,True,,,


In [7]:
# Cell 7 — Test no Roth plans in merged DataFrame

non_roth_test = age_matches[age_matches["plan_id"].str.startswith("300005") | age_matches["plan_id"].str.endswith("R")]
assert non_roth_test.empty, "WE HAVE ROTH DISBURSEMENTS!"

print("No Roth Disbursements in DataFrame!")

non_roth_test.head()

No Roth Disbursements in DataFrame!


Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,attained_55_in_term_year,expected_tax_code_1,expected_tax_code_2,correction_reason,action,match_status,code_matches_expected,suggested_tax_code_1,suggested_tax_code_2,new_tax_code


In [11]:
# Cell 8 — Test rows with G tax code are excluded from analysis engine

rollover_test = age_matches[age_matches["tax_code_1"].eq("G")]
rollover_test.head()

Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,attained_55_in_term_year,expected_tax_code_1,expected_tax_code_2,correction_reason,action,match_status,code_matches_expected,suggested_tax_code_1,suggested_tax_code_2,new_tax_code
45,IRA113PLAT,200489351,Mary Sidary,PA,398761.39,0.0,2025-12-16,Check Distribution,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
46,IRA113PLAT,171400540,William Jackson,PA,282773.14,0.0,2025-12-09,Check Distribution,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
53,IRA109PLAT,182428295,Shirley Berghold,PA,16000.0,0.0,2025-12-30,Tax Record Only,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
58,IRA109PLAT,182428295,Shirley Berghold,PA,4000.0,0.0,2025-12-17,Check Distribution,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
59,IRA109PLAT,182428295,Shirley Berghold,PA,16000.0,16000.0,2025-12-17,Tax Record Only,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,


In [10]:
# Cell 9 — Review rows that match status is not 'match_no_action'

logic_test = age_matches[age_matches["match_status"] != "match_no_action"]

logic_test.head(15)

Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,attained_55_in_term_year,expected_tax_code_1,expected_tax_code_2,correction_reason,action,match_status,code_matches_expected,suggested_tax_code_1,suggested_tax_code_2,new_tax_code
37,IRA32PLAT,204546624,Brian Scott,PA,3197.33,3197.33,2025-11-26,Check Distribution,4,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
38,IRA32PLAT,204546658,Amy Scott,PA,3197.33,3197.33,2025-11-26,Check Distribution,4,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
39,IRA32PLAT,204546547,Jeffrey Scott,PA,3198.29,3198.29,2025-11-26,Check Distribution,4,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
42,IRA32PLAT,204546870,Karen Anthony,PA,3119.27,3119.27,2025-11-24,Check Distribution,4,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
44,IRA113PLAT,187569907,Paul Simpson,PA,454863.26,0.0,2025-12-19,Check Distribution,4,G,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
45,IRA113PLAT,200489351,Mary Sidary,PA,398761.39,0.0,2025-12-16,Check Distribution,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
46,IRA113PLAT,171400540,William Jackson,PA,282773.14,0.0,2025-12-09,Check Distribution,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
53,IRA109PLAT,182428295,Shirley Berghold,PA,16000.0,0.0,2025-12-30,Tax Record Only,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
58,IRA109PLAT,182428295,Shirley Berghold,PA,4000.0,0.0,2025-12-17,Check Distribution,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,
59,IRA109PLAT,182428295,Shirley Berghold,PA,16000.0,16000.0,2025-12-17,Tax Record Only,G,,...,False,,,,,excluded_from_age_engine_rollover_or_inherited,False,,,


In [12]:
# Cell 10 — Review values and their counts in 'match_status'

age_matches["match_status"].value_counts()

match_status
match_no_action                                   1265
excluded_from_age_engine_rollover_or_inherited     402
match_needs_correction                              32
age_rule_insufficient_data                          14
Name: count, dtype: int64

In [13]:
# Cell 11 — Review values and their counts in 'match_status' per unique SSN

filter_df = age_matches.drop_duplicates(subset="ssn")
print(filter_df["match_status"].value_counts())

filter_df[filter_df["match_status"] == "age_rule_insufficient_data"].head(15)

match_status
match_no_action                                   1127
excluded_from_age_engine_rollover_or_inherited     368
match_needs_correction                              28
age_rule_insufficient_data                          12
Name: count, dtype: int64


Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,attained_55_in_term_year,expected_tax_code_1,expected_tax_code_2,correction_reason,action,match_status,code_matches_expected,suggested_tax_code_1,suggested_tax_code_2,new_tax_code
220,300001MBD,222856783,The Christian Gospel Fellowshi,PA,100.0,100.0,2025-12-03,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
221,300001MBD,232169885,Lycoming Valley Baptist Church,PA,100.0,100.0,2025-12-03,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
223,300001MBD,205940449,I-TEC 23,PA,200.0,200.0,2025-12-03,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
231,300001MBD,232090782,Community Baptist Church of Mo,PA,500.0,500.0,2025-12-03,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
239,300001MBD,251458329,Eldred First Church of God,PA,740.0,740.0,2025-12-03,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
693,IRA69PLAT,201817371,Diane Brady,FL,150.0,150.0,2025-12-19,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
694,IRA69PLAT,231355131,Diane Brady,FL,1200.0,1200.0,2025-12-19,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
696,IRA69PLAT,580660607,Diane Brady,FL,3000.0,3000.0,2025-12-19,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
697,IRA69PLAT,592332120,Diane Brady,FL,3000.0,3000.0,2025-12-19,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,
1000,245MBD,193282145,Barbara Yocum,PA,1000.0,1000.0,2025-12-03,Check Distribution,7,,...,False,,,,,age_rule_insufficient_data,False,,,


In [14]:
# Cell 12 — Review rows with 'match_needs_correction' status

age_taxcode = age_matches[age_matches["match_status"].eq("match_needs_correction")]

print(f" CORRECTION rows: {len(age_taxcode)}")
age_taxcode.head(15)

 CORRECTION rows: 32


Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,attained_55_in_term_year,expected_tax_code_1,expected_tax_code_2,correction_reason,action,match_status,code_matches_expected,suggested_tax_code_1,suggested_tax_code_2,new_tax_code
187,300004PLAT2,168701981,Deborah Barnett,PA,396.77,396.77,2025-12-03,ACH Distribution,7,,...,False,1,,no_term_date_under_55_in_txn_year,UPDATE_1099,match_needs_correction,False,1,,1
321,IRA66PLAT,192644776,Constance Testa,PA,1625.0,1625.0,2025-12-03,ACH Distribution,1,,...,False,2,,no_term_date_55_plus_in_txn_year,UPDATE_1099,match_needs_correction,False,2,,2
774,177PLAT,161649871,Mark Dempster,PA,500.0,500.0,2025-12-03,ACH Distribution,1,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
776,4881PLAT,201563368,Bernadette Mattica,PA,525.0,525.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
823,RIVVALLMBDII,190487259,Lisa Jackson,PA,599.74,599.74,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
949,137MBD,167603791,Gregory Knowles,PA,1000.0,1000.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
980,3425MBDII,202628064,Rodney Pallerino,PA,625.0,625.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
1041,362PLAT,176447029,Audrey Waddell,PA,1375.0,1375.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
1055,215MBDII,163585662,Jeffery Coover,DE,1200.0,1200.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
1092,425MBD,208568173,Joann Luzenski,PA,1200.0,1200.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2


In [15]:
# Cell 13 — Review analysis and logic engine for rows were age at distribution >= 54 years old

age_taxcode[age_taxcode["age_at_distribution_year"].ge(54.0)].head(15)

Unnamed: 0,plan_id,ssn,participant_name,state,gross_amt,fed_taxable_amt,txn_date,txn_method,tax_code_1,tax_code_2,...,attained_55_in_term_year,expected_tax_code_1,expected_tax_code_2,correction_reason,action,match_status,code_matches_expected,suggested_tax_code_1,suggested_tax_code_2,new_tax_code
321,IRA66PLAT,192644776,Constance Testa,PA,1625.0,1625.0,2025-12-03,ACH Distribution,1,,...,False,2,,no_term_date_55_plus_in_txn_year,UPDATE_1099,match_needs_correction,False,2,,2
774,177PLAT,161649871,Mark Dempster,PA,500.0,500.0,2025-12-03,ACH Distribution,1,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
776,4881PLAT,201563368,Bernadette Mattica,PA,525.0,525.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
823,RIVVALLMBDII,190487259,Lisa Jackson,PA,599.74,599.74,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
949,137MBD,167603791,Gregory Knowles,PA,1000.0,1000.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
980,3425MBDII,202628064,Rodney Pallerino,PA,625.0,625.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
1041,362PLAT,176447029,Audrey Waddell,PA,1375.0,1375.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
1055,215MBDII,163585662,Jeffery Coover,DE,1200.0,1200.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
1092,425MBD,208568173,Joann Luzenski,PA,1200.0,1200.0,2025-12-03,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2
1095,425MBD,208568173,Joann Luzenski,PA,1200.0,1200.0,2025-11-21,ACH Distribution,7,,...,True,2,,terminated_at_or_after_55,UPDATE_1099,match_needs_correction,False,2,,2


---
### Test Quick Export to Excel File
---

In [15]:
# Cell 14 — Use quick report export for manual DataFrame output to Excel for stakeholders

from src.outputs.export_utils import write_df_excel

path = write_df_excel(age_taxcode, filename_prefix="export_trad_distribs")

print(f"Export was successful!\nFile path: {path}")

Export was successful!
File path: /Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline/reports/outputs/export_trad_distribs_20260109_105221.xlsx


---
### Test Official Build/Export to Excel Correction File
---

In [16]:
# Cell 15 — Use build_correction_file module to build the 'official' correction file

#Build Excel correction file (same build as inherited corrections engine)
age_correction_df = build_correction_dataframe(age_matches)

print(age_correction_df.shape)
age_correction_df.head(15)

(32, 12)


Unnamed: 0,Transaction Id,Transaction Date,Participant SSN,Participant Name,Matrix Account,Current Tax Code 1,Current Tax Code 2,New Tax Code,New Taxable Amount,New First Year contrib,Reason,Action
0,45044779,2025-12-03,168701981,Deborah Barnett,07P6LM4F,7,,1,,,no_term_date_under_55_in_txn_year,UPDATE_1099
1,45036648,2025-12-03,192644776,Constance Testa,07P6LM54,1,,2,,,no_term_date_55_plus_in_txn_year,UPDATE_1099
2,45044498,2025-12-03,161649871,Mark Dempster,07P6LMFF,1,,2,,,terminated_at_or_after_55,UPDATE_1099
3,45044977,2025-12-03,201563368,Bernadette Mattica,07P6LMFZ,7,,2,,,terminated_at_or_after_55,UPDATE_1099
4,45044573,2025-12-03,190487259,Lisa Jackson,07P6LMM2,7,,2,,,terminated_at_or_after_55,UPDATE_1099
5,45044985,2025-12-03,167603791,Gregory Knowles,07P6LN4P,7,,2,,,terminated_at_or_after_55,UPDATE_1099
6,45044772,2025-12-03,202628064,Rodney Pallerino,07P6LN8L,7,,2,,,terminated_at_or_after_55,UPDATE_1099
7,45044932,2025-12-03,176447029,Audrey Waddell,07P6LNGR,7,,2,,,terminated_at_or_after_55,UPDATE_1099
8,45044704,2025-12-03,163585662,Jeffery Coover,07P6LNJT,7,,2,,,terminated_at_or_after_55,UPDATE_1099
9,44780423,2025-11-21,208568173,Joann Luzenski,07P6LNMD,7,,2,,,terminated_at_or_after_55,UPDATE_1099


Notes:
- Currently the age_correction DataFrame is finding all discrepancy between G and codes 1, 2 or G.
    - Age correction analysis is working, however code G is not based on age but in distribution type (G -> Rollovers).
    - We need to filter out distributions that are G (Traditional rollover) and H (Roth rollover) and exclude plans that are inherited since these are always code 4.
- Some tax codes are two digits (like '11'), I need to change the logi to extract 1 or two tax code digits.

In [17]:
# Cell 16 — Use build_correction_file module to create/write/export the 'official' correction file

# 2) Write it to Excel with an auto-generated timestamped name
output_path = write_correction_file(age_correction_df, engine="age_taxcode")

# Run write_correction_file() function
output_path

PosixPath('/Users/manuelreyes/Desktop/dev/projects/1099_reconciliation_pipeline/reports/outputs/age_taxcode/correction_file_20260109_114858.xlsx')