In [None]:
import json
import pandas as pd
import re

# Creating the Clean Test Data

## 1. Load Test Data

In [None]:
with open("data/data_processed/Train_Val_Test/df_test.json", 'r') as f:
    data = json.load(f)  

df_test = pd.DataFrame(data)
df_test.head(10)

Unnamed: 0,ID,question,answer,context,gold_context,operation,source
0,UNP/2015/page_80.pdf,what percentage of total minimum lease payment...,68%,['we maintain and operate the assets based on ...,{'table_7': 'millions the total minimum lease ...,"add(3430, 1587), divide(3430, #0)",FinQA
1,CMCSA/2004/page_30.pdf,Q: what was the value of the long-term strateg...,551,['management 2019s discussion and analysis of ...,{'text_18': 'additions to intangibles during 2...,"add(250, 133), add(168, #0)",ConvFinQA
2,MRO/2011/page_37.pdf,by how much did the wti crude oil benchmark in...,53.2%,"['item 7 .', 'management 2019s discussion and ...",{'table_1': 'benchmark the wti crude oil ( dol...,"subtract(95.11, 62.09), divide(#0, 62.09)",FinQA
3,a862932d,Risk mgmt for capital alloc. in accelerated re...,The changes in the terms of the Series B Prefe...,"[In conjunction with the Garrett spin-off, the...",,,FinDER
4,LMT/2012/page_44.pdf,"Q: what was the operating profit, in millions,...",7.4%,['aeronautics 2019 operating profit for 2011 i...,{'table_2': 'the operating profit of 2012 is 8...,"subtract(874, 814), divide(#0, 814)",ConvFinQA
5,6e5aff44,Revenue recognition timing for performance obl...,"Raymond James Financial, Inc. recognizes reven...",[Revenue from contracts with customers is reco...,,,FinDER
6,C/2008/page_26.pdf,what percentage of incremental risk-weighted a...,4%,"['commitments .', 'for a further description o...",{'table_4': 'in billions of dollars the studen...,"divide(3.5, 98.9)",FinQA
7,STT/2017/page_63.pdf,Q: what was the value of the s&p500 index in 2...,53%,"[""state street corporation | 52 shareholder re...",{'table_2': 'the s&p 500 index of 2012 is 100 ...,"subtract(153, 100), divide(#0, 100)",ConvFinQA
8,ETR/2008/page_376.pdf,what is the net change in net revenue during 2...,-1.4,"['entergy texas , inc .', ""management's financ...",{'table_1': 'the 2007 net revenue of amount ( ...,"subtract(440.9, 442.3)",FinQA
9,7ec5e05d,2024 total opex: Nordson Corp (NDSN) cost-of-s...,"For 2024, the cost of sales is $1,203,792 and ...",[Consolidated Statements of Income\nYears ende...,,Addition,FinDER


## 2. Categorizing Test Data By Operation Type

In [112]:
def hybrid_reasoning_classifier(row):
    op = str(row['operation']).lower()
    question = str(row['question']).lower()
    source = row['source']

    if pd.isna(op) or op.strip() == 'none':
        return 'Non-numerical'

    # FinDER: Use natural language tags but catch explicit math
    if source == 'FinDER':
        if 'compositional' in op or 'comparison' in op:
            return 'Compositional'
        elif any(kw in op for kw in ['multiply', 'division', 'addition', 'subtraction']):
            return 'Arithmetic'
        else:
            return 'Non-numerical'

    # ConvFinQA: Conversational → likely compositional
    if source == 'ConvFinQA':
        if question.count("q:") > 1:
            return 'Compositional'

    # Use regex to extract full operation calls like add(...), subtract(...), etc.
    base_ops = ['add', 'subtract', 'divide', 'multiply', 'average', 'max', 'min']
    ops = re.findall(r'\b(?:' + '|'.join(base_ops) + r')\s*\(.*?\)', op)

    used_ops = set()
    for o in ops:
        for fn in base_ops:
            if o.strip().startswith(fn):
                used_ops.add(fn)

    if len(used_ops) > 1:
        return 'Compositional'
    elif len(used_ops) == 1 and len(ops) > 1:
        return 'Compositional'
    else:
        return 'Arithmetic'

In [4]:
df_test['hybrid_reasoning'] = df_test.apply(hybrid_reasoning_classifier, axis=1)

In [5]:
df_test[
    (df_test['source'] == 'FinQA') &
    (df_test['operation'].str.contains('divide')) &
    (df_test['hybrid_reasoning'] == 'Arithmetic')
].head()

Unnamed: 0,ID,question,answer,context,gold_context,operation,source,hybrid_reasoning
6,C/2008/page_26.pdf,what percentage of incremental risk-weighted a...,4%,"['commitments .', 'for a further description o...",{'table_4': 'in billions of dollars the studen...,"divide(3.5, 98.9)",FinQA,Arithmetic
16,JPM/2013/page_132.pdf,what was the ratio of the firm 2019s cra loan ...,1.125,['management 2019s discussion and analysis 138...,"{'text_11': 'at december 31 , 2013 and 2012 , ...","divide(18, 16)",FinQA,Arithmetic
20,ADBE/2012/page_87.pdf,what is the yearly amortization rate related t...,10%,['goodwill is assigned to one or more reportin...,{'table_2': 'the customer contracts and relati...,"divide(const_100, 10)",FinQA,Arithmetic
29,ZBH/2004/page_65.pdf,what was the percentage change in accumulated ...,83%,"['z i m m e r h o l d i n g s , i n c .', 'a n...",{'table_1': 'the beginning balance at january ...,"divide(114.6, 138.7)",FinQA,Arithmetic
30,PKG/2006/page_27.pdf,what was the operating income margin for 2006?,10%,['results of operations year ended december 31...,{'table_1': '( in millions ) the net sales of ...,"divide(225.9, 2187.1)",FinQA,Arithmetic


In [87]:
df_test.groupby(['source', 'hybrid_reasoning']).size()

source     hybrid_reasoning
ConvFinQA  Compositional       530
FinDER     Arithmetic           25
           Compositional        52
           Non-numerical       493
FinQA      Arithmetic          368
           Compositional       252
dtype: int64

In [7]:
for reasoning in df_test['hybrid_reasoning'].unique():
    print(f"\n========== Reasoning Type: {reasoning.upper()} ==========\n")
    
    for source in df_test['source'].unique():
        subset = df_test[(df_test['source'] == source) & (df_test['hybrid_reasoning'] == reasoning)]

        if len(subset) == 0:
            continue

        print(f"\n--- Source: {source} ---")
        sample = subset.sample(n=min(3, len(subset)), random_state=42)

        for _, row in sample.iterrows():
            print(f"\nQ: {row['question']}")
            print(f"A: {row['answer']}")
            print(f"Operation: {row['operation']}")
            print(f"Gold Context: {row['gold_context']}")
            print("="*70)




--- Source: FinQA ---

Q: what is the growth rate in the risk-free interest rate from 2004 to 2005?
A: 38.7%
Operation: subtract(4.19, 3.02), divide(#0, 3.02)
Gold Context: {'table_3': 'the risk-free interest rate of 2006 is 4.60 ; the risk-free interest rate of 2005 is 4.19 ; the risk-free interest rate of 2004 is 3.02 ;'}

Q: what was the percent change in the value of commercial paper outstanding between 2010 and 2011?
A: 18%
Operation: subtract(2.80, 2.38), divide(#0, 2.38)
Gold Context: {'text_0': 'we maintain an effective universal shelf registration that allows for the public offering and sale of debt securities , capital securities , common stock , depositary shares and preferred stock , and warrants to purchase such securities , including any shares into which the preferred stock and depositary shares may be convertible , or any combination thereof .', 'text_5': 'at december 31 , 2011 , we had $ 2.38 billion of commercial paper outstanding , compared to $ 2.80 billion at de

# 3. Extend Sampling

In [None]:
# Define sampling plan by (source, reasoning type)
sample_plan = {
    ('FinQA', 'Arithmetic'): 25,
    ('FinQA', 'Compositional'): 25,
    ('ConvFinQA', 'Compositional'): 30,
    ('FinDER', 'Arithmetic'): 20,
    ('FinDER', 'Compositional'): 15,
    ('FinDER', 'Non-numerical'): 35,
}

# Sample from df_test (your labeled full dataset)
samples = []
for (source, reasoning), count in sample_plan.items():
    subset = df_test[(df_test['source'] == source) & (df_test['hybrid_reasoning'] == reasoning)]
    if len(subset) < count:
        print(f"Not enough samples in ({source}, {reasoning}), taking {len(subset)} instead of {count}")
        sample = subset.sample(n=len(subset), random_state=42)
    else:
        sample = subset.sample(n=count, random_state=42)
    samples.append(sample)

df_review = pd.concat(samples).reset_index(drop=True)

In [None]:
# Step 1: Track used questions to avoid duplication
used_questions = set(df_review['question'])

# Step 2: Define additional sampling plan
extended_sample_plan = {
    ('FinQA', 'Arithmetic'): 10,
    ('FinQA', 'Compositional'): 10,
    ('ConvFinQA', 'Compositional'): 30,
}

# Step 3: Sample new questions avoiding duplicates
new_samples = []
for (source, reasoning), count in extended_sample_plan.items():
    subset = df_test[
        (df_test['source'] == source) &
        (df_test['hybrid_reasoning'] == reasoning) &
        (~df_test['question'].isin(used_questions))
    ]
    
    if len(subset) < count:
        print(f"Not enough available samples in ({source}, {reasoning}), taking {len(subset)} instead of {count}")
        sample = subset.sample(n=len(subset), random_state=42)
    else:
        sample = subset.sample(n=count, random_state=42)
    
    new_samples.append(sample)

# Step 4: Combine with original review dataset
df_new = pd.concat(new_samples).reset_index(drop=True)
df_review = pd.concat([df_review, df_new]).reset_index(drop=True)

print(f"df_review now has {len(df_review)} entries.")

✅ df_review now has 200 entries.


In [None]:
# Step 1: Identify duplicate IDs
duplicate_ids = df_review[df_review.duplicated(subset='ID', keep=False)]['ID'].unique()

# Step 2: Extract and drop duplicates (keep only the first occurrence)
duplicates = df_review[df_review['ID'].isin(duplicate_ids)]
df_review = df_review.drop_duplicates(subset='ID', keep='first').reset_index(drop=True)

# Step 3: Track used questions
used_questions = set(df_review['question']) | set(duplicates['question'])

# Step 4: Resample replacements for the dropped duplicates
replacements = []

for _, dup_row in duplicates.iterrows():
    source = dup_row['source']
    reasoning = dup_row['hybrid_reasoning']
    
    subset = df_test[
        (df_test['source'] == source) &
        (df_test['hybrid_reasoning'] == reasoning) &
        (~df_test['question'].isin(used_questions))
    ]
    
    if not subset.empty:
        replacement = subset.sample(n=1, random_state=None)
        replacements.append(replacement)
        used_questions.add(replacement['question'].values[0])
        print(f"Replacement added for ({source}, {reasoning})")
    else:
        print(f"No replacement available for ({source}, {reasoning})")

# Step 5: Add replacements back
if replacements:
    df_review = pd.concat([df_review] + replacements).reset_index(drop=True)

✅ Replacement added for (ConvFinQA, Compositional)
✅ Replacement added for (FinQA, Compositional)
✅ Replacement added for (FinQA, Compositional)
✅ Replacement added for (FinQA, Arithmetic)
✅ Replacement added for (FinQA, Arithmetic)
✅ Replacement added for (FinQA, Arithmetic)
✅ Replacement added for (ConvFinQA, Compositional)
✅ Replacement added for (ConvFinQA, Compositional)
✅ Replacement added for (FinQA, Compositional)
✅ Replacement added for (ConvFinQA, Compositional)


In [78]:
for i, row in df_review.iterrows():
    print(f"\n[{i}] - Source: {row['source']} | Reasoning: {row['hybrid_reasoning']}")
    print(f"Q: {row['question']}")
    print(f"A: {row['answer']}")
    print(f"Operation: {row['operation']}")
    print(f"Gold Context: {row['gold_context']}")
    print("=" * 80)
 


[0] - Source: FinDER | Reasoning: Arithmetic
Q: GM operating margin 2023 vs 2022, GM.
A: To calculate the operating profit margin, we divide Operating Income by Total Net Sales and Revenue. For 2023, the calculation is as follows:

• 2023 Operating Profit Margin = 9,298 / 171,842 ≈ 0.0541, or about 5.41%.

For 2022, using the same method:

• 2022 Operating Profit Margin = 10,315 / 156,735 ≈ 0.0658, or about 6.58%.

This comparison shows that the operating margin declined from approximately 6.58% in 2022 to about 5.41% in 2023.
Operation: Division
Gold Context: 

[1] - Source: FinQA | Reasoning: Arithmetic
Q: what percentage of incremental risk-weighted assets are student loans at january 1 , 2010?
A: 4%
Operation: divide(3.5, 98.9)
Gold Context: {'table_4': 'in billions of dollars the student loans of incremental gaap assets is 14.4 ; the student loans of incremental risk- weighted assets is 3.5 ;', 'table_8': 'in billions of dollars the total of incremental gaap assets is $ 179.0 ; t

In [80]:
df_review.head(10)

Unnamed: 0,ID,question,answer,context,gold_context,operation,source,hybrid_reasoning,answer_verified,context_verified,notes,question_clean
0,182f0809,"GM operating margin 2023 vs 2022, GM.","To calculate the operating profit margin, we d...","[CONSOLIDATED INCOME STATEMENTS\n(In millions,...",,Division,FinDER,Arithmetic,,,,
1,C/2008/page_26.pdf,what percentage of incremental risk-weighted a...,4%,"['commitments .', 'for a further description o...",{'table_4': 'in billions of dollars the studen...,"divide(3.5, 98.9)",FinQA,Arithmetic,,,,
2,ETR/2004/page_20.pdf,what is the growth rate in net revenue in 2003...,0.1%,"[""entergy corporation and subsidiaries managem...",{'table_1': 'the 2002 net revenue of ( in mill...,"subtract(4214.5, 4209.6), divide(#0, 4209.6)",FinQA,Compositional,,,,
3,c593f878,NGC's cyber investments boost investor confide...,The provided information allows us to assess h...,[We recognize the critical importance of maint...,,,FinDER,Non-numerical,,,,
4,cb08b8b0,"SBA Comm., credit evals & DTA quality receivab...",The details provided illustrate that SBA Commu...,[Site leasing revenues\n\nRevenue from site le...,,,FinDER,Non-numerical,,,,
5,GPN/2009/page_85.pdf,Q: what was the fair value of share awards ves...,265% increase,['notes to consolidated financial statements 2...,{'text_2': 'the total fair value of share awar...,"subtract(6.2, 1.7), divide(#0, 1.7)",ConvFinQA,Compositional,,,,
6,AAPL/2012/page_36.pdf,Q: what was the difference between the net sal...,66%,['$ 43.3 million in 2011 compared to $ 34.1 mi...,{'table_1': 'the net sales of 2012 is $ 156508...,"subtract(108249, 65225), divide(#0, 65225)",ConvFinQA,Compositional,,,,
7,AMT/2012/page_123.pdf,what was the cost per tower in the colombia mo...,856067,['american tower corporation and subsidiaries ...,"{'text_8': '( 201ccolombia movil 201d ) , wher...","multiply(182.0, const_1000000), divide(#0, 2126)",FinQA,Compositional,,,,
8,C/2008/page_212.pdf,Q: what was the fair value of the msr in 2008?...,-33.2%,['the company has elected the fair-value optio...,{'table_1': 'in millions of dollars the carryi...,"subtract(4273, 6392), divide(#0, 6392)",ConvFinQA,Compositional,,,,
9,ETR/2016/page_175.pdf,Q: what is the implicit interest cost rate?\nA...,13225.7,['entergy corporation and subsidiaries notes t...,{'table_6': 'the years thereafter of amount ( ...,"divide(5.13, const_100), multiply(#0, 257812)",ConvFinQA,Compositional,,,,


In [None]:
# Snapshot by (Source, Reasoning Type)
snapshot = df_review.groupby(['source', 'hybrid_reasoning']).size().reset_index(name='count')

# Pivot for cleaner table view
pivot_snapshot = snapshot.pivot(index='source', columns='hybrid_reasoning', values='count').fillna(0).astype(int)

# Add total per source
pivot_snapshot['Total'] = pivot_snapshot.sum(axis=1)

# Add grand total at the bottom
pivot_snapshot.loc['Total'] = pivot_snapshot.sum()


import IPython.display as disp
disp.display(pivot_snapshot)

hybrid_reasoning,Arithmetic,Compositional,Non-numerical,Total
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ConvFinQA,0,60,0,60
FinDER,20,15,35,70
FinQA,37,37,0,74
Total,57,112,35,204


In [None]:
df_review.to_json("gold_test_data.json", orient="records", indent=2, force_ascii=False)

In [None]:
# Drop unwanted columns
df_cleaned = df_review.drop(
    columns=[
        "answer_verified",
        "context_verified",
        "notes",
        "question_clean"
    ],
    errors="ignore" 
)


df_cleaned = df_cleaned.rename(columns={"hybrid_reasoning": "reasoning_type"})


output_path = "data/data_processed/Train_Val_Test/gold_test_data.json"

df_cleaned.to_json(
    output_path,
    orient="records",
    indent=2,
    force_ascii=False
)

print(f"Cleaned gold test data saved to: {output_path}")

Cleaned gold test data saved to: /Users/alex/Documents/Data Science Master/thesis_RAG/data/data_processed/Train_Val_Test/gold_test_data.json


## Analyze Test Data for retrievable Questions and Contexts

In [None]:
with open("data/data_processed/Train_Val_Test/gold_test_data.json", 'r') as f:
    data = json.load(f)  


df_test = pd.DataFrame(data)
df_test.head(10)

Unnamed: 0,ID,question,answer,context,gold_context,operation,source,reasoning_type
0,182f0809,"GM operating margin 2023 vs 2022, GM.","To calculate the operating profit margin, we d...","[CONSOLIDATED INCOME STATEMENTS\n(In millions,...",,Division,FinDER,Arithmetic
1,C/2008/page_26.pdf,what percentage of incremental risk-weighted a...,4%,"['commitments .', 'for a further description o...",{'table_4': 'in billions of dollars the studen...,"divide(3.5, 98.9)",FinQA,Arithmetic
2,ETR/2004/page_20.pdf,what is the growth rate in net revenue in 2003...,0.1%,"[""entergy corporation and subsidiaries managem...",{'table_1': 'the 2002 net revenue of ( in mill...,"subtract(4214.5, 4209.6), divide(#0, 4209.6)",FinQA,Compositional
3,c593f878,NGC's cyber investments boost investor confide...,The provided information allows us to assess h...,[We recognize the critical importance of maint...,,,FinDER,Non-numerical
4,cb08b8b0,"SBA Comm., credit evals & DTA quality receivab...",The details provided illustrate that SBA Commu...,[Site leasing revenues\n\nRevenue from site le...,,,FinDER,Non-numerical
5,GPN/2009/page_85.pdf,Q: what was the fair value of share awards ves...,265% increase,['notes to consolidated financial statements 2...,{'text_2': 'the total fair value of share awar...,"subtract(6.2, 1.7), divide(#0, 1.7)",ConvFinQA,Compositional
6,AAPL/2012/page_36.pdf,Q: what was the difference between the net sal...,66%,['$ 43.3 million in 2011 compared to $ 34.1 mi...,{'table_1': 'the net sales of 2012 is $ 156508...,"subtract(108249, 65225), divide(#0, 65225)",ConvFinQA,Compositional
7,AMT/2012/page_123.pdf,what was the cost per tower in the colombia mo...,856067,['american tower corporation and subsidiaries ...,"{'text_8': '( 201ccolombia movil 201d ) , wher...","multiply(182.0, const_1000000), divide(#0, 2126)",FinQA,Compositional
8,C/2008/page_212.pdf,Q: what was the fair value of the msr in 2008?...,-33.2%,['the company has elected the fair-value optio...,{'table_1': 'in millions of dollars the carryi...,"subtract(4273, 6392), divide(#0, 6392)",ConvFinQA,Compositional
9,ETR/2016/page_175.pdf,Q: what is the implicit interest cost rate?\nA...,13225.7,['entergy corporation and subsidiaries notes t...,{'table_6': 'the years thereafter of amount ( ...,"divide(5.13, const_100), multiply(#0, 257812)",ConvFinQA,Compositional


# Adapted
"ID":"ETR\/2016\/page_175.pdf",
"ID":"C\/2008\/page_212.pdf",
"ID":"FIS\/2016\/page_64.pdf",
"ID":"HUM\/2018\/page_129.pdf",
"ID":"AWK\/2018\/page_142.pdf",
"ID":"ETR\/2008\/page_337.pdf",
"ID":"CB\/2010\/page_117.pdf",
"ID":"LMT\/2012\/page_44.pdf",
"ID":"MA\/2008\/page_116.pdf",
"ID":"ADBE\/2013\/page_84.pdf",
"ID":"APTV\/2013\/page_48.pdf",
"ID":"ETR\/2016\/page_403.pdf",
"ID":"GS\/2018\/page_165.pdf",
"ID":"GS\/2012\/page_56.pdf",
"ID":"SLB\/2009\/page_46.pdf",
"ID":"RE\/2012\/page_31.pdf",  
"ID":"DVN\/2011\/page_99.pdf", 
"ID":"APD\/2013\/page_44.pdf", 
"ID":"GS\/2013\/page_184.pdf",
"ID":"BLL\/2007\/page_75.pdf"
"ID":"AMT\/2010\/page_98.pdf",  
"ID":"DG\/2008\/page_73.pdf",
"ID":"HIG\/2012\/page_132.pdf",
"ID":"GS\/2016\/page_79.pdf",
"ID":"MRO\/2008\/page_146.pdf",
"ID":"HOLX\/2008\/page_132.pdf",   
"ID":"AAP\/2006\/page_85.pdf",
"ID":"ILMN\/2006\/page_92.pdf",
"ID":"MA\/2010\/page_107.pdf",
"ID":"UNP\/2015\/page_80.pdf",  
"ID":"APD\/2018\/page_113.pdf",
"ID":"ILMN\/2003\/page_79.pdf",
"ID":"LMT\/2007\/page_37.pdf",
"ID":"LMT\/2012\/page_29.pdf",  
"ID":"WRK\/2019\/page_103.pdf",
"ID":"INTC\/2018\/page_105.pdf",
"ID":"ETR\/2008\/page_336.pdf",
"ID":"HOLX\/2007\/page_127.pdf",
"ID":"MRO\/2015\/page_56.pdf",
"ID":"EMN\/2006\/page_108.pdf",
"ID":"HUM\/2004\/page_78.pdf",
"ID":"AWK\/2018\/page_152.pdf",
"ID":"CB\/2008\/page_216.pdf",  
"ID":"PPG\/2012\/page_76.pdf",  
"ID":"IPG\/2015\/page_79.pdf",
"ID":"EMN\/2016\/page_104.pdf",
"ID":"DVN\/2014\/page_88.pdf",
"ID":"AMT\/2002\/page_74.pdf",
"ID":"ABMD\/2003\/page_27.pdf",
"ID":"JPM\/2010\/page_236.pdf",
 "ID":"AMT\/2005\/page_84.pdf",
 "ID":"ETR\/2017\/page_401.pdf",
     "ID":"VLO\/2018\/page_99.pdf",

In [None]:
import pandas as pd
import json

# === Step 1: Load df_test and gold_test_data ===
df_test = pd.read_json("data/data_processed/Train_Val_Test/df_test.json")
df_review = pd.read_json("data/data_processed/Train_Val_Test/gold_test_data.json")

df_test['reasoning_type'] = df_test.apply(hybrid_reasoning_classifier, axis=1)

In [None]:
# === Step 2: Define FinDER IDs to replace ===
finder_ids_to_replace = {
    "abf9d50e", "dc4687be", "e4261e33", "79a01db8", "8fd9b081", "e75c04e9"
}

# === Step 3: Extract those rows and drop from df_review ===
df_to_replace = df_review[df_review['ID'].isin(finder_ids_to_replace)]
df_review = df_review[~df_review['ID'].isin(finder_ids_to_replace)]

# === Step 4: Track used questions to avoid duplication ===
used_questions = set(df_review['question'])

# === Step 5: Replacement logic ===
replacement_trace = {}
replacement_samples = []

for _, row in df_to_replace.iterrows():
    reasoning = row['reasoning_type']  
    subset = df_test[
        (df_test['source'] == "FinDER") &
        (df_test['reasoning_type'] == reasoning) &
        (~df_test['question'].isin(used_questions)) &
        (~df_test['ID'].isin(set(df_review['ID']))) 
    ]
    
    if subset.empty:
        print(f"No replacement for ID {row['ID']} with reasoning type {reasoning}")
        continue

    replacement = subset.sample(n=1, random_state=42)
    replacement_row = replacement.iloc[0]

    replacement_trace[row['ID']] = {
        "replaced_question": row['question'],
        "replacement_id": replacement_row['ID'],
        "replacement_question": replacement_row['question'],
    }

    used_questions.add(replacement_row['question'])
    replacement_samples.append(replacement)

# === Step 6: Combine replacements with df_review ===
df_replacements = pd.concat(replacement_samples).reset_index(drop=True)
df_review = pd.concat([df_review, df_replacements]).reset_index(drop=True)

# === Step 7: Save outputs ===
df_review.to_json("data/data_processed/Train_Val_Test/gold_test_data_updated.json", orient='records', indent=2)

with open("data/data_processed/Train_Val_Test/replacement_trace.json", "w") as f:
    json.dump(replacement_trace, f, indent=2)

print(" Replacement Mapping:")
for old_id, trace in replacement_trace.items():
    print(f"\nReplaced ID {old_id} → {trace['replacement_id']}")
    print(f"  OLD: {trace['replaced_question']}")
    print(f"  NEW: {trace['replacement_question']}")

print(f"\n New gold dataset has {len(df_review)} entries.")

 Replacement Mapping:

Replaced ID abf9d50e → 9aaf2bc7
  OLD: Impact of amortization on useful lives of intangible assets for PANW.
  NEW: Risk mgmt, board composition, & governance at Masco are key factors to consider for investors, symbol: MAS.

Replaced ID dc4687be → 2c82c639
  OLD: Sum of ORCL support functions employee count (R&D, Hardware, G&A) vs core investment analysis.
  NEW: GPM (Gross Profit/Net Revenues) FY23 ENPH.

Replaced ID e4261e33 → 34763c7c
  OLD: MPC 2023 capex breakdown for equity investees vs PPE, focusing on the ticker.
  NEW: The outcomes of historical legal cases affect ongoing disputes and settlements/losses, especially for AbbVie.

Replaced ID 79a01db8 → 54e238a9
  OLD: Lennar (LEN) faces legal claims that jeopardize profitability.
  NEW: CBRE, focusing on cybersecurity, is key to effective IR planning & risk mitigation.

Replaced ID 8fd9b081 → d3d8efd4
  OLD: Net sales & COGS for Ecolab from 2021-2023 have significantly impacted op. income & efficiency, ECL

In [None]:
df_review = pd.read_json("data/data_processed/Train_Val_Test/gold_test_data_updated.json")

# Snapshot by (Source, Reasoning Type)
snapshot = df_review.groupby(['source', 'reasoning_type']).size().reset_index(name='count')

# Pivot for cleaner table view
pivot_snapshot = snapshot.pivot(index='source', columns='reasoning_type', values='count').fillna(0).astype(int)

# Add total per source
pivot_snapshot['Total'] = pivot_snapshot.sum(axis=1)

# Add grand total at the bottom
pivot_snapshot.loc['Total'] = pivot_snapshot.sum()

import IPython.display as disp
disp.display(pivot_snapshot)

reasoning_type,Arithmetic,Compositional,Non-numerical,Total
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ConvFinQA,0,60,0,60
FinDER,20,15,33,68
FinQA,37,36,0,73
Total,57,111,33,201


In [None]:
with open("data/data_processed/Train_Val_Test/gold_test_data_updated.json", 'r') as f:
    data = json.load(f)  


df_test = pd.DataFrame(data)
df_test.head(10)

Unnamed: 0,ID,question,answer,context,gold_context,operation,source,reasoning_type
0,182f0809,"GM operating margin 2023 vs 2022, GM.","To calculate the operating profit margin, we d...","[CONSOLIDATED INCOME STATEMENTS\n(In millions,...",,Division,FinDER,Arithmetic
1,C/2008/page_26.pdf,"In the financial filing of Citigroup, what per...",4%,"['commitments .', 'for a further description o...",{'table_4': 'in billions of dollars the studen...,"divide(3.5, 98.9)",FinQA,Arithmetic
2,ETR/2004/page_20.pdf,what is the growth rate in net revenue in 2003...,0.1%,"[""entergy corporation and subsidiaries managem...",{'table_1': 'the 2002 net revenue of ( in mill...,"subtract(4214.5, 4209.6), divide(#0, 4209.6)",FinQA,Compositional
3,c593f878,NGC's cyber investments boost investor confide...,The provided information allows us to assess h...,[We recognize the critical importance of maint...,,,FinDER,Non-numerical
4,cb08b8b0,"SBA Comm., credit evals & DTA quality receivab...",The details provided illustrate that SBA Commu...,[Site leasing revenues\n\nRevenue from site le...,,,FinDER,Non-numerical
5,GPN/2009/page_85.pdf,"Q: For GPN, what was the fair value of share a...",265% increase,['notes to consolidated financial statements 2...,{'text_2': 'the total fair value of share awar...,"subtract(6.2, 1.7), divide(#0, 1.7)",ConvFinQA,Compositional
6,AAPL/2012/page_36.pdf,"Q: For Apple Inc., what was the difference bet...",66%,['$ 43.3 million in 2011 compared to $ 34.1 mi...,{'table_1': 'the net sales of 2012 is $ 156508...,"subtract(108249, 65225), divide(#0, 65225)",ConvFinQA,Compositional
7,AMT/2012/page_123.pdf,what was the cost per tower in American Tower’...,856067,['american tower corporation and subsidiaries ...,"{'text_8': '( 201ccolombia movil 201d ) , wher...","multiply(182.0, const_1000000), divide(#0, 2126)",FinQA,Compositional
8,C/2008/page_212.pdf,Q: what was the fair value of the msr reported...,-33.2%,['the company has elected the fair-value optio...,{'table_1': 'in millions of dollars the carryi...,"subtract(4273, 6392), divide(#0, 6392)",ConvFinQA,Compositional
9,ETR/2016/page_175.pdf,Q: what is the implicit interest cost rate rep...,13225.7,['entergy corporation and subsidiaries notes t...,{'table_6': 'the years thereafter of amount ( ...,"divide(5.13, const_100), multiply(#0, 257812)",ConvFinQA,Compositional


In [None]:
df_review = pd.read_json("notebooks/filtered_gold_eval_dataset.json")

# Snapshot by (Source, Reasoning Type)
snapshot = df_review.groupby(['source', 'reasoning_type']).size().reset_index(name='count')

# Pivot for cleaner table view
pivot_snapshot = snapshot.pivot(index='source', columns='reasoning_type', values='count').fillna(0).astype(int)

# Add total per source
pivot_snapshot['Total'] = pivot_snapshot.sum(axis=1)

# Add grand total at the bottom
pivot_snapshot.loc['Total'] = pivot_snapshot.sum()


import IPython.display as disp
disp.display(pivot_snapshot)

reasoning_type,Arithmetic,Compositional,Non-numerical,Total
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ConvFinQA,0,24,0,24
FinDER,20,15,33,68
FinQA,14,12,0,26
Total,34,51,33,118


In [None]:
with open("data/data_processed/Train_Val_Test/df_val.json", 'r') as f:
    data = json.load(f)  


df_test = pd.DataFrame(data)
df_test.head(10)

Unnamed: 0,ID,question,answer,context,gold_context,operation,source
0,UNP/2015/page_80.pdf,what percentage of total minimum lease payment...,68%,['we maintain and operate the assets based on ...,{'table_7': 'millions the total minimum lease ...,"add(3430, 1587), divide(3430, #0)",FinQA
1,CMCSA/2004/page_30.pdf,Q: what was the value of the long-term strateg...,551,['management 2019s discussion and analysis of ...,{'text_18': 'additions to intangibles during 2...,"add(250, 133), add(168, #0)",ConvFinQA
2,MRO/2011/page_37.pdf,by how much did the wti crude oil benchmark in...,53.2%,"['item 7 .', 'management 2019s discussion and ...",{'table_1': 'benchmark the wti crude oil ( dol...,"subtract(95.11, 62.09), divide(#0, 62.09)",FinQA
3,a862932d,Risk mgmt for capital alloc. in accelerated re...,The changes in the terms of the Series B Prefe...,"[In conjunction with the Garrett spin-off, the...",,,FinDER
4,LMT/2012/page_44.pdf,"Q: what was the operating profit, in millions,...",7.4%,['aeronautics 2019 operating profit for 2011 i...,{'table_2': 'the operating profit of 2012 is 8...,"subtract(874, 814), divide(#0, 814)",ConvFinQA
5,6e5aff44,Revenue recognition timing for performance obl...,"Raymond James Financial, Inc. recognizes reven...",[Revenue from contracts with customers is reco...,,,FinDER
6,C/2008/page_26.pdf,what percentage of incremental risk-weighted a...,4%,"['commitments .', 'for a further description o...",{'table_4': 'in billions of dollars the studen...,"divide(3.5, 98.9)",FinQA
7,STT/2017/page_63.pdf,Q: what was the value of the s&p500 index in 2...,53%,"[""state street corporation | 52 shareholder re...",{'table_2': 'the s&p 500 index of 2012 is 100 ...,"subtract(153, 100), divide(#0, 100)",ConvFinQA
8,ETR/2008/page_376.pdf,what is the net change in net revenue during 2...,-1.4,"['entergy texas , inc .', ""management's financ...",{'table_1': 'the 2007 net revenue of amount ( ...,"subtract(440.9, 442.3)",FinQA
9,7ec5e05d,2024 total opex: Nordson Corp (NDSN) cost-of-s...,"For 2024, the cost of sales is $1,203,792 and ...",[Consolidated Statements of Income\nYears ende...,,Addition,FinDER


In [113]:
df_test['reasoning_type'] = df_test.apply(hybrid_reasoning_classifier, axis=1)

In [117]:
from collections import defaultdict

def gold_context_included(sample):
    gold_ctxs = sample.get("gold_context", {})
    context_chunks = sample.get("context", "")

    if isinstance(gold_ctxs, dict):
        gold_values = [v.lower().strip() for v in gold_ctxs.values() if isinstance(v, str)]
    elif isinstance(gold_ctxs, str):
        gold_values = [gold_ctxs.lower().strip()]
    else:
        gold_values = []

    context_text = context_chunks.lower() if isinstance(context_chunks, str) else ""
    return any(gold in context_text for gold in gold_values)


def select_balanced_samples(df, dataset_name, total_samples=10):
    # Filter for dataset
    subset = df[df["source"] == dataset_name]

    # Group by reasoning_type
    grouped = subset.groupby("reasoning_type")
    selected_rows = []
    used_ids = set()

    # Select equal samples from each group
    per_type = total_samples // grouped.ngroups

    for _, group in grouped:
        rows = group.sample(n=min(per_type, len(group)), random_state=42).to_dict("records")
        selected_rows.extend(rows)
        used_ids.update(row["ID"] for row in rows)

    # Fill up remaining slots if some categories are too small
    remaining = total_samples - len(selected_rows)
    if remaining > 0:
        unused_rows = subset[~subset["ID"].isin(used_ids)]
        selected_rows.extend(unused_rows.sample(n=remaining, random_state=42).to_dict("records"))

    return pd.DataFrame(selected_rows)

In [118]:
finqa_selected = select_balanced_samples(df_test, "FinQA", total_samples=10)
convfinqa_selected = select_balanced_samples(df_test, "ConvFinQA", total_samples=10)

DG/2006/page_58.pdf


In [121]:
print("🔍 FinQA Samples:")
for idx, row in finqa_selected.iterrows():
    print(f"\n--- FinQA Sample {idx+1} ---")
    print(f"ID: {row['ID']}")
    print(f"Reasoning Type: {row['reasoning_type']}")
    print(f"Question: {row['question']}")
    print(f"Answer: {row['answer']}")
    print(f"Operation: {row.get('operation', 'N/A')}")
    print(f"Gold Context:\n{row['gold_context']}")
    print(f"Full Context:\n{row['context']}")

🔍 FinQA Samples:

--- FinQA Sample 1 ---
ID: DG/2006/page_58.pdf
Reasoning Type: Arithmetic
Question: what is the yearly depreciation rate on land improvements?
Answer: 5%
Operation: divide(const_100, 20)
Gold Context:
{'table_0': 'land improvements the land improvements of 20 is 20 ;'}
Full Context:
['consolidated 2005 results of operations was an estimated reduction of gross profit and a corresponding decrease to inventory , at cost , of $ 5.2 million .', 'store pre-opening costs pre-opening costs related to new store openings and the construction periods are expensed as incurred .', 'property and equipment property and equipment are recorded at cost .', 'the company provides for depreciation and amortization on a straight-line basis over the following estimated useful lives: .']
land improvements	20
buildings	39-40
furniture fixtures and equipment	3-10
['improvements of leased properties are amortized over the shorter of the life of the applicable lease term or the estimated useful 

In [122]:
print("🔍 FinQA Samples:")
for idx, row in convfinqa_selected.iterrows():
    print(f"\n--- FinQA Sample {idx+1} ---")
    print(f"ID: {row['ID']}")
    print(f"Reasoning Type: {row['reasoning_type']}")
    print(f"Question: {row['question']}")
    print(f"Answer: {row['answer']}")
    print(f"Operation: {row.get('operation', 'N/A')}")
    print(f"Gold Context:\n{row['gold_context']}")
    print(f"Full Context:\n{row['context']}")

🔍 FinQA Samples:

--- FinQA Sample 1 ---
ID: WRK/2019/page_103.pdf
Reasoning Type: Compositional
Question: Q: what was the value of gross unrecognized tax benefits at the end of 2018?
A: 127.1
Q: what was the value at the end of 2017?
A: 148.9
Q: what is the net difference?
A: A0
Q: what was the 2017 value?
A: 148.9
Q: what is the percent change?
Answer: -14.6%
Operation: subtract(127.1, 148.9), divide(#0, 148.9)
Gold Context:
{'table_9': 'the balance at end of fiscal year of 2019 is $ 224.3 ; the balance at end of fiscal year of 2018 is $ 127.1 ; the balance at end of fiscal year of 2017 is $ 148.9 ;'}
Full Context:
['westrock company notes to consolidated financial statements 2014 ( continued ) consistent with prior years , we consider a portion of our earnings from certain foreign subsidiaries as subject to repatriation and we provide for taxes accordingly .', 'however , we consider the unremitted earnings and all other outside basis differences from all other foreign subsidiaries t

In [123]:
def select_more_balanced_samples(df, dataset_name, already_selected_ids, total_new_samples=10):
    """
    Selects more balanced samples from df[dataset_name], excluding already selected IDs.
    """
    # Filter for dataset and remove already selected
    subset = df[(df["source"] == dataset_name) & (~df["ID"].isin(already_selected_ids))]

    # Group by reasoning type
    grouped = subset.groupby("reasoning_type")
    selected_rows = []
    newly_used_ids = set()

    per_type = total_new_samples // grouped.ngroups

    for _, group in grouped:
        available = group[~group["ID"].isin(newly_used_ids)]
        if len(available) > 0:
            rows = available.sample(n=min(per_type, len(available)), random_state=42).to_dict("records")
            selected_rows.extend(rows)
            newly_used_ids.update(row["ID"] for row in rows)

    # Fill remaining if needed
    remaining = total_new_samples - len(selected_rows)
    if remaining > 0:
        unused_rows = subset[~subset["ID"].isin(newly_used_ids)]
        selected_rows.extend(unused_rows.sample(n=remaining, random_state=42).to_dict("records"))

    return pd.DataFrame(selected_rows)

In [None]:
# Get already selected IDs
already_selected_finqa_ids = set(finqa_selected["ID"])
already_selected_convfinqa_ids = set(convfinqa_selected["ID"])

# Select 10 more from each, avoiding duplicates
more_finqa = select_more_balanced_samples(df_test, "FinQA", already_selected_finqa_ids, total_new_samples=10)
more_convfinqa = select_more_balanced_samples(df_test, "ConvFinQA", already_selected_convfinqa_ids, total_new_samples=10)


finqa_selected = pd.concat([finqa_selected, more_finqa], ignore_index=True)
convfinqa_selected = pd.concat([convfinqa_selected, more_convfinqa], ignore_index=True)

In [125]:
print("🔍 FinQA Samples:")
for idx, row in finqa_selected.iterrows():
    print(f"\n--- FinQA Sample {idx+1} ---")
    print(f"ID: {row['ID']}")
    print(f"Reasoning Type: {row['reasoning_type']}")
    print(f"Question: {row['question']}")
    print(f"Answer: {row['answer']}")
    print(f"Operation: {row.get('operation', 'N/A')}")
    print(f"Gold Context:\n{row['gold_context']}")
    print(f"Full Context:\n{row['context']}")

🔍 FinQA Samples:

--- FinQA Sample 1 ---
ID: DG/2006/page_58.pdf
Reasoning Type: Arithmetic
Question: what is the yearly depreciation rate on land improvements?
Answer: 5%
Operation: divide(const_100, 20)
Gold Context:
{'table_0': 'land improvements the land improvements of 20 is 20 ;'}
Full Context:
['consolidated 2005 results of operations was an estimated reduction of gross profit and a corresponding decrease to inventory , at cost , of $ 5.2 million .', 'store pre-opening costs pre-opening costs related to new store openings and the construction periods are expensed as incurred .', 'property and equipment property and equipment are recorded at cost .', 'the company provides for depreciation and amortization on a straight-line basis over the following estimated useful lives: .']
land improvements	20
buildings	39-40
furniture fixtures and equipment	3-10
['improvements of leased properties are amortized over the shorter of the life of the applicable lease term or the estimated useful 

In [126]:
print("🔍 ConvFinQA Samples:")
for idx, row in convfinqa_selected.iterrows():
    print(f"\n--- ConvFinQA Sample {idx+1} ---")
    print(f"ID: {row['ID']}")
    print(f"Reasoning Type: {row['reasoning_type']}")
    print(f"Question: {row['question']}")
    print(f"Answer: {row['answer']}")
    print(f"Operation: {row.get('operation', 'N/A')}")
    print(f"Gold Context:\n{row['gold_context']}")
    print(f"Full Context:\n{row['context']}")

🔍 ConvFinQA Samples:

--- ConvFinQA Sample 1 ---
ID: WRK/2019/page_103.pdf
Reasoning Type: Compositional
Question: Q: what was the value of gross unrecognized tax benefits at the end of 2018?
A: 127.1
Q: what was the value at the end of 2017?
A: 148.9
Q: what is the net difference?
A: A0
Q: what was the 2017 value?
A: 148.9
Q: what is the percent change?
Answer: -14.6%
Operation: subtract(127.1, 148.9), divide(#0, 148.9)
Gold Context:
{'table_9': 'the balance at end of fiscal year of 2019 is $ 224.3 ; the balance at end of fiscal year of 2018 is $ 127.1 ; the balance at end of fiscal year of 2017 is $ 148.9 ;'}
Full Context:
['westrock company notes to consolidated financial statements 2014 ( continued ) consistent with prior years , we consider a portion of our earnings from certain foreign subsidiaries as subject to repatriation and we provide for taxes accordingly .', 'however , we consider the unremitted earnings and all other outside basis differences from all other foreign subsid

In [None]:
import random

random.seed(42)

# Indices to resample
convfinqa_resample_indices = [5, 12, 13, 15, 16, 19]
finqa_resample_indices = [12, 20]

# Number of total samples available
total_convfinqa = len(convfinqa_selected)
total_finqa = len(finqa_selected)

# Function to resample an index ensuring it doesn't conflict with old ones
def resample_indices(original_indices, total_size):
    return [
        random.choice([i for i in range(total_size) if i not in original_indices])
        for _ in original_indices
    ]

# Resample
new_convfinqa_indices = resample_indices(convfinqa_resample_indices, total_convfinqa)
new_finqa_indices = resample_indices(finqa_resample_indices, total_finqa)

# Fetch new samples
convfinqa_resampled = convfinqa_selected.iloc[new_convfinqa_indices]
finqa_resampled = finqa_selected.iloc[new_finqa_indices]

# Combine for review
import pandas as pd
resampled_combined = pd.concat([convfinqa_resampled, finqa_resampled], ignore_index=True)


for idx, row in resampled_combined.iterrows():
    print(f"\n--- Resampled Sample {idx+1} ---")
    print(f"ID: {row['ID']}")
    print(f"Reasoning Type: {row['reasoning_type']}")
    print(f"Question: {row['question']}")
    print(f"Answer: {row['answer']}")
    print(f"Operation: {row.get('operation', 'N/A')}")
    print(f"Gold Context:\n{row['gold_context']}")
    print(f"Full Context:\n{row['context']}")


--- Resampled Sample 1 ---
ID: AMT/2008/page_14.pdf
Reasoning Type: Compositional
Question: Q: what was the number of owned sites in the us and mexico?
A: A0
Q: and including brazil?
Answer: 23200
Operation: add(19400, 2500), add(#0, 1100), add(#1, 200)
Gold Context:
{'table_1': 'country the united states of number of owned sites ( approx ) is 19400 ; the united states of coverage area is coverage spans 49 states and the district of columbia ; 90% ( 90 % ) of network provides coverage in the top 100 markets or core areas such as high traffic interstate corridors . ;', 'table_2': 'country the mexico of number of owned sites ( approx ) is 2500 ; the mexico of coverage area is coverage primarily concentrated in highly populated areas including mexico city monterrey guadalajara and acapulco . ;', 'table_3': 'country the brazil of number of owned sites ( approx ) is 1100 ; the brazil of coverage area is coverage primarily concentrated in major metropolitan areas in central and southern bra

In [None]:
# List of original question indices that were resampled
convfinqa_to_replace = [5, 12, 13, 15, 16, 19]
finqa_to_replace = [12, 20]

# Step 1: Drop original resampled rows from both datasets
convfinqa_final = convfinqa_selected.drop(index=convfinqa_to_replace, errors="ignore")
finqa_final = finqa_selected.drop(index=finqa_to_replace, errors="ignore")

# Step 2: Remove resampled sample 5 (index position 4)
convfinqa_resampled_final = convfinqa_resampled.drop(convfinqa_resampled.index[4])

# Step 3: Combine the cleaned datasets and resampled samples
import pandas as pd
gold_df = pd.concat([
    convfinqa_final,
    finqa_final,
    convfinqa_resampled_final,
    finqa_resampled
], ignore_index=True)


gold_df.to_json("final_gold_dataset.json", orient="records", indent=2)

print(f"Final gold dataset contains {len(gold_df)} samples.")

Final gold dataset contains 40 samples.


In [None]:
import pandas as pd

# Step 1: Load both JSON files
filtered_df = pd.read_json("notebooks/filtered_gold_eval_dataset.json")
final_df = pd.read_json("notebooks/final_gold_dataset.json")

# Step 2: Combine them
merged_df = pd.concat([filtered_df, final_df], ignore_index=True)

# Step 3: Check for ID uniqueness
num_total = len(merged_df)
num_unique = merged_df["ID"].nunique()

print(f"Total samples: {num_total}")
print(f"Unique IDs: {num_unique}")

if num_total == num_unique:
    print("All IDs are unique.")
else:
    print("Duplicate IDs detected!")


merged_df.to_json("merged_gold_eval_dataset.json", orient="records", indent=2)

Total samples: 150
Unique IDs: 149
⚠️ Duplicate IDs detected!


In [None]:
# Drop duplicate based on the "ID" column, keeping the first occurrence
merged_df = merged_df.drop_duplicates(subset="ID", keep="first")

# Recheck
num_total = len(merged_df)
num_unique = merged_df["ID"].nunique()

print(f"Total samples after removing duplicate: {num_total}")
print(f"Unique IDs: {num_unique}")

✅ Total samples after removing duplicate: 149
✅ Unique IDs: 149


In [134]:
merged_df.to_json("merged_gold_eval_dataset.json", orient="records", indent=2)

In [None]:
df_review = merged_df

# Snapshot by (Source, Reasoning Type)
snapshot = df_review.groupby(['source', 'reasoning_type']).size().reset_index(name='count')

# Pivot for cleaner table view
pivot_snapshot = snapshot.pivot(index='source', columns='reasoning_type', values='count').fillna(0).astype(int)

# Add total per source
pivot_snapshot['Total'] = pivot_snapshot.sum(axis=1)

# Add grand total at the bottom
pivot_snapshot.loc['Total'] = pivot_snapshot.sum()

import IPython.display as disp
disp.display(pivot_snapshot)

reasoning_type,Arithmetic,Compositional,Non-numerical,Total
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ConvFinQA,0,37,0,37
FinDER,20,15,33,68
FinQA,23,21,0,44
Total,43,73,33,149
