# Imports

In [1]:
import sys

import dotenv
import pandas as pd

from IPython.display import display, HTML

dotenv.load_dotenv()
sys.path.append("../../src")
import dataframe_utils
import GiveMeSomeCredit

# Variables

# Load the Dataset

This section loads the LLM text classification data into a DataFrame and displays its basic information.


In [2]:
responses_df = GiveMeSomeCredit.load_classification_responses()

with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(
        dataframe_utils.describe_df(responses_df)
    )

with pd.option_context("display.max_colwidth", None):
    display(
        responses_df.head(5).style.set_properties(**{"text-align": "left"})
    )
    
print(responses_df["Prediction"].value_counts())

Unnamed: 0,dtype,count,non_null,null_count,unique,top,freq,mean,std,min,25%,50%,75%,max
Row ID,int64,90000,90000,0,,,,74548.148,43327.774348,4.0,36814.25,74261.0,111940.25,149993.0
Model,object,90000,90000,0,1.0,google/flan-t5-small,90000.0,,,,,,,
Description Column,int64,90000,90000,0,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Classification Question ID,int64,90000,90000,0,,,,1.0,0.816501,0.0,0.0,1.0,2.0,2.0
Prediction,object,90000,90000,0,3.0,no,61479.0,,,,,,,
Yes Probability,float64,90000,90000,0,,,,0.373138,0.080088,0.169391,0.273927,0.41593,0.435758,0.495583
No Probability,float64,90000,90000,0,,,,0.461125,0.155254,0.136118,0.255221,0.558641,0.577702,0.611146


Unnamed: 0,Row ID,Model,Description Column,Classification Question ID,Prediction,Yes Probability,No Probability
0,4,google/flan-t5-small,0,0,yes,0.248806,0.226628
1,12,google/flan-t5-small,0,0,yes,0.266791,0.23439
2,14,google/flan-t5-small,0,0,yes,0.224461,0.160168
3,24,google/flan-t5-small,0,0,yes,0.26158,0.260417
4,25,google/flan-t5-small,0,0,yes,0.25744,0.243585


Prediction
no     61479
yes    27960
Yes      561
Name: count, dtype: int64


# Convert Text Predictions

This code converts the LLM predictions stored in `classification_prompts.csv` to binary predictions. The process is applied to each model and the results are saved.


In [3]:
validation_ids = set(GiveMeSomeCredit.get_validation_row_ids())
display(responses_df.head())
group_columns = ["Model", "Description Column", "Classification Question ID"]
for (model,description_id,question_id),group_df in responses_df.groupby(group_columns):
    model_name = f"{model} D{description_id} Q{question_id}"
    display(HTML(f"<h1>{model_name}</h1>"))
    
    if not group_df["Row ID"].is_unique:
        raise ValueError("Duplicate Sample IDs found! Every Sample ID must be unique.")
        
    with pd.option_context("display.max_colwidth", None):
        display(
            group_df.head(5).style.set_properties(**{"text-align": "left"})
        )
        
    group_df = group_df.set_index("Row ID")
    
    missing_indices = validation_ids - set(group_df.index)
    if missing_indices:
        group_df = group_df.reindex(group_df.index.union(missing_indices))
    
    pred_clean = (
        group_df["Prediction"]
        .str.replace(r"[^\x00-\x7F]", "", regex=True)  # remove non-ASCII
        .str.upper()
        .str.strip()
    )
    
    pred_numeric = (
        pred_clean
        .map({"YES": 1, "NO": 0})
        .where(~pred_clean.isna(), -2)  # NaNs = -2
        .fillna(-1)                     # Anything else unmapped → -1
        .astype(int)
    )
    
    pred_probs = (
        group_df["Yes Probability"] /
        (group_df["Yes Probability"] + group_df["No Probability"])
    )
    
    predictions_df = pd.DataFrame(
        {
            "pred": pred_numeric,
            "pred_probs": pred_probs,
            "yes_prob": group_df["Yes Probability"],
            "no_prob": group_df["No Probability"],
        },
        index=group_df.index
    )[["pred", "pred_probs", "yes_prob", "no_prob"]]
    
    with pd.option_context("display.max_rows", 5, "display.max_columns", None):
        display(predictions_df)
    GiveMeSomeCredit.save_train_validation_results(model_name, predictions_df)

Unnamed: 0,Row ID,Model,Description Column,Classification Question ID,Prediction,Yes Probability,No Probability
0,4,google/flan-t5-small,0,0,yes,0.248806,0.226628
1,12,google/flan-t5-small,0,0,yes,0.266791,0.23439
2,14,google/flan-t5-small,0,0,yes,0.224461,0.160168
3,24,google/flan-t5-small,0,0,yes,0.26158,0.260417
4,25,google/flan-t5-small,0,0,yes,0.25744,0.243585


Unnamed: 0,Row ID,Model,Description Column,Classification Question ID,Prediction,Yes Probability,No Probability
0,4,google/flan-t5-small,0,0,yes,0.248806,0.226628
1,12,google/flan-t5-small,0,0,yes,0.266791,0.23439
2,14,google/flan-t5-small,0,0,yes,0.224461,0.160168
3,24,google/flan-t5-small,0,0,yes,0.26158,0.260417
4,25,google/flan-t5-small,0,0,yes,0.25744,0.243585


Unnamed: 0_level_0,pred,pred_probs,yes_prob,no_prob
Row ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4,1,0.523325,0.248806,0.226628
12,1,0.532325,0.266791,0.234390
...,...,...,...,...
149992,1,0.522047,0.260023,0.238060
149993,1,0.521922,0.275122,0.252010


column:  ('google/flan-t5-small D0 Q0', 'pred')
column:  ('google/flan-t5-small D0 Q0', 'pred_probs')
column:  ('google/flan-t5-small D0 Q0', 'yes_prob')
column:  ('google/flan-t5-small D0 Q0', 'no_prob')


2025-09-12 20:29:26,406 - INFO - Saved DataFrame to processed directory: /Users/rina/llm-classification/data/GiveMeSomeCredit/processed/training_results.csv
2025-09-12 20:29:26,617 - INFO - Saved DataFrame to processed directory: /Users/rina/llm-classification/data/GiveMeSomeCredit/processed/validation_results.csv


Unnamed: 0,Row ID,Model,Description Column,Classification Question ID,Prediction,Yes Probability,No Probability
30000,4,google/flan-t5-small,0,1,no,0.451643,0.548016
30001,12,google/flan-t5-small,0,1,no,0.441889,0.557768
30002,14,google/flan-t5-small,0,1,no,0.459175,0.54049
30003,24,google/flan-t5-small,0,1,no,0.440042,0.559614
30004,25,google/flan-t5-small,0,1,no,0.444551,0.555098


Unnamed: 0_level_0,pred,pred_probs,yes_prob,no_prob
Row ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4,0,0.451797,0.451643,0.548016
12,0,0.442041,0.441889,0.557768
...,...,...,...,...
149992,0,0.443931,0.443778,0.555879
149993,0,0.432476,0.432323,0.567323


column:  ('google/flan-t5-small D0 Q1', 'pred')
column:  ('google/flan-t5-small D0 Q1', 'pred_probs')
column:  ('google/flan-t5-small D0 Q1', 'yes_prob')
column:  ('google/flan-t5-small D0 Q1', 'no_prob')


2025-09-12 20:29:27,281 - INFO - Saved DataFrame to processed directory: /Users/rina/llm-classification/data/GiveMeSomeCredit/processed/training_results.csv
2025-09-12 20:29:27,611 - INFO - Saved DataFrame to processed directory: /Users/rina/llm-classification/data/GiveMeSomeCredit/processed/validation_results.csv


Unnamed: 0,Row ID,Model,Description Column,Classification Question ID,Prediction,Yes Probability,No Probability
60000,4,google/flan-t5-small,0,2,no,0.421418,0.576574
60001,12,google/flan-t5-small,0,2,no,0.424796,0.573229
60002,14,google/flan-t5-small,0,2,no,0.446836,0.551045
60003,24,google/flan-t5-small,0,2,no,0.413422,0.58445
60004,25,google/flan-t5-small,0,2,no,0.421743,0.576147


Unnamed: 0_level_0,pred,pred_probs,yes_prob,no_prob
Row ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4,0,0.422266,0.421418,0.576574
12,0,0.425637,0.424796,0.573229
...,...,...,...,...
149992,0,0.422996,0.422142,0.575839
149993,0,0.419075,0.418228,0.579752


column:  ('google/flan-t5-small D0 Q2', 'pred')
column:  ('google/flan-t5-small D0 Q2', 'pred_probs')
column:  ('google/flan-t5-small D0 Q2', 'yes_prob')
column:  ('google/flan-t5-small D0 Q2', 'no_prob')


2025-09-12 20:29:28,312 - INFO - Saved DataFrame to processed directory: /Users/rina/llm-classification/data/GiveMeSomeCredit/processed/training_results.csv
2025-09-12 20:29:28,762 - INFO - Saved DataFrame to processed directory: /Users/rina/llm-classification/data/GiveMeSomeCredit/processed/validation_results.csv


# Check Results

In [4]:
display(
    *GiveMeSomeCredit.load_training_validation_results()
)

Unnamed: 0_level_0,Logistic Regression,Logistic Regression,Random Forest,Random Forest,Histogram-Based Gradient Boosting,Histogram-Based Gradient Boosting
Unnamed: 0_level_1,pred,pred_probs,pred,pred_probs,pred,pred_probs
Row ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,1,0.756797,1,0.71,0,0.342372
2,0,0.464318,0,0.03,0,0.089232
3,1,0.707312,0,0.10,0,0.320130
5,0,0.209631,0,0.08,0,0.124813
6,0,0.249206,0,0.01,0,0.008083
...,...,...,...,...,...,...
149996,0,0.243774,0,0.00,0,0.004548
149997,0,0.451480,0,0.00,0,0.020155
149998,0,0.298296,0,0.00,0,0.016222
149999,1,0.504252,0,0.00,0,0.010798


Unnamed: 0_level_0,Logistic Regression,Logistic Regression,Random Forest,Random Forest,Histogram-Based Gradient Boosting,Histogram-Based Gradient Boosting,google/flan-t5-small D0 Q0,google/flan-t5-small D0 Q0,google/flan-t5-small D0 Q0,google/flan-t5-small D0 Q0,google/flan-t5-small D0 Q1,google/flan-t5-small D0 Q1,google/flan-t5-small D0 Q1,google/flan-t5-small D0 Q1,google/flan-t5-small D0 Q2,google/flan-t5-small D0 Q2,google/flan-t5-small D0 Q2,google/flan-t5-small D0 Q2
Unnamed: 0_level_1,pred,pred_probs,pred,pred_probs,pred,pred_probs,pred,pred_probs,yes_prob,no_prob,pred,pred_probs,yes_prob,no_prob,pred,pred_probs,yes_prob,no_prob
Row ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
4,1,0.518869,0,0.00,0,0.022168,1,0.523325,0.248806,0.226628,0,0.451797,0.451643,0.548016,0,0.422266,0.421418,0.576574
12,0,0.413919,0,0.00,0,0.013772,1,0.532325,0.266791,0.234390,0,0.442041,0.441889,0.557768,0,0.425637,0.424796,0.573229
14,1,0.947342,1,0.63,1,0.618585,1,0.583578,0.224461,0.160168,0,0.459329,0.459175,0.540490,0,0.447785,0.446836,0.551045
24,0,0.476525,0,0.00,0,0.014112,1,0.501114,0.261580,0.260417,0,0.440193,0.440042,0.559614,0,0.414304,0.413422,0.584450
25,0,0.340831,0,0.03,0,0.009455,1,0.513826,0.257440,0.243585,0,0.444707,0.444551,0.555098,0,0.422635,0.421743,0.576147
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149984,0,0.180238,0,0.00,0,0.005129,1,0.511567,0.266742,0.254680,0,0.446341,0.446193,0.553476,0,0.414935,0.414106,0.583897
149985,0,0.170122,0,0.00,0,0.005574,1,0.507404,0.279580,0.271420,0,0.435533,0.435390,0.564281,0,0.415646,0.414839,0.583221
149987,0,0.386821,0,0.03,0,0.025807,1,0.529707,0.288262,0.255930,0,0.454778,0.454623,0.545038,0,0.426753,0.425915,0.572120
149992,0,0.309497,0,0.00,0,0.012891,1,0.522047,0.260023,0.238060,0,0.443931,0.443778,0.555879,0,0.422996,0.422142,0.575839
