In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
import os
import re

In [11]:
data = pd.read_csv('CSV\merged_confidence_correctness.csv')
# Create a folder to save the plots
output_folder = '_final_calibration_curves'
os.makedirs(output_folder, exist_ok=True)

  data = pd.read_csv('CSV\merged_confidence_correctness.csv')


In [16]:
def process_model(model_name, confidence_col, correctness_col):
    confidence = data[confidence_col].replace(['not_mentioned', 'cell_empty', 'NoA', 'NOP', 'EOP'], np.nan).astype(float)
    
    # Handle mixed data types in correctness column
    def parse_correctness(x):
        if isinstance(x, str):
            return 1 if x.lower() == 'correct' else 0
        elif isinstance(x, (int, float)):
            return 1 if x == 1 else 0
        else:
            return np.nan

    correctness = data[correctness_col].apply(parse_correctness)
    
    # Remove rows with NaN values
    mask = ~(np.isnan(confidence) | np.isnan(correctness))
    confidence = confidence[mask]
    correctness = correctness[mask]
    
    # Normalize confidence to [0, 1] range if it's not already
    if confidence.max() > 1:
        confidence = confidence / 10
    
    # Calculate calibration curve
    prob_true, prob_pred = calibration_curve(correctness, confidence, n_bins=25, strategy='quantile')
    
    # Plot the calibration curve
    plt.figure(figsize=(10, 6))
    plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly calibrated')
    plt.plot(prob_pred, prob_true, marker='o', label=model_name)
    
    plt.xlabel('Mean predicted probability')
    plt.ylabel('Fraction of positives')
    plt.title(f'Calibration Curve for {model_name}')
    plt.legend()
    plt.grid(True)
    
    # Save the plot
    plt.savefig(os.path.join(output_folder, f'{model_name}_calibration_curve.png'))
    plt.close()

# Function to find matching columns case-insensitively
def find_matching_columns(pattern, columns):
    return [col for col in columns if re.match(pattern, col, re.IGNORECASE)]

In [17]:
# Get all model names (assuming they follow the pattern: modelname_confidence, modelname_correctness)
confidence_columns = find_matching_columns(r'.*_confidence$', data.columns)
model_names = [re.sub(r'_confidence$', '', col, flags=re.IGNORECASE) for col in confidence_columns]

# Process each model
for model in model_names:
    confidence_col = find_matching_columns(f'{model}_confidence$', data.columns)[0]
    correctness_col = find_matching_columns(f'{model}_correctness$', data.columns)[0]
    process_model(model, confidence_col, correctness_col)

print(f"Calibration curves have been saved in the '{output_folder}' folder.")

Calibration curves have been saved in the '_final_calibration_curves' folder.
