# Measure Length Bias
The script in the provided Jupyter notebook measures length bias in a dataset by evaluating model performance using influence functions. It generates AUC (Area Under the Curve) curves to visualize the model's performance across these shorter, longer, and total datasets and compares the results. Additionally, the script includes ROC curves plotted alongside the baseline models for comprehensive performance analysis.

## Load Data

In [None]:
import torch
import numpy as np
import datasets
import utils.influence as utils
D = 65536 # 2^16, size of rapid grad

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load data
_WORK_PATH = os.environ['IF_RLHF_HOME']
model_path = _WORK_PATH + "/logs/logs/Llama-3-8B_length"
train_data_path =  _WORK_PATH + "/dataset/length_dataset/train"
val_data_path = _WORK_PATH + "/dataset/length_dataset/test"

rapid_grad_train = torch.load(f"{model_path}/rapid_grad_train.pt")[D]
rapid_grad_val = torch.load(f"{model_path}/rapid_grad_val.pt")[D]
flipped_indices = np.load(f"{train_data_path}/flipped_indices.npy")
val_data = datasets.load_from_disk(val_data_path)

## Calculate Influence

In [None]:
# get the group indices for eval data, this is used to calculate influence
shorter_indices, longer_indices = utils.get_length_indices(val_data)
rapid_datainf_shorter = utils.rapid_datainf(rapid_grad_train, rapid_grad_val, shorter_indices)
rapid_datainf_longer = utils.rapid_datainf(rapid_grad_train, rapid_grad_val, longer_indices)
rapid_datainf_total = utils.rapid_datainf(rapid_grad_train, rapid_grad_val, np.arange(len(val_data)))

## Plot ROC Curve

In [4]:
from matplotlib import pyplot as plt
from utils.influence import get_roc_auc

# Assign colors as requested
INFLUENCE_COLOR = '#359afa'

In [5]:
def plot_roc_curve_for_length(influence, influence2, influence3, flipped_indices, title, fpr_llm = [], tpr_llm = [], llm_label = []):
    """
    Plots the ROC curve for given data and flipped indices and calculates the AUC value.

    Parameters:
    influence (np.array): Array of data points from the RapidInf algorithm.
    flipped_indices (list): List of indices that were flipped.

    Returns:
    float: AUC value of the ROC curve.
    """
    plt.rcParams.update({
        'font.family': 'Times New Roman',
        'mathtext.fontset': 'custom',
        'mathtext.it': 'Times New Roman:italic',
        'mathtext.rm': 'Times New Roman',
    })

    # Get ROC values
    roc_auc, fpr, tpr = get_roc_auc(influence, flipped_indices)
    roc_auc_maha, fpr_maha, tpr_maha = get_roc_auc(influence2, flipped_indices)
    roc_self_confidence, fpr_self_confidence, tpr_self_confidence = get_roc_auc(influence3, flipped_indices)
    
    # Create a figure
    fig, ax = plt.subplots(figsize=(6.5, 6))
    
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    
    # Scatter plot for LLM results
    # ax.scatter(fpr_llm[0], tpr_llm[0], lw=3, label=llm_labels[0], color=LLM_COLOR1, marker='x', s=200, alpha=0.9)
    # ax.scatter(fpr_llm[2], tpr_llm[2], lw=3, label=llm_labels[1], color=LLM_COLOR2, marker='x', s=200, alpha=0.9)
    
    # Plot ROC curves using new colors
    ax.plot(fpr, tpr, color=INFLUENCE_COLOR, lw=3, label=r'$\mathit{Concise}$, (AUC=%0.3f)' % roc_auc)  # Navy for Influence
    ax.plot(fpr_maha, tpr_maha, color=BASELINE_COLOR1, lw=3, linestyle='-.', label=r'$\mathit{Verbose}$, (AUC=%0.3f)' % roc_auc_maha)  # Magenta for Mahalanobis
    ax.plot(fpr_self_confidence, tpr_self_confidence, color=BASELINE_COLOR2, lw=3, linestyle='--', label=r'Full, (AUC=%0.3f)' % roc_self_confidence)  # Green for Conf.
    
    # Baseline line (diagonal)
    ax.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--', alpha=0.6)  # Dotted line representing random classifier
    
    # Customize labels and title with Times New Roman font
    ax.set_xlabel('False Positive Rate', fontsize=22)
    ax.set_ylabel('True Positive Rate', fontsize=22)
    ax.tick_params(axis='both', which='major', labelsize=16, length=0)
    ax.legend(loc="lower right", fontsize=16, title = "Validation Set", title_fontsize = 16)
    ax.set_title(title, fontsize=22)
    ax.grid(True, which='major', linestyle='--', color='gray', alpha=0.3)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)

    plt.show()

In [None]:
plot_roc_curve_for_length(rapid_datainf_shorter, rapid_datainf_longer, rapid_datainf_total, flipped_indices, "Length Bias", fpr_llm=fpr_llm, tpr_llm=tpr_llm, llm_label=llm_labels)