# OLMo Model Dataset Parsing

This notebook loads and processes the OLMo dataset from Hugging Face for use with the ICAI framework.

In [1]:
# Install any missing packages
# !pip install datasets transformers pandas numpy matplotlib

In [2]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset
from tqdm.auto import tqdm

## Load the Multipref Dataset from Hugging Face

The Allen Institute for AI (AI2) released the OLMo model along with several datasets. Let's load the main dataset from Hugging Face.

In [3]:
# Load the OLMo dataset from Hugging Face
# You can choose a specific config/subset if needed
dataset = load_dataset("allenai/multipref")
ds_gpt4 = load_dataset("allenai/multipref", "gpt4_overall_binarized")
ds_human = load_dataset("allenai/multipref", "human_overall_binarized")

In [4]:
main_df = dataset["train"].to_pandas()

In [5]:
main_df.to_csv("../data/processed/allenai/multipref_original.csv", index=False)

In [None]:
# Get all unique values for overall_pref from the normal_worker_annotations
overall_prefs = []
for annotations in main_df["normal_worker_annotations"]:
    for annotation in annotations:
        if 'overall_pref' in annotation:
            overall_prefs.append(annotation['overall_pref'])

# Display unique values and their counts
unique_prefs = pd.Series(overall_prefs).value_counts()
print("Unique overall_pref values:")
print(unique_prefs)

In [None]:
pref_series = pd.Series(overall_prefs)
num_a_better = (pref_series == "A-is-slightly-better").sum() + (pref_series == "A-is-clearly-better").sum()
num_b_better = (pref_series == "B-is-slightly-better").sum() + (pref_series == "B-is-clearly-better").sum()
num_tie = (pref_series == "Tie").sum()

print(f"Number of A-is-better: {num_a_better}")
print(f"Number of B-is-better: {num_b_better}")
print(f"Number of Tie: {num_tie}")



In [None]:
# model distribution

model_a_counts = main_df["model_a"].value_counts()
model_b_counts = main_df["model_b"].value_counts()

print(f"Model A counts: {model_a_counts}")
print(f"\n\nModel B counts: {model_b_counts}")



In [None]:
# Create a function to flatten annotations and add them as columns to the main dataframe
def flatten_annotations(df, annotation_column, prefix):
    # Create a copy of the dataframe
    result_df = df.copy()

    # Get the maximum number of annotations in any row
    max_annotations = max(len(annotations) for annotations in df[annotation_column])

    # For each row in the dataframe
    for idx, row in tqdm(df.iterrows(), total=len(df), desc=f"Flattening {prefix} annotations"):
        annotations = row[annotation_column]

        # For each annotation in the list
        for i, annotation in enumerate(annotations):
            # Add each field as a new column with a prefix indicating the annotation source and index
            for key, value in annotation.items():
                column_name = f"{prefix}_{i}_{key}"

                # Handle special case for arrays like 'helpful_checked_reasons'
                if isinstance(value, np.ndarray):
                    # Convert array to a comma-separated string
                    value_str = ','.join(value) if len(value) > 0 else ''
                    result_df.at[idx, column_name] = value_str

                    # Also create individual boolean columns for each reason in the array
                    for reason in value:
                        reason_column = f"{prefix}_{i}_{key}_{reason}"
                        result_df.at[idx, reason_column] = True
                else:
                    # For regular values, just add them directly
                    result_df.at[idx, column_name] = value

    return result_df

# Flatten normal worker annotations
main_df_with_normal = flatten_annotations(main_df, "normal_worker_annotations", "normal")

# Flatten expert worker annotations
main_df_flattened = flatten_annotations(main_df_with_normal, "expert_worker_annotations", "expert")

# Display the number of columns before and after flattening
print(f"\nOriginal number of columns: {len(main_df.columns)}")
print(f"Number of columns after flattening: {len(main_df_flattened.columns)}")

# Display some of the new columns
new_columns = [col for col in main_df_flattened.columns if col not in main_df.columns]
print(f"\nNumber of new columns added: {len(new_columns)}")
print("\nSample of new columns:")
print(new_columns[:20])

main_df_flattened = main_df_flattened.sort_index(axis=1)

# Example of what an annotation looks like (for reference)
# example_evaluator = {'evaluator': 'keen_williams',
#  'harmless_checked_reasons': np.array([], dtype=object),
#  'harmless_confidence': 'absolutely-confident',
#  'harmless_own_reason': '',
#  'harmless_pref': 'Tie',
#  'helpful_checked_reasons': np.array(['well_formatted', 'coherent', 'creative', 'better_tone'],
#        dtype=object),
#  'helpful_confidence': 'absolutely-confident',
#  'helpful_own_reason': '',
#  'helpful_pref': 'B-is-clearly-better',
#  'overall_confidence': 'absolutely-confident',
#  'overall_pref': 'B-is-clearly-better',
#  'time_spent': 213,
#  'timestamp': '2024-05-03 04:17:06.661562',
#  'truthful_checked_reasons': np.array([], dtype=object),
#  'truthful_confidence': 'absolutely-confident',
#  'truthful_own_reason': '',
#  'truthful_pref': 'Tie'}

In [10]:
# merge

df_gpt4 = ds_gpt4["train"].to_pandas()
df_human = ds_human["train"].to_pandas()

# add suffixes to columns
df_gpt4 = df_gpt4.add_suffix("_gpt4")
df_human = df_human.add_suffix("_human")


merged_df = pd.merge(main_df_flattened, df_gpt4, left_on="comparison_id", right_on="comparison_id_gpt4", how="left")
merged_df = pd.merge(merged_df, df_human, left_on="comparison_id", right_on="comparison_id_human", how="left")


for dataset in [ds_gpt4, ds_human]:
    for idx, row in merged_df.iterrows():
        for chat_col in ["gpt4", "human"]:
            selected_completion = row[f"chosen_{chat_col}"][-1]["content"]
            if selected_completion == row["completion_a"]:
                merged_df.at[idx, f"preferred_text_{chat_col}"] = "text_a"
            elif selected_completion == row["completion_b"]:
                merged_df.at[idx, f"preferred_text_{chat_col}"] = "text_b"
            else:
                print(f"Selected completion {selected_completion} not in {row['completion_a']} or {row['completion_b']}")
                merged_df.at[idx, f"preferred_text_{chat_col}"] = "invalid"


In [None]:
merged_df

In [12]:
# create text_a and text_b columns from "text" and "completion_a" and "completion_b"
# e.g. text_a = [{'role': 'user', 'content': text}, {'role': 'assistant', 'content': completion_a}]

merged_df["text_a"] = merged_df[["text", "completion_a"]].apply(lambda x: [{'role': 'user', 'content': x["text"]}, {'role': 'assistant', 'content': x["completion_a"]}], axis=1)
merged_df["text_b"] = merged_df[["text", "completion_b"]].apply(lambda x: [{'role': 'user', 'content': x["text"]}, {'role': 'assistant', 'content': x["completion_b"]}], axis=1)




In [None]:
print(merged_df["text_a"][0])
print(merged_df["text_b"][0])
print(merged_df["chosen_gpt4"][0])
print(merged_df["chosen_human"][0])
print(merged_df["preferred_text_gpt4"][0])
print(merged_df["preferred_text_human"][0])

In [None]:
list(merged_df.columns)

In [None]:
# Convert normal and expert overall preferences into "text_a", "text_b" or "tie"
# First, check if these columns exist in the dataframe
normal_pref_cols = [col for col in merged_df.columns if 'normal_' in col and '_overall_pref' in col]
expert_pref_cols = [col for col in merged_df.columns if 'expert_' in col and '_overall_pref' in col]

# Function to convert preference notation to text_a/text_b/tie
def convert_pref_to_text(pref):
    if pref == 'A-is-slightly-better' or pref == 'A-is-clearly-better':
        return 'text_a'
    elif pref == 'B-is-slightly-better' or pref == 'B-is-clearly-better':
        return 'text_b'
    elif pref == 'Tie':
        return 'tie'
    else:
        return 'invalid'

# Process normal annotator preferences
for col in normal_pref_cols:
    new_col = col.replace('_overall_pref', '_preferred_text')
    merged_df[new_col] = merged_df[col].apply(convert_pref_to_text)

# Process expert annotator preferences
for col in expert_pref_cols:
    new_col = col.replace('_overall_pref', '_preferred_text')
    merged_df[new_col] = merged_df[col].apply(convert_pref_to_text)

print(f"Converted {len(normal_pref_cols)} normal annotator preferences and {len(expert_pref_cols)} expert annotator preferences")


In [16]:
# remove "prompt_human" and "prompt_gpt4" columns
# remove "rejected_gpt4" and "rejected_human" columns
# remove "chosen_gpt4" and "chosen_human" columns

merged_df = merged_df.drop(columns=["prompt_human", "prompt_gpt4", "rejected_gpt4", "rejected_human", "chosen_gpt4", "chosen_human"])

In [17]:
merged_df["preferred_text"] = merged_df["preferred_text_human"]

In [18]:
merged_df[:10000].to_csv("../data/processed/allenai/multipref_gpt4_human_merged.csv", index=True, index_label="index")


In [None]:
merged_df.iloc[0].to_dict()