In [1]:
# Notebook to preprocess feedback data for Inverse Consitutional AI algorithm

import pathlib
import inverse_cai
import numpy as np
import pandas as pd

DATA_DIR = pathlib.Path('../data')

In [2]:
# Utility functions for loading and saving data in same order
# create random shuffle of the data
# that can be saved and reproduced

def save_random_index(df, path, seed=42):
    np.random.seed(seed)
    random_index_series = pd.Series(np.random.permutation(df.index), index=df.index)
    random_index_series.to_csv(path, index=False, header=False)

def load_random_order_and_apply_to_df(df, path):
    rand_index = pd.read_csv(path, header=None).squeeze()
    assert len(rand_index) == len(df), f"Length of random index {len(rand_index)} does not match length of dataframe {len(df)}"
    return df.loc[rand_index.values]

In [3]:
# OPTIONAL: Fetch the anthropic dataset if not downloaded yet
import gzip
from urllib.request import urlopen
dataset_revision = "354e5c3cb8960630860dd5774a6ac2a58313bf4d"

anthropic_target_dir = DATA_DIR / "raw/anthropic/anthropic_helpful_base"
anthropic_target_dir.mkdir(parents=True, exist_ok=True)

# Download and unzip the file
url = f"https://github.com/anthropics/hh-rlhf/raw/{dataset_revision}/helpful-base/train.jsonl.gz"
with urlopen(url) as response:
    with gzip.open(response, 'rb') as gz:
        file_content = gz.read()

file_path = anthropic_target_dir / "train.jsonl"
with open(file_path, 'wb') as f:
    f.write(file_content)

In [None]:
# Preprocess feedback data from Anthropic
# Origin: https://github.com/anthropics/hh-rlhf

for data_type in ["helpful", "harmless"]:
    ANTH_PATH = DATA_DIR / f"raw/anthropic/anthropic_{data_type}_base/train.jsonl"
    anth_df = inverse_cai.data.loader.anthropic.load_original_jsonl_file(ANTH_PATH )
    anth_canonical_order_path = DATA_DIR / f"meta_data/anthropic/anthropic_{data_type}_base_train_canonical_rand_order.csv"

    # OPTIONAL: save new random order
    if not anth_canonical_order_path.exists():
        save_random_index(anth_df, anth_canonical_order_path, seed=42)

    anth_df_canonical_order = load_random_order_and_apply_to_df(anth_df, anth_canonical_order_path)

    # Randomly flip text_a and text_b to balance the dataset
    np.random.seed(42)
    flip_mask = np.random.rand(len(anth_df_canonical_order)) < 0.5

    # Create copy to avoid modifying original
    anth_df_canonical_order = anth_df_canonical_order.copy()

    # For rows where flip_mask is True, swap text_a and text_b
    temp_a = anth_df_canonical_order.loc[flip_mask, 'text_a'].copy()
    anth_df_canonical_order.loc[flip_mask, 'text_a'] = anth_df_canonical_order.loc[flip_mask, 'text_b']
    anth_df_canonical_order.loc[flip_mask, 'text_b'] = temp_a

    # Update preferred_text to match the flips
    anth_df_canonical_order.loc[flip_mask, 'preferred_text'] = "text_b"
    anth_df_canonical_order.loc[~flip_mask, 'preferred_text'] = "text_a"

    print(f"Anthropic {data_type} base dataset loaded with {len(anth_df_canonical_order)} samples")

    LEN_PROCESSED = 10000
    processed_path = DATA_DIR / f"processed/anthropic/anthropic_{data_type}_base_train_{LEN_PROCESSED}canonrand_balanced.csv"
    processed_path.parent.mkdir(parents=True, exist_ok=True)
    anth_df_canonical_order[:LEN_PROCESSED].to_csv(processed_path, index_label='index')

In [None]:
anth_df_canonical_order[:LEN_PROCESSED].head()

In [None]:
anth_df