In [1]:
from datasets import load_dataset

def load_shp_dataset(
    split: str = "train",
    max_samples: int | None = 1000,
    seed: int = 42,
):
    """
    Load Stanford Human Preferences (SHP) dataset
    and convert it into (prompt, chosen, rejected) format.

    Returns:
        List[Dict[str, str]]
    """

    dataset = load_dataset("stanfordnlp/SHP", split=split)

    if max_samples is not None:
        dataset = dataset.shuffle(seed=seed).select(range(max_samples))

    def convert(example):
        if example["labels"] == 1:
            chosen = example["human_ref_A"]
            rejected = example["human_ref_B"]
        else:
            chosen = example["human_ref_B"]
            rejected = example["human_ref_A"]

        return {
            "prompt": example["history"],
            "chosen": chosen,
            "rejected": rejected,
        }

    dataset = dataset.map(
        convert,
        remove_columns=dataset.column_names,
    )

    return dataset

dataset = load_shp_dataset()

Downloading readme:   0%|          | 0.00/19.1k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/18 [00:00<?, ?files/s]

Downloading data:   0%|          | 0/18 [00:00<?, ?files/s]

Downloading data:   0%|          | 0/18 [00:00<?, ?files/s]

Generating train split:   0%|          | 0/348718 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/18436 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/18409 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [2]:
dataset[0]

{'prompt': "[Harry Potter] If Harry uses the invisibility cloak backwards.. What does he see? Let's assume the invisibility cloak works only on one side. If Harry puts the inner side out, what does he see?",
 'chosen': "Magic primarily works through intent, rather than through hard mechanics. Drop your wand on the floor and it isn't going to shoot out spells. Someone needs to be casting them. Throw a flying broom as hard and as far as you can, and it isn't going to fly. Someone needs to be riding it.  The invisibility cloak is, for all intents and purposes, just a cloak until someone decides to wear it. I don't think it'd actually become invisible or *do* anything unless you were wearing it properly.",
 'rejected': "If the cloak were reversible it would be almost impossible to guarantee to be able to put it on right.  After all, it's barely visible anyway."}

In [4]:
for sample in dataset:
    print("PROMPT:\n", sample["prompt"], end = "\n\n")
    print("CHOSEN:\n", sample["chosen"], end = "\n\n")
    print("REJECTED:\n", sample["rejected"], end = "\n\n")
    print("=" * 50)
    break 


PROMPT:
 [Harry Potter] If Harry uses the invisibility cloak backwards.. What does he see? Let's assume the invisibility cloak works only on one side. If Harry puts the inner side out, what does he see?

CHOSEN:
 Magic primarily works through intent, rather than through hard mechanics. Drop your wand on the floor and it isn't going to shoot out spells. Someone needs to be casting them. Throw a flying broom as hard and as far as you can, and it isn't going to fly. Someone needs to be riding it.  The invisibility cloak is, for all intents and purposes, just a cloak until someone decides to wear it. I don't think it'd actually become invisible or *do* anything unless you were wearing it properly.

REJECTED:
 If the cloak were reversible it would be almost impossible to guarantee to be able to put it on right.  After all, it's barely visible anyway.

