In [None]:
!pip install datasets

In [None]:
import pandas as pd
from datasets import Dataset, DatasetDict, load_dataset

def convert_to_dpo_format(df):
    """
    Converts a DataFrame into the required format for Direct Preference Optimization (DPO).

    Args:
        df (pd.DataFrame): A DataFrame containing the columns 'prompt', 'chosen', and 'rejected'.

    Returns:
        list[dict]: A list of dictionaries formatted for DPO training, where each entry contains
                    a conversation prompt along with a preferred ('chosen') and non-preferred ('rejected') response.
    """
    formatted_data = []
    for _, row in df.iterrows():
        formatted_entry = {
            "conversations": [
                {
                    "from": "human",
                    "value": f"System: I want you to summarize this text\nDocument: {row['prompt']}"
                }
            ],
            "chosen": {"from": "gpt", "value": row["chosen"]},
            "rejected": {"from": "gpt", "value": row["rejected"]}
        }
        formatted_data.append(formatted_entry)

    return formatted_data



# Load your Hugging Face dataset
hf_dataset = load_dataset("Muadil/cleaned_openai_summary_comparisons")

# Convert train and test data to DPO format
dpo_train = convert_to_dpo_format(pd.DataFrame(hf_dataset["train"]))
dpo_test = convert_to_dpo_format(pd.DataFrame(hf_dataset["test"]))
print(dpo_train[:5])

# Convert to dataset format for upload to Hugging Face
dpo_dataset = DatasetDict({
    "train": Dataset.from_list(dpo_train),
    "test": Dataset.from_list(dpo_test)
})

# Upload to Hugging Face Hub
dpo_dataset.push_to_hub("username/repository")

print("Dataset successfully transformed and uploaded to Hugging Face Hub.")
