In [1]:
import argparse
import re
import os
from typing import cast
import json
import datasets
import pandas as pd

from datasets.load import Dataset, DatasetDict
from verl.utils.hdfs_io import copy, makedirs
from tqdm import tqdm


In [2]:
data_source = "BAAI/TACO"
dataset: DatasetDict = cast(DatasetDict, datasets.load_dataset(data_source, "ALL"))
train_dataset: Dataset = dataset["train"]
test_dataset: Dataset = dataset["train"]


In [3]:
def make_format(question: str, inputs: list[str], outputs: list[str]):
    prompts = []
    ground_truth = []
    for input, output in zip(inputs, outputs):
        if (isinstance(input, str) and isinstance(output, str)):
            query = question + "\n" + input
            answer = output
            prompts.append(query)
            ground_truth.append(answer)
    return prompts, ground_truth


In [4]:

prompts = []
ground_truth = []
for sample in tqdm(test_dataset):
    try:
        question = sample['question']
        inputs = json.loads(sample['input_output'])['inputs']
        outputs = json.loads(sample['input_output'])['outputs']
        a, b = make_format(question, inputs, outputs)
        prompts.extend(a)
        ground_truth.extend(b)
    except:
        continue
assert len(prompts) == len(ground_truth)

100%|██████████| 25443/25443 [00:40<00:00, 627.85it/s] 


In [5]:
data_source = ["BAAI/TACO"] * len(prompts)

In [6]:
def format_prompt(prompt : str) -> list[dict]:
    return [
        {
            "role": "system",
            "content": "answer or u die :)" 
        },
        {
            "role": "user",
            "content": prompt
        }
    ]

prompt = list(map(format_prompt, prompts))

In [7]:
ability = ["code"] * len(prompts)

In [8]:
def format_reward_model(input_output_pair : tuple[str, str]) -> dict:
    assert isinstance(input_output_pair[0], str)
    assert isinstance(input_output_pair[1], str)
    return {
        "style": "rule",
        "ground_truth": {"inputs": input_output_pair[0], "outputs": input_output_pair[1]},
    }
reward_model = list(map(format_reward_model, zip(prompts, ground_truth)))

In [9]:
df = pd.DataFrame({
    "data_source" : data_source,
    "prompt": prompt,
    "ability": ability,
    "reward_model": reward_model
})


In [10]:
df.head()

Unnamed: 0,data_source,prompt,ability,reward_model
0,BAAI/TACO,"[{'role': 'system', 'content': 'answer or u di...",code,"{'style': 'rule', 'ground_truth': {'inputs': '..."
1,BAAI/TACO,"[{'role': 'system', 'content': 'answer or u di...",code,"{'style': 'rule', 'ground_truth': {'inputs': '..."
2,BAAI/TACO,"[{'role': 'system', 'content': 'answer or u di...",code,"{'style': 'rule', 'ground_truth': {'inputs': '..."
3,BAAI/TACO,"[{'role': 'system', 'content': 'answer or u di...",code,"{'style': 'rule', 'ground_truth': {'inputs': '..."
4,BAAI/TACO,"[{'role': 'system', 'content': 'answer or u di...",code,"{'style': 'rule', 'ground_truth': {'inputs': '..."


In [11]:
df.to_parquet("TACO_test_processed.parquet")