In [1]:
import pandas as pd
import numpy as np

In [2]:
# read json file
import json

with open('/gpfs/accounts/wangluxy_root/wangluxy2/shared_data/xin/data/sample_train_math_Qwen2.5-32B-5samples_unfiltered.json', 'r') as f:
    large_base_cov_data = json.load(f)

with open('/gpfs/accounts/wangluxy_root/wangluxy2/shared_data/xin/data/sample_train_math_r1_qwen_1.5B_5samples_unfiltered.json', 'r') as f:
    small_reasoning_cov_data = json.load(f)


In [3]:
large_base_data_dict = {}
small_reasoning_data_dict = {}

for conv_item in large_base_cov_data:
    input_prompt = conv_item["messages"][0]["content"]
    if input_prompt not in large_base_data_dict:
        large_base_data_dict[input_prompt] = [conv_item]
    else:
        large_base_data_dict[input_prompt].append(conv_item)

for conv_item in small_reasoning_cov_data:
    input_prompt = conv_item["messages"][0]["content"]
    if input_prompt not in small_reasoning_data_dict:
        small_reasoning_data_dict[input_prompt] = [conv_item]
    else:
        small_reasoning_data_dict[input_prompt].append(conv_item)

In [4]:
from eval.math_equivalence import is_equiv
df = pd.read_json("MATH/train.jsonl", lines=True)
question_mapping = {row["query"]: row["answer"] for _, row in df.iterrows()}

def extract_boxed_answer(text):
    start_token = r"\boxed{"
    start_idx = text.rfind(start_token)
    if start_idx == -1:
        return ''
    i = start_idx + len(start_token)
    brace_depth = 1
    content = []
    while i < len(text):
        if text[i] == '{':
            brace_depth += 1
        elif text[i] == '}':
            brace_depth -= 1
            if brace_depth == 0:
                break
        content.append(text[i])
        i += 1
    return ''.join(content).strip() if brace_depth == 0 else ''

# V1

In [6]:
from itertools import product
assert large_base_data_dict.keys() == small_reasoning_data_dict.keys()

preference_pair_list = []

for input_prompt in large_base_data_dict.keys():
    large_base_item_list = large_base_data_dict[input_prompt]
    small_reasoning_item_list = small_reasoning_data_dict[input_prompt]
    
    correct_answer = question_mapping[input_prompt.split("Question: ")[-1]]
    large_base_item_list_correct = [item for item in large_base_item_list if is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]
    small_reasoning_item_list_correct = [item for item in small_reasoning_item_list if is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]
    large_base_item_list_incorrect = [item for item in large_base_item_list if not is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]
    small_reasoning_item_list_incorrect = [item for item in small_reasoning_item_list if not is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]

    for chosen_item, rejected_item in list(product(large_base_item_list_correct, small_reasoning_item_list_incorrect)): # enumerate all the combinations of pairs
    # for chosen_item, rejected_item in zip(chosen_item_list, rejected_item_list):
        preference_pair_list.append({
            "conversations": [
                {"from": "user", "value": input_prompt},
            ],
            "chosen": {"from": "assistant", "value": chosen_item["messages"][-1]["content"]},
            "rejected": {"from": "assistant", "value": rejected_item["messages"][-1]["content"]}
        })
    
    for chosen_item, rejected_item in list(product(small_reasoning_item_list_correct, large_base_item_list_incorrect)): # enumerate all the combinations of pairs
    # for chosen_item, rejected_item in zip(chosen_item_list, rejected_item_list):
        preference_pair_list.append({
            "conversations": [
                {"from": "user", "value": input_prompt},
            ],
            "chosen": {"from": "assistant", "value": chosen_item["messages"][-1]["content"]},
            "rejected": {"from": "assistant", "value": rejected_item["messages"][-1]["content"]}
        })

# randomize the preference pairs with seed 42
import random
random.seed(42)
random.shuffle(preference_pair_list)

# save the preference pairs with indention 4
with open('./preference_pairs_math_filtered.json', 'w') as f:
    json.dump(preference_pair_list, f, indent=4)

# V2

In [8]:
from itertools import product
assert large_base_data_dict.keys() == small_reasoning_data_dict.keys()

preference_pair_list = []

for input_prompt in large_base_data_dict.keys():
    large_base_item_list = large_base_data_dict[input_prompt]
    small_reasoning_item_list = small_reasoning_data_dict[input_prompt]
    
    correct_answer = question_mapping[input_prompt.split("Question: ")[-1]]
    large_base_item_list_correct = [item for item in large_base_item_list if is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]
    small_reasoning_item_list_correct = [item for item in small_reasoning_item_list if is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]
    large_base_item_list_incorrect = [item for item in large_base_item_list if not is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]
    small_reasoning_item_list_incorrect = [item for item in small_reasoning_item_list if not is_equiv(extract_boxed_answer(item["messages"][-1]["content"]), correct_answer)]

    # for chosen_item, rejected_item in list(product(large_base_item_list_correct, small_reasoning_item_list_incorrect)): # enumerate all the combinations of pairs
    for chosen_item, rejected_item in zip(large_base_item_list_correct[:min(len(large_base_item_list_correct), len(small_reasoning_item_list_incorrect))], small_reasoning_item_list_incorrect[:min(len(large_base_item_list_correct), len(small_reasoning_item_list_incorrect))]):
        preference_pair_list.append({
            "conversations": [
                {"from": "user", "value": input_prompt},
            ],
            "chosen": {"from": "assistant", "value": chosen_item["messages"][-1]["content"]},
            "rejected": {"from": "assistant", "value": rejected_item["messages"][-1]["content"]}
        })
    
    # for chosen_item, rejected_item in list(product(small_reasoning_item_list_correct, large_base_item_list_incorrect)): # enumerate all the combinations of pairs
    for chosen_item, rejected_item in zip(small_reasoning_item_list_correct[:min(len(small_reasoning_item_list_correct), len(large_base_item_list_incorrect))], large_base_item_list_incorrect[:min(len(small_reasoning_item_list_correct), len(large_base_item_list_incorrect))]):
        preference_pair_list.append({
            "conversations": [
                {"from": "user", "value": input_prompt},
            ],
            "chosen": {"from": "assistant", "value": chosen_item["messages"][-1]["content"]},
            "rejected": {"from": "assistant", "value": rejected_item["messages"][-1]["content"]}
        })

# randomize the preference pairs with seed 42
import random
random.seed(42)
random.shuffle(preference_pair_list)

# save the preference pairs with indention 4
with open('./preference_pairs_math_filtered_v2.json', 'w') as f:
    json.dump(preference_pair_list, f, indent=4)

len(preference_pair_list)

12831

In [7]:
len(preference_pair_list)

55183