In [28]:
import os
import json
from typing import Any, Iterable, Union
from pathlib import Path

def load_jsonl(file: Union[str, Path]) -> Iterable[Any]:
    with open(file, "r", encoding="utf-8") as f:
        for line in f:
            try:
                yield json.loads(line)
            except:
                print("Error in loading:", line)
                exit()


def save_jsonl(samples, save_path):
    # ensure path
    folder = os.path.dirname(save_path)
    os.makedirs(folder, exist_ok=True)

    with open(save_path, "w", encoding="utf-8") as f:
        for sample in samples:
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    print("Saved to", save_path)

In [29]:
math_path = "/home/sunyi/CoT/Time-Constrained-CoT/data/math/test.jsonl"

qwen_models = ["Qwen/Qwen2.5-0.5B-Instruct"]
mathstral_models = ["mistralai/Mathstral-7B-v0.1"]
skywork_models = ["Skywork/Skywork-o1-Open-Llama-3.1-8B"]
qwen_prompt_types = ["qwen25-step-by-step-hard", "qwen25-math-cot", "coarse-to-fine-qwen"]
mathstral_prompt_types = ["mathstral-step-by-step-hard", "mathstral-step-by-step", "mathstral-coarse-to-fine"]
skywork_prompt_types = ["skywork-step-by-step-hard", "skywork-step-by-step", "skywork-coarse-to-fine"]

todo_dir = "/home/sunyi/CoT/Time-Constrained-CoT/outputs/12_26"


In [30]:
def add_subject(model, prompt_type):
    math_data = list(load_jsonl(math_path))
    model_path = os.path.join(todo_dir, model, prompt_type, "math")
    for file in os.listdir(model_path):
        if file.endswith(".jsonl"):
            file_path = os.path.join(model_path, file)
            samples = list(load_jsonl(file_path))
            for index, sample in enumerate(samples):
                sample["subject"] = math_data[index]["subject"]
            save_jsonl(samples, file_path)

for model in qwen_models:
    for prompt_type in qwen_prompt_types:
        add_subject(model, prompt_type)




Saved to /home/sunyi/CoT/Time-Constrained-CoT/outputs/12_26/Qwen/Qwen2.5-0.5B-Instruct/qwen25-step-by-step-hard/math/test_qwen25-step-by-step-hard_-1_seed0_t0.0_s0_e-1_b450.jsonl
Saved to /home/sunyi/CoT/Time-Constrained-CoT/outputs/12_26/Qwen/Qwen2.5-0.5B-Instruct/qwen25-step-by-step-hard/math/test_qwen25-step-by-step-hard_-1_seed0_t0.0_s0_e-1_b75.jsonl
Saved to /home/sunyi/CoT/Time-Constrained-CoT/outputs/12_26/Qwen/Qwen2.5-0.5B-Instruct/qwen25-step-by-step-hard/math/test_qwen25-step-by-step-hard_-1_seed0_t0.0_s0_e-1_b1350.jsonl
Saved to /home/sunyi/CoT/Time-Constrained-CoT/outputs/12_26/Qwen/Qwen2.5-0.5B-Instruct/qwen25-step-by-step-hard/math/test_qwen25-step-by-step-hard_-1_seed0_t0.0_s0_e-1_b200.jsonl
Saved to /home/sunyi/CoT/Time-Constrained-CoT/outputs/12_26/Qwen/Qwen2.5-0.5B-Instruct/qwen25-step-by-step-hard/math/test_qwen25-step-by-step-hard_-1_seed0_t0.0_s0_e-1_b1250.jsonl
Saved to /home/sunyi/CoT/Time-Constrained-CoT/outputs/12_26/Qwen/Qwen2.5-0.5B-Instruct/qwen25-step-by-st

KeyboardInterrupt: 