In [10]:
import os
import json
from typing import Any, Iterable, Union
from pathlib import Path

def load_jsonl(file: Union[str, Path]) -> Iterable[Any]:
    with open(file, "r", encoding="utf-8") as f:
        for line in f:
            try:
                yield json.loads(line)
            except:
                print("Error in loading:", line)
                exit()


def save_jsonl(samples, save_path):
    # ensure path
    folder = os.path.dirname(save_path)
    os.makedirs(folder, exist_ok=True)

    with open(save_path, "w", encoding="utf-8") as f:
        for sample in samples:
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    print("Saved to", save_path)

In [11]:
from matplotlib import pyplot as plt
import os
import numpy as np
import json


file_dir = "/data03/sunyi/time_constrained_cot/outputs/1_10"
model_list = [
    "Qwen/QwQ-32B-Preview",
    "Qwen/Qwen2.5-32B-Instruct", 
    "Qwen/Qwen2.5-14B-Instruct", 
    "Qwen/Qwen2.5-7B-Instruct", 
    "Qwen/Qwen2.5-3B-Instruct", 
    "Qwen/Qwen2.5-1.5B-Instruct",
    "mistralai/Mathstral-7B-v0.1",
    "mistralai/Ministral-8B-Instruct-2410",
    "mistralai/Mistral-Nemo-Instruct-2407",
    "mistralai/Mistral-Small-Instruct-2409",
    "google/gemma-2-2b-it",
    "google/gemma-2-9b-it",
    "google/gemma-2-27b-it",
    "microsoft/phi-4",
    "microsoft/Phi-3.5-mini-instruct",
    "microsoft/Phi-3-medium-128k-instruct",
    "microsoft/Phi-3-small-128k-instruct",
    "microsoft/Phi-3-mini-128k-instruct",
]

In [12]:

sbs_hard = "-sbs-hard"
sbs = "-sbs"
c2f = "-c2f"
kf = "-kf"
aav = "-aav"


MODEL_SERIES_MAP = {
    "Qwen/QwQ-32B-Preview": "qwen",
    "Qwen/Qwen2.5-32B-Instruct": "qwen",
    "Qwen/Qwen2.5-14B-Instruct": "qwen",
    "Qwen/Qwen2.5-7B-Instruct": "qwen",
    "Qwen/Qwen2.5-3B-Instruct": "qwen",
    "Qwen/Qwen2.5-1.5B-Instruct": "qwen",
    "google/gemma-2-2b-it": "gemma",
    "google/gemma-2-9b-it": "gemma",
    "google/gemma-2-27b-it": "gemma",
    "mistralai/Mathstral-7B-v0.1": "mistral",
    "mistralai/Ministral-8B-Instruct-2410": "mistral",
    "mistralai/Mistral-Nemo-Instruct-2407": "mistral",
    "mistralai/Mistral-Small-Instruct-2409": "mistral",
    "microsoft/phi-4": "phi4",
    "microsoft/Phi-3-medium-128k-instruct": "phi3medium",
    "microsoft/Phi-3-small-128k-instruct": "phi3small",
    "microsoft/Phi-3.5-mini-instruct": "phi3mini",
    "microsoft/Phi-3-mini-128k-instruct": "phi3mini",
}


MODEL_SERIES_PROMPT_TYPE_MAP = {
    "qwen": ["qwen"+sbs_hard, "qwen"+sbs, "qwen"+c2f, "qwen"+kf, "qwen"+aav],
    "mistral": ["mistral"+sbs_hard, "mistral"+sbs, "mistral"+c2f, "mistral"+kf, "mistral"+aav],
    "gemma": ["gemma"+sbs_hard, "gemma"+sbs, "gemma"+c2f, "gemma"+kf, "gemma"+aav],
    "phi3mini": ["phi3mini"+sbs_hard, "phi3mini"+sbs, "phi3mini"+c2f, "phi3mini"+kf, "phi3mini"+aav],
    "phi3small": ["phi3small"+sbs_hard, "phi3small"+sbs, "phi3small"+c2f, "phi3small"+kf, "phi3small"+aav],
    "phi3medium": ["phi3medium"+sbs_hard, "phi3medium"+sbs, "phi3medium"+c2f, "phi3medium"+kf, "phi3medium"+aav],
    "phi4": ["phi4"+sbs_hard, "phi4"+sbs, "phi4"+c2f, "phi4"+kf, "phi4"+aav],
}

In [13]:
def gen_budget_list(budget, data_name, model):
    if budget < 0:
        return [-1]
    elif budget == 0:
        return [25]
    else:
        if model in ["Qwen/QwQ-32B-Preview", "Skywork/Skywork-o1-Open-Llama-3.1-8B", "PowerInfer/SmallThinker-3B-Preview"]:
            if data_name == "gsm8k":
                budget_list = []
                for i in range(25, 600, 25):
                    budget_list.append(i)
                for i in range(600, 1001, 50):
                    budget_list.append(i)
            elif data_name == "math":
                budget_list = []
                for i in range(25, 600, 25):
                    budget_list.append(i)
                for i in range(600, 1801, 50):
                    budget_list.append(i)
        else:    
            if data_name == "gsm8k":
                budget_list = []
                for i in range(25, 601, 25):
                    budget_list.append(i)
                # for i in range(600, 1001, 50):
                #     budget_list.append(i)
            elif data_name == "math":
                budget_list = []
                for i in range(25, 600, 25):
                    budget_list.append(i)
                for i in range(600, 1201, 50):
                    budget_list.append(i)
        return budget_list

In [14]:
dataset = "math"
something = "_-1_seed0_t0.0_s0_e-1"

In [15]:
# # add missing jsonl files
# for model in model_list:
#     budget_list = gen_budget_list(1, dataset, model)
#     model_prompt_list = MODEL_SERIES_PROMPT_TYPE_MAP[MODEL_SERIES_MAP[model]]
#     for prompt_type in model_prompt_list:
#         for budget in budget_list:
#             full_jsonl_name = "test_" + prompt_type + something + ".jsonl"
#             full_jsonl_path = os.path.join(file_dir, model, prompt_type, dataset, full_jsonl_name)
#             metrics_name = "test_" + prompt_type + something + "_b" + str(int(budget)) + "_metrics.json"
#             metrics_path = os.path.join(file_dir, model, prompt_type, dataset, metrics_name)
#             jsonl_name = "test_" + prompt_type + something + "_b" + str(int(budget)) + ".jsonl"
#             jsonl_path = os.path.join(file_dir, model, prompt_type, dataset, jsonl_name)
            
#             # check if jsonl_path exists, if not, copy data from full_jsonl_path to jsonl_path
#             if not os.path.exists(jsonl_path):
#                 samples = list(load_jsonl(full_jsonl_path))
#                 save_jsonl(samples, jsonl_path)
#                 print(jsonl_path)

In [16]:
# # add level and subject accs to metrics.json

# def get_accs(jsonl_path):
#     metric_path = jsonl_path.replace(".jsonl", "_metrics.json")
#     # 读取现有的metric文件内容
#     with open(metric_path, "r", encoding="utf-8") as f:
#         data = json.load(f)
#     if "level_acc" in data and "subject_acc" in data:
#         print(f"Level and subject accs already exist in {metric_path}")
#         return data["acc"], data["level_acc"], data["subject_acc"]
#     else:
#         acc = data["acc"]
#         data_list = list(load_jsonl(jsonl_path))
#         level_num = {
#             "Level 1": 0,
#             "Level 2": 0,
#             "Level 3": 0,
#             "Level 4": 0,
#             "Level 5": 0
#         }
#         level_acc = {
#             "Level 1": 0,
#             "Level 2": 0,
#             "Level 3": 0,
#             "Level 4": 0,
#             "Level 5": 0
#         }
#         subject_num = {
#             "Prealgebra": 0,
#             "Precalculus": 0,
#             "Geometry": 0,
#             "Intermediate Algebra": 0,
#             "Counting & Probability": 0,
#             "Algebra": 0,
#             "Number Theory": 0
#         }
#         subject_acc = {
#             "Prealgebra": 0,
#             "Precalculus": 0,
#             "Geometry": 0,
#             "Intermediate Algebra": 0,
#             "Counting & Probability": 0,
#             "Algebra": 0,
#             "Number Theory": 0
#         }
#         for data in data_list:
#             level_num[data["level"]] += 1
#             subject_num[data["subject"]] += 1
#             # data["score"] is a list of true or false
#             level_acc[data["level"]] += data["score"][0]
#             subject_acc[data["subject"]] += data["score"][0]
#         print("level_num", level_num)
#         print("subject_num", subject_num)
#         print("sum of level_num", sum(level_num.values()))
#         print("sum of subject_num", sum(subject_num.values()))
#         for level in level_acc:
#             level_acc[level] = round(level_acc[level] / level_num[level] * 100, 1)
#         for subject in subject_acc:
#             subject_acc[subject] = round(subject_acc[subject] / subject_num[subject] * 100, 1)
            
#         # 读取现有的metric文件内容
#         with open(metric_path, "r", encoding="utf-8") as f:
#             data = json.load(f)
        
#         # 更新或添加新的统计信息
#         data["level_acc"] = level_acc
#         data["subject_acc"] = subject_acc
        
#         # 写回文件
#         with open(metric_path, "w", encoding="utf-8") as f:
#             json.dump(data, f)
#         print(f"Updated metrics in {metric_path}\n")
    
#     return acc, level_acc, subject_acc

# for model in model_list:
#     budget_list = gen_budget_list(1, dataset, model)
#     model_prompt_list = MODEL_SERIES_PROMPT_TYPE_MAP[MODEL_SERIES_MAP[model]]
#     for prompt_type in model_prompt_list:
#         for budget in budget_list:
#             jsonl_name = "test_" + prompt_type + something + "_b" + str(int(budget)) + ".jsonl"
#             jsonl_path = os.path.join(file_dir, model, prompt_type, dataset, jsonl_name)
#             acc, level_acc, subject_acc = get_accs(jsonl_path)

In [17]:
# math_path = "/home/sunyi/CoT/Time-Constrained-CoT/data/math/test.jsonl"

# def add_subject(jsonl_path):
#     math_data = list(load_jsonl(math_path))
#     data = list(load_jsonl(jsonl_path))
#     # check if data[0] has subject, if not, add it
#     if "subject" not in data[0]:
#         for index, sample in enumerate(data):
#             sample["subject"] = math_data[index]["subject"]
#         save_jsonl(data, jsonl_path)
#         print(f"Added subject to {jsonl_path}")
#     else:
#         print(f"Subject already exists in {jsonl_path}")

# for model in model_list:
#     budget_list = gen_budget_list(1, dataset, model)
#     model_prompt_list = MODEL_SERIES_PROMPT_TYPE_MAP[MODEL_SERIES_MAP[model]]
#     for prompt_type in model_prompt_list:
#         for budget in budget_list:
#             jsonl_name = "test_" + prompt_type + something + "_b" + str(int(budget)) + ".jsonl"
#             jsonl_path = os.path.join(file_dir, model, prompt_type, dataset, jsonl_name)
#             add_subject(jsonl_path)

In [18]:
# for every metric file, load data from it and the resave it back as indent=4
for model in model_list:
    budget_list = gen_budget_list(1, dataset, model)
    model_prompt_list = MODEL_SERIES_PROMPT_TYPE_MAP[MODEL_SERIES_MAP[model]]
    for prompt_type in model_prompt_list:
        for budget in budget_list:
            metrics_name = "test_" + prompt_type + something + "_b" + str(int(budget)) + "_metrics.json"
            metrics_path = os.path.join(file_dir, model, prompt_type, dataset, metrics_name)
            with open(metrics_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            with open(metrics_path, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=4)
            print(f"Updated metrics in {metrics_path}\n")

Updated metrics in /data03/sunyi/time_constrained_cot/outputs/1_10/Qwen/QwQ-32B-Preview/qwen-sbs-hard/math/test_qwen-sbs-hard_-1_seed0_t0.0_s0_e-1_b25_metrics.json

Updated metrics in /data03/sunyi/time_constrained_cot/outputs/1_10/Qwen/QwQ-32B-Preview/qwen-sbs-hard/math/test_qwen-sbs-hard_-1_seed0_t0.0_s0_e-1_b50_metrics.json

Updated metrics in /data03/sunyi/time_constrained_cot/outputs/1_10/Qwen/QwQ-32B-Preview/qwen-sbs-hard/math/test_qwen-sbs-hard_-1_seed0_t0.0_s0_e-1_b75_metrics.json

Updated metrics in /data03/sunyi/time_constrained_cot/outputs/1_10/Qwen/QwQ-32B-Preview/qwen-sbs-hard/math/test_qwen-sbs-hard_-1_seed0_t0.0_s0_e-1_b100_metrics.json

Updated metrics in /data03/sunyi/time_constrained_cot/outputs/1_10/Qwen/QwQ-32B-Preview/qwen-sbs-hard/math/test_qwen-sbs-hard_-1_seed0_t0.0_s0_e-1_b125_metrics.json

Updated metrics in /data03/sunyi/time_constrained_cot/outputs/1_10/Qwen/QwQ-32B-Preview/qwen-sbs-hard/math/test_qwen-sbs-hard_-1_seed0_t0.0_s0_e-1_b150_metrics.json

Updated