In [None]:
import os, json
from copy import deepcopy

import numpy as np
from openai import OpenAI
from tqdm import tqdm

from mistakes_const import PARAPHRASE_PROMPT, ADD_MISTAKE_FEWSHOT
from const import model_name_dict, dataset_model_best_lr, LETTERS, datasets, model_name_to_path
from util import store_jsonl, load_results, load_specific_results

models = model_name_to_path.values()

In [None]:
client = OpenAI(api_key="")

In [None]:
def query_api(prompt, client, model="gpt-4o-mini"):
    response = client.chat.completions.create(
        messages=[{
            "role": "user",
            "content": prompt,
        }],
        model=model,
    )
    return response

In [None]:
def make_question(question, options):
    _options = '\n'.join(["(" + o for o in options])
    
    return f"{question}\n\n{_options}"

In [None]:
PATH_ROOT = 'mistake_results'
for model in models:
    model_name = model_name_dict[model.split("/")[1]]
    for dataset in datasets:

        resdir = f"{PATH_ROOT}/{dataset}/{model_name}"
        os.makedirs(resdir, exist_ok=True)
        lr = dataset_model_best_lr[dataset][model_name]
        path_to_store = f"{resdir}/npo_KL_{lr}_rs=1001_mistakes.jsonl"

        if os.path.exists(path_to_store): 
            print("Results exist, skipping")
            continue

        print(f"Running for {dataset} & {model_name}")
        results = load_specific_results(model_name, dataset, lr)
        augmented_results = deepcopy(results)

        for idx, instance in tqdm(enumerate(results), total=len(results)):
            q = make_question(instance['question'], instance['options'])
            prompt = ADD_MISTAKE_FEWSHOT.format(question=q, sentence=instance['cot_step'])
            response = query_api(prompt, client)

            answer = response.choices[0].message.content
            augmented_results[idx]['mistake_cot_step'] = answer
            augmented_results[idx]['mistake_model'] = response.model

        store_jsonl(augmented_results, path_to_store)