In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from tqdm import tqdm
import pandas as pd
import torch, json

In [None]:
def generate_hints(data, model_path):
    hints = []
    device = "cuda" if torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(device)
    batch_size = 128
    prompt_batches = [data[i:i+batch_size]
                      for i in range(0, len(data), batch_size)]
    for batch in tqdm(prompt_batches):
        inputs = tokenizer(batch, return_tensors="pt",
                           padding=True, truncation=True, max_length=512)
        inputs = {key: tensor.to(device) for key, tensor in inputs.items()}
        outputs = model.generate(**inputs, max_new_tokens=512)
        batch_responses = [tokenizer.decode(
            output, skip_special_tokens=True) for output in outputs]
        hints.extend(batch_responses)
    return hints


def generate_code(data, model_path):
    codes = []
    device = "cuda" if torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(device)
    batch_size = 128
    prompt_batches = [data[i:i+batch_size]
                      for i in range(0, len(data), batch_size)]
    for batch in tqdm(prompt_batches):
        inputs = tokenizer(batch, return_tensors="pt",
                           padding=True, truncation=True, max_length=512)
        inputs = {key: tensor.to(device) for key, tensor in inputs.items()}
        outputs = model.generate(**inputs, max_new_tokens=512)
        batch_responses = [tokenizer.decode(
            output, skip_special_tokens=True) for output in outputs]
        codes.extend(batch_responses)
    return codes

In [None]:
def evaluate(predictions, golds):
    correct = 0
    total = len(golds)

    for pred, gold in zip(predictions, golds):
        if pred != "":
            if pred == gold:
                correct += 1

    accuracy = correct / total if total > 0 else 0
    return accuracy


def inference(questions, golds, hints_model_path, code_model_path):
    predictions, compilation_error_num = [], 0
    if hints_model_path.strip() != "" and hints_model_path != None:
        hints = generate_hints(questions, hints_model_path)
        for i in range(len(hints)):
            hints[i] = questions[i] + " ## " + hints[i]
    else:
        hints = questions
    codes = generate_code(hints, code_model_path)
    for code in codes:
        local_vars = {}
        try:
            exec(code, {}, local_vars)
            predictions.append(float(local_vars['result']))
        except:
            compilation_error_num += 1
            predictions.append("")
    result = evaluate(predictions, golds)
    return result, compilation_error_num


data_path = input("Test Data Path: ")
hints_model_path = input(
    "Hints Model Path (leave blank if you don't need hints): ")
code_model_path = input("Code Model Path: ")

questions = []
golds = []

with open(data_path, 'r') as file:
    data = json.load(file)
    for item in data:
        questions.append(item['question'])
        golds.append(float(str(item['num_answer']).replace(",", "")))

result, compilation_error_num = inference(
    questions, golds, hints_model_path, code_model_path)

print("The number of compilation errors:", compilation_error_num)
print("Accuracy:", result)