# Evaluate the minimal-edit Tuned LLM

## Imports

In [None]:
from utils import load_finetuned_model_tokenizer, get_dataset, get_prompt
from tqdm.notebook import tqdm
from os import path

## Variables

In [None]:
version = "minimal"
# Well above the max essay length
max_length = 2048
results = {}

## Load Model and Dataset

In [None]:
prompt = get_prompt(version)
model, tokenizer = load_finetuned_model_tokenizer(version)
dataset = get_dataset(version)
test_dataset = dataset["test"]

## Run Model Inference

In [None]:
for example in tqdm(test_dataset):
    essay_id = example["id"]
    source = prompt + example["source"]
    inputs = tokenizer(
        source,
        return_tensors="pt",
        padding="max_length",
        max_length=max_length,
    )
    outputs = model.generate(inputs["input_ids"], max_length=max_length)
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    results[essay_id] = decoded_output

## Write Results to File

In [None]:
output_dir = "hypotheses"
output_file = f"sv-swell_gold-hypo-test.md"
output_path = path.join(output_dir, output_file)

with open(output_path, "w+") as f:
    for essay_id, essay in results.items():
        md_output = f"### essay_id = {essay_id}\n{essay}\n\n"
        f.write(md_output)