# Evaluate the minimal-edit Tuned LLM

## Imports

In [None]:
from tqdm.notebook import tqdm
from prompts import minimal_prompt, fluency_prompt
from os import path, makedirs
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
)
import torch
from torch.utils.data import DataLoader
from datasets import load_from_disk
from peft import AutoPeftModelForCausalLM

## Variables

In [None]:
version = "minimal"
model_label = "Viking-7B"
MAX_LENGTH = 2048  # Well above the max essay length
model_path = path.join("models", model_label, version)

## Load Model and Dataset

## Ensure GPU is available

In [None]:
if not torch.cuda.is_available():
    raise RuntimeError("GPU is not available for training!")
device = "cuda:0"

In [None]:
prompts = {"minimal": minimal_prompt, "fluency": fluency_prompt}
prompt = prompts[version]

In [None]:
#model = AutoModelForCausalLM.from_pretrained(model_path)
model = AutoPeftModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,
    load_in_4bit=True
)
model.to(device)

tokenizer = AutoTokenizer.from_pretrained(model_path)

In [None]:
dataset_path = path.join("datasets", version)
dataset = load_from_disk(dataset_path)
test_dataset = dataset["test"]

In [None]:
batch_size = 2

dataloader = DataLoader(test_dataset, batch_size=batch_size)

## Run Model Inference

In [None]:
results = {}
bos = tokenizer.bos_token
eos = tokenizer.eos_token
for batch in tqdm(dataloader):

    inputs = [
        f"### Instruktioner:\n{prompt}\n### Indata:\n{source}\n### Utdata:\n\n"
        for source in batch["source"]
    ]

    model_inputs = tokenizer(
        inputs,
        max_length=MAX_LENGTH,
        padding="max_length",
        return_tensors="pt"
    )
    """
    sources = [prompt + bos + source for source in batch["source"]]
    #targets = [bos + target + eos for target in batch["target"]]

    model_inputs = tokenizer(
        sources,
        max_length=MAX_LENGTH,
        padding="max_length",
        return_tensors="pt"
    )

    #labels = tokenizer(
    #    targets,
    #    max_length=MAX_LENGTH,
    #    padding="max_length",
    #    return_tensors="pt"
    #)

    #model_inputs["labels"] = labels["input_ids"]
    """

    model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
    with torch.no_grad():
        outputs = model.generate(
            input_ids=model_inputs["input_ids"],
            attention_mask=model_inputs["attention_mask"],
            max_new_tokens=MAX_LENGTH
        )
    essay_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    for essay_id, essay_text in zip(batch["id"], essay_texts):
        cleaned_essay_text = essay_text.split("### Utdata:\n")[1]
        results[essay_id] = cleaned_essay_text

In [None]:
essays_md = [
    f"### essay_id = {essay_id}\n{essay}\n\n"
    for essay_id, essay in results.items()
]
output_md = "".join(essays_md)

Create path to the output file, which has the below format depending on the edit style.

| Version      | Path                                                                   |
| ------------ | ---------------------------------------------------------------------- |
| Minimal Edit | `./outputs/<model_label>/<version>/sv-swell_gold-hypo-test.md`         |
| Fluency Edit | `./outputs/<model_label>/<version>/sv-swell_gold-fluency-hypo-test.md` |


In [None]:
output_dir = path.join("outputs", model_label, version)
makedirs(output_dir, exist_ok=True)  # Ensure directory exists
infix = "-fluency" if version == "fluency" else ""  # Add fluency infix if needed
output_file_name = f"sv-swell_gold{infix}-hypo-test.md"
output_file_md = path.join(output_dir, output_file_name)

Write the full output string to the output file.

In [None]:
with open(output_file_md, "w+") as f:
    f.write(output_md)