# Evaluate the minimal-edit Tuned LLM

## Imports

In [None]:
from tqdm.notebook import tqdm
from prompts import minimal_prompt, fluency_prompt
from os import path, makedirs
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
import torch
from datasets import load_from_disk

## Variables

In [None]:
version = "minimal"
model_label = "Viking-7B"
MAX_LENGTH = 4096
model_path = path.join("models", model_label, version)

## Load Model and Dataset

## Ensure GPU is available

In [None]:
if not torch.cuda.is_available():
    raise RuntimeError("GPU is not available for training!")
device = "cuda:0"

In [None]:
prompts = {"minimal": minimal_prompt, "fluency": fluency_prompt}
prompt = prompts[version]

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=quantization_config,
    device_map="auto",
)

model.eval()


tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
dataset_path = path.join("datasets", version)
dataset = load_from_disk(dataset_path)
test_dataset = dataset["test"]

## Run Model Inference

In [None]:
longest_sequence = 849

max_new_tokens = int(1.2 * longest_sequence)
print(longest_sequence)

In [None]:
results = {}

for example in tqdm(test_dataset):

    input_prompt = f"### Instruktioner:\n{prompt}\n\n### Indata:\n{example['source']}\n\n### Utdata:\n"

    # Tokenize and move to GPU
    model_inputs = tokenizer(
        input_prompt,
        max_length=MAX_LENGTH,
        padding="max_length",
        return_tensors="pt"
    )
    model_inputs = {k: v.to(device) for k, v in model_inputs.items()}

    # Generate
    with torch.no_grad():
        outputs = model.generate(
            input_ids=model_inputs["input_ids"],
            attention_mask=model_inputs["attention_mask"],
            max_new_tokens=max_new_tokens
        )

    # Decode
    essay_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Clean and save
    output_header = "### Utdata:"
    if output_header in essay_text:
        clean = essay_text.split(output_header)[-1].strip()
    else:
        clean = essay_text.strip()
    results[example["id"]] = clean

## Create Output

Create a Markdown-structured string to store each output essay.

In [None]:
essays_md = [
    f"### essay_id = {essay_id}\n{essay}\n\n"
    for essay_id, essay in results.items()
]
output_md = "".join(essays_md)

Create path to the output file, which has the below format depending on the edit style.

| Version      | Path                                                                   |
| ------------ | ---------------------------------------------------------------------- |
| Minimal Edit | `./outputs/<model_label>/<version>/sv-swell_gold-hypo-test.md`         |
| Fluency Edit | `./outputs/<model_label>/<version>/sv-swell_gold-fluency-hypo-test.md` |


In [None]:
output_dir = path.join("outputs", model_label, version)
makedirs(output_dir, exist_ok=True)  # Ensure directory exists
infix = "-fluency" if version == "fluency" else ""  # Add fluency infix if needed
output_file_name = f"sv-swell_gold{infix}-hypo-test.md"
output_file_md = path.join(output_dir, output_file_name)

Write the full output string to the output file.

In [None]:
with open(output_file_md, "w+") as f:
    f.write(output_md)