# Evaluate the minimal-edit Tuned LLM

## Imports

In [None]:
from tqdm.notebook import tqdm
from prompts import minimal_prompt, fluency_prompt
from os import path, makedirs
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
import torch
from datasets import load_from_disk

## Variables

Change the `version` and `model_label` variables to the version-model combination you want to generate texts.

For instance `version = "fluency"` and `model_label = "Viking-13B"` evaluates the fluency-tuned Viking-13B model.


In [None]:
version = "fluency"
model_label = "Viking-7B"

model_path = path.join("models", model_label, version)

## Ensure GPU is available

In [None]:
if not torch.cuda.is_available():
    raise RuntimeError("GPU is not available for training!")
device = "cuda:0"

## Load Prompt

Load the prompt corresponding to the correction style.

In [None]:
prompts = {"minimal": minimal_prompt, "fluency": fluency_prompt}
prompt = prompts[version]

## Load Model 

Since we trained the model with QLoRA, we need to load it with a quantization config.

The model is also set to evaluation mode, to not update any weights during generation.

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=quantization_config,
    device_map="auto",
)

model.eval()

## Load Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
model.generation_config.pad_token = tokenizer.pad_token
model.generation_config.pad_token_id = tokenizer.pad_token_id
tokenizer.padding_side = "left"

## Load Dataset

Load the dataset corresponding to the correction style from disk.

In [None]:
dataset_path = path.join("datasets", version)
dataset = load_from_disk(dataset_path)
test_dataset = dataset["test"]

## Run Model Inference

Begin by setting `max_new_tokens`, which is the maximum amount of tokens that the model should generate.

The longest sequence was found separately in the test data and it is multiplied by a "safety factor" of 1.2.

In [None]:
results = {}
bos = tokenizer.bos_token
eos = tokenizer.eos_token

for example in tqdm(test_dataset):
    source = example["source"]
    input_prompt = f"{prompt}\n### Indata:\n{source}\n### Utdata:\n{bos}"

    # Tokenize
    model_inputs = tokenizer(
        input_prompt, padding=False, truncation=False, return_tensors="pt"
    )
    # Get maximum number of tokens to generate
    tokenized_reference = tokenizer(
        example["target"], padding=False, truncation=False
    )
    max_new_tokens = int(1.2 * len(tokenized_reference["input_ids"]))
    # Move to GPU
    model_inputs = {k: v.to(device) for k, v in model_inputs.items()}

    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **model_inputs,
            max_new_tokens=max_new_tokens,
        )

    # Decode
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Clean and save
    output_header = "### Utdata:"
    clean = clean = output_text.split(output_header)[-1].strip()
    results[example["id"]] = clean

## Create Output

Create a Markdown-structured string to store each output essay.

In [None]:
essays_md = [
    f"### essay_id = {essay_id}\n{essay}\n\n"
    for essay_id, essay in results.items()
]
output_md = "".join(essays_md)

Create path to the output file, which has the below format depending on the edit style.

| Version      | Path                                                                   |
| ------------ | ---------------------------------------------------------------------- |
| Minimal Edit | `./outputs/<model_label>/<version>/sv-swell_gold-hypo-test.md`         |
| Fluency Edit | `./outputs/<model_label>/<version>/sv-swell_gold-fluency-hypo-test.md` |


In [None]:
output_dir = path.join("outputs", model_label, version)
makedirs(output_dir, exist_ok=True)  # Ensure directory exists
infix = "-fluency" if version == "fluency" else ""  # Add fluency infix if needed
output_file_name = f"sv-swell_gold{infix}-hypo-test.md"
output_file_md = path.join(output_dir, output_file_name)

Write the full output string to the output file.

In [None]:
with open(output_file_md, "w+") as f:
    f.write(output_md)