Here, we try the model on the test dataset, to know if it learned the definitions of the training dataset.

## 1 - Loads model and test dataset

In [None]:
import torch
import os

date="09_02_2025-14h_17min" # here, put the name of the folder of the training sessions
session_path = f"../bucket/fine_tuning_acronym/sessions/results_{date}"
checkpoint_name = "checkpoint-150" # here, put the checkpoint name (inside the training session folder)

model_path = os.path.join(session_path, "model", checkpoint_name)
data_dir = "../bucket/fine_tuning_acronym/data"
test_dir = os.path.join(session_path, "tests")

if not os.path.exists(test_dir):
    os.makedirs(test_dir)



dtype = torch.bfloat16

print(f"""
    Model will be loaded from : {model_path},
    Datatype: {dtype},
    Tests will be saved at : {test_dir}
    Loads test data from : {data_dir}.
""")

In [None]:
# Loads data for evaluation

import json
import os

path_test_dataset = os.path.join(data_dir, "test_dataset.json")
print(f"Loading eval data from : {path_test_dataset}")

with open(path_test_dataset, "rt") as f:
    test_dataset = json.load(f)

print(test_dataset[1]) # example of an element of the dataset

In [None]:
from transformers import pipeline

pl = pipeline("text-generation", model=model_path, torch_dtype=dtype, do_sample=True, max_new_tokens=50)

In [None]:
pl("1+1 ?", pad_token_id=pl.tokenizer.eos_token_id) # test model availability

# 2 - Model evaluation

Now that the model is trained, we can make an automatic evaluation of it. To do so, we first ask the model all the questions of our test dataset.

In [None]:
all_test_convs = [
    [each_acro["conversation"][0][0]] for each_acro in test_dataset
]

answers_raw = pl(all_test_convs) # ask all the questions

print(json.dumps(answers_raw[0], indent=4)) # example of answer

In [None]:
answer_dataset = []

for k, each_answer in enumerate(answers_raw):
    question = each_answer[0]["generated_text"][0]["content"]
    answer = each_answer[0]["generated_text"][1]["content"]
    acronym = test_dataset[k]["acronym"]
    ground_truth = test_dataset[k]["ground_truth"]
    expected_answer = test_dataset[k]["conversation"][0][1]["content"]
    answer_dataset.append({
        "question": question,
        "answer": answer,
        "expected_answer": expected_answer,
        "ground_truth": ground_truth,
        "acronym": acronym
    })

In [None]:
answer_dataset[1] # example

In [None]:
save_answer_dataset = os.path.join(test_dir, "answer_dataset.json")

print(f"Saving answer dataset to {save_answer_dataset}.")

with open(save_answer_dataset, "wt") as f:
    json.dump(answer_dataset, f)
