In [2]:
import os
from typing import Literal
from datetime import datetime

which_infra:Literal["onyxia", "datalab_gcp", "local"] = os.environ["WHICH_INFRA"] if "WHICH_INFRA" in os.environ else "local"
date = datetime.now().strftime("%m_%d_%Y-%Hh_%Mmin")


training_dir: str | None = None # change to the name of the folder were the trained model is located ex : 


if training_dir is None :
    model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # default to pre-trained model
    data_dir = "../bucket/data"
    test_dir = f"../bucket/tests/{date}"
else:
    match which_infra:
        case "onyxia":
            test_dir = "../bucket/test"
            model_name = os.path.join("../bucket/models/", training_dir)
            data_dir = "../bucket/data"
            test_dir = os.path.join("../bucket/tests", date)
        case "local":
            model_name = os.path.join("../bucket/models/", training_dir)
            data_dir = "../bucket/data"
            test_dir = os.path.join("../bucket/tests", date)
        case "datalab_gcp":
            model_name = os.path.join("../../bucket/models/", training_dir)
            data_dir = "../../bucket/data"
            test_dir = os.path.join("../../bucket/tests", date)
        case _:
            raise ValueError(f"Unexpected value for environment variable WHICH_INFRA : '{which_infra}'. Accepted values are : 'onyxia', 'datalab_gcp' and 'local'.")

os.mkdir(test_dir)
print(f"Created test dir at {test_dir}")

print(f"""
    Running on : {which_infra},
    Model will be loaded from : {model_name},
    Tests will be saved at : {test_dir}
""")

Created test dir at ../bucket/tests/05_12_2025-15h_18min

    Running on : local,
    Model will be loaded from : meta-llama/Llama-3.2-1B-Instruct,
    Tests will be saved at : ../bucket/tests/05_12_2025-15h_18min



In [None]:
import json
import os

path_eval_dataset = os.path.join(data_dir, "eval_dataset.json")
print(f"Loading eval data from : {path_eval_dataset}")

with open(path_eval_dataset, "rt") as f:
    eval_dataset = json.load(f)

print(eval_dataset[19]) # example of test

In [None]:
from transformers import pipeline

pl = pipeline("text-generation", model=model_name)


In [None]:
pl("1+1 ?", pad_token_id=pl.tokenizer.eos_token_id) # test model availability

In [None]:
from wordllama import WordLlama

# Load pre-trained embeddings (truncate dimension to 64)
wl = WordLlama.load(trunc_dim=64)


def test_model(pl, dataset_elem):
    question = [dataset_elem["conversation"][0][0]]
    answer = pl(question, pad_token_id=pl.tokenizer.eos_token_id)[0]['generated_text'][1]['content']
    sim = wl.similarity(dataset_elem["ground_truth"], answer)
    return {
        "acronym": dataset_elem["acronym"],
        "sim_with_ground_truth": abs(sim),
    }

import tqdm
test_result = []
for data_elem in tqdm.tqdm(eval_dataset):
    test_result.append(test_model(pl, data_elem))

In [None]:
import pandas as pd
df = pd.DataFrame(test_result)

In [None]:
pd.set_option("display.max_rows", 300)
df

In [None]:
df.to_csv("./test_result_ft.csv")

In [None]:
df_no_ft = pd.read_csv("test_result_no_ft.csv", index_col=0)
df_ft = pd.read_csv("test_result_ft.csv", index_col=0)

In [None]:
dg = pd.DataFrame()
dg["acronym"] = df_ft["acronym"]
dg["sim_no_ft"] = df_no_ft["sim_with_ground_truth"]
dg["sim_ft"] = df_ft["sim_with_ground_truth"]

In [None]:
dg

In [None]:
dg.sim_ft.mean(), dg.sim_no_ft.mean()

In [None]:
pl("What is ESF ?")