# Baseline Evaluation for Helpdesk models

Inside the folder `<project_root>/runs/helpdesk` we have a list of folders named as `<percentage>%`, where `<percentage>` is the percentage of the dataset used for training the model.
In each of these folders we have a folder named as the best model found during the training phase based on the accuracy value.
Inside each of these folders we have the following files:
- `constraints_satisfaction_rate.csv`: a CSV file containing the constraints satisfaction rate for each of the test traces.
- `constraints_satisfactions.csv`: a CSV file containing the constraints satisfaction for each of the test traces.
- `predicted_traces.txt`: a TXT file containing the traces generated by the model for each of the test traces.
- `predictions.csv`: a CSV file containing the predictions step by step for each of the test traces.
- `results.json`: a JSON file containing the results of the evaluation of the model on the test set.

In [1]:
DATASET_NAME = "helpdesk"

In [2]:
from collections import namedtuple
import pathlib

project_root = pathlib.Path("../../..").parent.resolve()

Info = namedtuple("Info", ["model_args", "model_path", "results_path"])

models_path: dict[int, list[Info]] = {}

for dataset_percentage in range(20, 101, 20):
    checkpoints = [
        path
        for path in (project_root / "runs" / DATASET_NAME).rglob(
            f"{dataset_percentage}%/**/*.best_val_acc.pth"
        )
    ]
    results = [
        pathlib.Path(str(checkpoint).removesuffix(".pth")) / "results.json"
        for checkpoint in checkpoints
    ]
    args = [checkpoint.parent / "args.json" for checkpoint in checkpoints]
    models_path[dataset_percentage] = [
        Info(model_args=args, model_path=checkpoint, results_path=result)
        for args, checkpoint, result in zip(args, checkpoints, results)
    ]

## Comparison

In [3]:
import json
import pandas as pd

dataframes = {}

for percentage in models_path:
    dataframes[percentage] = pd.DataFrame(
        columns=[
            "lr",
            "dropout",
            "loss",
            "acc",
            "dld",
            "norm_dld",
            "constraints",
            "constraints_multiplier",
        ]
    )
    for info in models_path[percentage]:
        with open(info.model_args) as f:
            args = json.load(f)
        try:
            with open(info.results_path) as f:
                results = json.load(f)
                print(f"Processing {info.model_path}")
                dataframes[percentage].loc[info.model_path.parent.name] = [
                    args["learning_rate"],
                    args["model"]["dropout"],
                    results["loss"],
                    results["acc"],
                    results["dld"],
                    results["norm_dld"],
                    args.get("constraints", None),
                    args.get("constraints_multiplier", None),
                ]
        except FileNotFoundError:
            print(f"Missing results for {info.model_path}")

Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/20%/20250303.2239.no_constraint/model.epoch_20.best_val_acc.pth
Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/20%/20250303.2242.no_constraint/model.epoch_96.best_val_acc.pth
Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/20%/20250303.2340.no_constraint/model.epoch_82.best_val_acc.pth
Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/20%/20250303.2341.no_constraint/model.epoch_18.best_val_acc.pth
Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/40%/20250303.2242.no_constraint/model.epoch_34.best_val_acc.pth
Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/40%/20250303.2243.no_constraint/model.epoch_8.best_val_acc.pth
Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/40%/20250303.2244.no_constraint/model.epoch_58.best_val_acc.pth
Processing /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/4

In [4]:
for percentage in range(20, 101, 20):
    print("=" * 10 + f" {percentage}% " + "=" * 10)
    display(dataframes[percentage].sort_values("acc", ascending=False))
    print()



Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2239.no_constraint,0.001,0.0,0.119979,0.843621,2.846154,0.117233,[],
20250303.2242.no_constraint,0.0001,0.2,0.113159,0.841564,2.923077,0.11874,[],
20250303.2340.no_constraint,0.0001,0.0,0.114421,0.841564,2.923077,0.11874,[],
20250303.2341.no_constraint,0.001,0.2,0.116057,0.839506,2.923077,0.119636,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2244.no_constraint,0.001,0.2,0.151329,0.851249,2.854167,0.12716,[],
20250303.2243.no_constraint,0.0001,0.0,0.1228,0.849077,2.895833,0.128483,[],
20250303.2245.no_constraint,0.0001,0.2,0.122933,0.849077,2.895833,0.128483,[],
20250303.2242.no_constraint,0.001,0.0,0.141922,0.846906,2.9375,0.126477,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2246.no_constraint,0.001,0.0,0.081523,0.876027,2.59375,0.089562,[],
20250303.2248.no_constraint,0.001,0.2,0.081756,0.876027,2.59375,0.089562,[],
20250303.2247.no_constraint,0.0001,0.0,0.079187,0.87528,2.609375,0.09012,[],
20250303.2250.no_constraint,0.0001,0.2,0.079518,0.874533,2.625,0.091682,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2256.no_constraint,0.0001,0.2,0.143383,0.854144,2.463636,0.134317,[],
20250303.2253.no_constraint,0.0001,0.0,0.144588,0.853606,2.463636,0.134317,[],
20250303.2251.no_constraint,0.001,0.0,0.174217,0.85253,2.472727,0.139421,[],
20250303.2254.no_constraint,0.001,0.2,0.16927,0.850377,2.509091,0.137879,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2307.no_constraint,0.0001,0.0,0.122277,0.874892,2.206107,0.111077,[],
20250303.2309.no_constraint,0.001,0.2,0.140553,0.874892,2.206107,0.123032,[],
20250303.2312.no_constraint,0.0001,0.2,0.11946,0.873593,2.221374,0.119007,[],
20250303.2258.no_constraint,0.001,0.0,0.146295,0.872727,2.244275,0.12435,[],





In [5]:
for percentage in range(20, 101, 20):
    print("=" * 10 + f" {percentage}% " + "=" * 10)
    display(dataframes[percentage].sort_values("acc", ascending=False).head(1))
    print()



Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2239.no_constraint,0.001,0.0,0.119979,0.843621,2.846154,0.117233,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2244.no_constraint,0.001,0.2,0.151329,0.851249,2.854167,0.12716,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2246.no_constraint,0.001,0.0,0.081523,0.876027,2.59375,0.089562,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2256.no_constraint,0.0001,0.2,0.143383,0.854144,2.463636,0.134317,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2307.no_constraint,0.0001,0.0,0.122277,0.874892,2.206107,0.111077,[],



