# Baseline Evaluation for Helpdesk models

Inside the folder `<project_root>/runs/helpdesk` we have a list of folders named as `<percentage>%`, where `<percentage>` is the percentage of the dataset used for training the model.
In each of these folders we have a folder named as the best model found during the training phase based on the accuracy value.
Inside each of these folders we have the following files:
- `constraints_satisfaction_rate.csv`: a CSV file containing the constraints satisfaction rate for each of the test traces.
- `constraints_satisfactions.csv`: a CSV file containing the constraints satisfaction for each of the test traces.
- `predicted_traces.txt`: a TXT file containing the traces generated by the model for each of the test traces.
- `predictions.csv`: a CSV file containing the predictions step by step for each of the test traces.
- `results.json`: a JSON file containing the results of the evaluation of the model on the test set.

In [9]:
DATASET_NAME = "helpdesk"

In [10]:
from collections import namedtuple
import pathlib

project_root = pathlib.Path("../../..").parent.resolve()

Info = namedtuple("Info", ["model_args", "model_path", "results_path"])

models_path: dict[int, list[Info]] = {}

for dataset_percentage in range(20, 101, 20):
    checkpoints = [
        path
        for path in (project_root / "runs" / DATASET_NAME).rglob(
            f"{dataset_percentage}%/**/*.best_val_acc.pth"
        )
    ]
    results = [
        pathlib.Path(str(checkpoint).removesuffix(".pth")) / "results.json"
        for checkpoint in checkpoints
    ]
    args = [checkpoint.parent / "args.json" for checkpoint in checkpoints]
    models_path[dataset_percentage] = [
        Info(model_args=args, model_path=checkpoint, results_path=result)
        for args, checkpoint, result in zip(args, checkpoints, results)
    ]

## Comparison

In [11]:
import json
import pandas as pd

dataframes = {}

for percentage in models_path:
    dataframes[percentage] = pd.DataFrame(
        columns=[
            "lr",
            "dropout",
            "loss",
            "acc",
            "dld",
            "norm_dld",
            "constraints",
            "constraints_multiplier",
        ]
    )
    for info in models_path[percentage]:
        with open(info.model_args) as f:
            args = json.load(f)
        try:
            with open(info.results_path) as f:
                results = json.load(f)
                dataframes[percentage].loc[info.model_path.parent.name] = [
                    args["learning_rate"],
                    args["model"]["dropout"],
                    results["loss"],
                    results["acc"],
                    results["dld"],
                    results["norm_dld"],
                    args.get("constraints", None),
                    args.get("constraints_multiplier", None),
                ]
        except FileNotFoundError:
            print(f"Missing results for {info.model_path}")

Missing results for /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/80%/20250304.1305.constraints/model.epoch_2.best_val_acc.pth
Missing results for /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/80%/20250304.1315.constraints/model.epoch_60.best_val_acc.pth
Missing results for /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/80%/20250304.1324.constraints/model.epoch_3.best_val_acc.pth
Missing results for /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/80%/20250304.1333.constraints/model.epoch_85.best_val_acc.pth
Missing results for /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/80%/20250304.1341.constraints/model.epoch_36.best_val_acc.pth
Missing results for /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/80%/20250304.1350.constraints/model.epoch_93.best_val_acc.pth
Missing results for /home/salvatore/Workspace/pylon-experiments/runs/helpdesk/80%/20250304.1359.constraints/model.epoch_20.best_val_acc.pth
Missing results for /h

  dataframes[percentage].loc[info.model_path.parent.name] = [
  dataframes[percentage].loc[info.model_path.parent.name] = [
  dataframes[percentage].loc[info.model_path.parent.name] = [


In [12]:
for percentage in range(20, 101, 20):
    print("=" * 10 + f" {percentage}% " + "=" * 10)
    display(dataframes[percentage].sort_values("acc", ascending=False))
    print()



Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2239.no_constraint,0.001,0.0,0.119979,0.843621,2.846154,0.117233,[],
20250303.2242.no_constraint,0.0001,0.2,0.113159,0.841564,2.923077,0.11874,[],
20250303.2240.no_constraint,0.0001,0.0,0.114421,0.841564,2.923077,0.11874,[],
20250303.2241.no_constraint,0.001,0.2,0.116057,0.839506,2.923077,0.119636,[],
20250304.0025.constraints,0.0001,0.0,0.24167,0.839506,2.961539,0.131561,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0111.constraints,0.0001,0.2,0.231207,0.839506,3.0,0.122863,"[Alternate Succession[Assign seriousness, Reso...",0.1
20250304.0022.constraints,0.001,0.2,0.245293,0.837449,3.038461,0.134861,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0018.constraints,0.001,0.0,0.254159,0.835391,3.076923,0.137606,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0057.constraints,0.0001,0.0,0.234063,0.833333,3.076923,0.137844,"[Alternate Succession[Assign seriousness, Reso...",0.1
20250304.0029.constraints,0.0001,0.2,0.239476,0.833333,3.076923,0.135167,"[Chain Succession[Resolve ticket, Closed]]",0.1





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2244.no_constraint,0.001,0.2,0.151329,0.851249,2.854167,0.12716,[],
20250303.2243.no_constraint,0.0001,0.0,0.1228,0.849077,2.895833,0.128483,[],
20250303.2245.no_constraint,0.0001,0.2,0.122933,0.849077,2.895833,0.128483,[],
20250304.0239.constraints,0.0001,0.0,0.235462,0.849077,2.895833,0.118674,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250303.2242.no_constraint,0.001,0.0,0.141922,0.846906,2.9375,0.126477,[],
20250304.0246.constraints,0.0001,0.2,0.225635,0.846906,2.9375,0.123492,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0253.constraints,0.001,0.0,0.212228,0.840391,3.0625,0.148805,"[Alternate Succession[Assign seriousness, Reso...",0.1
20250304.0337.constraints,0.0001,0.0,0.223734,0.837134,3.125,0.13731,"[Alternate Succession[Assign seriousness, Reso...",0.1
20250304.0225.constraints,0.001,0.0,0.207805,0.836048,3.104167,0.125194,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0232.constraints,0.001,0.2,0.211447,0.836048,3.145833,0.131878,"[Chain Succession[Resolve ticket, Closed]]",0.1





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2246.no_constraint,0.001,0.0,0.081523,0.876027,2.59375,0.089562,[],
20250303.2248.no_constraint,0.001,0.2,0.081756,0.876027,2.59375,0.089562,[],
20250303.2247.no_constraint,0.0001,0.0,0.079187,0.87528,2.609375,0.09012,[],
20250303.2250.no_constraint,0.0001,0.2,0.079518,0.874533,2.625,0.091682,[],
20250304.0640.constraints,0.0001,0.2,0.184343,0.867812,2.765625,0.096351,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0630.constraints,0.0001,0.0,0.178658,0.867065,2.78125,0.096839,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0611.constraints,0.001,0.0,0.164819,0.864824,2.828125,0.098027,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0620.constraints,0.001,0.2,0.163577,0.864824,2.828125,0.09797,"[Chain Succession[Resolve ticket, Closed]]",0.1
20250304.0837.constraints,0.0001,0.2,0.172478,0.851382,3.078125,0.108063,"[Alternate Succession[Assign seriousness, Reso...",0.1
20250304.0723.constraints,0.001,0.2,0.165745,0.851382,3.109375,0.105468,"[Alternate Succession[Assign seriousness, Reso...",0.1





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2256.no_constraint,0.0001,0.2,0.143383,0.854144,2.463636,0.134317,[],
20250303.2253.no_constraint,0.0001,0.0,0.144588,0.853606,2.463636,0.134317,[],
20250303.2251.no_constraint,0.001,0.0,0.174217,0.85253,2.472727,0.139421,[],
20250303.2254.no_constraint,0.001,0.2,0.16927,0.850377,2.509091,0.137879,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2307.no_constraint,0.0001,0.0,0.122277,0.874892,2.206107,0.111077,[],
20250303.2309.no_constraint,0.001,0.2,0.140553,0.874892,2.206107,0.123032,[],
20250303.2312.no_constraint,0.0001,0.2,0.11946,0.873593,2.221374,0.119007,[],
20250303.2258.no_constraint,0.001,0.0,0.146295,0.872727,2.244275,0.12435,[],





In [13]:
for percentage in range(20, 101, 20):
    print("=" * 10 + f" {percentage}% " + "=" * 10)
    display(dataframes[percentage].sort_values("acc", ascending=False).head(1))
    print()



Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2239.no_constraint,0.001,0.0,0.119979,0.843621,2.846154,0.117233,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2244.no_constraint,0.001,0.2,0.151329,0.851249,2.854167,0.12716,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2246.no_constraint,0.001,0.0,0.081523,0.876027,2.59375,0.089562,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2256.no_constraint,0.0001,0.2,0.143383,0.854144,2.463636,0.134317,[],





Unnamed: 0,lr,dropout,loss,acc,dld,norm_dld,constraints,constraints_multiplier
20250303.2307.no_constraint,0.0001,0.0,0.122277,0.874892,2.206107,0.111077,[],



