In [78]:
from datasets.ml2hp import MotionLeap2Dataset
from datasets.mmhgdhgr import MultiModalHandGestureDatasetForHandGestureRecognition
from os.path import join, isdir
from os import listdir
import yaml
import pandas as pd

# Utilities

In [79]:
def parse_results(checkpoints_path, dataset_name):
    assert dataset_name in ["ml2hp", "mmhgdhgr", "tiny_hgr"], "Dataset name must be 'ml2hp', 'mmhgdhgr', 'tiny_hgr."
    # if dataset_name == "ml2hp":
    #     subject_ids = MotionLeap2Dataset._get_subject_ids()
    # else:
    #     subject_ids = MultiModalHandGestureDatasetForHandGestureRecognition._get_subject_ids()
    data = {}
    metrics = []
    for checkpoint_name in sorted(listdir(checkpoints_path)):
        # check if the checkpoint has complete experiments
        subjects_in_checkpoint = []
        for folder in listdir(join(checkpoints_path, checkpoint_name)):
            if (
                # folder in subject_ids
                isdir(join(checkpoints_path, checkpoint_name, folder))
                and "metrics.yaml"
                in listdir(join(checkpoints_path, checkpoint_name, folder))
            ):
                subjects_in_checkpoint.append(folder)
        # skips empty checkpoints
        if len(subjects_in_checkpoint) < 1:
            print(f"Skipping checkpoint {checkpoint_name} as it has no valid subjects.")
            continue
        # loads the cfg used
        with open(join(checkpoints_path, checkpoint_name, "cfg.yaml")) as f:
            cfg = yaml.safe_load(f)
        # initialize the data dict for this checkpoint
        metrics_per_run = []
        # loops over each subject
        for subject_id in subjects_in_checkpoint:
            # load the metrics for this subject
            with open(
                join(checkpoints_path, checkpoint_name, subject_id, "metrics.yaml")
            ) as f:
                metrics_per_run.append(yaml.safe_load(f)[0])
        # compute the mean metrics for this checkpoint
        metrics_per_run = pd.DataFrame(metrics_per_run)
        metrics_per_run_mean, metrics_per_run_std = (
            metrics_per_run.mean(),
            metrics_per_run.std(),
        )
        # parses some numbers
        for col in metrics_per_run.columns:
            for df in [metrics_per_run_mean, metrics_per_run_std]:
                if col.startswith("cls_"):
                    df[col] = (df[col] * 100).round(3)
                elif col.startswith("num_params"):
                    df[col] = (df[col] / 1e6)
                elif col.startswith("time_test"):
                    df[col] = (df[col] * 1e3).round(2)
                # metrics_per_run[col] = metrics_per_run_mean.astype(str) + " ± " + metrics_per_run_std.astype(str)
        metrics_per_run = (
            metrics_per_run_mean.astype(str) + " ± " + metrics_per_run_std.astype(str)
        )
        metrics_per_run["num_params_test"] = f"{metrics_per_run_mean["num_params_test"].round(1):.1f}"
        # for col in metrics_per_run.columns:
        #     if col.startswith("num_params"):
        #         metrics_per_run[col] = metrics_per_run_mean[col].round(1)
        for metric in [
            "validation",
            "normalize_landmarks",
            "image_backbone_name",
            "landmarks_backbone_name",
            "use_horizontal_landmarks",
            "use_vertical_landmarks",
            "use_horizontal_image",
            "use_vertical_image",
        ]:
            metrics_per_run[metric] = cfg[metric]
        # metrics_per_run["normalize_landmarks"] = cfg["normalize_landmarks"]
        # metrics_per_run["image_backbone"] = cfg["image_backbone_name"]
        # metrics_per_run["landmarks_backbone"] = cfg["landmarks_backbone_name"]
        # append to the metrics list
        metrics.append(metrics_per_run)

    # parses the metrics into a DataFrame
    metrics = pd.DataFrame(metrics)
    metrics = metrics[
        [
            "validation",
            "image_backbone_name",
            "use_horizontal_image",
            "use_vertical_image",
            "landmarks_backbone_name",
            "use_horizontal_landmarks",
            "use_vertical_landmarks",
            "normalize_landmarks",
            "cls_acc_test",
            "cls_f1_test",
            "cls_prec_test",
            "cls_rec_test",
            "cls_loss_test",
            "num_params_test",
            "time_test",
        ]
    ]
    metrics = metrics.rename(
        columns={
            "validation": "Validation",
            "normalize_landmarks": "Normalized landmarks",
            "landmarks_backbone_name": "Landmarks backbone",
            "use_horizontal_landmarks": "H landmarks",
            "use_vertical_landmarks": "V landmarks",
            "use_horizontal_image": "H image",
            "use_vertical_image": "V image",
            "image_backbone_name": "Image backbone",
            "cls_acc_test": "Accuracy (\\%) $\\uparrow$",
            "cls_f1_test": "F1 (\\%) $\\uparrow$",
            "cls_prec_test": "Precision (\\%) $\\uparrow$",
            "cls_rec_test": "Recall (\\%) $\\uparrow$",
            "cls_loss_test": "Loss $\\downarrow$",
            "num_params_test": "\\# Params (M)",
            "time_test": "Inference time (ms) $\\downarrow$",
        }
    )
    metrics = metrics.replace({
        "linear": "Linear",
        "mlp": "MLP",

        "clip-b": "CLIP-B \\cite{clip}",
        "convnextv2-b": "ConvNeXt V2-B \\cite{convnextv2}",
        "convnextv2-t": "ConvNeXt V2-T \\cite{convnextv2}",
        "dinov2-b": "DINOv2-B \\cite{dinov2}",
        "dinov2-s": "DINOv2-S \\cite{dinov2}",
        "resnet18": "ResNet-18 \\cite{resnet, resnet_strikes_back}",

        None: "-",
    })
    metrics = metrics.sort_values(by=["Image backbone", "\\# Params (M)"])
    metrics = metrics.reset_index(drop=True)
    return metrics

# Tables for the manuscript

## ML2HP

### Ablation table

In [80]:
ablation_checkpoints_path = join("checkpoints", "ablation")
metrics_ablation = parse_results(ablation_checkpoints_path, dataset_name="ml2hp")
metrics_ablation = metrics_ablation[["Image backbone", "Landmarks backbone", "Precision (\\%) $\\uparrow$", "Recall (\\%) $\\uparrow$", "Accuracy (\\%) $\\uparrow$", "F1 (\\%) $\\uparrow$", "Inference time (ms) $\\downarrow$", "\\# Params (M)"]]
print(metrics_ablation.to_latex(index=False))

Skipping checkpoint ml2hp_loso_None_None-20250617-1111 as it has no valid subjects.
Skipping checkpoint ml2hp_loso_convnextv2-t_None-20250616-0937 as it has no valid subjects.
Skipping checkpoint ml2hp_loso_convnextv2-t_mlp-20250616-0937 as it has no valid subjects.
\begin{tabular}{llllllll}
\toprule
Image backbone & Landmarks backbone & Precision (\%) $\uparrow$ & Recall (\%) $\uparrow$ & Accuracy (\%) $\uparrow$ & F1 (\%) $\uparrow$ & Inference time (ms) $\downarrow$ & \# Params (M) \\
\midrule
- & Linear & 78.399 ± 8.842 & 77.416 ± 9.154 & 77.416 ± 9.154 & 76.533 ± 9.469 & 0.32 ± 0.01 & 2.0 \\
- & MLP & 82.658 ± 7.349 & 81.153 ± 7.941 & 81.153 ± 7.941 & 80.366 ± 8.295 & 0.34 ± 0.01 & 4.2 \\
CLIP-B \cite{clip} & - & 51.469 ± 4.204 & 47.695 ± 3.601 & 47.695 ± 3.601 & 46.472 ± 3.939 & 2.48 ± 0.03 & 89.1 \\
CLIP-B \cite{clip} & Linear & 82.626 ± 7.938 & 81.506 ± 7.848 & 81.506 ± 7.848 & 80.954 ± 8.017 & 2.53 ± 0.04 & 91.0 \\
CLIP-B \cite{clip} & MLP & 87.651 ± 4.957 & 85.946 ± 5.025 & 8

### Normalization table

In [81]:
norm_checkpoints_path = join("checkpoints", "normalization")
metrics_norm = parse_results(norm_checkpoints_path, dataset_name="ml2hp")
print(metrics_norm.columns)
metrics_norm = metrics_norm[["Normalized landmarks", "H landmarks", "H image", "Precision (\\%) $\\uparrow$", "Recall (\\%) $\\uparrow$", "Accuracy (\\%) $\\uparrow$", "F1 (\\%) $\\uparrow$", "Inference time (ms) $\\downarrow$", "\\# Params (M)"]]
metrics_norm = metrics_norm.replace({
    True: "\\cmark",
    False: "\\xmark",
})
metrics_norm = metrics_norm.sort_values(by=["Normalized landmarks", "H image"])
print(metrics_norm.to_latex(index=False))

Index(['Validation', 'Image backbone', 'H image', 'V image',
       'Landmarks backbone', 'H landmarks', 'V landmarks',
       'Normalized landmarks', 'Accuracy (\%) $\uparrow$',
       'F1 (\%) $\uparrow$', 'Precision (\%) $\uparrow$',
       'Recall (\%) $\uparrow$', 'Loss $\downarrow$', '\# Params (M)',
       'Inference time (ms) $\downarrow$'],
      dtype='object')
\begin{tabular}{lllllllll}
\toprule
Normalized landmarks & H landmarks & H image & Precision (\%) $\uparrow$ & Recall (\%) $\uparrow$ & Accuracy (\%) $\uparrow$ & F1 (\%) $\uparrow$ & Inference time (ms) $\downarrow$ & \# Params (M) \\
\midrule
\cmark & \cmark & \cmark & 94.967 ± 2.522 & 94.432 ± 2.993 & 94.432 ± 2.993 & 94.319 ± 3.16 & 3.09 ± 0.04 & 33.6 \\
\cmark & \cmark & \xmark & 91.689 ± 3.067 & 90.887 ± 3.419 & 90.887 ± 3.419 & 90.599 ± 3.59 & 0.38 ± 0.02 & 4.2 \\
\xmark & \cmark & \cmark & 88.044 ± 3.369 & 86.314 ± 4.067 & 86.314 ± 4.067 & 86.363 ± 3.999 & 3.08 ± 0.04 & 33.6 \\
\xmark & \cmark & \xmark & 83.249

### Results table

In [82]:
results_checkpoints_path = join("checkpoints", "results")
metrics_results = parse_results(results_checkpoints_path, dataset_name="ml2hp")
print(metrics_results.columns)
metrics_results["Method"] = "Ours"
metrics_results.loc[-1] = {
    "Method": "Baseline \\cite{icaart_baseline}",
    "H landmarks": True,
    "V landmarks": True,
    "H image": True,
    "V image": True,
    "Precision (\\%) $\\uparrow$": "80.63 ± 0.09",
    "Recall (\\%) $\\uparrow$": "79.65 ± 0.09",
    "Accuracy (\\%) $\\uparrow$": "79.65 ± 0.09",
    "F1 (\\%) $\\uparrow$": "79.33 ± 0.09",
    "\\# Params (M)": "89.5",
}
metrics_results = metrics_results[
    [
        "Method",
        "H landmarks",
        "V landmarks",
        "H image",
        "V image",
        "Precision (\\%) $\\uparrow$",
        "Recall (\\%) $\\uparrow$",
        "Accuracy (\\%) $\\uparrow$",
        "F1 (\\%) $\\uparrow$",
        "Inference time (ms) $\\downarrow$",
        "\\# Params (M)",
    ]
]
metrics_results = metrics_results.replace(
    {
        True: "\\cmark",
        False: "\\xmark",
        None: "N/A",
    }
)
metrics_results = metrics_results.sort_values(
    by=["Method", "H landmarks", "V landmarks", "H image", "V image"]
)
print(metrics_results.to_latex(index=False))

Skipping checkpoint ml2hp_loso_convnextv2-t_mlp_h-images_h-landmarks-20250710-1542 as it has no valid subjects.
Index(['Validation', 'Image backbone', 'H image', 'V image',
       'Landmarks backbone', 'H landmarks', 'V landmarks',
       'Normalized landmarks', 'Accuracy (\%) $\uparrow$',
       'F1 (\%) $\uparrow$', 'Precision (\%) $\uparrow$',
       'Recall (\%) $\uparrow$', 'Loss $\downarrow$', '\# Params (M)',
       'Inference time (ms) $\downarrow$'],
      dtype='object')
\begin{tabular}{lllllllllll}
\toprule
Method & H landmarks & V landmarks & H image & V image & Precision (\%) $\uparrow$ & Recall (\%) $\uparrow$ & Accuracy (\%) $\uparrow$ & F1 (\%) $\uparrow$ & Inference time (ms) $\downarrow$ & \# Params (M) \\
\midrule
Baseline \cite{icaart_baseline} & \cmark & \cmark & \cmark & \cmark & 80.63 ± 0.09 & 79.65 ± 0.09 & 79.65 ± 0.09 & 79.33 ± 0.09 & N/A & 89.5 \\
Ours & \cmark & \cmark & \cmark & \cmark & 94.967 ± 2.522 & 94.432 ± 2.993 & 94.432 ± 2.993 & 94.319 ± 3.16 & 3.0

## MMHGDHGR

### Results table

In [83]:
results_checkpoints_path = join("checkpoints", "mmhgdhgr_results")
metrics_results = parse_results(results_checkpoints_path, dataset_name="mmhgdhgr")
print(metrics_results.columns)
metrics_results["Method"] = "Ours"
metrics_results = pd.concat(
    [
        metrics_results,
        pd.DataFrame(
            [
                {
                    "Method": "MDAI paper \\cite{gilmartin2023hand}",
                    "Validation": "simple",
                    "H landmarks": True,
                    "H image": True,
                    "Precision (\\%) $\\uparrow$": "-",
                    "Recall (\\%) $\\uparrow$": "-",
                    "Accuracy (\\%) $\\uparrow$": "97.25 ± 0.25",
                    "F1 (\\%) $\\uparrow$": "97.23 ± 0.25",
                    "\\# Params (M)": "0.03",
                },
                {
                    "Method": "Baseline paper \\cite{MultiModalHandGestureDataset}",
                    "Validation": "simple",
                    "H landmarks": False,
                    "H image": True,
                    "Precision (\\%) $\\uparrow$": "-",
                    "Recall (\\%) $\\uparrow$": "-",
                    "Accuracy (\\%) $\\uparrow$": "96.02 ± N/A",
                    "F1 (\\%) $\\uparrow$": "-",
                },
            ]
        ),
    ]
)
# metrics_results.loc[-1] = {
#     "Method": "SUYN paper \\cite{icaart_baseline}",
#     "H landmarks": True,
#     "H image": True,
#     "Precision (\\%) $\\uparrow$"
# : "-",
#     "Recall (\\%) $\\uparrow$": "-",
#     "Accuracy (\\%) $\\uparrow$": "97.25 ± 0.25",
#     "F1 (\\%) $\\uparrow$": "97.23 ± 0.25",
# }
# metrics_results.loc[-1] = {
#     "Method": "Baseline \\cite{mmhgdhgr}",
#     "H landmarks": True,
#     "V landmarks": True,
#     "H image": True,
#     "V image": True,
#     "Precision (\\%) $\\uparrow$": "-",
#     "Recall (\\%) $\\uparrow$": "-",
#     "Accuracy (\\%) $\\uparrow$": "97.25 ± 0.25",
#     "F1 (\\%) $\\uparrow$": "97.23 ± 0.25",
# }
metrics_results = metrics_results[
    [
        "Method",
        "Validation",
        "H landmarks",
        "H image",
        "Precision (\\%) $\\uparrow$",
        "Recall (\\%) $\\uparrow$",
        "Accuracy (\\%) $\\uparrow$",
        "F1 (\\%) $\\uparrow$",
        "Inference time (ms) $\\downarrow$",
        "\\# Params (M)",
    ]
]
metrics_results = metrics_results.replace(
    {
        True: "\\cmark",
        False: "\\xmark",
        None: "N/A",
    }
)
metrics_results = metrics_results.sort_values(
    by=["Method", "Validation", "H landmarks", "H image"]
)
print(metrics_results.to_latex(index=False))

Index(['Validation', 'Image backbone', 'H image', 'V image',
       'Landmarks backbone', 'H landmarks', 'V landmarks',
       'Normalized landmarks', 'Accuracy (\%) $\uparrow$',
       'F1 (\%) $\uparrow$', 'Precision (\%) $\uparrow$',
       'Recall (\%) $\uparrow$', 'Loss $\downarrow$', '\# Params (M)',
       'Inference time (ms) $\downarrow$'],
      dtype='object')
\begin{tabular}{llllllllll}
\toprule
Method & Validation & H landmarks & H image & Precision (\%) $\uparrow$ & Recall (\%) $\uparrow$ & Accuracy (\%) $\uparrow$ & F1 (\%) $\uparrow$ & Inference time (ms) $\downarrow$ & \# Params (M) \\
\midrule
Baseline paper \cite{MultiModalHandGestureDataset} & simple & \xmark & \cmark & - & - & 96.02 ± N/A & - & N/A & N/A \\
MDAI paper \cite{gilmartin2023hand} & simple & \cmark & \cmark & - & - & 97.25 ± 0.25 & 97.23 ± 0.25 & N/A & 0.03 \\
Ours & loso & \cmark & \cmark & 95.517 ± 3.331 & 94.927 ± 2.723 & 94.927 ± 2.723 & 94.882 ± 3.213 & 9.73 ± 0.35 & 32.7 \\
Ours & loso & \cmark & 

## Tiny HGR

### Results

In [84]:
results_checkpoints_path = join("checkpoints", "tiny_hgr_results")
metrics_results = parse_results(results_checkpoints_path, dataset_name="tiny_hgr")
print(metrics_results.columns)
metrics_results["Method"] = "Ours"
metrics_results = pd.concat(
    [
        metrics_results,
        pd.DataFrame(
            [
                {
                    "Method": "MDAI paper \\cite{gilmartin2023hand}",
                    "Validation": "simple",
                    "H landmarks": True,
                    "H image": False,
                    "Precision (\\%) $\\uparrow$": "-",
                    "Recall (\\%) $\\uparrow$": "-",
                    "Accuracy (\\%) $\\uparrow$": "98.22 ± 0.06",
                    "F1 (\\%) $\\uparrow$": "98.23 ± 0.06",
                    "\\# Params (M)": "0.03",
                },
                {
                    "Method": "Baseline paper \\cite{TinyDatasetRecognition}",
                    "Validation": "simple",
                    "H landmarks": True,
                    "H image": True,
                    "Precision (\\%) $\\uparrow$": "-",
                    "Recall (\\%) $\\uparrow$": "-",
                    "Accuracy (\\%) $\\uparrow$": "85.30 ± N/A",
                    "F1 (\\%) $\\uparrow$": "-",
                    "\\# Params (M)": "-",
                },
            ]
        ),
    ]
)
# metrics_results.loc[-1] = {
#     "Method": "SUYN paper \\cite{icaart_baseline}",
#     "H landmarks": True,
#     "H image": True,
#     "Precision (\\%) $\\uparrow$"
# : "-",
#     "Recall (\\%) $\\uparrow$": "-",
#     "Accuracy (\\%) $\\uparrow$": "97.25 ± 0.25",
#     "F1 (\\%) $\\uparrow$": "97.23 ± 0.25",
# }
# metrics_results.loc[-1] = {
#     "Method": "Baseline \\cite{mmhgdhgr}",
#     "H landmarks": True,
#     "V landmarks": True,
#     "H image": True,
#     "V image": True,
#     "Precision (\\%) $\\uparrow$": "-",
#     "Recall (\\%) $\\uparrow$": "-",
#     "Accuracy (\\%) $\\uparrow$": "97.25 ± 0.25",
#     "F1 (\\%) $\\uparrow$": "97.23 ± 0.25",
# }
metrics_results = metrics_results[
    [
        "Method",
        "Validation",
        "H landmarks",
        "H image",
        "Precision (\\%) $\\uparrow$",
        "Recall (\\%) $\\uparrow$",
        "Accuracy (\\%) $\\uparrow$",
        "F1 (\\%) $\\uparrow$",
        "Inference time (ms) $\\downarrow$",
        "\\# Params (M)",
    ]
]
metrics_results = metrics_results.replace(
    {
        True: "\\cmark",
        False: "\\xmark",
        None: "N/A",
    }
)
metrics_results = metrics_results.sort_values(
    by=["Method", "Validation", "H landmarks", "H image"]
)
print(metrics_results.to_latex(index=False))

Index(['Validation', 'Image backbone', 'H image', 'V image',
       'Landmarks backbone', 'H landmarks', 'V landmarks',
       'Normalized landmarks', 'Accuracy (\%) $\uparrow$',
       'F1 (\%) $\uparrow$', 'Precision (\%) $\uparrow$',
       'Recall (\%) $\uparrow$', 'Loss $\downarrow$', '\# Params (M)',
       'Inference time (ms) $\downarrow$'],
      dtype='object')
\begin{tabular}{llllllllll}
\toprule
Method & Validation & H landmarks & H image & Precision (\%) $\uparrow$ & Recall (\%) $\uparrow$ & Accuracy (\%) $\uparrow$ & F1 (\%) $\uparrow$ & Inference time (ms) $\downarrow$ & \# Params (M) \\
\midrule
Baseline paper \cite{TinyDatasetRecognition} & simple & \cmark & \cmark & - & - & 85.30 ± N/A & - & N/A & - \\
MDAI paper \cite{gilmartin2023hand} & simple & \cmark & \xmark & - & - & 98.22 ± 0.06 & 98.23 ± 0.06 & N/A & 0.03 \\
Ours & loso & \cmark & \cmark & 99.005 ± 1.64 & 98.702 ± 2.715 & 98.702 ± 2.715 & 98.716 ± 2.664 & 10.05 ± 0.33 & 32.7 \\
Ours & loso & \cmark & \xmark &