In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import yaml
# %matplotlib inline

from pathlib import Path
from omegaconf import OmegaConf
from IPython.display import HTML


In [3]:
def collect_one(run, csv_folder):
    with open(run / 'config.json', 'r') as f:
        cfg = json.load(f)
    
    tpt = "-tpt{}".format(cfg['model']['text_encoder']['num_prompt_tokens']) if 'num_prompt_tokens' in cfg['model']['text_encoder'] else ''
    text_model = cfg['model']['text_encoder']['_target_'].replace('src.model.', '')
    text_model = f"{text_model}{tpt}"

    motion_model = cfg['model']['motion_encoder']['_target_'].replace('src.model.', '')

    no_bkb = '_nobkb' if 'bkb_feats' in cfg['model']['contrastive_loss'] and not cfg['model']['contrastive_loss']['bkb_feats'] else ''
    contrastive_loss = f"_{cfg['model']['contrastive_loss']['infonce_loss_fn']['_target_'].replace('src.model.', '')}" if 'infonce_loss_fn' in cfg['model']['contrastive_loss'] and cfg['model']['contrastive_loss']['infonce_loss_fn'] is not None else ''
    lambda_start_epoch = f"_{cfg['model']['contrastive_loss']['lambda_start_epoch']}" if 'lambda_start_epoch' in cfg['model']['contrastive_loss'] and cfg['model']['contrastive_loss']['lambda_start_epoch'] is not None else ''
    lambda_end_epoch = f"_{cfg['model']['contrastive_loss']['lambda_end_epoch']}" if 'lambda_end_epoch' in cfg['model']['contrastive_loss'] and cfg['model']['contrastive_loss']['lambda_end_epoch'] is not None else ''
    klloss = f"_{cfg['model']['contrastive_loss']['cross_consistent_type']}" if 'cross_consistent_type' in cfg['model']['contrastive_loss'] and cfg['model']['contrastive_loss']['cross_consistent_type'] is not None else ''
    pseudolab = f"_pseudolab{cfg['model']['contrastive_loss']['use_length_as_pseudo_motion_label']}" if 'use_length_as_pseudo_motion_label' in cfg['model']['contrastive_loss'] and cfg['model']['contrastive_loss']['use_length_as_pseudo_motion_label'] is not None else ''
    no_teacher_m2m = '_no-teacher-m2m' if 'text_teacher_affects_m2m' in cfg['model']['contrastive_loss'] and not cfg['model']['contrastive_loss']['text_teacher_affects_m2m'] else ''
    loss = cfg['model']['contrastive_loss']['_target_'].replace('src.model.', '') + no_bkb + contrastive_loss + klloss + lambda_start_epoch + lambda_end_epoch + pseudolab + no_teacher_m2m
    if 'threshold_selfsim' in cfg['model']['contrastive_loss'] and cfg['model']['contrastive_loss']['threshold_selfsim'] is None:
        loss = loss.replace('_with_filtering', '')

    dataset_test = csv_folder.split('_')[0]
    dataset_train = cfg['data']['test']['path'].split('/')[-1]
    dataset = dataset_train + '->' + dataset_test # dataset_test
    
    common_space_dim = cfg['common_space_dim']
    data_rep = cfg['data_rep']

    if 'ACTOR' in motion_model:
        motion_debug = '-debug-joints' if 'debug_encode_decode_joints' in cfg['model']['motion_encoder'] and cfg['model']['motion_encoder']['debug_encode_decode_joints'] else ''
        with_feet = '-withfeet' if 'with_feet' in cfg['model']['motion_encoder'] and cfg['model']['motion_encoder']['with_feet'] else ''
        motion_model = motion_model + motion_debug + with_feet

    if 'MoViT' in motion_model:
        time_mask = 'use_time_padding_mask' in cfg['model']['motion_encoder'] and cfg['model']['motion_encoder']['use_time_padding_mask']
        graph_based = 'use_skeleton_connection_mask' in cfg['model']['motion_encoder'] and cfg['model']['motion_encoder']['use_skeleton_connection_mask']
        time_mask = '-timemask' if time_mask else ''
        graph_based = '-graph' if graph_based else ''
        attention_type = '-' + cfg['model']['motion_encoder']['attention_type']
        uniform = '-uniform' if 'uniform_sample' in cfg['model']['motion_encoder'] and cfg['model']['motion_encoder']['uniform_sample'] else ''
        num_frames = '-{num_frames}'.format(num_frames=cfg['model']['motion_encoder']['num_frames'])
        ff_dim = '-ff{ff_dim}'.format(ff_dim=cfg['model']['motion_encoder']['ff_dims']) if 'ff_dims' in cfg['model']['motion_encoder'] else ''
        num_layers = f"-{cfg['model']['motion_encoder']['num_transformer_layers']}layers" if 'num_transformer_layers' in cfg['model']['motion_encoder'] else ''
        motion_model = motion_model + '-' + cfg['model']['motion_encoder']['body_repres'] + time_mask + graph_based + uniform + attention_type + num_frames + ff_dim + num_layers
    
    data = []
    for yamlf in (run / csv_folder).rglob('*.yaml'):
        with open(yamlf, 'r') as f:
            m = yaml.load(f, Loader=yaml.FullLoader)
        fname = yamlf.stem
        split = fname.rsplit('_', 1)
        if len(split) == 2:
            protocol_name, threshold = split
            threshold = threshold.split('-')[1]
            data.append({'protocol': protocol_name, 'len_thresh': threshold, **m})
        elif len(split) == 1:
            protocol_name = split[0]
            data.append({'protocol': protocol_name, **m})

    data = pd.DataFrame(data)
    if data.empty:
        print(f'Pred folder is empty: {csv_folder}')
    
    data['text_model'] = text_model
    data['motion_model'] = motion_model
    data['loss'] = loss
    data['dataset'] = dataset
    data['common_space_dim'] = common_space_dim
    data['data_rep'] = data_rep
    data['run'] = cfg['rep']
    
    return data

def collect_all(root, csv_folder):
    root = Path(root)
    metrics = [collect_one(csvf.parents[0], csvf.name) for csvf in list(root.rglob(csv_folder))]
    metrics = pd.concat(metrics, ignore_index=True)
    return metrics

default_fields_dict = {
    'R1': lambda x: u"{:.2f}".format(x),
    'R5': lambda x: u"{:.2f}".format(x),
    'R10': lambda x: u"{:.2f}".format(x),
    'meanr': lambda x: u"{:.1f}".format(x),
    'medr': lambda x: int(x),
    'spice': lambda x: u"{:.3f}".format(x),
    'spacy': lambda x: u"{:.3f}".format(x),
}

In [4]:
# Compute metrics for each detected run

def summarize_metrics(
        metrics, 
        protocol=None, 
        dataset=None, 
        text_model=None, 
        motion_model=None, 
        loss=None, 
        common_space_dim=[256], 
        data_rep=['cont_6d_plus_rifke'], 
        len_thresh=None, 
        melt=True, 
        run=None,
        decimals=2):

    id_vars = ['dataset', 'protocol', 'text_model', 'motion_model', 'loss', 'common_space_dim', 'data_rep']
    # if len_thresh column is present, add "len_thresh" to id_vars
    if 'len_thresh' in metrics.columns:
        id_vars.append('len_thresh')

    if run is None:
        metrics = metrics.groupby(id_vars).mean().reset_index()
    else:
        # take the first run (rows with run = 0)
        metrics = metrics[metrics['run'] == run]
    
    metrics.drop(columns="run", inplace=True)

    # create an "average" protocol which is obtained by averaging every metric across all protocols, for every id_vars
    id_vars_without_protocol = list(id_vars)
    id_vars_without_protocol.remove('protocol')
    metrics_avg = metrics.drop(columns='protocol').groupby(id_vars_without_protocol).mean().reset_index()
    metrics_avg['protocol'] = 'average'
    metrics = pd.concat([metrics, metrics_avg], ignore_index=True)

    if protocol is not None:
        metrics = metrics[metrics['protocol'].isin(protocol)]
        if len(protocol) == 1:
            metrics.drop(columns="protocol", inplace=True)
            id_vars.remove('protocol')
    if common_space_dim is not None:
        metrics = metrics[metrics['common_space_dim'].isin(common_space_dim)]
        if len(common_space_dim) == 1:
            metrics.drop(columns="common_space_dim", inplace=True)
            id_vars.remove('common_space_dim')
    if data_rep is not None:
        metrics = metrics[metrics['data_rep'].isin(data_rep)]
        if len(data_rep) == 1:
            metrics.drop(columns="data_rep", inplace=True)
            id_vars.remove('data_rep')
    if text_model is not None:
        metrics = metrics[metrics['text_model'].isin(text_model)]
        if len(text_model) == 1:
            metrics.drop(columns="text_model", inplace=True)
            id_vars.remove('text_model')
    if motion_model is not None:
        metrics = metrics[metrics['motion_model'].isin(motion_model)]
        if len(motion_model) == 1:
            metrics.drop(columns="motion_model", inplace=True)
            id_vars.remove('motion_model')
    if loss is not None:
        metrics = metrics[metrics['loss'].isin(loss)]
        if len(loss) == 1:
            metrics.drop(columns="loss", inplace=True)
            id_vars.remove('loss')
    if len_thresh is not None:
        metrics = metrics[metrics['len_thresh'].isin(len_thresh)]
        if len(len_thresh) == 1:
            metrics.drop(columns="len_thresh", inplace=True)
            id_vars.remove('len_thresh')
    if dataset is not None:
        metrics = metrics[(metrics['dataset'] == dataset)]
        metrics.drop(columns="dataset", inplace=True)
        id_vars.remove('dataset')

    if melt:
        metrics = metrics.melt(id_vars=id_vars, var_name="metric")
        metrics = metrics.pivot(index=id_vars, columns="metric", values="value")

    # round to 2 decimal places
    metrics = metrics.round(decimals)

    return metrics

In [22]:
# rename content of the table
def rename_fn(v):
    mapping = {'motions.MoViT_v2-body-parts-timemask-uniform-fact_encoder-200-ff1024-2layers': 'MoViT++',
               'motions.MoViT-body-parts-timemask-uniform-divided_space_time-200-3layers': 'MoViT',
               'losses.InfoNCECrossConsistent_kldiv_0_1': 'CCL self',
               'losses.InfoNCECrossConsistent_kldiv_80_140': 'CCL 80-140',
               'losses.InfoNCECrossConsistent_kldiv_40_100': 'CCL 40-100',
               'losses.InfoNCECrossConsistent_kldiv_140_200': 'CCL 140-200',
               'losses.InfoNCECrossConsistent_kldiv_500_600': 'CCL supervised',
               'losses.InfoNCECrossConsistent_kldiv_80_140_no-teacher-m2m': 'CCL 80-140 no-teacher-m2m',
               'losses.InfoNCECrossConsistent_kldiv_40_100_no-teacher-m2m': 'CCL 40-100 no-teacher-m2m',
               'losses.InfoNCE': 'InfoNCE',
               'losses.InfoNCE_with_filtering': 'InfoNCE+F',
               'ACTORStyleEncoder': 'TMR',
               'texts.CLIP': 'CLIP',
               }
    if v in mapping:
        return mapping[v]
    return v

def paper_formatting(metrics, remapping, order=None, protocols_order=['guo', 'normal', 'nsim', 'threshold_0.95', 'average']):
    index_cols = ['protocol', 'dataset', 'text_model', 'motion_model', 'loss']
    # metrics = metrics[metrics['loss'].isin(['losses.InfoNCECrossConsistent_kldiv_80_140', 'losses.InfoNCE_with_filtering', 'losses.InfoNCE'])]
    # rename values in the table
    metrics = metrics.applymap(rename_fn)
    # create multi-index where protocol is repeated
    metrics = metrics.set_index(index_cols)
    # metrics['model'] = metrics['motion_model'] + ' ' + metrics['text_model'] + ' ' + metrics['loss']
    # rename the first level of the columns: from "t2m" to "Text-to-Motion" and from "m2t" to "Motion-to-Text"
    #metrics.columns = metrics.columns.set_levels(['Motion-to-Text', 'Text-to-Motion'], level=0)

    # reorder columns and rows
    #metrics = metrics.reindex(columns=['R01', 'R02', 'R03', 'R05', 'R10', 'MedR'], level=1)

    # select only the rows that are in the selections
    # idx = pd.IndexSlice
    # selections = list(remapping.keys())
    # metrics = metrics.loc[idx[:, [x[0] for x in selections], [x[1] for x in selections], [x[2] for x in selections], [x[3] for x in selections]], :]
    # Step 2: Extract levels 1 to 3 and apply the mapping
    mapped_part = metrics.index.to_frame(index=False).loc[:, ['dataset', 'text_model', 'motion_model', 'loss']].apply(tuple, axis=1).map(remapping)

    # Step 3: Combine level 0 with the mapped part
    new_index = pd.MultiIndex.from_arrays([metrics.index.get_level_values(0), mapped_part], names=['protocol', 'method'])

    # Step 4: Set the new index to the DataFrame
    metrics.index = new_index

    # delete rows where method part of the index is NaN
    c = metrics.index.names
    metrics = metrics.reset_index().dropna().set_index(c)

    # order the rows by protocol
    metrics = metrics.reindex(protocols_order, level=0)

    if order is not None:
        metrics = metrics.reindex(order, level=1)

    return metrics

def render_to_latex(metrics, rename_func=default_fields_dict, **latex_kwargs):
    m = metrics.copy()
     # make bold the best values

    # Custom function to highlight the maximum value in each group
    def highlight_best(data):
        attr = 'font-weight: bold'
        result = pd.DataFrame('', index=data.index, columns=data.columns)
        for col in data.columns:
            if 'MedR' in col:
                best_idx = data.groupby(level=0)[[col]].idxmin()
            else:
                best_idx = data.groupby(level=0)[[col]].idxmax()
            for idx in best_idx.values:
                result.loc[idx, col] = attr
        return result

    styled_df = m.style.apply(highlight_best, axis=None)
    ltex = styled_df.format(precision=3).to_latex(
        **latex_kwargs
    )
    return ltex

# Motion to Motion retrieval - Results on KIT

In [None]:
# collect all data on kit
ROOT = "runs"

metrics = collect_all(ROOT, '*latest_m2m-metrics')
metrics = summarize_metrics(
    metrics,
    data_rep=['cont_6d_plus_rifke_vels'],
    motion_model=[
        'ACTORStyleEncoder',
        'motions.MoViT_v2-body-parts-timemask-uniform-fact_encoder-200-ff1024-2layers'
    ],
    decimals=4,
    melt=False
    )

selection = {
    ('kitml->kitml', 'TMR', 'TMR', 'InfoNCE+F'): 'TMR',
    ('kitml->kitml', 'CLIP', 'MoViT', 'InfoNCE'): 'MoT',
    ('kitml->kitml', 'TMR', 'MoViT++', 'CCL 40-100'): 'MoT++',
    ('kitml->kitml', 'TMR', 'MoViT++', 'CCL self'): 'MoT++ self',
    ('kitml->kitml', 'TMR', 'MoViT++', 'CCL supervised'): 'MoT++ supervised',
    ('kitml->kitml', 'TMR', 'MoViT++', 'CCL 80-140 no-teacher-m2m'): 'MoT++ 80-140 ntm',
    ('kitml->kitml', 'TMR', 'MoViT++', 'CCL 40-100 no-teacher-m2m'): 'MoT++ 40-100 ntm',
}

# order = ['TMR (only HML3D)', 'TMR', 'MoT', 'MoT++ self', 'MoT++ 40-100', 'MoT++ 80-140', 'MoT++ 140-200', 'MoT++ supervised']
order = ['TMR', 'MoT', 'MoT++', 'MoT++ self', 'MoT++ supervised', 'MoT++ 80-140 ntm', 'MoT++ 40-100 ntm']
metrics = paper_formatting(metrics, selection, order)
metrics

# Motion to Motion retrieval - Train HumanML3D, Test KitML

In [None]:
# collect all data on kit
ROOT = "runs"

metrics = collect_all(ROOT, '*latest_m2m-metrics')
metrics = summarize_metrics(
    metrics,
    data_rep=['cont_6d_plus_rifke_vels'],
    motion_model=[
        'ACTORStyleEncoder',
        'motions.MoViT_v2-body-parts-timemask-uniform-fact_encoder-200-ff1024-2layers'
    ],
    decimals=4,
    melt=False
    )

selection = {
    ('humanml3d->kitml', 'TMR', 'TMR', 'InfoNCE+F'): 'TMR',
    ('humanml3d->kitml', 'CLIP', 'MoViT', 'InfoNCE'): 'MoT',
    ('humanml3d->kitml', 'TMR', 'MoViT++', 'CCL 40-100'): 'MoT++',
    ('humanml3d->kitml', 'TMR', 'MoViT++', 'CCL self'): 'MoT++ self',
    ('humanml3d->kitml', 'TMR', 'MoViT++', 'CCL supervised'): 'MoT++ supervised',
    ('humanml3d->kitml', 'TMR', 'MoViT++', 'CCL 80-140 no-teacher-m2m'): 'MoT++ 80-140 ntm',
    ('humanml3d->kitml', 'TMR', 'MoViT++', 'CCL 40-100 no-teacher-m2m'): 'MoT++ 40-100 ntm',
}

# order = ['TMR (only HML3D)', 'TMR', 'MoT', 'MoT++ self', 'MoT++ 40-100', 'MoT++ 80-140', 'MoT++ 140-200', 'MoT++ supervised']
order = ['TMR', 'MoT', 'MoT++', 'MoT++ self', 'MoT++ supervised', 'MoT++ 80-140 ntm', 'MoT++ 40-100 ntm']
metrics = paper_formatting(metrics, selection, order)
metrics

# Motion to Motion retrieval - Train HumanML3D+KitML, Test KitML

In [None]:
# collect all data on kit
ROOT = "runs"

metrics = collect_all(ROOT, '*latest_m2m-metrics')
metrics = summarize_metrics(
    metrics,
    data_rep=['cont_6d_plus_rifke_vels'],
    # motion_model=[
    #     'ACTORStyleEncoder',
    #     'motions.MoViT_v2-body-parts-timemask-uniform-fact_encoder-200-ff1024-2layers'
    # ],
    decimals=4,
    melt=False,
    )

selection = {
    ('humanml3d_plus_kitml->kitml', 'TMR', 'TMR', 'InfoNCE+F'): 'TMR',
    ('humanml3d_plus_kitml->kitml', 'CLIP', 'MoViT', 'InfoNCE'): 'MoT',
    ('humanml3d_plus_kitml->kitml', 'TMR', 'MoViT++', 'CCL 40-100'): 'MoT++',
    ('humanml3d_plus_kitml->kitml', 'TMR', 'MoViT++', 'CCL self'): 'MoT++ self',
    ('humanml3d_plus_kitml->kitml', 'TMR', 'MoViT++', 'CCL supervised'): 'MoT++ supervised',
    ('humanml3d_plus_kitml->kitml', 'TMR', 'MoViT++', 'CCL 80-140 no-teacher-m2m'): 'MoT++ 80-140 ntm',
    ('humanml3d_plus_kitml->kitml', 'TMR', 'MoViT++', 'CCL 40-100 no-teacher-m2m'): 'MoT++ 40-100 ntm',
}

# order = ['TMR (only HML3D)', 'TMR', 'MoT', 'MoT++ self', 'MoT++ 40-100', 'MoT++ 80-140', 'MoT++ 140-200', 'MoT++ supervised']
order = ['TMR', 'MoT', 'MoT++', 'MoT++ self']
metrics = paper_formatting(metrics, selection, order)

# remove primary_label_idx/mAP and primary_label_idx/nDCG columns
metrics = metrics.drop(columns=['primary_label_idx/mAP', 'primary_label_idx/nDCG'])

# remove top_level_label_idx/ from the beginning of the column names
metrics.columns = metrics.columns.str.replace('top_level_label_idx/', '')
metrics
latex = render_to_latex(
    metrics, 
    caption="M2M results on KIT. Train: KIT+HumanML",
    clines="skip-last;data",
    hrules=True,
    column_format="llccccccccccccc",
    convert_css=True
)

print(latex)