In [1]:
import ast
import os
import pickle as pkl

import numpy as np
import pandas as pd
import torch
from torchmetrics.functional.classification import (
    multilabel_average_precision,
)

In [2]:
file_path = "/home/kukushkin/imslowfast/dataset/results/training_progression/validation/model=slow_r50_feats=epoch-1_split=val.pkl"

with open(file_path, "rb") as f:
    data = pkl.load(f)

print(data.keys())

dict_keys(['names', 'preds', 'feats', 'labels'])


**Load annotations and results**

In [3]:
# model name
model_name = "slow_r50"
folder_path = "../dataset/results/training_progression"
splits = [
    "train",
    "validation",
]
metadata_file = "../dataset/metadata/metadata.csv"
behavioural_labels_file = "../dataset/metadata/behaviours.txt"
segements_file = "../dataset/metadata/segements.txt"

# where to save the results
result_file = "../dataset/results/training_progression/results_training_progression_behaviourwise.csv"
show_per_class = True

# list all result files in the folder which end with .pkl and contain the model name
result_info = {}

for split in splits:
    for file in os.listdir(os.path.join(folder_path, split)):
        if file.endswith(".pkl") and model_name in file:
            epoch = file.split("_")[-2].split("-")[-1]

            # get the split from the file name
            data_split = file.split("=")[-1].split(".")[0]

            # add model to the dictionary
            if model_name not in result_info:
                result_info[model_name] = {}
            # add epoch to the dictionary
            if epoch not in result_info[model_name]:
                result_info[model_name][epoch] = {}
            if split not in result_info[model_name][epoch]:
                result_info[model_name][epoch][data_split] = {}
            result_info[model_name][epoch][data_split] = {
                "file_path": os.path.join(folder_path, split, file),
            }

In [4]:
result_info

{'slow_r50': {'40': {'train': {'file_path': '../dataset/results/training_progression/train/model=slow_r50_feats=epoch-40_split=train.pkl'},
   'val': {'file_path': '../dataset/results/training_progression/validation/model=slow_r50_feats=epoch-40_split=val.pkl'}},
  '80': {'train': {'file_path': '../dataset/results/training_progression/train/model=slow_r50_feats=epoch-80_split=train.pkl'},
   'val': {'file_path': '../dataset/results/training_progression/validation/model=slow_r50_feats=epoch-80_split=val.pkl'}},
  '1': {'train': {'file_path': '../dataset/results/training_progression/train/model=slow_r50_feats=epoch-1_split=train.pkl'},
   'val': {'file_path': '../dataset/results/training_progression/validation/model=slow_r50_feats=epoch-1_split=val.pkl'}},
  '10': {'train': {'file_path': '../dataset/results/training_progression/train/model=slow_r50_feats=epoch-10_split=train.pkl'},
   'val': {'file_path': '../dataset/results/training_progression/validation/model=slow_r50_feats=epoch-10_s

In [5]:
metadata_df = pd.read_csv(metadata_file)

with open(behavioural_labels_file, "rb") as f:
    behaviours = [beh.decode("utf-8").strip() for beh in f.readlines()]

with open(segements_file, "rb") as f:
    segments = [seg.decode("utf-8").strip() for seg in f.readlines()]


def read_files(model_results, epoch):
    with open(model_results[epoch]["train"]["file_path"], "rb") as f:
        train_data = pkl.load(f)

    with open(model_results[epoch]["val"]["file_path"], "rb") as f:
        val_data = pkl.load(f)

    return train_data, val_data


def results2df(train_data, val_data, metadata_df):
    # Process subclips
    subclips = []
    for i, split in enumerate([train_data, val_data]):
        for name, pred, feat, label in zip(
            split["names"], split["preds"], split["feats"], split["labels"]
        ):
            subclips.append(
                {
                    "name": name,
                    "split": i,
                    "pred": pred,
                    "feat": feat,
                    "negative": True if sum(label) == 0 else False,
                }
            )

    df = pd.DataFrame(subclips, columns=["name", "split", "pred", "feat", "negative"])

    df["split"] = df.split.map({0: "train", 1: "val"})
    df = df.merge(metadata_df, how="left", left_on="name", right_on="subject_id")

    # Apply sigmoid to predictions
    df["pred"] = df.pred.apply(lambda x: torch.sigmoid(torch.tensor(x)))

    # Convert label from str to int
    df.label = df.label.apply(lambda x: np.array(ast.literal_eval(x)))

    # Add negative
    df["negative"] = df.label.apply(lambda x: sum(x) == 0)

    # Add global location count to dataframe
    df["location_count"] = df.utm.map(df.utm.value_counts())

    # Return train and val dataframes
    train_df = df[df.split == "train"]
    val_df = df[df.split == "val"]

    return train_df, val_df


def print_per_segement_performance(map, segment, show_per_class=True):
    res = []
    for i, (b, s) in enumerate(zip(map, segments)):
        if s == segment:
            res.append({behaviours[i]: b})
    agg_values = []
    for r in res:
        for _, value in r.items():
            agg_values.append(value)
    # if show_per_class:
    #    print(f"{segment}: {np.mean(agg_values):.2f} {res}")
    # else:
    #    print(f"{segment}: {np.mean(agg_values):.2f}")

    if show_per_class:
        return {
            segment: {
                "mean": np.round(np.mean(agg_values), 2),
                "values": res,
            }
        }
    else:
        return {
            segment: {
                "mean": np.round(np.mean(agg_values), 2),
            }
        }

In [6]:
def calculate_metrics(df, round_to=2, show_per_class=False):
    # Train performance
    map = multilabel_average_precision(
        torch.tensor(np.stack(df["pred"])),
        torch.tensor(np.stack(df["label"])),
        num_labels=14,
        average="none",
    )

    map_head = print_per_segement_performance(map, "head", show_per_class)
    map_tail = print_per_segement_performance(map, "tail", show_per_class)
    map_fs = print_per_segement_performance(map, "few_shot", show_per_class)

    if show_per_class:
        map_head_values = map_head["head"]["values"]
        map_tail_values = map_tail["tail"]["values"]
        map_fs_values = map_fs["few_shot"]["values"]

    map_head = round(float(map_head["head"]["mean"]), round_to)
    map_tail = round(float(map_tail["tail"]["mean"]), round_to)
    map_fs = round(float(map_fs["few_shot"]["mean"]), round_to)

    avg_map = round(map.mean().item(), round_to)

    if show_per_class:
        # round to 2 decimal places
        print(map_head_values)
        for i in range(len(map_head_values)):
            for key, value in map_head_values[i].items():
                map_head_values[i][key] = round(value.item(), round_to)
        for i in range(len(map_tail_values)):
            for key, value in map_tail_values[i].items():
                map_tail_values[i][key] = round(value.item(), round_to)

        for i in range(len(map_fs_values)):
            for key, value in map_fs_values[i].items():
                map_fs_values[i][key] = round(value.item(), round_to)

        return (
            avg_map,
            map_head,
            map_head_values,
            map_tail,
            map_tail_values,
            map_fs,
            map_fs_values,
        )

    return avg_map, map_head, map_tail, map_fs

In [7]:
# if file exists, delete it
if os.path.exists(result_file):
    os.remove(result_file)


for m in result_info:
    for epoch in result_info[m]:
        # print(f"Loading results for model: {m}, epoch: {epoch}")
        train_data, val_data = read_files(result_info[model_name], epoch)
        train_df, val_df = results2df(train_data, val_data, metadata_df)

        if show_per_class:
            (
                train_map,
                train_map_head,
                train_map_head_values,
                train_map_tail,
                train_map_tail_values,
                train_map_fs,
                train_map_fs_values,
            ) = calculate_metrics(train_df, show_per_class=show_per_class)

            (
                val_map,
                val_map_head,
                val_map_head_values,
                val_map_tail,
                val_map_tail_values,
                val_map_fs,
                val_map_fs_values,
            ) = calculate_metrics(val_df, show_per_class=show_per_class)

            with open(result_file, "a") as f:
                # check if file is existing and empty
                if os.stat(result_file).st_size == 0:
                    f.write(
                        "model;split;epoch;overall_map;map_head;map_head_values;map_tail;map_tail_values;map_fs;map_fs_values\n"
                    )

                f.write(
                    f"{m};train;{epoch};{train_map};{train_map_head};{train_map_head_values};{train_map_tail};{train_map_tail_values};{train_map_fs};{train_map_fs_values}\n"
                )
                f.write(
                    f"{m};val;{epoch};{val_map};{val_map_head};{val_map_head_values};{val_map_tail};{val_map_tail_values};{val_map_fs};{val_map_fs_values}\n"
                )

            # Write results to file # with columns: model, epoch, train_map, train_map_head, train_map_tail, train_map_fs, val_map, val_map_head, val_map_tail, val_map_fs
        else:
            with open(result_file, "a") as f:
                # check if file is existing and empty
                if os.stat(result_file).st_size == 0:
                    f.write("model;split;epoch;overall_map;map_head;map_tail;map_fs\n")

                f.write(
                    f"{m};train;{epoch};{train_map};{train_map_head};{train_map_tail};{train_map_fs}\n"
                )
                f.write(
                    f"{m};val;{epoch};{val_map};{val_map_head};{val_map_tail};{val_map_fs}\n"
                )


# open the csv reorder the columns and save it again
df = pd.read_csv(result_file, sep=";")

if show_per_class:
    df = df[
        [
            "model",
            "split",
            "epoch",
            "overall_map",
            "map_head",
            "map_head_values",
            "map_tail",
            "map_tail_values",
            "map_fs",
            "map_fs_values",
        ]
    ]
else:
    df = df[
        [
            "model",
            "split",
            "epoch",
            "overall_map",
            "map_head",
            "map_tail",
            "map_fs",
        ]
    ]
df.epoch = df.epoch.astype(int)
df = df.sort_values(
    [
        "model",
        "epoch",
        "split",
    ],
    ascending=[True, True, True],
)
df.to_csv(result_file, index=False, sep=";")

[{'resting': tensor(0.9601)}, {'travel': tensor(0.9739)}]
[{'resting': tensor(0.6850)}, {'travel': tensor(0.8118)}]
[{'resting': tensor(0.9942)}, {'travel': tensor(0.9899)}]
[{'resting': tensor(0.6567)}, {'travel': tensor(0.7795)}]
[{'resting': tensor(0.4930)}, {'travel': tensor(0.3583)}]
[{'resting': tensor(0.4871)}, {'travel': tensor(0.3139)}]
[{'resting': tensor(0.7470)}, {'travel': tensor(0.9032)}]
[{'resting': tensor(0.5894)}, {'travel': tensor(0.8289)}]
[{'resting': tensor(0.9994)}, {'travel': tensor(0.9986)}]
[{'resting': tensor(0.6571)}, {'travel': tensor(0.7116)}]
[{'resting': tensor(0.8854)}, {'travel': tensor(0.9520)}]
[{'resting': tensor(0.6755)}, {'travel': tensor(0.8202)}]
[{'resting': tensor(0.6577)}, {'travel': tensor(0.6468)}]
[{'resting': tensor(0.5887)}, {'travel': tensor(0.4751)}]
