In [3]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from diagnostics.inventory import ModelInventoryBuilder, QueryBuilder
from diagnostics.inventory import load_metric_csv, load_confidence_csv
from diagnostics.paper_utils import update_col_names, get_video_names, add_model_metadata

In [2]:
# prompt to specify directory containing config files
artifacts_path = "/media/mattw/behavior/results/pose-estimation/mirror-mouse/2022-11-17"
df_save_path = "/media/mattw/behavior/results/pose-estimation/mirror-mouse"
dataset_name = "mirror-mouse"

rng_seeds_list =  ["0", "1"]
train_frames_list = ["75"]

In [3]:
model_inventory = ModelInventoryBuilder(artifacts_path)
total_df = model_inventory.build_dframe()

Building model registry from 8 configs...


100%|█████████████████████████████████████████████| 8/8 [00:00<00:00, 63.86it/s]


In [4]:
query_builder = QueryBuilder(total_df)
query_builder.add_query("training.rng_seed_data_pt", "in", rng_seeds_list)
query_builder.add_query("training.train_frames", "in", train_frames_list)
# query_builder.add_query("model.losses_to_use", "in", ["[]", "[pca_multiview]"]) # trying to grab both unimodal_mse and supervised. note no quotes inside brackets per loss
query_builder.add_timestamp_query("2022-11-16", "2022-11-20") # works
# query_builder.add_query("losses.pca_multiview.log_weight", ">", 4.) 

total_df_queried = total_df.query(query_builder.combine_queries("and"))
total_df_queried.shape

`training.rng_seed_data_pt` == '0' or `training.rng_seed_data_pt` == '1'
`training.train_frames` == '75'


(8, 92)

In [6]:
# loop over rows of df, load predictions_pixel_error.csv, predictions_pca_singleview_error.csv
df_labeled_preds = []
df_labeled_metrics = []
df_video_preds = []
df_video_metrics = []
for i, model in tqdm(total_df_queried.iterrows()):
    
    # --------------------
    # labeled predictions
    # --------------------
    df_labeled_preds_curr = []
    for distribution_type in ["InD", "OOD"]:
        if distribution_type == "InD":
            filename = os.path.join(model["path"], "predictions.csv")
        else:
            filename = os.path.join(model["path"], "predictions_new.csv")
        df = pd.read_csv(filename, header=[1, 2], index_col=0)
        df = update_col_names(df)
        df.loc[:, ("distribution", "")] = distribution_type
        if distribution_type == "OOD":
            df.loc[:, ("set", "")] = "test"
        df_labeled_preds_curr.append(df)
    df_labeled_preds_curr = pd.concat(df_labeled_preds_curr)
    add_model_metadata(df_labeled_preds_curr, model, levels=2)
    df_labeled_preds.append(df_labeled_preds_curr)
    
    # --------------------
    # labeled metrics
    # --------------------
    df_labeled_metrics_curr = []
    for distribution_type in ["InD", "OOD"]:
        # load precomputed errors
        for metric_name in ["pixel_error", "pca_singleview_error", "pca_multiview_error"]:
            if distribution_type == "InD":
                filename = os.path.join(model["path"], "predictions_%s.csv" % metric_name)
            else:
                filename = os.path.join(model["path"], "predictions_new_%s.csv" % metric_name)
            if os.path.isfile(filename):
                df = load_metric_csv(
                    filename, metric_name, None, pd_kwargs={"header": [0], "index_col": 0})
                df["distribution"] = distribution_type
                if distribution_type == "OOD":
                    df["set"] = "test"
                df_labeled_metrics_curr.append(df)
        # load confidences from predictions
        if distribution_type == "InD":
            filename = os.path.join(model["path"], "predictions.csv")
        else:
            filename = os.path.join(model["path"], "predictions_new.csv")
        df = load_confidence_csv(filename)
        df["distribution"] = distribution_type
        if distribution_type == "OOD":
            df["set"] = "test"
        df_labeled_metrics_curr.append(df)
    df_labeled_metrics_curr = pd.concat(df_labeled_metrics_curr)
    add_model_metadata(df_labeled_metrics_curr, model, levels=1)
    df_labeled_metrics.append(df_labeled_metrics_curr)

    # --------------------
    # video predictions
    # --------------------
    video_names = get_video_names(os.listdir(os.path.join(model["path"], "video_preds")))
    df_video_preds_curr = []
    for video_name in video_names:
        filename = os.path.join(model["path"], "video_preds", "%s.csv" % video_name)
        df = pd.read_csv(filename, header=[1, 2], index_col=0)
        df.loc[:, ("video_name", "")] = video_name
        df_video_preds_curr.append(df)
    df_video_preds_curr = pd.concat(df_video_preds_curr)
    add_model_metadata(df_video_preds_curr, model, levels=2)  # in-place
    df_video_preds.append(df_video_preds_curr)

    # --------------------
    # video metrics
    # --------------------
    video_names = get_video_names(os.listdir(os.path.join(model["path"], "video_preds")))
    df_video_metrics_curr = []
    for video_name in video_names:
        # load precomputed metrics
        for metric_name in ["temporal_norm", "pca_singleview_error", "pca_multiview_error"]:
            filename = os.path.join(
                model["path"], "video_preds", "%s_%s.csv" % (video_name, metric_name))
            df = load_metric_csv(
                filename, metric_name, None, pd_kwargs={"header": [0], "index_col": 0})
            df["video_name"] = video_name
            df_video_metrics_curr.append(df)
        # load confidences from predictions
        filename = os.path.join(model["path"], "video_preds", "%s.csv" % video_name)
        df = load_confidence_csv(filename)
        df["video_name"] = video_name
        df_video_metrics_curr.append(df)
    df_video_metrics_curr = pd.concat(df_video_metrics_curr)
    add_model_metadata(df_video_metrics_curr, model, levels=1)  # in-place
    df_video_metrics.append(df_video_metrics_curr)
    
# concat all dfs
df_labeled_preds = pd.concat(df_labeled_preds)
df_labeled_metrics = pd.concat(df_labeled_metrics)
df_video_preds = pd.concat(df_video_preds)
df_video_metrics = pd.concat(df_video_metrics)

8it [00:05,  1.50it/s]


In [7]:
# save out dfs
df_labeled_preds.to_parquet(os.path.join(df_save_path, "%s_labeled_preds.pqt" % dataset_name))
df_labeled_metrics.to_parquet(os.path.join(df_save_path, "%s_labeled_metrics.pqt" % dataset_name))
df_video_preds.to_parquet(os.path.join(df_save_path, "%s_video_preds.pqt" % dataset_name))
df_video_metrics.to_parquet(os.path.join(df_save_path, "%s_video_metrics.pqt" % dataset_name))