# Analyze what classes labels are predicted for sequences with man walking with a ladder

Those sequences should probably be the easiest to classify.

In [1]:
import pandas as pd
from pathlib import Path

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import numpy as np
import torch

import plotly.io as pio

from ilids.towhee_utils.override.movinet import read_kinetics_600_classmap

In [2]:
# cell has parameters tag for papermill execution
movinet_variation: str = "movineta0"
normalize_features: str = False
topk: int = 5

# PNG
# ---
# In case you want to export to png to use in report, set to "png".
# jq + base64 decode cli:
#   `jq -r '.cells[X] | .outputs[Y].data."image/png"' | base64 -d > fileX.png`
#
# Default
# ---
# Default is "notebook_connected" which is interactive and works while being connected
# to the internet and works in a browser too
plotly_renderer: str = "notebook_connected"

In [3]:
pio.renderers.default = plotly_renderer
print(f"Using variation: {movinet_variation}")
print(f"Normalize features: {normalize_features}")

Using variation: movineta0
Normalize features: False


In [4]:
# Constants
SOURCE_PATH = Path().resolve()  # this template folder

SUBJECT_APPROACH_TYPE = (
    "Walk With Ladder"  # looking at features of the sequences of this type
)

## Loading Ground Truth and Experiment Results

In [5]:
# Load the ground truth sequences
tp_fp_sequences_path = (
    SOURCE_PATH.parent.parent / "data" / "handcrafted-metadata" / "tp_fp_sequences.csv"
)
# the first column being the sequence file name: e.g. "SZTEA101a_00_05_37.mov"
SEQUENCES_DF = pd.read_csv(tp_fp_sequences_path, index_col=0)
# Only keep relevant columns
SEQUENCES_DF = SEQUENCES_DF[
    [
        # "Classification", All sequences are "TP" when filtering only the "Walk With Ladder" type
        "Distance",
        "SubjectApproachType",
        "SubjectDescription",
        "Stage",
    ]
]
# Prefix the index with the path to the sequence file
SEQUENCES_DF = SEQUENCES_DF.set_index("data/sequences/" + SEQUENCES_DF.index)

## Load the pickle result features

In [6]:
# Load pickle results
pickle_file = SOURCE_PATH / f"{movinet_variation}.pkl"
features_df = pd.read_pickle(pickle_file)

# MoViNet produce 600 features (the 600 classes of the Kinetics-600 dataset)
FEATURES_COLUMNS_INDEXES = pd.RangeIndex.from_range(range(600))

if normalize_features:
    features = features_df[FEATURES_COLUMNS_INDEXES].to_numpy()
    features_df[FEATURES_COLUMNS_INDEXES] = (
        features / np.linalg.norm(features, axis=-1, keepdims=True) * 100
    ).tolist()

df = SEQUENCES_DF.join(features_df)

In [7]:
# Keep only the sequences matching the subject approach type
df = df[df["SubjectApproachType"] == SUBJECT_APPROACH_TYPE].drop(
    columns=["SubjectApproachType"]
)
len(df)

26

## Compute their Top K scores and labels

In [8]:
softmax = torch.nn.Softmax(dim=1)

In [9]:
# initialize the class map
classmap = read_kinetics_600_classmap()

In [10]:
features = torch.tensor(
    df[FEATURES_COLUMNS_INDEXES].values
)  # N, 600 (600 being the number of classes in Kinetics-600)

probabilities = softmax(features)  # N, 600
predictions_scores, predictions_classes = probabilities.topk(topk)  # N, topk

In [11]:
# labels = [[classmap[int(i)] for i in video_pred_classes] for video_pred_classes in predictions_classes]  # list of string with topk elements
# scores = [[round(float(x), 5) for x in video_pred_scores] for video_pred_scores in predictions_scores]  # float percentages (e.g. 0.34 -> 34%; sum doesn't sum to 1, as topk from preds)

In [12]:
# Produce heatmap with the topk predictions
# x axis: classes
all_prediction_classes = predictions_classes.ravel()
all_prediction_classes.sort()
all_prediction_classes = all_prediction_classes.unique()

all_prediction_classes_names = [classmap[int(i)] for i in all_prediction_classes]
# y axis: sequences
# color: score
#  -> for each sequences
#    -> for each remaining predicted classes
#      -> if the predicted class is in the topk, then the score is the score of the prediction
#      -> else the score is 0
#  -> sum the scores for each sequences
#  -> normalize the scores by the number of sequences
#  -> produce the heatmap
z = [
    [
        scores[video_pred_classes == predicted_class].item()
        if predicted_class in video_pred_classes
        else 0
        for predicted_class in all_prediction_classes
    ]
    for video_pred_classes, scores in zip(predictions_classes, predictions_scores)
]
# reduce z to the mean of the scores
classes_means = np.array(z).mean(axis=0)

In [13]:
fig = make_subplots(
    rows=2,
    cols=1,
    row_heights=[0.15, 0.85],
    vertical_spacing=0.02,
)

# add mean of scores as bars
fig.add_trace(
    go.Bar(
        x=all_prediction_classes_names,
        y=classes_means,
        marker_color=classes_means,
        marker_colorscale="dense",
    ),
    row=1,
    col=1,
)
# hide x axis of the bar plot
fig.update_xaxes(showticklabels=False, row=1, col=1)

# add heatmap
fig.add_trace(
    go.Heatmap(
        z=z,
        x=all_prediction_classes_names,
        y=df.index,
        colorscale="dense",
    ),
    row=2,
    col=1,
)

fig.update_layout(height=800, width=1600, title_text="Topk predictions heatmap")

fig.show()

In [14]:
# Print the topk classes and there scores
# csv:
#       sequences,class,score,model_name,normalize_features
df = pd.DataFrame(
    [
        (
            classmap[int(pred_class_idx)],  # class name
            round(float(video_pred_score), 5),  # score
            movinet_variation,  # model name
            normalize_features,  # normalize features
        )
        for video_pred_classes, video_pred_scores in zip(
            predictions_classes, predictions_scores
        )
        for pred_class_idx, video_pred_score in zip(
            video_pred_classes, video_pred_scores
        )
    ],
    columns=["class", "score", "model_name", "normalize_features"],
).sort_values(by="score", ascending=False)

print(df.to_csv(index=False))

class,score,model_name,normalize_features
slapping,1.0,movineta0,False
finger snapping,1.0,movineta0,False
stretching arm,1.0,movineta0,False
shaking head,0.99838,movineta0,False
playing didgeridoo,0.97988,movineta0,False
pirouetting,0.95373,movineta0,False
headbutting,0.95309,movineta0,False
golf chipping,0.45458,movineta0,False
golf putting,0.39467,movineta0,False
golf putting,0.37473,movineta0,False
pirouetting,0.37171,movineta0,False
stretching arm,0.34994,movineta0,False
using a paint roller,0.281,movineta0,False
stretching arm,0.27108,movineta0,False
presenting weather forecast,0.25085,movineta0,False
golf driving,0.23779,movineta0,False
golf putting,0.23387,movineta0,False
presenting weather forecast,0.19667,movineta0,False
golf putting,0.19538,movineta0,False
golf putting,0.19503,movineta0,False
shooting goal (soccer),0.19082,movineta0,False
golf chipping,0.16002,movineta0,False
golf putting,0.1558,movineta0,False
presenting weather forecast,0.14924,movineta0,False
presenting w