In [69]:
import glob
import os
from math import trunc
from typing import Dict, List, Tuple

import open_clip
import torch
import plotly.express as px
import pandas as pd
from pathlib import Path

import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from ilids.models.actionclip.factory import create_models_and_transforms

In [70]:
SOURCE_PATH = Path(os.path.dirname(os.getcwd()))

FEATURES_COLUMNS_INDEXES = pd.RangeIndex.from_range(range(512))

VARIATION_PATHS = list(
    map(lambda result_file: Path(result_file), glob.glob(str(SOURCE_PATH / "results" / "actionclip" / "*.pkl")))
)
VARIATION_NAMES = sorted(
    list(map(lambda result_path: result_path.stem, VARIATION_PATHS))
)

tp_fp_sequences_path = (
    SOURCE_PATH / "data" / "handcrafted-metadata" / "tp_fp_sequences.csv"
)
SEQUENCES_DF = pd.read_csv(tp_fp_sequences_path, index_col=0)
# Only keep relevant columns
SEQUENCES_DF = SEQUENCES_DF[
    [
        "Classification",
        "Duration",
        "Distance",
        "SubjectApproachType",
        "SubjectDescription",
        "Distraction",
        "Stage",
    ]
]
# Fix index prefix for join
SEQUENCES_DF = SEQUENCES_DF.set_index("data/sequences/" + SEQUENCES_DF.index)


def load_variation_df(movinet_variation):
    pickle_file =  SOURCE_PATH / "results" / "actionclip" / f"{movinet_variation}.pkl"
    features_df = pd.read_pickle(pickle_file)

    df = SEQUENCES_DF.join(features_df)

    df["Alarm"] = df["Classification"] == "TP"
    # For each sample, get the highest feature/signal
    df["Activation"] = df[FEATURES_COLUMNS_INDEXES].max(axis=1)

    return df


ALL_DF = {
    variation_name: load_variation_df(variation_name)
    for variation_name in VARIATION_NAMES
}

model_text = create_models_and_transforms(
    actionclip_pretrained_ckpt=SOURCE_PATH
    / "ckpt"
    / "actionclip"
    / "vit-b-16-8f.pt",
    openai_model_name="ViT-B-16",
    extracted_frames=8,
    device=torch.device("cpu"),
)[1]

In [71]:
def get_text_features(text: str) -> np.ndarray:
    tokenized_text = open_clip.tokenize([text])

    with torch.no_grad():
        return model_text(tokenized_text).numpy().ravel()

In [72]:
TEXT_FEATURES_LEN = len(get_text_features(""))

DF_COLUMNS = ["text"] + list(range(TEXT_FEATURES_LEN))

df = pd.DataFrame([], columns=DF_COLUMNS)

In [73]:
def update_texts_dataframe(new_text) -> np.ndarray:
    features = get_text_features(new_text)
    new_entry = pd.Series([new_text, *features], index=DF_COLUMNS)

    df.loc[df.shape[0]] = new_entry

    df.drop_duplicates("text", inplace=True)
    df.reset_index(drop=True, inplace=True)

    return features

In [74]:
def downscale_outlier(arr: np.ndarray, m = 2.) -> np.ndarray:
    threshold = m * np.std(arr)
    copy = arr.copy()

    copy[abs(copy - np.mean(copy)) > threshold] = threshold

    return copy

def plot_linear_heatmap(text: str, arr: np.ndarray, colorbar_y) -> go.Trace:
    trace = go.Heatmap(
        z=[arr],
        x=list(range(TEXT_FEATURES_LEN)),
        y=[text],
        colorbar=dict(y=colorbar_y,len=.5),
        colorscale='Viridis')

    return trace

def add_new_text_and_plot(text: str) -> go.Figure:
    fig = make_subplots(rows=2)

    features = update_texts_dataframe(text)
    fig.add_trace(plot_linear_heatmap(text, features, .8), row=1, col=1)
    fig.add_trace(plot_linear_heatmap(f"{text}<br><i>scaled outliers</i>", downscale_outlier(features), .2), row=2, col=1)

    return fig

In [75]:
add_new_text_and_plot("picture of an empty field").show()


In [76]:
add_new_text_and_plot("empty field").show()


In [77]:
add_new_text_and_plot("field with a fence").show()


In [78]:
add_new_text_and_plot("man climbing a ladder").show()


In [79]:
add_new_text_and_plot("a human approaching a fence").show()


In [80]:
add_new_text_and_plot("rabbits running around a field").show()


In [81]:
add_new_text_and_plot("wind blowing").show()


In [82]:
add_new_text_and_plot("plastic bag flying").show()


In [83]:
add_new_text_and_plot("human walking towards a fence").show()


In [84]:
df


Unnamed: 0,text,0,1,2,3,4,5,6,7,8,...,502,503,504,505,506,507,508,509,510,511
0,picture of an empty field,-0.35609,-0.271133,-0.137026,0.125804,-0.233744,0.522557,-0.208291,-0.043032,-0.154073,...,0.18801,0.039296,-0.124762,-0.06311,-0.368589,0.229237,-0.186146,-0.366585,0.342311,-0.199278
1,empty field,-0.295095,-0.218877,-0.192643,0.134075,-0.234198,0.441821,-0.233674,-0.02608,-0.085609,...,0.123664,0.061273,-0.098849,-0.04576,-0.3306,0.179777,-0.224275,-0.332272,0.361963,-0.246271
2,field with a fence,-0.12067,-0.450603,0.20947,0.009077,0.158471,0.168095,-0.322119,0.197069,-0.306551,...,-0.00165,-0.058275,0.236303,-0.208223,-0.166307,0.184759,0.092327,-0.010297,-0.034521,-0.076386
3,man climbing a ladder,-0.551713,-0.217681,0.183532,-0.022692,0.234883,0.137929,0.20579,0.351075,-0.315958,...,0.095843,-0.055543,0.162736,0.048026,0.045994,-0.306849,0.19423,-0.310613,0.080763,0.099642
4,a human approaching a fence,-0.005462,-0.365037,0.190679,-0.168931,0.18724,0.095736,-0.289104,0.212373,-0.266311,...,0.061439,-0.135051,0.35056,-0.050822,-0.004916,0.113562,0.155182,0.057085,-0.06201,-0.033774
5,rabbits running around a field,-0.163206,-0.56653,-0.147852,0.37715,-0.061112,0.129431,-0.265035,0.078672,-0.106596,...,-0.317187,0.210297,-0.057121,-0.147961,-0.439237,-0.121976,-0.006623,-0.174817,0.194057,0.035039
6,wind blowing,-0.215425,-0.094497,-0.131679,0.095478,0.309397,-0.094538,-0.461097,0.299907,0.397832,...,-0.366881,0.099616,-0.165579,-0.082181,-0.033465,0.007524,0.713355,0.440283,0.226067,0.025934
7,plastic bag flying,0.045433,0.021208,0.171719,0.038145,-0.073443,0.184898,0.096925,-0.055723,-0.129427,...,-0.304049,-0.078793,-0.054164,0.038147,-0.466997,-0.315993,0.435417,0.146395,0.034537,-0.245097
8,human walking towards a fence,-0.052189,-0.355159,0.157766,-0.140254,0.371432,-0.036416,-0.360644,0.227916,-0.180473,...,0.164948,-0.232989,0.308196,-0.062923,-0.090947,0.137682,0.205779,0.155847,-0.061114,0.006005


In [85]:
text_features = torch.from_numpy(df[FEATURES_COLUMNS_INDEXES].to_numpy(dtype=np.float64))
text_features /= text_features.norm(dim=-1, keepdim=True)
text_features.shape


torch.Size([9, 512])

In [86]:
images_features = torch.from_numpy(ALL_DF["vit-b-16-8f"][FEATURES_COLUMNS_INDEXES].to_numpy(dtype=np.float64))
images_features.shape


torch.Size([864, 512])

In [87]:
similarity = 100. * images_features @ text_features.T
similarity


tensor([[16.4574, 16.0481, 19.9589,  ..., 13.4874, 13.4469, 22.5220],
        [17.0473, 16.7116, 20.6221,  ..., 14.0103, 13.7152, 22.6304],
        [15.0031, 14.8112, 19.2253,  ..., 16.1342, 13.7557, 21.1092],
        ...,
        [10.5742, 10.0869, 13.5842,  ...,  9.5726, 12.4303, 17.8463],
        [10.7316, 10.3085, 13.7750,  ...,  9.3816, 12.1195, 17.8375],
        [10.6674, 10.2638, 13.3034,  ...,  9.0685, 11.5992, 17.0522]],
       dtype=torch.float64)

In [88]:
softmax_sim = similarity = similarity.softmax(dim=-1)
softmax_sim


tensor([[1.5505e-03, 1.0297e-03, 5.1423e-02,  ..., 7.9546e-05, 7.6383e-05,
         6.6725e-01],
        [2.1957e-03, 1.5697e-03, 7.8361e-02,  ..., 1.0535e-04, 7.8427e-05,
         5.8384e-01],
        [1.3584e-03, 1.1212e-03, 9.2627e-02,  ..., 4.2100e-03, 3.9024e-04,
         6.0936e-01],
        ...,
        [4.5536e-04, 2.7971e-04, 9.2379e-03,  ..., 1.6725e-04, 2.9137e-03,
         6.5548e-01],
        [5.2147e-04, 3.4158e-04, 1.0939e-02,  ..., 1.3519e-04, 2.0893e-03,
         6.3576e-01],
        [1.0707e-03, 7.1518e-04, 1.4945e-02,  ..., 2.1642e-04, 2.7187e-03,
         6.3470e-01]], dtype=torch.float64)

In [89]:
fig = go.Figure(data=go.Heatmap(
    z=softmax_sim.numpy(),
    x=df["text"],
    y=ALL_DF["vit-b-16-8f"].index.str.lstrip("data/sequences/")
))

fig.show()


In [98]:
top1_action_idx = softmax_sim.numpy().argmax(axis=1)
top1_action_df = pd.DataFrame(np.take_along_axis(df["text"].to_numpy(), top1_action_idx, axis=0), columns=["top1_action"], index=ALL_DF["vit-b-16-8f"].index)
top1_action_df["Classification"] = ALL_DF["vit-b-16-8f"]["Classification"]
top1_action_df["SubjectApproachType"] = ALL_DF["vit-b-16-8f"]["SubjectApproachType"]
top1_action_df

Unnamed: 0_level_0,top1_action,Classification
id_sequence,Unnamed: 1_level_1,Unnamed: 2_level_1
data/sequences/SZTEA101a_00_00_42.mov,human walking towards a fence,FP
data/sequences/SZTEA101a_00_04_47.mov,human walking towards a fence,FP
data/sequences/SZTEA101a_00_05_37.mov,human walking towards a fence,TP
data/sequences/SZTEA101a_00_08_58.mov,picture of an empty field,TP
data/sequences/SZTEA101a_00_12_12.mov,human walking towards a fence,TP
...,...,...
data/sequences/SZTRN202d_00_09_52.mov,human walking towards a fence,FP
data/sequences/SZTRN202d_00_11_17.mov,human walking towards a fence,FP
data/sequences/SZTRN202d_00_12_32.mov,human walking towards a fence,FP
data/sequences/SZTRN203a_00_00_45.mov,human walking towards a fence,FP


In [99]:
ALL_DF["vit-b-16-8f"]


Unnamed: 0_level_0,Classification,Duration,Distance,SubjectApproachType,SubjectDescription,Distraction,Stage,0,1,2,...,504,505,506,507,508,509,510,511,Alarm,Activation
id_sequence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data/sequences/SZTEA101a_00_00_42.mov,FP,00:01:07,,,,,1,-0.108582,-0.038727,0.019775,...,0.048340,0.195923,-0.029953,0.082764,0.057831,0.068481,-0.026535,-0.051636,False,0.301514
data/sequences/SZTEA101a_00_04_47.mov,FP,00:00:39,,,,,1,-0.107239,-0.029831,0.009163,...,0.047485,0.179077,-0.033600,0.080444,0.058655,0.058899,-0.046631,-0.056213,False,0.307373
data/sequences/SZTEA101a_00_05_37.mov,TP,00:01:00,30.0,Crouch Walk,One Person,,1,-0.122864,-0.010185,-0.027359,...,0.043396,0.136230,-0.044220,0.023636,0.041779,0.040894,-0.000834,-0.044769,True,0.171997
data/sequences/SZTEA101a_00_08_58.mov,TP,00:01:08,15.0,Crawl,One Person,,1,-0.122375,-0.007896,-0.003401,...,0.058044,0.124817,-0.065613,0.034058,0.061371,0.044281,-0.011253,-0.038239,True,0.198120
data/sequences/SZTEA101a_00_12_12.mov,TP,00:01:00,10.0,Run,One Person,,1,-0.097656,-0.042175,0.016876,...,0.064148,0.146240,-0.031113,0.061920,0.066467,0.071167,-0.022049,-0.059204,True,0.331787
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
data/sequences/SZTRN202d_00_09_52.mov,FP,00:01:10,,,,,2,-0.090088,-0.063049,-0.034454,...,-0.003462,0.139526,-0.013222,0.074951,0.000834,0.000526,-0.030273,-0.072388,False,0.415527
data/sequences/SZTRN202d_00_11_17.mov,FP,00:00:31,,,,,2,-0.095154,-0.062561,-0.046539,...,-0.013901,0.126709,-0.016663,0.077087,-0.010834,-0.000832,-0.032501,-0.075256,False,0.435059
data/sequences/SZTRN202d_00_12_32.mov,FP,00:01:27,,,,,2,-0.093750,-0.066406,-0.042358,...,-0.015884,0.128540,-0.017212,0.081238,-0.006020,0.004253,-0.024948,-0.073853,False,0.412354
data/sequences/SZTRN203a_00_00_45.mov,FP,00:01:40,,,,,2,-0.087158,-0.056122,-0.044067,...,-0.009903,0.125366,-0.016678,0.075134,-0.005947,0.000193,-0.026062,-0.076233,False,0.440186
