In [None]:
import glob
import os
from math import trunc
from typing import Dict, List, Tuple

import open_clip
import torch
import plotly.express as px
import pandas as pd
from pathlib import Path

import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.manifold import TSNE

from ilids.models.actionclip.factory import create_models_and_transforms

In [None]:
SOURCE_PATH = Path(os.path.dirname(os.getcwd()))

FEATURES_COLUMNS_INDEXES = pd.RangeIndex.from_range(range(512))

VARIATION_PATHS = list(
    map(lambda result_file: Path(result_file), glob.glob(str(SOURCE_PATH / "results" / "actionclip" / "*.pkl")))
)
VARIATION_NAMES = sorted(
    list(map(lambda result_path: result_path.stem, VARIATION_PATHS))
)

tp_fp_sequences_path = (
    SOURCE_PATH / "data" / "handcrafted-metadata" / "tp_fp_sequences.csv"
)
SEQUENCES_DF = pd.read_csv(tp_fp_sequences_path, index_col=0)
# Only keep relevant columns
SEQUENCES_DF = SEQUENCES_DF[
    [
        "Classification",
        "Duration",
        "Distance",
        "SubjectApproachType",
        "SubjectDescription",
        "Distraction",
        "Stage",
    ]
]
# Fix index prefix for join
SEQUENCES_DF = SEQUENCES_DF.set_index("data/sequences/" + SEQUENCES_DF.index)


def load_variation_df(movinet_variation):
    pickle_file =  SOURCE_PATH / "results" / "actionclip" / f"{movinet_variation}.pkl"
    features_df = pd.read_pickle(pickle_file)

    df = SEQUENCES_DF.join(features_df)

    df["Alarm"] = df["Classification"] == "TP"
    # For each sample, get the highest feature/signal
    df["Activation"] = df[FEATURES_COLUMNS_INDEXES].max(axis=1)

    return df


ALL_DF = {
    variation_name: load_variation_df(variation_name)
    for variation_name in VARIATION_NAMES
}

model_text = create_models_and_transforms(
    actionclip_pretrained_ckpt=SOURCE_PATH
    / "ckpt"
    / "actionclip"
    / "vit-b-16-8f.pt",
    openai_model_name="ViT-B-16",
    extracted_frames=8,
    device=torch.device("cpu"),
)[1]

In [None]:
def get_text_features(text: str) -> np.ndarray:
    tokenized_text = open_clip.tokenize([text])

    with torch.no_grad():
        return model_text(tokenized_text).numpy().ravel()

In [None]:
TEXT_FEATURES_LEN = len(get_text_features(""))

DF_COLUMNS = ["text", "classification"] + list(range(TEXT_FEATURES_LEN))

df = pd.DataFrame([], columns=DF_COLUMNS)

In [None]:
def update_texts_dataframe(new_text: str, classification: bool) -> np.ndarray:
    features = get_text_features(new_text)
    new_entry = pd.Series([new_text, classification, *features], index=DF_COLUMNS)

    df.loc[df.shape[0]] = new_entry

    df.drop_duplicates("text", inplace=True)
    df.reset_index(drop=True, inplace=True)

    return features

In [None]:
def downscale_outlier(arr: np.ndarray, m = 2.) -> np.ndarray:
    threshold = m * np.std(arr)
    copy = arr.copy()

    copy[abs(copy - np.mean(copy)) > threshold] = threshold

    return copy

def plot_linear_heatmap(text: str, arr: np.ndarray, colorbar_y) -> go.Trace:
    trace = go.Heatmap(
        z=[arr],
        x=list(range(TEXT_FEATURES_LEN)),
        y=[text],
        colorbar=dict(y=colorbar_y, len=.5),
        colorscale='Viridis')

    return trace

def add_new_text_and_plot(text: str, classification: bool) -> go.Figure:
    fig = make_subplots(rows=2)

    features = update_texts_dataframe(text, classification)
    fig.add_trace(plot_linear_heatmap(text, features, .8), row=1, col=1)
    fig.add_trace(plot_linear_heatmap(f"{text}<br><i>scaled outliers</i>", downscale_outlier(features), .2), row=2, col=1)

    return fig

In [None]:
add_new_text_and_plot("picture of an empty field", False).show()


In [None]:
add_new_text_and_plot("empty field", False).show()


In [None]:
add_new_text_and_plot("field with a fence", False).show()


In [None]:
add_new_text_and_plot("rabbits running around a field", False).show()


In [None]:
add_new_text_and_plot("wind blowing", False).show()


In [None]:
add_new_text_and_plot("plastic bag flying", False).show()


In [None]:
add_new_text_and_plot("human climbing a ladder", True).show()


In [None]:
add_new_text_and_plot("a human approaching a fence", True).show()


In [None]:
add_new_text_and_plot("human walking towards a fence", True).show()


In [None]:
df


In [None]:
text_features = torch.from_numpy(df[FEATURES_COLUMNS_INDEXES].to_numpy(dtype=np.float64))
text_features /= text_features.norm(dim=-1, keepdim=True)
text_features.shape


In [None]:
go.Figure(go.Heatmap(
        z=df[FEATURES_COLUMNS_INDEXES],
        x=list(range(TEXT_FEATURES_LEN)),
        y=df["text"],
        colorscale='Viridis'))

In [None]:
go.Figure(go.Heatmap(
        z=text_features.numpy(),
        x=list(range(TEXT_FEATURES_LEN)),
        y=df["text"],
        colorscale='Viridis'))

In [None]:
images_features = torch.from_numpy(ALL_DF["vit-b-16-8f"][FEATURES_COLUMNS_INDEXES].to_numpy(dtype=np.float64))
images_features.shape


In [None]:
similarity = 100. * images_features @ text_features.T
similarity


In [None]:
softmax_sim = similarity = similarity.softmax(dim=-1)
softmax_sim


In [None]:
fig = go.Figure(data=go.Heatmap(
    z=softmax_sim.numpy(),
    x=df["text"],
    y=ALL_DF["vit-b-16-8f"].index.str.lstrip("data/sequences/")
))

fig.show()


In [None]:
top1_action_idx = softmax_sim.numpy().argmax(axis=1)
top1_action_df = pd.DataFrame(np.take_along_axis(df["text"].to_numpy(), top1_action_idx, axis=0), columns=["top1_action"], index=ALL_DF["vit-b-16-8f"].index)
top1_action_df["top1_text_classification"] = np.take_along_axis(df["classification"].to_numpy(), top1_action_idx, axis=0)
top1_action_df["Classification"] = ALL_DF["vit-b-16-8f"]["Classification"] == "TP"
top1_action_df["Classification_Match"] = top1_action_df["top1_text_classification"] == top1_action_df["Classification"]
top1_action_df["SubjectApproachType"] = ALL_DF["vit-b-16-8f"]["SubjectApproachType"]
top1_action_df["SubjectDescription"] = ALL_DF["vit-b-16-8f"]["SubjectDescription"]
top1_action_df["Distance"] = ALL_DF["vit-b-16-8f"]["Distance"]
top1_action_df["Distraction"] = ALL_DF["vit-b-16-8f"]["Distraction"]
top1_action_df

In [None]:
mismatch_df = top1_action_df[~top1_action_df["Classification_Match"]][["top1_action", "Classification", "SubjectApproachType", "SubjectDescription", "Distraction"]]
mismatch_df[mismatch_df["Classification"]]

In [None]:
mismatch_df[~mismatch_df["Classification"]]


In [None]:
matching_df = top1_action_df[top1_action_df["Classification_Match"]]
matching_df[matching_df["Classification"]].shape, matching_df[~matching_df["Classification"]].shape


In [None]:
projection_color_df = pd.DataFrame(index=ALL_DF["vit-b-16-8f"].index)
projection_color_df["category"] = None  # "create" a new column
projection_color_df.loc[ALL_DF["vit-b-16-8f"]["Distraction"].notnull(), "category"] = "Distraction"
projection_color_df.loc[~ALL_DF["vit-b-16-8f"]["Distraction"].notnull(), "category"] = "Background"
projection_color_df.loc[ALL_DF["vit-b-16-8f"]["Classification"] == "TP", "category"] = "Alarm"
projection_color_df["Classification"] = ALL_DF["vit-b-16-8f"]["Classification"]
projection_color_df

In [None]:
sequences_projections_2d = TSNE(n_components=2, random_state=16896375, init='pca').fit_transform(ALL_DF["vit-b-16-8f"][FEATURES_COLUMNS_INDEXES])

fig = px.scatter(
    sequences_projections_2d, x=0, y=1,
    color=projection_color_df["category"],
    render_mode='svg',
    hover_data={"sequence": ALL_DF["vit-b-16-8f"].index},
)
fig.show()


In [None]:
sequences_projections_3d = TSNE(n_components=3, random_state=16896375).fit_transform(ALL_DF["vit-b-16-8f"][FEATURES_COLUMNS_INDEXES])

fig = px.scatter_3d(
    sequences_projections_3d, x=0, y=1, z=2,
    color=projection_color_df["category"],
    hover_data={"sequence": ALL_DF["vit-b-16-8f"].index},
)
fig.show()


In [None]:
texts_projections_2d = TSNE(n_components=2, random_state=16896375, perplexity=5.).fit_transform(text_features)

fig = px.scatter(
    texts_projections_2d, x=0, y=1,
    color=df["classification"],
    render_mode='svg',
    hover_data={"text": df["text"]},
)
fig.show()

In [None]:
texts_projections_3d = TSNE(n_components=3, random_state=16896375, perplexity=5.).fit_transform(text_features)

fig = px.scatter_3d(
    texts_projections_3d, x=0, y=1, z=2,
    color=df["classification"],
    hover_data={"text": df["text"]},
)
fig.show()
