In [1]:
import functools
import glob
import json
import math
import os
from enum import Enum
from pathlib import Path
from typing import Dict, List, Tuple, Optional

import numpy as np
import open_clip
import pandas as pd
import plotly.express as px
import torch
from ilids.models.actionclip.factory import create_models_and_transforms
from plotly.subplots import make_subplots
from pydantic import BaseModel
from sklearn.manifold import TSNE
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score

from main import get_similarity, SimilarityRequest, TopClassificationMethod, images_features_df, VARIATION_NAMES

In [2]:
def infer(TEXTS: List[str], TEXT_CLASSIFICATIONS: List[bool], VARIATION: str):
    similarities = get_similarity(
        SimilarityRequest(
            texts=TEXTS,
            classifications=TEXT_CLASSIFICATIONS,
            model_variation=VARIATION,
            text_classification_method=TopClassificationMethod.any,
            texts_to_subtract=[],
            apply_softmax=False,
        )
    ).similarities

    alarms_series = images_features_df(VARIATION)["Alarm"]

    df = (
        pd.DataFrame(
            [
                dict(clip=clip, y_true=alarms_series[clip], **v.dict())
                for clip, l in similarities.items()
                for v in l
            ]
        )
        .rename(columns={"classification": "y_predict"})
        .sort_values(["clip", "y_true", "y_predict", "text"])
        .reset_index(drop=True)
    )

    unique_y_true = df["y_true"].unique()

    fig = make_subplots(
        rows=1,
        cols=4,
        column_widths=[0.3, 0.2, 0.3, 0.2],
        shared_yaxes=True,
        y_title="similarity",
        subplot_titles=[f"y_true={y_true}" for y_true in unique_y_true for _ in range(2)],
    )

    # group by
    #  1. y_true (facet)
    #  2. y_predict / text class (color)
    for i, y_true in enumerate(unique_y_true):
        facet_df = df[df["y_true"] == y_true]

        for y_predict, class_color in zip(
            sorted(facet_df["y_predict"].unique()), ["CornflowerBlue", "Tomato"]
        ):
            facet_color_df = facet_df[facet_df["y_predict"] == y_predict]

            violin_side = "positive" if y_predict else "negative"

            fig.add_scatter(
                x=facet_color_df["text"],
                y=facet_color_df["similarity"],
                marker=dict(color=class_color, size=3),
                hovertext=facet_color_df["clip"],
                mode="markers",
                name=f"y_predict={str(y_predict)}",
                legendgroup=f"y_true={str(y_true)}",
                legendgrouptitle=dict(text=f"y_true={str(y_true)}"),
                row=1,
                col=i * 2 + 1,
            )
            fig.update_layout(**{f"xaxis{i*2+1}": dict(title="text")})
            fig.add_violin(
                x=np.repeat(str(y_true), len(facet_color_df)),
                y=facet_color_df["similarity"],
                box=dict(visible=True),
                scalegroup=str(y_true),
                scalemode="count",
                width=1,
                meanline=dict(visible=True),
                side=violin_side,
                marker=dict(color=class_color),
                showlegend=False,
                row=1,
                col=i * 2 + 2,
            )

    fig.update_layout(height=900, violingap=0, violinmode="overlay")
    fig.show()

    groupby_classification = df.groupby(["clip", "y_predict"])["similarity"]
    weighted_similarity = groupby_classification.sum() / groupby_classification.count()

    ratio = weighted_similarity.groupby(level="clip").aggregate(
        lambda s: s.loc[:, True] / s.loc[:, False]
    )

    ratio_df = ratio.to_frame("ratio")
    ratio_df["y_true"] = alarms_series.loc[ratio_df.index]

    fig = px.scatter(
        ratio_df.sort_values(["y_true", "ratio"]),
        y="ratio",
        color="y_true",
        render_mode="line",
        marginal_y="violin",
        height=900,
    )
    fig.show()

    fpr, tpr, thresholds = roc_curve(ratio_df["y_true"], ratio_df["ratio"])
    auc_score = roc_auc_score(ratio_df["y_true"], ratio_df["ratio"])

    roc_df = pd.DataFrame(
        {
            "False Positive Rate": fpr,
            "True Positive Rate": tpr,
        },
        columns=pd.Index(["False Positive Rate", "True Positive Rate"], name="Rate"),
        index=pd.Index(thresholds, name="Thresholds"),
    )
    fig = px.line(
        roc_df,
        x="False Positive Rate",
        y="True Positive Rate",
        title=f"{VARIATION} - AUC: {auc_score:.3f}",
        color_discrete_sequence=["orange"],
        range_x=[0, 1],
        range_y=[0, 1],
        width=600,
        height=450,
    ).add_shape(type="line", line=dict(dash="dash"), x0=0, x1=1, y0=0, y1=1)

    fig.show()

In [3]:
TEXTS_TRUE = ["human", "a video of a human approaching a fence", "human going through a fence", "human cutting a fence"]
TEXTS_FALSE = ["birds flying", "plastic bag laying on the floor", "rabbits", "insects", "foxes"]
# assert len(TEXTS_TRUE) == len(TEXTS_FALSE)
TEXTS = TEXTS_TRUE + TEXTS_FALSE
TEXT_CLASSIFICATIONS = [True] * len(TEXTS_TRUE) + [False] * len(TEXTS_FALSE)


infer(TEXTS, TEXT_CLASSIFICATIONS, VARIATION_NAMES[1])

In [4]:
##### BEST!!
TEXTS_TRUE = ["human", "a video of a human approaching", "human going through", "human cutting"]
TEXTS_FALSE = ["birds flying", "plastic bag laying on the floor", "rabbits", "animals"]
assert len(TEXTS_TRUE) == len(TEXTS_FALSE)
TEXTS = TEXTS_TRUE + TEXTS_FALSE
TEXT_CLASSIFICATIONS = [True] * len(TEXTS_TRUE) + [False] * len(TEXTS_FALSE)

infer(TEXTS, TEXT_CLASSIFICATIONS, VARIATION_NAMES[1])

In [5]:
TEXTS_TRUE = ["human", "a video of a human approaching", "human going through", "human cutting"]
TEXTS_FALSE = ["birds flying", "plastic bag laying on the floor", "rabbits", "insects", "animals", "empty scene"]
# assert len(TEXTS_TRUE) == len(TEXTS_FALSE)
TEXTS = TEXTS_TRUE + TEXTS_FALSE
TEXT_CLASSIFICATIONS = [True] * len(TEXTS_TRUE) + [False] * len(TEXTS_FALSE)

infer(TEXTS, TEXT_CLASSIFICATIONS, VARIATION_NAMES[1])


In [6]:
TEXTS_TRUE = ["human", "person", "adult", "thief", "terrorist"]
TEXTS_FALSE = ["birds flying", "bag", "rabbits", "insects", "animals", "empty scene", "fence", "wall", "barbed wire",
               "field"]
# assert len(TEXTS_TRUE) == len(TEXTS_FALSE)
TEXTS = TEXTS_TRUE + TEXTS_FALSE
TEXT_CLASSIFICATIONS = [True] * len(TEXTS_TRUE) + [False] * len(TEXTS_FALSE)

infer(TEXTS, TEXT_CLASSIFICATIONS, VARIATION_NAMES[1])


In [9]:
TEXTS_TRUE = ["human"]
TEXTS_FALSE = ["birds flying", "bag", "rabbits", "insects", "animals", "barbed wire"]
# assert len(TEXTS_TRUE) == len(TEXTS_FALSE)
TEXTS = TEXTS_TRUE + TEXTS_FALSE
TEXT_CLASSIFICATIONS = [True] * len(TEXTS_TRUE) + [False] * len(TEXTS_FALSE)

infer(TEXTS, TEXT_CLASSIFICATIONS, VARIATION_NAMES[1])
