In [None]:
"""Evaluate the triplets from Shuki available at- https://github.com/ysterin/deep_cluster/tree/master/deep_cluster/triplets/data

According to Shuki: selected_triplets.csv contains the most data
the files:
selected_triplets_strong.csv
selected_triplets_robust.csv
both contain less data but clearer classifications

selected_tiplets_fixed.csv should have a repair for reading the data, claims there was a redundant comma or some other small mistake


"""
import pandas as pd
import os
import numpy as np
import re
from vame.util.auxiliary import read_config
import ast
from clean_up_triplets import clean_raw_triplets_file
from vame.analysis.kinutils import KinVideo, create_grid_video
from IPython import display
from ipywidgets import Output, GridspecLayout

# dowloaded triplets from github and saved locally
# CAUTION: some of the csv files contain the file header (video_file, anchor, sample1,sample2, selected) several times within the csv file! -> additional
# data cleaning needed!
TRIPLETS_PATH = "/home/katharina/vame_approach/triplets_shuki"

# project path with already predicted latent vectors -> run the themis_pipeline.py script first
# also create aligned videos to visualize the triplets by running create_aligned_videos.py!
PROJECT_PATH = "/home/katharina/vame_approach/tb_align_0089"
LATENT_VEC_PATH = os.path.join(PROJECT_PATH, "inference", "results", "06-08-2022-09-36")
ALIGNED_VIDEO_PATH = "/home/katharina/vame_approach/tb_align_0089/videos/aligned_videos"

config = read_config(
    os.path.join(
        PROJECT_PATH, "model", os.path.basename(LATENT_VEC_PATH), "config.yaml"
    )
)

In [None]:
triplets = clean_raw_triplets_file(
    os.path.join(TRIPLETS_PATH, "selected_triplets_fixed.csv")
)
# triplets = clean_raw_triplets_file(os.path.join(TRIPLETS_PATH, "strong_triplets.csv"))


triplets["video_id"] = triplets["video_file"].apply(
    lambda x: os.path.basename(x).split(".")[0]
)
triplets["anchor"] = triplets["anchor"].apply(lambda x: ast.literal_eval(x))
triplets["sample1"] = triplets["sample1"].apply(lambda x: ast.literal_eval(x))
triplets["sample2"] = triplets["sample2"].apply(lambda x: ast.literal_eval(x))

video_ids = np.unique(triplets["video_id"].values)

In [None]:
print(video_ids)

In [None]:
latent_vector_files = {
    re.findall(r"\d+", file)[0]: os.path.join(LATENT_VEC_PATH, file)
    for file in os.listdir(LATENT_VEC_PATH)
    if re.findall(r"\d+", file)[0] in video_ids
}
latent_vectors = {
    video_id: np.load(file) for video_id, file in latent_vector_files.items()
}

In [None]:
aligned_video_files = {
    v_id: os.path.join(ALIGNED_VIDEO_PATH, "a" + v_id + ".MP4")
    for v_id in video_ids
    if os.path.exists(os.path.join(ALIGNED_VIDEO_PATH, "a" + v_id + ".MP4"))
}

In [None]:
video = KinVideo(aligned_video_files[list(aligned_video_files.keys())[0]], view="")
video.probevid()
video_clip_duration = (
    triplets.iloc[0]["anchor"][1] - triplets.iloc[0]["anchor"][0]
) / video.getfps()  # lenght of the triplet snipplet in seconds

In [None]:
# remove all triplets which have no corresponding aligned video file (and no latent vectors)
triplets = triplets[
    np.isin(triplets["video_id"].values, list(aligned_video_files.keys()))
]

### The "selected" field in the triplets has values 0, 1 or 2. 0: impossible to decide which of the two samples is closer to the anchor. 1: sample1 is closer to the anchor than sample2. 2: sample2 is closer to the anchor than sample1.

In [None]:
# remove all triplets labelled with 0 in selected field
triplets = triplets[triplets["selected"] != 0]
print(triplets)

## Visualize the Triplets

In [None]:
selected_ids = np.random.choice(len(triplets), min(15, len(triplets)), replace=False)
grid = GridspecLayout(len(selected_ids), 1)
for i_row, s_id in enumerate(selected_ids):
    video_clip_data = [
        (
            aligned_video_files[triplets.iloc[s_id]["video_id"]],
            triplets.iloc[s_id][field][0] / video.getfps(),
            (0, 0, video.width, video.height),
        )
        for field in ["anchor", "sample1", "sample2"]
    ]
    grid_video_name = create_grid_video(
        video_clip_data, video_clip_duration, speed=0.5
    )  # duration is in seconds!!
    out = Output()
    print("Selected id: ", triplets.iloc[s_id]["selected"])
    with out:
        display.display(
            display.Video(
                grid_video_name,
                embed=True,
                html_attributes="loop autoplay",
                width=600,
                height=200,
            )
        )
    grid[i_row, 0] = out
grid

## Calculate distances latent vectors 
Check whether the distances between anchor and close sample are smaller than between anchor and distant sample

In [None]:
# TODO: the triplets and the temp. window used to create the latent vectors are of different sizes
# (triplets 60 or 120 frames,time window to create latent vectors: 144)
# therefore assume that the additional frames in the end of the time window will have a small effect
start_idx_anchor = np.vstack(triplets["anchor"].values)[:, 0]
start_idx_sample1 = np.vstack(triplets["sample1"].values)[:, 0]
start_idx_sample2 = np.vstack(triplets["sample2"].values)[:, 0]

In [None]:
embeddings_anchor = np.array(
    [
        latent_vectors[video_id][time_idx]
        for video_id, time_idx in zip(triplets["video_id"].values, start_idx_anchor)
    ]
)
embeddings_sample1 = np.array(
    [
        latent_vectors[video_id][time_idx]
        for video_id, time_idx in zip(triplets["video_id"].values, start_idx_sample1)
    ]
)
embeddings_sample2 = np.array(
    [
        latent_vectors[video_id][time_idx]
        for video_id, time_idx in zip(triplets["video_id"].values, start_idx_sample2)
    ]
)

In [None]:
dist_anchor_s1 = np.linalg.norm(embeddings_anchor - embeddings_sample1, axis=1)
dist_anchor_s2 = np.linalg.norm(embeddings_anchor - embeddings_sample2, axis=1)
dist_s1_s2 = np.linalg.norm(embeddings_sample1 - embeddings_sample2, axis=1)

In [None]:
# calculate number of correctly aranged distances
correctly_arranged = np.sum(
    (dist_anchor_s1 < dist_anchor_s2) & (triplets["selected"].values == "1")
) + np.sum((dist_anchor_s1 > dist_anchor_s2) & (triplets["selected"].values == "2"))

In [None]:
grid = GridspecLayout(len(triplets), 1)

for i_row in range(len(triplets)):
    video_clip_data = [
        (
            aligned_video_files[triplets.iloc[i_row]["video_id"]],
            triplets.iloc[i_row][field][0] / video.getfps(),
            (0, 0, video.width, video.height),
        )
        for field in ["anchor", "sample1", "sample2"]
    ]
    # print(video_clip_data)
    grid_video_name = create_grid_video(
        video_clip_data, video_clip_duration, speed=0.5
    )  # duration is in seconds!!
    out = Output()
    print("Selected id", triplets.iloc[i_row]["selected"])
    print("distance anchor-sample1", dist_anchor_s1[i_row])
    print("distance anchor-sample2", dist_anchor_s2[i_row])
    print("distance sample1-sample2", dist_s1_s2[i_row])

    with out:
        display.display(
            display.Video(
                grid_video_name,
                embed=True,
                html_attributes="loop autoplay",
                width=600,
                height=200,
            )
        )
    grid[i_row, 0] = out

grid