In [1]:
import asyncio
from collections import OrderedDict
import logging
import os
from pathlib import Path
import sys

import imageio.v3 as iio
from sklearn.cluster import HDBSCAN
from matplotlib import colormaps, colors
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
import umap

from mime_db import MimeDb
from pose_functions import *

In [4]:
VIDEO_FILE = "JuliusCaesar—WinterMainStage23.mp4" # Just the name of the video file, no path

video_path = Path("videos", VIDEO_FILE)

# Connect to the database
db = await MimeDb.create()

# Get video metadata
video_name = video_path.name
video_id = await db.get_video_id(video_name)

video_data = await db.get_video_by_id(video_id)
video_fps = video_data["fps"]

video_movelets = await db.get_movelet_data_from_video(video_id)
movelets_df = pd.DataFrame.from_records(video_movelets, columns=video_movelets[0].keys())

video_poses = await db.get_pose_data_from_video(video_id)
#video_poses = await db.get_poses_with_faces(video_id)

poses_df = pd.DataFrame.from_records(video_poses, columns=video_poses[0].keys())

print(poses_df.memory_usage())

IndexError: list index out of range

In [None]:
print("TOTAL MOVELETS:", len(movelets_df))
print("NON-MOTION MOVELETS:", len(movelets_df[movelets_df['movement'].isna()]))
print("MOVELETS WITH STILL MOTION:", len(movelets_df[movelets_df['movement'] == 0]))
print("MOVELETS WITH MOVEMENT < 10px/sec:", len(movelets_df[(movelets_df['movement'] >= 0) & (movelets_df['movement'] < 10)]))

print("MEAN MOVEMENT PER MOVELET (norm px/sec):", np.nanmean(movelets_df['movement']))
print("MEDIAN MOVEMENT PER MOVELET (norm px/sec):", np.nanmedian(movelets_df['movement']))

In [None]:
nonnull_movelets_df = movelets_df.copy()
nonnull_movelets_df['movement'].fillna(-1, inplace=True)
n, bins, patches = plt.hist(nonnull_movelets_df[nonnull_movelets_df['movement'] <= 500]['movement'], bins=300)
plt.xlabel("Movement (normalized pixels/sec)")
plt.ylabel("# Movelets")
top_bin = n[1:].argmax()
print('most frequent bin: (' + str(bins[top_bin]) + ',' + str(bins[top_bin+1]) + ')')
print('mode: '+ str((bins[top_bin] + bins[top_bin+1])/2))
movement_mode = (bins[top_bin] + bins[top_bin+1])/2

In [None]:
frozen_movelets = movelets_df[(movelets_df['movement'] >= 0) & (movelets_df['movement'] < movement_mode)].reset_index()
frozen_poses = frozen_movelets['norm'].tolist()

In [None]:
standard_embedding = umap.UMAP(
    random_state=42,
).fit_transform(frozen_poses)

plt.scatter(standard_embedding[:, 0], standard_embedding[:, 1], s=4)

In [None]:
clusterable_embedding = umap.UMAP(
    n_neighbors=10,
    min_dist=1.0,
    n_components=2,
    random_state=42,
).fit_transform(frozen_poses)

plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], s=4)

In [None]:
print("fitting clustering model")

hdb = HDBSCAN(min_cluster_size=3, min_samples=4) # , max_cluster_size=15
hdb.fit(frozen_poses)
labels = hdb.labels_.tolist()

assigned_poses = 0

cluster_to_poses = {}
for i, cluster_id in enumerate(labels):
    if cluster_id not in cluster_to_poses:
        cluster_to_poses[cluster_id] = [i]
    else:
        cluster_to_poses[cluster_id].append(i)
    
tracks_per_cluster = []
poses_per_track_per_cluster = []
        
for cluster_id in range(-1, max(labels) + 1):
    #print("Poses in cluster", cluster_id, labels.count(cluster_id))

    cluster_track_poses = {}
    for movelet_id in cluster_to_poses[cluster_id]:
        movelet_track = frozen_movelets.iloc[movelet_id]['track_id']
        if movelet_track not in cluster_track_poses:
            cluster_track_poses[movelet_track] = 1
        else:
            cluster_track_poses[movelet_track] += 1
            
    if cluster_id != -1:
        assigned_poses += labels.count(cluster_id)
        tracks_per_cluster.append(len(cluster_track_poses))
        poses_per_track_per_cluster.append(labels.count(cluster_id) / len(cluster_track_poses))
    
    #print("Tracks in cluster", cluster_id, len(cluster_track_poses))

print("assigned", assigned_poses, "poses out of", len(labels), round(assigned_poses/len(labels),4))

fig = plt.figure(figsize=(10,10))
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)

fig2 = plt.figure(figsize=(10,4))
ax = fig2.gca()
n, bins, patches = plt.hist(tracks_per_cluster, bins=20)
ax.set_title("Tracks per cluster")
ax.set_xlabel("# Tracks")
ax.set_ylabel("# Clusters")
plt.show()

fig3 = plt.figure(figsize=(10,4))
ax = fig3.gca()
n, bins, patches = plt.hist(poses_per_track_per_cluster, bins=30)
ax.set_title("Poses per track per cluster")
ax.set_xlabel("Poses/track")
ax.set_ylabel("# Clusters")
plt.show()

In [None]:
print("fitting UMAP preclustered model")

hdb = HDBSCAN(min_cluster_size=3, min_samples=4) # , max_cluster_size=15
hdb.fit(clusterable_embedding)
labels = hdb.labels_.tolist()

assigned_poses = 0

cluster_to_poses = {}
for i, cluster_id in enumerate(labels):
    if cluster_id not in cluster_to_poses:
        cluster_to_poses[cluster_id] = [i]
    else:
        cluster_to_poses[cluster_id].append(i)
        
# Build an alternative, filtered movelet set that is
# filtered down to just one movelet per track in a cluster
# i.e., when more than one pose per track is in a given
# cluster, just keep the first one. This has the effect
# of stripping out repeated poses that are part of the
# same low-motion movelet.

filtered_movelet_indices = []

tracks_per_cluster = []
poses_per_track_per_cluster = []
        
for cluster_id in range(-1, max(labels) + 1):
    # print("Poses in cluster", cluster_id, labels.count(cluster_id))

    cluster_track_poses = {}
    for movelet_id in cluster_to_poses[cluster_id]:
        movelet_track = frozen_movelets.iloc[movelet_id]['track_id']
        if movelet_track not in cluster_track_poses:
            if cluster_id != -1:
                filtered_movelet_indices.append(movelet_id)
            cluster_track_poses[movelet_track] = 1 # Include non-clustered poses?
        else:
            cluster_track_poses[movelet_track] += 1
            
    if cluster_id != -1:
        assigned_poses += labels.count(cluster_id)
        tracks_per_cluster.append(len(cluster_track_poses))
        poses_per_track_per_cluster.append(labels.count(cluster_id) / len(cluster_track_poses))
    
    # print("Tracks in cluster", cluster_id, len(cluster_track_poses))

print("assigned", assigned_poses, "poses out of", len(labels), round(assigned_poses/len(labels),4))

fig = plt.figure(figsize=(10,10))
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)

fig2 = plt.figure(figsize=(10,4))
ax = fig2.gca()
n, bins, patches = plt.hist(tracks_per_cluster, bins=20)
ax.set_title("Tracks per cluster")
ax.set_xlabel("# Tracks")
ax.set_ylabel("# Clusters")
plt.show()

fig3 = plt.figure(figsize=(10,4))
ax = fig3.gca()
n, bins, patches = plt.hist(poses_per_track_per_cluster, bins=30)
ax.set_title("Poses per track per cluster")
ax.set_xlabel("Poses/track")
ax.set_ylabel("# Clusters")
plt.show()

In [None]:
# Get the full pose data for each representative movelet from a track in a cluster,
# to be used to display armatures and source image excerpts in the cluster plot and
# in visualizations of the cluster averages

print(len(filtered_movelet_indices))

filtered_movelet_counts = dict()
for i in filtered_movelet_indices:
    filtered_movelet_counts[i] = filtered_movelet_counts.get(i, 0) + 1

print("Filtered movelets:",len(set(filtered_movelet_indices)))
filtered_movelets = frozen_movelets.iloc[list(set(filtered_movelet_indices))]
filtered_movelets.reset_index(inplace=True)
filtered_poses = filtered_movelets['norm'].tolist()
filtered_poses = [np.nan_to_num(pose, nan=-1) for pose in filtered_poses]
len(filtered_poses)

In [None]:
print("visualizing UMAP preclustered model")
show_poses = True
plot_images = True

if show_poses:
    ord_cluster_to_poses = res = OrderedDict(sorted(cluster_to_poses.items(), key = lambda x : len(x[1]), reverse=True)).keys()
    for cluster_id in ord_cluster_to_poses:
        cluster_poses = []
        fig, ax = plt.subplots()
        fig.set_size_inches(UPSCALE * 100 / fig.dpi, UPSCALE * 100 / fig.dpi)
        fig.canvas.draw()
        print("CLUSTER:", cluster_id, "POSES:", len(cluster_to_poses[cluster_id]))
        for pose_index in cluster_to_poses[cluster_id]:
            cl_pose = frozen_poses[pose_index]
            cl_pose[cl_pose==-1] = np.nan
            cluster_poses.append(cl_pose)
        cluster_average = np.nanmean(np.array(cluster_poses), axis=0).tolist()
        armature_prevalences = get_armature_prevalences(cluster_poses)
        cluster_average = np.array_split(cluster_average, len(cluster_average) / 2)
        #print("Average pose in cluster", cluster_id, cluster_average)
        cluster_average_img = draw_normalized_and_unflattened_pose(
            cluster_average, armature_prevalences=armature_prevalences
        )
        #plt.figure(figsize=(2,2))
        plt.imshow(cluster_average_img)
        plt.show()
        

fig = plt.figure(figsize=(40,40))
ax = fig.gca()
cm = colormaps["Spectral"]
norm = colors.Normalize(vmin=-1, vmax=max(labels))

if plot_images:
    
    ax.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], alpha=0)
    for i, cluster_id in enumerate(labels):
        if cluster_id == -1:
            continue
        cl_pose = frozen_poses[i]
        cl_pose[cl_pose==-1] = np.nan
        cluster_pose = np.array_split(cl_pose, len(cl_pose) / 2)
        cluster_pose_img = draw_normalized_and_unflattened_pose(
            cluster_pose, armature_prevalences=[1] * 19
        )
        #img = Image.fromarray(img_region)
        img = cluster_pose_img
        img.thumbnail((40, 40), resample=Image.Resampling.LANCZOS)
        ab = AnnotationBbox(OffsetImage(np.asarray(img)), (clusterable_embedding[i, 0], clusterable_embedding[i, 1]), frameon=False)
        #ab.patch.set_linewidth(0)
        #ab.patch.set(color=cm(norm(cluster_id)))

        ax.add_artist(ab)
else:
    ax.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)

In [None]:
standard_embedding = umap.UMAP(
    random_state=42,
).fit_transform(filtered_poses)

plt.scatter(standard_embedding[:, 0], standard_embedding[:, 1], s=4)

In [None]:
clusterable_embedding = umap.UMAP(
    n_neighbors=10,
    min_dist=1.0,
    n_components=2,
    random_state=42,
).fit_transform(filtered_poses)

plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], s=4)

In [None]:
print("fitting clustering model")

hdb = HDBSCAN(min_cluster_size=3, min_samples=4) # , max_cluster_size=15
hdb.fit(filtered_poses)
labels = hdb.labels_.tolist()

assigned_poses = 0

cluster_to_poses = {}
for i, cluster_id in enumerate(labels):
    if cluster_id not in cluster_to_poses:
        cluster_to_poses[cluster_id] = [i]
    else:
        cluster_to_poses[cluster_id].append(i)

tracks_per_cluster = []
poses_per_track_per_cluster = []
        
for cluster_id in range(-1, max(labels) + 1):
    # print("Poses in cluster", cluster_id, labels.count(cluster_id))

    cluster_track_poses = {}
    for movelet_id in cluster_to_poses[cluster_id]:
        movelet_track = filtered_movelets.iloc[movelet_id]['track_id']
        if movelet_track not in cluster_track_poses:
            cluster_track_poses[movelet_track] = 1
        else:
            cluster_track_poses[movelet_track] += 1
            
    if cluster_id != -1:
        assigned_poses += labels.count(cluster_id)
        tracks_per_cluster.append(len(cluster_track_poses))
        poses_per_track_per_cluster.append(labels.count(cluster_id) / len(cluster_track_poses))
    
    # print("Tracks in cluster", cluster_id, len(cluster_track_poses))

print("assigned", assigned_poses, "poses out of", len(labels), round(assigned_poses/len(labels),4))

fig = plt.figure(figsize=(10,10))
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)

fig2 = plt.figure(figsize=(10,4))
ax = fig2.gca()
n, bins, patches = plt.hist(tracks_per_cluster, bins=20)
ax.set_title("Tracks per cluster")
ax.set_xlabel("# Tracks")
ax.set_ylabel("# Clusters")
plt.show()

fig3 = plt.figure(figsize=(10,4))
ax = fig3.gca()
n, bins, patches = plt.hist(poses_per_track_per_cluster, bins=30)
ax.set_title("Poses per track per cluster")
ax.set_xlabel("Poses/track")
ax.set_ylabel("# Clusters")
plt.show()

In [None]:
print("fitting UMAP preclustered model")

hdb = HDBSCAN(min_cluster_size=3, min_samples=4) # , max_cluster_size=15
hdb.fit(clusterable_embedding)
labels = hdb.labels_.tolist()

assigned_poses = 0

cluster_to_poses = {}
for i, cluster_id in enumerate(labels):
    if cluster_id not in cluster_to_poses:
        cluster_to_poses[cluster_id] = [i]
    else:
        cluster_to_poses[cluster_id].append(i)
        
# Build an alternative, filtered movelet set that is
# filtered down to just one movelet per track in a cluster
# i.e., when more than one pose per track is in a given
# cluster, just keep the first one. This has the effect
# of stripping out repeated poses that are part of the
# same low-motion movelet.

filtered_movelet_indices = []

tracks_per_cluster = []
poses_per_track_per_cluster = []
        
for cluster_id in range(-1, max(labels) + 1):
    # print("Poses in cluster", cluster_id, labels.count(cluster_id))

    cluster_track_poses = {}
    for movelet_id in cluster_to_poses[cluster_id]:
        movelet_track = filtered_movelets.iloc[movelet_id]['track_id']
        if movelet_track not in cluster_track_poses:
            if cluster_id != -1:
                filtered_movelet_indices.append(movelet_id)
            cluster_track_poses[movelet_track] = 1 # Include non-clustered poses?
        else:
            cluster_track_poses[movelet_track] += 1
            
    if cluster_id != -1:
        assigned_poses += labels.count(cluster_id)
        tracks_per_cluster.append(len(cluster_track_poses))
        poses_per_track_per_cluster.append(labels.count(cluster_id) / len(cluster_track_poses))
    
    # print("Tracks in cluster", cluster_id, len(cluster_track_poses))

print("assigned", assigned_poses, "poses out of", len(labels), round(assigned_poses/len(labels),4))

fig = plt.figure(figsize=(10,10))
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)

fig2 = plt.figure(figsize=(10,4))
ax = fig2.gca()
n, bins, patches = plt.hist(tracks_per_cluster, bins=20)
ax.set_title("Tracks per cluster")
ax.set_xlabel("# Tracks")
ax.set_ylabel("# Clusters")
plt.show()

fig3 = plt.figure(figsize=(10,4))
ax = fig3.gca()
n, bins, patches = plt.hist(poses_per_track_per_cluster, bins=30)
ax.set_title("Poses per track per cluster")
ax.set_xlabel("Poses/track")
ax.set_ylabel("# Clusters")
plt.show()

In [None]:
print("visualizing UMAP preclustered model")
show_poses = False
plot_images = False
save_images = True

if show_poses:
    ord_cluster_to_poses = res = OrderedDict(sorted(cluster_to_poses.items(), key = lambda x : len(x[1]), reverse=True)).keys()
    for cluster_id in ord_cluster_to_poses:
        fig, ax = plt.subplots()
        fig.set_size_inches(UPSCALE * 100 / fig.dpi, UPSCALE * 100 / fig.dpi)
        fig.canvas.draw()
        
        cluster_poses = []
        print("CLUSTER:", cluster_id, "POSES:", len(cluster_to_poses[cluster_id]))
        for pose_index in cluster_to_poses[cluster_id]:
            cl_pose = filtered_poses[pose_index]
            cl_pose[cl_pose==-1] = np.nan
            cluster_poses.append(cl_pose)
        cluster_average = np.nanmean(np.array(cluster_poses), axis=0).tolist()
        armature_prevalences = get_armature_prevalences(cluster_poses)
        cluster_average = np.array_split(cluster_average, len(cluster_average) / 2)
        #print("Average pose in cluster", cluster_id, cluster_average)
        cluster_average_img = draw_normalized_and_unflattened_pose(
            cluster_average, armature_prevalences=armature_prevalences
        )
        #plt.figure(figsize=(2,2))
        plt.imshow(cluster_average_img)
        plt.show()
        

fig = plt.figure(figsize=(50,50))
ax = fig.gca()
cm = colormaps["Spectral"]
norm = colors.Normalize(vmin=-1, vmax=max(labels))

if save_images:
    images_dir = f"pose_images/{video_name}"
    if not os.path.isdir("pose_images"):
        os.mkdir("pose_images")
    if not os.path.isdir(images_dir):
        os.mkdir(images_dir)
    img_metadata_file = open(f"{video_name}.csv", "w", encoding="utf-8")
    
    # PixPlot metadata elements: 
    # year is an integer but doesn't need to be a year
    # label can be the cluster the pose is in
    # description is plain text
    # can also supply any number of "tags", but it's not clear how these would be useful
    img_metadata_file.write(",".join(["filename", "description", "year", "label"]) + "\n")
    
    features_dir = f"pose_features/{video_name}"
    if not os.path.isdir("pose_features"):
        os.mkdir("pose_features")
    if not os.path.isdir(features_dir):
        os.mkdir(features_dir)
    

if plot_images or save_images:
    ax.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], alpha=0)

    for i, cluster_id in enumerate(labels):
        #if cluster_id == -1:
        #    continue

        # Use this code block if we want to draw the normalized pose anywhere
#         cl_pose = filtered_poses[i]
#         cl_pose[cl_pose==-1] = np.nan
#         cluster_pose = np.array_split(cl_pose, len(cl_pose) / 2)
#         cluster_pose_img = draw_normalized_and_unflattened_pose(
#             cluster_pose, armature_prevalences=[1] * 19
#         )

        cluster_movelet = filtered_movelets.iloc[i]
        # Prefer a target frame in the middle of the movelet, but if the actual pose index
        # is missing from this frame (which can happen sometimes), just use the first frame
        # of the movelet
        try:
            target_frame = round((cluster_movelet['end_frame'] + cluster_movelet['start_frame']) / 2)
            target_movelet = poses_df[(poses_df['frame'] == target_frame) & (poses_df['track_id'] == cluster_movelet['track_id'])]
            if len(target_movelet) == 0:
                target_frame = cluster_movelet['start_frame']
                target_movelet = poses_df[(poses_df['frame'] == target_frame) & (poses_df['track_id'] == cluster_movelet['track_id'])]
            target_pose = poses_df[(poses_df['frame'] == target_frame) & (poses_df['track_id'] == cluster_movelet['track_id'])].iloc[0]
        except Exception as e:
            print("Couldn't find representative pose from movelet middle or beginning, skipping")
            continue

        save_name = f"{images_dir}/{target_frame}_{target_pose['pose_idx']}.jpg"

        if not os.path.isfile(save_name):
            
            bbox = [round(v) for v in target_pose['bbox']]
            print("frame", target_frame, "pose", target_pose["pose_idx"], "pose bbox", bbox)

            frame_faces = await db.get_frame_faces(video_id, target_frame)

            if len(frame_faces):
                faces_df = pd.DataFrame.from_records(frame_faces, columns=frame_faces[0].keys())

                target_face = faces_df[faces_df['pose_idx'] == target_pose['pose_idx']]
                if (len(target_face)):
                    target_face_df = target_face.iloc[0]
                    face_bbox = [round(v) for v in target_face_df['bbox']]
                    print("face_bbox", face_bbox)

                    min_x = min(bbox[0], face_bbox[0])
                    min_y = min(bbox[1], face_bbox[1])
                    max_x = max(bbox[0] + bbox[2], face_bbox[0] + face_bbox[2])
                    max_y = max(bbox[1] + bbox[3], face_bbox[1] + face_bbox[3])
                    b_w = max_x - min_x
                    b_h = max_y - min_y

                    # A bbox that includes the body and the face (if detected)
                    bbox = [min_x, min_y, b_w, b_h]
                    print("Combined bbox", bbox)
                
            pose_frame_image = iio.imread(f"/videos/{video_name}", index=target_frame - 1, plugin="pyav")

            pose_img = Image.fromarray(pose_frame_image)

            img_size = pose_img.size

            pose_img = pose_img.resize((img_size[0] * UPSCALE, img_size[1] * UPSCALE))
            drawing = ImageDraw.Draw(pose_img)
            keypoints_triples = [(target_pose['keypoints'][i], target_pose['keypoints'][i+1], target_pose['keypoints'][i+2]) for i in range(0, len(target_pose['keypoints']), 3)]
            drawing = draw_armatures(keypoints_triples, drawing)

            pose_img = pose_img.resize(
                (img_size[0], img_size[1]), resample=Image.Resampling.LANCZOS
            )

            cropped_pose_frame_image = pose_img.crop([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]])

            img = cropped_pose_frame_image

            print("saving image", save_name)
            img.save(save_name)
            
            cropped_pose_frame_image.close()
            pose_img.close()

            # Assume there's always a features file if there's an image file
            img_features = filtered_poses[i]
            np.save(f"{features_dir}/{target_frame}_{target_pose['pose_idx']}.npy", img_features)
            
        else:
            img = Image.open(save_name)
            
        frame_minute = round(target_frame / video_fps / 60)
        
        img_metadata_file.write(",".join([f"{target_frame}_{target_pose['pose_idx']}.jpg", f"Frame {target_frame} | pose {target_pose['pose_idx']} | track {target_pose['track_id']}", str(frame_minute), str(cluster_id)]) + "\n")

        if plot_images:
            img.thumbnail((100, 100), resample=Image.Resampling.LANCZOS)
            ab = AnnotationBbox(OffsetImage(np.asarray(img)), (clusterable_embedding[i, 0], clusterable_embedding[i, 1]), frameon=False)
            #ab.patch.set_linewidth(0)
            #ab.patch.set(color=cm(norm(cluster_id)))
            ax.add_artist(ab)
            ax.text(clusterable_embedding[i,0], clusterable_embedding[i, 1], cluster_id, color="red") 

        img.close()

    img_metadata_file.close()

else:
    ax.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)


In [None]:
imgs_to_keep = set()

with open(f"{video_name}.csv", "r", encoding="utf-8") as img_metadata_file:
    for la in img_metadata_file:
        img_fn = la.strip().split(",")[0]
        if img_fn == "filename":
            continue
        imgs_to_keep.add(img_fn)

print(len(imgs_to_keep),"unique images in metadata file")
        
for fn in os.listdir(images_dir):
    if os.path.isfile(f"{images_dir}/{fn}"):
        if fn not in imgs_to_keep:
            print("Deleting image", fn)
            os.unlink(f"{images_dir}/{fn}")
            os.unlink(f"{features_dir}/{fn.replace('jpg', 'npy')}")
