In [None]:
import asyncio
import logging
import os
from pathlib import Path
import sys

import imageio.v3 as iio
from sklearn.cluster import HDBSCAN
from matplotlib import colormaps, colors
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
import umap

from mime_db import MimeDb


In [None]:
from pose_functions import *

In [None]:
VIDEO_FILE = "JuliusCaesar—WinterMainStage23.mp4" # Just the name of the video file, no path

video_path = Path("videos", VIDEO_FILE)

# Connect to the database
db = await MimeDb.create()

# Get video metadata
video_name = video_path.name
video_id = await db.get_video_id(video_name)
video_id = video_id[0]["id"]

video_movelets = await db.get_movelet_data_from_video(video_id)

movelets_df = pd.DataFrame.from_records(video_movelets, columns=video_movelets[0].keys())

# XXX Not sure why a few of the movement values are NaNs
movelets_df['movement'].fillna(0, inplace=True)

In [None]:
print(len(movelets_df))
print(len(movelets_df[movelets_df['movement'] == 0]))
print(len(movelets_df[(movelets_df['movement'] > 0) & (movelets_df['movement'] < 10)]))

print(np.mean(movelets_df['movement']))
print(np.median(movelets_df['movement']))

In [None]:
plt.hist(movelets_df[movelets_df['movement'] <= 500]['movement'], bins=300)

In [None]:
#frozen_movelets = movelets_df[movelets_df['movement'] == 0]
frozen_movelets = movelets_df[(movelets_df['movement'] > 0) & (movelets_df['movement'] < 10)]
frozen_poses = frozen_movelets['norm'].tolist()

In [None]:
frozen_poses

In [None]:
standard_embedding = umap.UMAP(
    random_state=42,
).fit_transform(frozen_poses)

plt.scatter(standard_embedding[:, 0], standard_embedding[:, 1], s=4)

In [None]:
clusterable_embedding = umap.UMAP(
    n_neighbors=10,
    min_dist=1.0,
    n_components=2,
    random_state=42,
).fit_transform(frozen_poses)

plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], s=4)

In [None]:
print("fitting clustering model")

hdb = HDBSCAN(min_cluster_size=3, min_samples=4) # , max_cluster_size=15
hdb.fit(frozen_poses)
labels = hdb.labels_.tolist()

assigned_poses = 0

for cluster_id in range(-1, max(labels) + 1):
    print("Poses in cluster", cluster_id, labels.count(cluster_id))
    if cluster_id != -1:
        assigned_poses += labels.count(cluster_id)

print("assigned", assigned_poses, "poses out of", len(labels), round(assigned_poses/len(labels),4))
    
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)

In [None]:
print("fitting UMAP preclustered model")

hdb = HDBSCAN(min_cluster_size=3, min_samples=4) # , max_cluster_size=15
hdb.fit(clusterable_embedding)
labels = hdb.labels_.tolist()

assigned_poses = 0

for cluster_id in range(-1, max(labels) + 1):
    print("Poses in cluster", cluster_id, labels.count(cluster_id))
    if cluster_id != -1:
        assigned_poses += labels.count(cluster_id)

print("assigned", assigned_poses, "poses out of", len(labels), round(assigned_poses/len(labels),4))
    
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)

In [None]:
print("fitting UMAP preclustered model")
show_poses = True
plot_images = True

hdb = HDBSCAN(min_cluster_size=3, min_samples=4) # , max_cluster_size=15
hdb.fit(clusterable_embedding)
labels = hdb.labels_.tolist()

assigned_poses = 0

cluster_to_poses = {}
for i, cluster_id in enumerate(labels):
    if cluster_id not in cluster_to_poses:
        cluster_to_poses[cluster_id] = [i]
    else:
        cluster_to_poses[cluster_id].append(i)

for cluster_id in range(-1, max(labels) + 1):
    print("Poses in cluster", cluster_id, labels.count(cluster_id))
    if cluster_id != -1:
        assigned_poses += labels.count(cluster_id)

print("assigned", assigned_poses, "poses out of", len(labels), round(assigned_poses/len(labels),4))

if show_poses:
    for cluster_id in cluster_to_poses:
        cluster_poses = []
        print("CLUSTER:", cluster_id, "POSES:", len(cluster_to_poses[cluster_id]))
        for pose_index in cluster_to_poses[cluster_id]:
            cl_pose = frozen_poses[pose_index]
            cl_pose[cl_pose==-1] = np.nan
            cluster_poses.append(cl_pose)
        cluster_average = np.nanmean(np.array(cluster_poses), axis=0).tolist()
        armature_prevalences = get_armature_prevalences(cluster_poses)
        cluster_average = np.array_split(cluster_average, len(cluster_average) / 2)
        #print("Average pose in cluster", cluster_id, cluster_average)
        cluster_average_img = draw_normalized_and_unflattened_pose(
            cluster_average, armature_prevalences=armature_prevalences
        )
        plt.figure(figsize=(2,2))
        plt.imshow(cluster_average_img)
        

fig = plt.figure(figsize=(40,40))
ax = fig.gca()
cm = colormaps["Spectral"]
norm = colors.Normalize(vmin=-1, vmax=max(labels))

if plot_images:
    
    ax.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], alpha=0)
    for i, cluster_id in enumerate(labels):
        if cluster_id == -1:
            continue
        cl_pose = frozen_poses[i]
        cl_pose[cl_pose==-1] = np.nan
        cluster_pose = np.array_split(cl_pose, len(cl_pose) / 2)
        cluster_pose_img = draw_normalized_and_unflattened_pose(
            cluster_pose, armature_prevalences=[1] * 19
        )
        #img = Image.fromarray(img_region)
        img = cluster_pose_img
        img.thumbnail((40, 40), resample=Image.Resampling.LANCZOS)
        ab = AnnotationBbox(OffsetImage(np.asarray(img)), (clusterable_embedding[i, 0], clusterable_embedding[i, 1]), frameon=False)
        #ab.patch.set_linewidth(0)
        #ab.patch.set(color=cm(norm(cluster_id)))

        ax.add_artist(ab)
else:
    ax.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1], c=labels, cmap='Spectral', s=4)