In [136]:
# conda env: dataPy_NWB2

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from glob import glob
import os
from os.path import join
from pathlib import Path
import sys

import cv2
import h5py
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import median_filter
from tqdm.auto import tqdm


print("Python interpreter binary location:", sys.executable)

Python interpreter binary location: /home/jop9552/miniconda3/envs/dataPy_NWB2/bin/python


NB: this currently expects one video per session. Making QC vids with multiple one-min vids per session is harder.

# TODO
* allow user to specify start frame, so that we can look in middle of session as well as beginning
* add to kpt pipeline


# Pipeline testing

In [5]:
from multicamera_airflow_pipeline.tim_240731.keypoints.validation_videos import KeypointVideoCreator

base_dir = "/n/groups/datta/kpts_pipeline/tim_240731/results"
# session = "24-09-29-12-40-04-238868"
session = "24-09-28-11-44-04-693209"
predictions_2d_directory = join(base_dir, "2D_predictions", session)
predictions_triang_directory = join(base_dir, "triangulation", session)
# camera_calibration_directory = join(base_dir, "camera_calibration", "24-09-29-13-56-13-243339/jarvis/CalibrationParameters")
camera_calibration_directory = join(base_dir, "camera_calibration", "24-09-28-12-57-16-037945/jarvis/CalibrationParameters")
output_directory_keypoint_vids = join(base_dir, "keypoint_validation_videos", session)
# raw_video_directory = f"/n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240929_J07901_6cam_PBN/{session}"
raw_video_directory = f"/n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240928_J07901_6cam_PBN/{session}"

k = KeypointVideoCreator(
    predictions_2d_directory,
    predictions_triang_directory,
    camera_calibration_directory,
    raw_video_directory,
    output_directory_keypoint_vids,
    max_frames=40,
    recompute_completed=True,
)

k.run()


Processing frames:   0%|          | 0/40 [00:00<?, ?it/s]

Processing frames:   0%|          | 0/40 [00:00<?, ?it/s]

Processing frames:   0%|          | 0/40 [00:00<?, ?it/s]

Processing frames:   0%|          | 0/40 [00:00<?, ?it/s]

Processing frames:   0%|          | 0/40 [00:00<?, ?it/s]

Processing frames:   0%|          | 0/40 [00:00<?, ?it/s]

ffmpeg version 3.3.3-static http://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2017 the FFmpeg developers
  built with gcc 6.4.0 (Debian 6.4.0-2) 20170724
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc-6 --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gray --enable-libass --enable-libfreetype --enable-libfribidi --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libzimg
  libavutil      55. 58.100 / 55. 58.100
  libavcodec     57. 89.100 / 57. 89.100
  libavformat    57. 71.100 / 57. 71.100
  libavdevice    57.  6.100 / 57.  6.100
  libavfilter     6. 82.100 /  6. 82.100
  libswscale   

# generate_keypoint_video function

In [3]:
def generate_keypoint_video(
    output_directory: Path,
    video_path: Path,
    keypoint_coords: np.ndarray,
    keypoint_conf: np.ndarray,  # New parameter for keypoint confidence
    keypoint_info: dict,
    skeleton_info: dict,
    vid_suffix: str,
    detection_coords: np.ndarray=None,
    max_frames=None,
):
    """
    Generates a video with keypoint predictions overlaid on the original video frames.

    Parameters:
    -----------
    output_directory : Path
        Directory where the output video will be saved.

    video_path : Path
        Path to the input video file.

    keypoint_coords : np.ndarray
        Array of shape (#frames, #keypoints, 2) containing the coordinates of keypoints for each frame.

    keypoint_conf : np.ndarray
        Array of shape (#frames, #keypoints) containing the confidence values (0-1) for each keypoint in each frame.

    keypoint_info : dict
        Dictionary containing information about the keypoints. Each key in the dictionary represents a keypoint ID, and the
        value is another dictionary with the following structure:
        {
            'name': str,       # Keypoint name
            'id': int,         # Keypoint ID
            'color': list,     # RGB color for the keypoint [R, G, B]
            'type': str,       # Keypoint type (e.g., 'upper', 'lower')
            'swap': str        # Name of the corresponding left/right keypoint to be swapped (for symmetry)
        }

    skeleton_info : dict
        Dictionary containing information about the skeleton. Each key in the dictionary represents a skeleton link ID, and
        the value is another dictionary with the following structure:
        {
            'link': tuple,     # Tuple containing the names of the two keypoints that form the link
            'id': int,         # Link ID
            'color': list      # RGB color for the link [R, G, B]
        }

    vid_suffix : str
        Suffix to add to the video file name. Ie "with_2D_keypoints" or "with_3D_keypoints"

    Returns:
    --------
    None
        The function saves the output video with keypoints and skeletons overlaid to the specified output directory.

    Raises:
    -------
    ValueError
        If the input video cannot be opened.

    Example:
    --------
    output_directory = Path('/output/directory')
    video_path = Path('/path/to/video.mp4')
    keypoint_coords = np.load('keypoint_coords.npy')  # Load your keypoints array
    keypoint_conf = np.load('keypoint_conf.npy')  # Load your keypoint confidence array
    keypoint_info = {
        0: {'name': 'nose_tip', 'id': 0, 'color': [120, 184, 181], 'type': 'upper', 'swap': ''},
        # Add other keypoints as needed
    }
    skeleton_info = {
        0: {'link': ('tail_base', 'spine_low'), 'id': 0, 'color': [173, 160, 183]},
        # Add other links as needed
    }

    generate_keypoint_video(output_directory, video_path, keypoint_coords, keypoint_conf, keypoint_info, skeleton_info)
    """

    # Open the input video
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")

    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Create the VideoWriter object
    output_path = output_directory / (video_path.stem + "_" + vid_suffix + ".mp4")
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(str(output_path), fourcc, fps, (frame_width, frame_height))

    frame_idx = 0
    if total_frames < 0 and max_frames is None:
        raise ValueError(
            "Could not determine total number of frames in the video -- please specify max_frames."
        )
    elif total_frames < 0:
        total_frames = max_frames
    elif max_frames is not None:
        total_frames = np.min([max_frames, total_frames])

    print(f"Total frames: {total_frames}")

    with tqdm(total=total_frames, desc="Processing frames") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Create an overlay for drawing
            overlay = frame.copy()

            # Draw keypoints
            for kp_idx, kp_info in keypoint_info.items():
                if (
                    frame_idx < len(keypoint_coords)
                    and kp_idx < keypoint_coords.shape[1]
                ):
                    x, y = keypoint_coords[frame_idx, kp_idx]
                    if np.isnan(x) or np.isnan(y):
                        continue
                    conf = keypoint_conf[frame_idx, kp_idx]
                    color = tuple(kp_info["color"])
                    alpha = conf  # Alpha value is based on the confidence (0-1)
                    if conf > 0:  # Only draw if confidence is greater than 0
                        overlay = cv2.circle(
                            overlay,
                            (int(x), int(y)),
                            radius=4,
                            color=color,
                            thickness=-1,
                        )

            # Apply the overlay with alpha blending for keypoints
            cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)

            # Draw skeleton
            for link_info in skeleton_info.values():
                kp1_name, kp2_name = link_info["link"]
                kp1_id = next(
                    (
                        kp["id"]
                        for kp in keypoint_info.values()
                        if kp["name"] == kp1_name
                    ),
                    None,
                )
                kp2_id = next(
                    (
                        kp["id"]
                        for kp in keypoint_info.values()
                        if kp["name"] == kp2_name
                    ),
                    None,
                )

                if kp1_id is not None and kp2_id is not None:
                    if (
                        frame_idx < len(keypoint_coords)
                        and kp1_id < keypoint_coords.shape[1]
                        and kp2_id < keypoint_coords.shape[1]
                    ):
                        x1, y1 = keypoint_coords[frame_idx, kp1_id]
                        x2, y2 = keypoint_coords[frame_idx, kp2_id]
                        kp1_conf = keypoint_conf[frame_idx, kp1_id]
                        kp2_conf = keypoint_conf[frame_idx, kp2_id]
                        color = tuple(link_info["color"])
                        alpha = min(
                            kp1_conf, kp2_conf
                        )  # Alpha value is the minimum confidence of the link
                        if (
                            kp1_conf > 0 and kp2_conf > 0 and not np.isnan(x1)
                        ):  # Only draw if both confidence values are greater than 0
                            overlay = cv2.line(
                                overlay,
                                (int(x1), int(y1)),
                                (int(x2), int(y2)),
                                color=color,
                                thickness=2,
                            )

            # Apply the overlay with alpha blending for skeleton
            cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)

            # Find centroid of bounding box
            # x1, y1, x2, y2 = detection_coords[frame_idx, 0, :]
            # centroid = (int((x1 + x2) / 2), int((y1 + y2) / 2))
            # overlay = cv2.circle(
            #     overlay, centroid, radius=4, color=(0, 255, 0), thickness=-1
            # )

            # Draw the detection bounding box on the frame
            # if frame_idx < len(detection_coords):
            #     x1, y1, x2, y2 = detection_coords[frame_idx,0,:]
            #     overlay = cv2.rectangle(
            #         overlay, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2
            #     )

            # # Apply the overlay
            alpha = 0.5
            cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)

            # Write the frame with keypoints and skeletons to the output video
            out.write(frame)
            frame_idx += 1
            pbar.update(1)
            if max_frames and frame_idx >= max_frames:
                break

    # Release video objects
    cap.release()
    out.release()
    print(f"Video saved to: {output_path}")

In [4]:
def crop_and_stich_vids(
    output_directory: Path,
    single_vid_suffix: str,
    bbox_coords_by_camera: dict[np.ndarray]=None,
    detection_coords_by_camera: dict[np.ndarray]=None,
    bbox_crop_size=(400,400),
    max_frames=None,
    ):
    """
    Take keypoint videos and crop the mouse out, and stitch together the cropped videos into one row.

    Parameters:
    -----------
    output_directory : Path
        Directory where the single videos will be found + output video will be saved.

    single_vid_suffix : str
        Suffix to identify the single videos to be stitched together.

    bbox_coords_by_camera : dict or None
        Dictionary containing the bounding box coordinates for each camera. The keys are camera names and the values are
        numpy arrays of shape (#frames, 4) containing the bounding box coordinates (x1, y1, x2, y2) for each frame.
        If None, must provide detection coordinates instead, which wil be treated as centroids.

    detection_coords_by_camera : dict or None
        Dictionary containing the detection coordinates for each camera. The keys are camera names and the values are
        numpy arrays of shape (#frames, 4) containing the detection (ie centroid) coordinates (x, y) for each frame.
        If None, must provide bbox coordinates instead, which will be used to infer a centroid + crop 
        (the bboxes from mmpose aren't uniform size, so we infer centroid + crop to standard size).
    
    """

    assert bbox_coords_by_camera is not None or detection_coords_by_camera is not None, "Must provide either bbox or detection coordinates."
    assert bbox_coords_by_camera is None or detection_coords_by_camera is None, "Must provide either bbox or detection coordinates, not both."

    out_vids = list(output_directory.glob(f"*{single_vid_suffix}.mp4"))
    timestamp, cam, vid_suffix = out_vids[0].stem.split(".")
    stitched_vid_name = ".".join([timestamp, "stitched", vid_suffix, ".mp4"])
    
    # Get the total number of frames to use
    tmp_cap = cv2.VideoCapture(str(out_vids[0]))
    total_frames = int(tmp_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames < 0 and max_frames is None:
        raise ValueError(
            "Could not determine total number of frames in the video -- please specify max_frames."
        )
    elif total_frames < 0:
        total_frames = max_frames
    elif max_frames is not None:
        total_frames = np.min([max_frames, total_frames])
    tmp_cap.release()

    # Calculate bbox centroids for the cropping
    bbox_centroids_by_camera = {}
    if bbox_coords_by_camera is not None:
        for vid in out_vids:
            recording_id, camera, frame, ext = os.path.basename(vid).split(".")
            detn_coords = bbox_coords_by_camera[camera]
            bbox_centroids_by_camera[camera] = np.array(
                [
                    [(x1 + x2) / 2, (y1 + y2) / 2]
                    for x1, y1, x2, y2 in detn_coords
                ]
            )
            # Apply median filter smoothing to reduce jitter
            bbox_centroids_by_camera[camera] = median_filter(bbox_centroids_by_camera[camera], size=(12, 1))
    elif detection_coords_by_camera is not None:
        for vid in out_vids:
            recording_id, camera, frame, ext = os.path.basename(vid).split(".")
            bbox_centroids_by_camera[camera] = median_filter(detection_coords_by_camera[camera], size=(12, 1))


    # Open the output video
    out_vid_path = output_directory / Path(stitched_vid_name)
    print(f"Output video path: {out_vid_path}")
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    output_frame_size = (bbox_crop_size[0] * len(out_vids), bbox_crop_size[1])
    out = cv2.VideoWriter(str(out_vid_path), fourcc, 30, output_frame_size)

    with tqdm(total=total_frames, desc="Processing frames") as pbar:

        # Open the input videos
        cap_by_camera = {}
        for vid in out_vids:
            cap = cv2.VideoCapture(str(vid))
            cap_by_camera[os.path.basename(vid).split(".")[1]] = cap
        
        frame_idx = 0
        while True:
            frames = []
            for camera, cap in cap_by_camera.items():

                # Read the frame
                ret, frame = cap.read()
                if not ret:
                    break

                # Crop the frame
                x, y = bbox_centroids_by_camera[camera][frame_idx]
                x1, y1 = x - bbox_crop_size[0] // 2, y - bbox_crop_size[1] // 2
                x2, y2 = x + bbox_crop_size[0] // 2, y + bbox_crop_size[1] // 2
                frame = frame[int(y1) : int(y2), int(x1) : int(x2)]
                frames.append(frame)

            if not ret:
                break

            # Stitch the frames together
            stitched_frame = np.zeros(
                (bbox_crop_size[1], bbox_crop_size[0] * len(out_vids), 3), dtype=np.uint8
            )
            for i, frame in enumerate(frames):
                stitched_frame[
                    0 : frame.shape[0], i * frame.shape[1] : (i + 1) * frame.shape[1]
                ] = frame
            
            # Write the stitched frame to the output video
            out.write(stitched_frame)

            # Loop control
            frame_idx += 1
            pbar.update(1)
            if max_frames and frame_idx >= max_frames:
                break

    

# Setup

In [5]:
sessions = [
    "24-09-28-11-44-04-693209",
    "24-09-29-12-40-04-238868",
    # "24-09-30-15-43-47-490092",  # one camera's 2d pred keeps failing
    "24-10-01-18-48-38-861115",
    "24-10-04-14-07-59-928846",
    # "24-10-08-17-56-50-878824",  # missing triggerdata file

]
calibration_timestamps = [
    "24-09-28-12-57-16-037945",
    "24-09-29-13-56-13-243339",
    # "24-09-30-16-49-17-341423"
    "24-10-01-20-49-29-153123",
    "24-10-04-13-52-07-229882",
    # "24-10-08-19-08-13-461143",
]
recording_dir = (
    "/n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901"  # path to raw videos
)
results_dir = "/n/groups/datta/kpts_pipeline/tim_240731/results"
pred_2d_dir = join(results_dir, "2D_predictions")  # path to 2D kp predictions
triang_3d_dir = join(results_dir, "triangulation")  # path to triangulated 3D kp predictions
calibration_dir = join(results_dir, "camera_calibration")  # path to calibration files
video_output_directory = Path(
    join(recording_dir, "tim_240731_keypoint_videos")
)  # path to save output videos
video_output_directory.mkdir(parents=True, exist_ok=True)
max_frames = 120 * 20  # number of frames to process per video
# max_frames = 60

In [6]:
session_dict = {}

In [7]:
# CP data

sessions = [
    "20240723",
]
recording_dir = (
    "/n/groups/datta/charlotte/DATA/Internal_state_MOSEQ/Male_03"
)

video_output_directory = Path(
    join(recording_dir, "tim_240731_keypoint_videos")
)  # path to save output videos
# video_output_directory.mkdir(parents=True, exist_ok=True)
max_frames = 120 * 60  # number of frames to process per video
# max_frames = 60

# Generate the videos of the raw 2D predictions

## Load the necessary data / info

In [8]:
# Using 25 kpt model for now
from multicamera_airflow_pipeline.tim_240731.skeletons.sainburg25pt import dataset_info
print(dataset_info.keys())

dict_keys(['dataset_name', 'paper_info', 'keypoint_info', 'skeleton_info', 'upper_body_ids', 'lower_body_ids', 'joint_weights', 'sigmas'])


In [9]:
for session in sessions:

    prediction_files = glob(join(pred_2d_dir, session, f"{session}*.h5"))
    
    detection_coords_by_vid = {}
    kp_coords_by_vid = {}
    kp_conf_by_vid = {}
    video_paths_by_vid = {}
    session_dict[session] = {}
    for h5_file in prediction_files:
        
        # Load the data
        with h5py.File(h5_file, "r") as file:
            keypoint_coords = np.array(file["keypoint_coords"])  # shape: (n_frames, n_keypoints, 2)
            keypoint_conf = np.array(file["keypoint_conf"])
            detection_conf = np.array(file["detection_conf"])
            detection_coords = np.array(file["detection_coords"])
        keypoint_conf[
            keypoint_conf > 1
        ] = 1  # not sure if this is the right way to fix this? Unhelpful discussion at https://github.com/open-mmlab/mmpose/issues/884

        # TODO: make overall qc plots
        # print(keypoint_coords.shape)
        # print(keypoint_conf.shape)
        # plt.figure()
        # plt.matshow(keypoint_conf[:, 0, :].T, aspect="auto")
        # plt.title(
        #     f'Keypoint confidence\n{session}\nCam {os.path.basename(h5_file).split(".")[1]}'
        # )

        # Find video
        # NB: may need to change this per user depending on how / where the videos are stored
        recording_id, camera, frame, ext = os.path.basename(h5_file).split(".")
        assert frame == "0"
        video_path = glob(join(recording_dir, "**", session, f"*{camera}*.mp4"))[0]

        detection_coords_by_vid[camera] = detection_coords.squeeze()
        kp_coords_by_vid[camera] = keypoint_coords.squeeze()
        kp_conf_by_vid[camera] = keypoint_conf.squeeze()
        video_paths_by_vid[camera] = video_path

        
    session_dict[session]["2D_bbox_coords"] = detection_coords_by_vid
    session_dict[session]["2D_kp_coords"] = kp_coords_by_vid
    session_dict[session]["2D_kp_conf"] = kp_conf_by_vid
    session_dict[session]["video_paths"] = video_paths_by_vid

## Create QC plots

In [145]:
for session in sessions:

    kp_confs = session_dict[session]["2D_kp_conf"]

    all_kp_confs = np.stack([kp_confs[cam] for cam in kp_confs.keys()], axis=-1)
    max_kp_confs_per_frame = np.max(all_kp_confs, axis=-1)

    plt.figure()
    plt.matshow(max_kp_confs_per_frame.T, aspect="auto", cmap="PiYG", vmin=0, vmax=1)
    cbar = plt.colorbar()
    cbar.set_label("Max keypoint confidence")
    cbar.set_ticks([0, 0.5, 1])
    plt.ylabel("Keypoint")
    plt.title(f"Max keypoint confidences across cameras\n{session}")
    plt.savefig(join(video_output_directory, f"{session}_max_kp_confs.png"))
    plt.close()



<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

## Create the detection videos for each camera

In [146]:
for session in sessions:
    session_output_directory = video_output_directory / session
    session_output_directory.mkdir(parents=True, exist_ok=True)
    
    vid_suffix = "with_2D_keypoints"
    output_vids = list(session_output_directory.glob(f"*{vid_suffix}.mp4"))
    if len(output_vids) == len(prediction_files):
        print(f"Videos already processed: {session_output_directory}")
        continue

    for h5_file in prediction_files:
        
        recording, camera, frame, ext = os.path.basename(h5_file).split(".")

        # Load the data
        video_path = session_dict[session]["video_paths"][camera]
        keypoint_coords = session_dict[session]["2D_kp_coords"][camera]
        keypoint_conf = session_dict[session]["2D_kp_conf"][camera]
        bbox_coords = session_dict[session]["2D_bbox_coords"][camera]

        generate_keypoint_video(
            output_directory=session_output_directory,
            video_path=Path(video_path),
            keypoint_coords=keypoint_coords,
            keypoint_conf=keypoint_conf,
            keypoint_info=dataset_info["keypoint_info"],
            vid_suffix=vid_suffix,
            detection_coords=bbox_coords,
            skeleton_info=dataset_info["skeleton_info"],
            max_frames=max_frames,
        )


Videos already processed: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240928_J07901_6cam_PBN/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.BackBottom.0.mp4
Videos already processed: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240928_J07901_6cam_PBN/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.BackLeft.0.mp4
Videos already processed: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240928_J07901_6cam_PBN/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.BackRight.0.mp4
Videos already processed: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240928_J07901_6cam_PBN/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.FrontBottom.0.mp4
Videos already processed: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240928_J07901_6cam_PBN/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.FrontLeft.0.mp4
Videos already processed: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/20240928_J07901_6cam_PBN/24-09-28-11-44-04-693209/24-09-2

Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.BackBottom.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.BackLeft.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.BackRight.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.FrontBottom.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.FrontLeft.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.FrontRight.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.BackBottom.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.BackLeft.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.BackRight.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.FrontBottom.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.FrontLeft.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.FrontRight.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.BackBottom.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.BackLeft.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.BackRight.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.FrontBottom.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.FrontLeft.0_with_2D_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.FrontRight.0_with_2D_keypoints.mp4


## Crop and stitch the detection videos into one big one

In [147]:
for session in sessions:
    session_output_directory = video_output_directory / session
    bbox_coords_by_vid = session_dict[session]["2D_bbox_coords"]
    crop_and_stich_vids(
        output_directory=session_output_directory,
        bbox_coords_by_camera=bbox_coords_by_vid,
        single_vid_suffix="with_2D_keypoints",
        max_frames=max_frames,
    )


Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.stitched.0_with_2D_keypoints..mp4


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.stitched.0_with_2D_keypoints..mp4


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.stitched.0_with_2D_keypoints..mp4


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.stitched.0_with_2D_keypoints..mp4


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

# Generate the videos of the 3D triangulation predictions

## Funcs

In [6]:
p = "/n/groups/datta/kpts_pipeline/tim_240731/results/triangulation/24-05-01-16-26-23-467239/predictions_2d.float32.270027x6x25x2.mmap"
a = np.memmap(p, dtype="float32", mode="r", shape=(270027, 6, 25, 2))


Python interpreter binary location: /home/jop9552/miniconda3/envs/dataPy_NWB2/bin/python


In [7]:
a.shape

(270027, 6, 25, 2)

In [14]:
def load_memmap_from_filename(filename):
    # Extract the metadata from the filename
    parts = filename.name.rsplit(".", 4)  # Split the filename into parts
    dtype_str = parts[-3]  # Get the dtype part of the filename
    shape_str = parts[-2]  # Get the shape part of the filename
    shape = tuple(map(int, shape_str.split("x")))  # Convert shape string to a tuple of integers
    # Load the array using numpy memmap
    array = np.memmap(filename, dtype=dtype_str, mode="r", shape=shape)
    return array


In [15]:
def nan_to_preceding(arr):
    # Make a copy of the array to avoid modifying the original array
    result = arr.copy()

    # Iterate over each element in the first dimension
    for i in range(1, arr.shape[0]):
        mask = np.isnan(result[i])  # Identify the NaN values
        result[i][mask] = result[i - 1][mask]  # Replace NaNs with preceding values

    return result

## Load the data

In [16]:
import multicam_calibration as mcc

In [17]:
# Using 25 kpt model for now
from multicamera_airflow_pipeline.tim_240731.skeletons.sainburg25pt import dataset_info
print(dataset_info.keys())

dict_keys(['dataset_name', 'paper_info', 'keypoint_info', 'skeleton_info', 'upper_body_ids', 'lower_body_ids', 'joint_weights', 'sigmas'])


In [18]:
for session, calibration_timestamp in zip(sessions, calibration_timestamps):
    prediction_files = glob(join(triang_3d_dir, session, "predictions_3d*.mmap"))
    confidence_files = glob(join(triang_3d_dir, session, "confidences_3d*.mmap"))
    reproj_err_files = glob(join(triang_3d_dir, session, "reprojection_errors*.mmap"))
    
    kp_reproj_coords_by_cam = {}
    video_paths_by_cam = {}
    session_dict[session] = {}
    for pred_file, conf_file, reproj_err_file in zip(prediction_files, confidence_files, reproj_err_files):
        
        # Load the data
        keypoint_coords = load_memmap_from_filename(Path(pred_file))
        keypoint_confs = load_memmap_from_filename(Path(conf_file))  # n_frames, n_kpts
        reproj_err = load_memmap_from_filename(Path(reproj_err_file))
        
        all_extrinsics, all_intrinsics, camera_names = mcc.load_calibration(
            Path(join(calibration_dir, calibration_timestamp, "jarvis", "CalibrationParameters")).as_posix(),
            load_format="jarvis",
        )

        # TODO: make overall qc plots

        # get reprojections  (n_frames, n_cams, n_kpts, 2)
        positions_2D_reprojections = (
            np.zeros((keypoint_coords.shape[0], len(camera_names), keypoint_coords.shape[1], 2)) * np.nan
        )  
        videos = glob(join(recording_dir, "**", session, "*.0.mp4"))
        for vid_path in videos:
            cam = os.path.basename(vid_path).split(".")[1]
            calibration_cam_idx = camera_names.index(cam)
            extrinsics = all_extrinsics[calibration_cam_idx]
            camera_matrix, dist_coefs = all_intrinsics[calibration_cam_idx]
            positions_2D_reprojections[:, calibration_cam_idx, :, :] = mcc.project_points(
                keypoint_coords,
                extrinsics=extrinsics,
                camera_matrix=camera_matrix,
                dist_coefs=dist_coefs,
            )
            
            kp_reproj_coords_by_cam[cam] = positions_2D_reprojections[:, calibration_cam_idx, :, :]
            video_paths_by_cam[cam] = vid_path
        
    session_dict[session]["triang_kp_reproj_coords"] = kp_reproj_coords_by_cam
    session_dict[session]["video_paths"] = video_paths_by_cam
    session_dict[session]["triang_keypoint_confs"] = keypoint_confs

## Create the 3D reprojection videos for each camera

In [153]:
for session in sessions:
    session_output_directory = video_output_directory / session
    session_output_directory.mkdir(parents=True, exist_ok=True)
    
    for cam in session_dict[session]["triang_kp_reproj_coords"].keys():
        
        # Grab the data
        video_path = session_dict[session]["video_paths"][cam]
        reproj_keypoint_coords = session_dict[session]["triang_kp_reproj_coords"][cam]
        keypoint_confs = session_dict[session]["triang_keypoint_confs"]

        generate_keypoint_video(
            output_directory=session_output_directory,
            video_path=Path(video_path),
            keypoint_coords=reproj_keypoint_coords,
            keypoint_conf=keypoint_confs,
            keypoint_info=dataset_info["keypoint_info"],
            skeleton_info=dataset_info["skeleton_info"],
            vid_suffix="with_triang_keypoints",
            max_frames=max_frames,
        )

Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.BackBottom.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.BackLeft.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.BackRight.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.FrontBottom.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.FrontLeft.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.FrontRight.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.BackBottom.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.BackLeft.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.BackRight.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.FrontBottom.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.FrontLeft.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.FrontRight.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.BackBottom.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.BackLeft.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.BackRight.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.FrontBottom.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.FrontLeft.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Video saved to: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.FrontRight.0_with_triang_keypoints.mp4
Total frames: 2400


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

ValueError: cannot convert float NaN to integer

## Crop and stitch the 3D reprojection videos into one big one

In [19]:
for session in sessions:
    session_output_directory = video_output_directory / session
    session_output_directory.mkdir(parents=True, exist_ok=True)

    # Instead of using the raw detections, use the centroid of the triang'd kps, which will be cleaner.
    # detection_coords_by_vid = session_dict[session]["2D_detection_coords"]
    detection_coords_by_vid = {}
    for cam in session_dict[session]["triang_kp_reproj_coords"].keys():
        reproj_keypoint_coords = session_dict[session]["triang_kp_reproj_coords"][cam]
        centroids = np.nanmean(reproj_keypoint_coords, axis=1)
        centroids = nan_to_preceding(centroids)
        detection_coords_by_vid[cam] = centroids

    crop_and_stich_vids(
        output_directory=session_output_directory,
        detection_coords_by_camera=detection_coords_by_vid,
        single_vid_suffix="with_triang_keypoints",
        max_frames=max_frames,
    )


  centroids = np.nanmean(reproj_keypoint_coords, axis=1)


Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-28-11-44-04-693209/24-09-28-11-44-04-693209.stitched.0_with_triang_keypoints..mp4


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-09-29-12-40-04-238868/24-09-29-12-40-04-238868.stitched.0_with_triang_keypoints..mp4


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-01-18-48-38-861115/24-10-01-18-48-38-861115.stitched.0_with_triang_keypoints..mp4


Processing frames:   0%|          | 0/2400 [00:00<?, ?it/s]

[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55ee8899a280] moov atom not found


Output video path: /n/groups/datta/Jonah/20240925_PBN_npx/raw_data/J07901/tim_240731_keypoint_videos/24-10-04-14-07-59-928846/24-10-04-14-07-59-928846.stitched.0_with_triang_keypoints..mp4


Processing frames: 0it [00:00, ?it/s]

[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55ee88a97b80] moov atom not found
