In [1]:
import cv2
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt

In [2]:
def read_grayscale_video(path: str) -> np.ndarray:
    cap = cv2.VideoCapture(path)
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        # Convert to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frames.append(gray)

    cap.release()

    if not frames:
        return np.empty((0, 0, 0), dtype=np.uint8)

    arr = np.array(frames, dtype=np.uint8)
    return arr

def get_video_metadata(path):
    cap = cv2.VideoCapture(path)
    fps         = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()
    return fps, frame_count, width, height

def resample_gaze_to_frames(gaze: np.ndarray,
                            fps: float,
                            frame_count: int,
                            offset: float = 10.0):
    """
    Resample gaze samples to one (x, y) per video frame, starting at `offset` seconds.
    """
    # Time of each frame
    all_frame_times = np.arange(frame_count) / fps
    # Only keep frames at or after the gaze start time
    valid_frames = all_frame_times >= offset
    target_times = all_frame_times[valid_frames]

    # Duration of the full video
    duration = frame_count / fps
    # Original gaze timestamps span from offset to end
    orig_times = np.linspace(offset, duration, num=gaze.shape[0])

    # Interpolate x and y separately
    x_resampled = np.interp(target_times, orig_times, gaze[:, 0])
    y_resampled = np.interp(target_times, orig_times, gaze[:, 1])

    gaze_per_frame = np.vstack((x_resampled, y_resampled)).T
    return gaze_per_frame, valid_frames


In [3]:
path = "bin/bangbangyouredead_v4-1.mp4"
video_array = read_grayscale_video(path)
fps, frame_count, width, height = get_video_metadata(path)
print(fps, frame_count, width, height)

29.97002997002997 14665 720 480


In [4]:
video_array.shape

(14664, 480, 720)

In [5]:
gaze = np.load("bin/gaze.npy")
gaze.shape # (num_frames, 2)

(239419, 2)

In [6]:
gaze_on_frames, frame_mask = resample_gaze_to_frames(gaze, fps, frame_count, offset=10.45)
gaze_on_frames.shape, frame_mask.shape, np.nanmax(gaze_on_frames), np.nanmin(gaze_on_frames), frame_mask.sum()

((14351, 2),
 (14665,),
 np.float64(626.4181468771467),
 np.float64(19.17312396882625),
 np.int64(14351))

In [7]:
def overlay_gaze_on_frames(frames: np.ndarray,
                           gaze_frames: np.ndarray,
                           valid_mask: np.ndarray,
                           pad_x: int,
                           circle_radius: int = 20,
                           circle_color: int = 255) -> np.ndarray:
    """
    Draws a filled circle at each gaze point on a stack of grayscale frames.

    Parameters
    ----------
    frames : np.ndarray
        Video frames as (N, H, W), dtype=uint8.
    gaze_frames : np.ndarray
        Array of (x, y) gaze coordinates, one per valid frame: (M, 2).
    valid_mask : np.ndarray
        Boolean mask of length N indicating which frames have gaze.
    pad_x : int
        Horizontal offset (in px) to map original gaze coords into frame coords.
    circle_radius : int, optional
        Radius of the overlay circle, by default 20.
    circle_color : int, optional
        Gray‐level for the circle (0–255), by default 255 (white).

    Returns
    -------
    np.ndarray
        A new array of frames with the circles drawn, same shape as `frames`.
    """
    out = frames.copy()
    gaze_idx = 0
    N = frames.shape[0]

    for i in tqdm(range(N)):
        if valid_mask[i] and gaze_idx < len(gaze_frames):
            x, y = gaze_frames[gaze_idx]
            gaze_idx += 1
            
            # skip NaNs or out-of-bounds
            if np.isnan(x) or np.isnan(y):
                continue
            xi = int(round(x + pad_x))
            yi = int(round(y))
            
            if (0 <= xi < out.shape[2]) and (0 <= yi < out.shape[1]):
                cv2.circle(out[i], (xi, yi), circle_radius, circle_color, thickness=-1)

    return out

In [8]:
def save_frames_to_video(frames: np.ndarray,
                         out_path: str,
                         fps: float,
                         is_color: bool = False) -> None:
    """
    Saves a stack of frames to an MP4 file.

    Parameters
    ----------
    frames : np.ndarray
        Array of frames. If is_color=False, shape is (N, H, W);
        if is_color=True, shape is (N, H, W, 3).
    out_path : str
        Path to write the MP4.
    fps : float
        Frames per second for the output video.
    is_color : bool, optional
        Whether the frames are BGR color, by default False.
    """
    N, H, W = frames.shape[:3]
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(out_path, fourcc, fps, (W, H), isColor=is_color)

    for i in tqdm(range(N)):
        frame = frames[i]
        # if grayscale, VideoWriter expects a BGR frame only if isColor=True,
        # otherwise it will accept (H, W) when isColor=False.
        writer.write(frame)

    writer.release()


In [9]:
modified = overlay_gaze_on_frames(video_array, gaze_on_frames, frame_mask, pad_x=40)

100%|██████████| 14664/14664 [00:00<00:00, 262099.32it/s]


In [10]:
save_frames_to_video(modified, "bin/gaze_overlay.mp4", fps, is_color=False)

100%|██████████| 14664/14664 [00:13<00:00, 1077.47it/s]


In [11]:
duration_s = float(14664 / fps)
seconds = np.arange(0, int(np.floor(duration_s)) + 1)
indices = np.array([int(round(fps * s)) for s in seconds])

In [12]:
cvid = modified[indices]

In [13]:
for i in tqdm(range(len(cvid))):
    plt.figure()
    plt.imshow(cvid[i], cmap='gray')
    plt.axis('off')
    plt.title(f"Frame {i}")
    plt.savefig(f"export/export_{i}.jpg")
    plt.close()

100%|██████████| 490/490 [00:18<00:00, 26.06it/s]


In [14]:
# zip the export folder
!zip -r export.zip export

updating: export/ (stored 0%)
updating: export/export_34.jpg (deflated 13%)
updating: export/export_307.jpg (deflated 17%)
updating: export/export_65.jpg (deflated 15%)
updating: export/export_75.jpg (deflated 14%)
updating: export/export_423.jpg (deflated 12%)
updating: export/export_74.jpg (deflated 14%)
updating: export/export_333.jpg (deflated 12%)
updating: export/export_162.jpg (deflated 13%)
updating: export/export_205.jpg (deflated 12%)
updating: export/export_135.jpg (deflated 18%)
updating: export/export_168.jpg (deflated 12%)
updating: export/export_356.jpg (deflated 12%)
updating: export/export_222.jpg (deflated 21%)
updating: export/export_250.jpg (deflated 11%)
updating: export/export_484.jpg (deflated 13%)
updating: export/export_313.jpg (deflated 14%)
updating: export/export_430.jpg (deflated 13%)
updating: export/export_407.jpg (deflated 11%)
updating: export/export_341.jpg (deflated 11%)
updating: export/export_464.jpg (deflated 13%)
updating: export/export_448.jpg (d