# Filmstrip Chart Generator

This notebook generates a filmstrip visualization showing video thumbnails, state sizes, and input events over time.

## Imports

Import required libraries for data processing and visualization.


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy import arange, array, concatenate
from matplotlib.pyplot import bar, yscale, subplots, tight_layout

## Data Directory

List available data directories.

In [None]:
!ls data/

## Configuration

Set the filepath to the data directory containing the video and state files.

In [None]:
current_filepath = "data/DSL for Thesis/8c239eac-d539-4290-a7da-b6e06930b498"
savestream_filename = current_filepath + "/states.savestream"

## Load State Data

Read and unpack the savestream file containing the state snapshots.

In [None]:
# Read and unpack the savestream file
with open(savestream_filename, 'rb') as f:
    unpacker = msgpack.Unpacker(f, raw=False)
    states = list(unpacker)[0]

print(f"Number of states: {len(states)}")

## Calculate State Sizes

Compute the encoded size (in bytes) of each state snapshot.

In [None]:
sizes = [len(msgpack.packb(state)) for state in states]
print(f"Computed sizes for {len(sizes)} states")

## Generate Video Thumbnails

Extract thumbnail frames from the video at 1 frame per second using ffmpeg.

In [None]:
# Create thumbnails directory
video_file = os.path.join(current_filepath, "response.webm")
thumbs_dir = os.path.join(current_filepath, "thumbnails")
os.makedirs(thumbs_dir, exist_ok=True)

# Generate thumbnails at 1fps (one per second) with higher quality
# Using scale filter to maintain quality and png compression level 1 for faster processing
!ffmpeg -i "{video_file}" -vf "fps=1,scale=-1:768" -q:v 1 "{thumbs_dir}/thumb_%04d.png"

## Additional Imports

Import additional libraries needed for image processing, file parsing, and event handling.


In [None]:
import msgpack
import os
import json
import glob
import re
from collections import Counter
from PIL import Image

## Visualization Configuration

Configure parameters for the filmstrip visualization, including thumbnail processing and event display settings.


In [None]:
# Thumbnail processing parameters
k = 80                # thumbnail subsampling rate (show every k-th thumbnail)
thumb_height = 500    # height of the thumbnail strip in pixels
jitter_amount = 0.05  # for input event separation (increased for more distinct events)

# Cropping parameters (0-1, smaller = less cropping)
crop_left_frac = 0.32
crop_right_frac= 0.37  
crop_top_frac   = 0.16   
crop_bottom_frac = 0.21   

# Border parameters
border_width = 2            # width of border in pixels
border_color = (255, 255, 255)  # border color (white) - RGB tuple

## Load and Process Thumbnails

Load thumbnail images, apply cropping, and create a filmstrip by concatenating processed thumbnails.


In [None]:
# Load thumbnail files
thumb_files = sorted(glob.glob(os.path.join(thumbs_dir, "thumb_*.png")))
thumbs = [Image.open(f) for f in thumb_files]

print(f"Loaded {len(thumbs)} thumbnails, showing every {k}th")

# Process thumbnails: crop, resize, and add borders
thumb_arrays = []
for i in range(0, len(thumbs), k):
    thumb = thumbs[i]
    w, h = thumb.size

    # Horizontal crop (keep center)
    left = int(w * crop_left_frac)
    right = w - int(w * crop_right_frac)

    # Vertical crop
    top = int(h * crop_top_frac)
    bottom = h - int(h * crop_bottom_frac)

    cropped = thumb.crop((left, top, right, bottom))

    # Resize using cropped aspect ratio
    cw, ch = cropped.size
    aspect = cw / ch
    new_width = int(thumb_height * aspect)

    resized = cropped.resize((new_width, thumb_height), Image.Resampling.NEAREST)
    
    # Add border around the thumbnail
    bordered = Image.new('RGB', 
                        (new_width + 2 * border_width, thumb_height + 2 * border_width),
                        border_color)
    bordered.paste(resized, (border_width, border_width))
    
    thumb_arrays.append(np.array(bordered))

# Concatenate thumbnails into filmstrip
filmstrip = np.concatenate(thumb_arrays, axis=1) if thumb_arrays else None
print(f"Created filmstrip with {len(thumb_arrays)} thumbnails (with {border_width}px borders)")


## Parse Input Events

Parse the stimulus.vtt file to extract keyboard and mouse input events and map them to state indices.


In [None]:
import os
import re
import json

# Helper function to convert "HH:MM:SS.mmm" to total seconds (float)
def parse_timestamp(time_str):
    hours, minutes, seconds = time_str.split(':')
    return int(hours) * 3600 + int(minutes) * 60 + float(seconds)

stimulus_file = os.path.join(current_filepath, "stimulus.vtt")
keyboard_events = []
mouse_click_events = []
mouse_delta_events = []

if os.path.exists(stimulus_file):
    with open(stimulus_file, "r") as f:
        lines = f.read().split("\n")

    i = 0
    while i < len(lines):
        line = lines[i].strip()
        
        # Match the VTT timestamp line: 00:00:00.000 --> 00:00:00.000
        if re.match(r'\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}', line):
            # Extract the start time (the part before " --> ")
            timestamp_str = line.split(' --> ')[0]
            current_time = parse_timestamp(timestamp_str)

            if i + 1 < len(lines):
                try:
                    event_line = lines[i+1].strip()
                    if event_line: # Ensure the line isn't empty
                        event_data = json.loads(event_line).get("event", {})
                        name = event_data.get("name", "")
                        
                        # Append the exact time instead of state index
                        if name == "keyboard-code":
                            keyboard_events.append(current_time)
                        elif name == "mouse-click":
                            mouse_click_events.append(current_time)
                        elif name == "mouse-delta":
                            mouse_delta_events.append(current_time)
                except json.JSONDecodeError:
                    pass # Skip lines that aren't valid JSON
                i += 2
            else:
                i += 1
        else:
            i += 1

total_events = len(keyboard_events) + len(mouse_click_events) + len(mouse_delta_events)
print(f"Found {total_events} input events with exact timestamps")
print(f"  - Keyboard: {len(keyboard_events)}")
print(f"  - Mouse clicks: {len(mouse_click_events)}")
print(f"  - Mouse movements: {len(mouse_delta_events)}")

# Example: Print first 5 keyboard times to verify
print(f"Sample keyboard times: {keyboard_events[:5]}")

## Helper Functions

Define utility functions for visualization, such as adding jitter to event markers for better visibility.


In [None]:
def jitter(vals, amount=jitter_amount):
    """Add random jitter to values to separate overlapping points."""
    return vals + np.random.uniform(-amount, amount, size=len(vals))


## Generate Filmstrip Visualization

Create the final three-row visualization showing thumbnails, state sizes, and input events.


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

# 1. Increase this value to make all text bigger
fontSize = 24 

# -------------------------------------------------------
# CONFIG FOR PLOTTING
# -------------------------------------------------------
# 1 frame = 1 second, so the time axis is just the index
time_axis = np.arange(len(sizes))
duration = len(sizes) 

# Bar width < 1.0 to create the "gap" effect
bar_width = 1.0

# -------------------------------------------------------
# PLOT
# -------------------------------------------------------
fig, (ax1, ax2, ax3) = plt.subplots(
    3, 1,
    figsize=(24, 14), # Increase figure size to fit larger text
    gridspec_kw={"height_ratios": [1.5, 1.1, 1]}, 
)

# -----------------------------
# 1. Filmstrip
# -----------------------------
if filmstrip is not None:
    ax1.imshow(filmstrip, interpolation='nearest', aspect='equal')
    ax1.axis("off")
    ax1.set_title(f"Video Thumbnails (every {k} frames)", fontsize=fontSize)

# -----------------------------
# 2. State Sizes
# -----------------------------
ax2.bar(time_axis, sizes, width=bar_width, align="edge", edgecolor="none")

# Force start at 0 (cuts off left half of first bar)
ax2.set_xlim(-0.5, duration)
ax2.set_yscale("log")
ax2.set_ylabel("Size (bytes)", fontsize=fontSize)
ax2.set_title("Encoded State Sizes (one bar per frame)", fontsize=fontSize)

# Set Tick Label Size & Force Integers
ax2.tick_params(axis='both', which='major', labelsize=fontSize)
ax2.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
ax2.grid(True, alpha=0.3)

# -----------------------------
# 3. Input Events
# -----------------------------
if total_events > 0:
    
    if keyboard_events:
         k_times = np.array(keyboard_events)
         ax3.scatter(jitter(k_times), jitter(np.full(len(k_times), 1.2)),
                     s=100, c="#0066FF", marker="^", alpha=0.5, 
                     edgecolors="none",
                     label=f"Keyboard ({len(keyboard_events)})")

    if mouse_click_events:
        c_times = np.array(mouse_click_events)
        ax3.scatter(jitter(c_times), jitter(np.full(len(c_times), 1.0)),
                    s=100, c="#FF3333", marker="o", alpha=0.8,
                    edgecolors="none",
                    label=f"Mouse Click ({len(mouse_click_events)})")

    if mouse_delta_events:
        d_times = np.array(mouse_delta_events)
        ax3.scatter(jitter(d_times), jitter(np.full(len(d_times), 0.8)),
                    s=100, c="#00AA00", marker="s", alpha=0.4,
                    edgecolors="none",
                    label=f"Mouse Move ({len(mouse_delta_events)})")

    # Force start at 0 to match Axis 2
    ax3.set_xlim(0, duration)
    ax3.set_ylim(0.5, 1.5)
    ax3.set_yticks([]) # Hide Y ticks
    ax3.set_xlabel("Time (Seconds)", fontsize=fontSize)
    ax3.set_title(f"Input Events â€“ {total_events} total", fontsize=fontSize)
    
    # Set Tick Label Size & Force Integers for X-axis
    ax3.tick_params(axis='x', which='major', labelsize=fontSize)
    ax3.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
    
    ax3.legend(loc="lower right", fontsize=fontSize * 0.8) # Legend slightly smaller
    ax3.grid(True, alpha=0.3, axis="x")

else:
    ax3.text(0.5, 0.5, "No input events found",
             ha="center", va="center", fontsize=fontSize)
    ax3.set_xlim(0, duration)
    ax3.set_yticks([])
    ax3.tick_params(axis='x', which='major', labelsize=fontSize)
    ax3.set_xlabel("Time (Seconds)", fontsize=fontSize)

plt.subplots_adjust(hspace=0.3)
plt.show()