Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the license found in the LICENSE file in the root directory of this source tree.

# Video Seal - Video inference, optimized for low RAM

In [None]:
# run in the root of the repository
%load_ext autoreload
%autoreload 2
 
%cd ..

/private/home/pfz/09-videoseal/videoseal


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [None]:
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import logging
logging.getLogger("matplotlib.image").setLevel(logging.ERROR)
from IPython.display import HTML, display

import pandas as pd
from tqdm import tqdm
import numpy as np
import ffmpeg
import os
import cv2
import subprocess
import torch

from videoseal.utils.display import save_vid
from videoseal.utils import Timer
from videoseal.evals.full import setup_model_from_checkpoint
from videoseal.evals.metrics import bit_accuracy, pvalue, capacity, psnr, ssim, msssim, linf
from videoseal.data.datasets import VideoDataset
from videoseal.augmentation import Identity, H264, Crop
from videoseal.models.videoseal import Videoseal
from videoseal.modules.jnd import JND

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu" 
# device = "cuda" 

def get_video_info(input_path):
    # Open the video file
    video = cv2.VideoCapture(input_path)

    # Get video properties
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)
    codec = int(video.get(cv2.CAP_PROP_FOURCC))
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    # Decode codec to human-readable form
    codec_str = "".join([chr((codec >> 8 * i) & 0xFF) for i in range(4)])

    video.release()  # Close the video file

    return {
        "width": width,
        "height": height,
        "fps": fps,
        "codec": codec_str,
        "num_frames": num_frames
    }


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu" 
# device = "cuda" 

  from .autonotebook import tqdm as notebook_tqdm


## Load the model

The videoseal library provides pretrained models for embedding and extracting watermarks.

In [None]:
# Load the VideoSeal model.
model = setup_model_from_checkpoint("videoseal")

# Set the model to evaluation mode and move it to the selected device.
model = model.eval()
model = model.to(device)
model.compile()

# Setup the step size. Bigger step size makes embedding faster but loses a bit of robustness.
model.step_size = 8

Model loaded successfully from ckpts/y_128b_img.pth with message: <All keys matched successfully>


## Embedding

The embedding process is the process of hiding the watermark in the video.

In [None]:
def check_and_add_ffmpeg():
    try:
        # Try multiple possible ffmpeg paths
        ffmpeg_paths = [
            'ffmpeg',
            '/opt/homebrew/bin/ffmpeg',
            '/usr/local/bin/ffmpeg'
        ]
        
        for path in ffmpeg_paths:
            try:
                subprocess.run([path, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                print(f"ffmpeg found at: {path}")
                if os.path.dirname(path) not in os.environ['PATH']:
                    print(f"Adding {os.path.dirname(path)} to PATH")
                    os.environ['PATH'] = os.path.dirname(path) + ':' + os.environ.get('PATH', '')
                return
            except FileNotFoundError:
                continue
                
        raise FileNotFoundError("No ffmpeg installation found")
    except Exception as e:
        raise RuntimeError(f"ffmpeg check failed: {str(e)}")

check_and_add_ffmpeg()  # add path to the ffmpeg binary, from Mac homebrew or system


In [None]:
def embed_video_clip(
    model: Videoseal,
    clip: np.ndarray,
    msgs: torch.Tensor
) -> np.ndarray:
    clip_tensor = torch.tensor(clip, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
    outputs = model.embed(clip_tensor, msgs=msgs, is_video=True, lowres_attenuation=True)
    processed_clip = outputs["imgs_w"]
    processed_clip = (processed_clip * 255.0).byte().permute(0, 2, 3, 1).numpy()
    return processed_clip

def embed_video(
    model: Videoseal,
    input_path: str,
    output_path: str,
    chunk_size: int,
    crf: int = 23
) -> None:
    # Read video dimensions
    video_info = get_video_info(input_path)
    width = int(video_info['width'])
    height = int(video_info['height'])
    fps = float(video_info['fps'])
    codec = video_info['codec']
    num_frames = int(video_info['num_frames'])

    # Open the input video
    process1 = (
        ffmpeg
        .input(input_path)
        .output('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height), r=fps)
        .run_async(pipe_stdout=True, pipe_stderr=subprocess.PIPE)
    )
    # Open the output video
    process2 = (
        ffmpeg
        .input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height), r=fps)
        .output(output_path, vcodec='libx264', pix_fmt='yuv420p', r=fps)
        .overwrite_output()
        .run_async(pipe_stdin=True, pipe_stderr=subprocess.PIPE)
    )

    # Create a random message
    msgs = model.get_random_msg()
    with open(output_path.replace(".mp4", ".txt"), "w") as f:
        f.write("".join([str(msg.item()) for msg in msgs[0]]))

    # Process the video
    frame_size = width * height * 3
    chunk = np.zeros((chunk_size, height, width, 3), dtype=np.uint8)
    frame_count = 0
    pbar = tqdm(total=num_frames, desc="Watermark embedding")
    while True:
        in_bytes = process1.stdout.read(frame_size)
        if not in_bytes:
            break
        frame = np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3])
        chunk[frame_count % chunk_size] = frame
        frame_count += 1
        pbar.update(1)
        if frame_count % chunk_size == 0:
            processed_frame = embed_video_clip(model, chunk, msgs)
            process2.stdin.write(processed_frame.tobytes())
    process1.stdout.close()
    process2.stdin.close()
    process1.wait()
    process2.wait()

    return msgs

You are free to upload any video and change the `video_path`.

You can look at the watermark video output in the folder `outputs`.

In [6]:
# Path to the input video
video_path = "assets/videos/1.mp4"

# Create the output directory and path
output_dir = "./outputs"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, os.path.basename(video_path))

# Embed the watermark inside the video with a random msg
msgs_ori = embed_video(model, video_path, output_path, 32)
print(f"\nSaved watermarked video to {output_path}")

Watermark embedding: 100%|██████████| 256/256 [00:14<00:00, 18.20it/s]


Saved watermarked video to ./outputs/1.mp4





## Extraction

Load the video output from the embedding process and extract the watermark.

In [7]:
def detect_video_clip(
    model: Videoseal,
    clip: np.ndarray
) -> torch.Tensor:
    clip_tensor = torch.tensor(clip, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
    outputs = model.detect(clip_tensor, is_video=True)
    output_bits = outputs["preds"][:, 1:]  # exclude the first which may be used for detection
    return output_bits

def detect_video(
    model: Videoseal,
    input_path: str,
    num_frames_for_extraction: int,
    chunk_size: int
) -> None:
    # Read video dimensions
    video_info = get_video_info(input_path)
    width = int(video_info['width'])
    height = int(video_info['height'])
    num_frames = int(video_info['num_frames'])

    soft_msgs = []
    process1 = None
    
    try:
        # Open the input video
        process1 = (
            ffmpeg
            .input(input_path)
            .output('pipe:', format='rawvideo', pix_fmt='rgb24')
            .run_async(pipe_stdout=True, pipe_stderr=subprocess.PIPE)
        )

        # Process the video
        frame_size = width * height * 3
        chunk = np.zeros((chunk_size, height, width, 3), dtype=np.uint8)
        frame_count = 0
        current_chunk_size = 0
        pbar = tqdm(total=num_frames, desc="Watermark extraction")
        
        try:
            while frame_count < num_frames_for_extraction:
                in_bytes = process1.stdout.read(frame_size)
                if not in_bytes:
                    break
                frame = np.frombuffer(in_bytes, np.uint8).reshape([height, width, 3])
                chunk[frame_count % chunk_size] = frame
                frame_count += 1
                current_chunk_size += 1
                pbar.update(1)
                
                if frame_count % chunk_size == 0:
                    soft_msgs.append(detect_video_clip(model, chunk))
                    current_chunk_size = 0
        except BrokenPipeError:
            print("Pipe closed unexpectedly. Finalizing extraction...")
        finally:
            # Process any remaining frames in the last chunk
            if current_chunk_size > 0:
                last_chunk = chunk[:current_chunk_size]
                soft_msgs.append(detect_video_clip(model, last_chunk))
    except Exception as e:
        print(f"Error during video detection: {str(e)}")
        raise
    finally:
        # Ensure all resources are properly closed
        try:
            if process1 is not None:
                process1.stdout.close()
                process1.wait(timeout=5)
        except Exception as e:
            print(f"Error closing process: {str(e)}")

    if not soft_msgs:
        raise RuntimeError("No frames were successfully processed for watermark extraction")
        
    soft_msgs = torch.cat(soft_msgs, dim=0)
    soft_msgs = soft_msgs.mean(dim=0)  # Average the predictions across all frames
    return soft_msgs

In [8]:
# Detect the watermark
num_frames_for_extraction = 32
soft_msgs = detect_video(model, output_path, num_frames_for_extraction, 16)
bit_acc = bit_accuracy(soft_msgs, msgs_ori).item() * 100
print(f"\nBinary message extracted with {bit_acc:.1f}% bit accuracy")

Watermark extraction:  12%|█▎        | 32/256 [00:00<00:06, 33.27it/s]


Binary message extracted with 99.2% bit accuracy



