
# Video ROI Tool — Draw ROI on a Random Frame and Export to JSON

This notebook helps you:
1. Load a video from disk.
2. Pick a random frame from that video.
3. Draw an ROI (rectangle or polygon) on the frame.
4. Export ROI coordinates to a JSON file and save crops (full frame + ROI crop).

### Requirements
- Python 3.8+
- Packages: `opencv-python` (`cv2`), `numpy`, `matplotlib`, `ipywidgets`

> **Tip:** For best interactivity, use JupyterLab and run `%matplotlib widget`.


In [1]:

# If you're in JupyterLab, this enables interactive widgets/plots.
# If you see issues, you can comment this out and try `%matplotlib inline`.
%matplotlib widget

import os, json, uuid, math, random
from pathlib import Path
from datetime import datetime

import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.widgets import RectangleSelector, PolygonSelector
from matplotlib.path import Path as MplPath

import ipywidgets as W

print("Imports OK. Matplotlib backend:", plt.get_backend())


Imports OK. Matplotlib backend: widget


In [2]:

def ensure_dir(d):
    Path(d).mkdir(parents=True, exist_ok=True)

def bgr_to_rgb(frame_bgr):
    return cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)

def load_random_frame(video_path):
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise RuntimeError(f"Cannot open video: {video_path}")
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
    fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
    if total <= 0:
        cap.release()
        raise RuntimeError("Video has 0 frames (or unknown frame count).")
    idx = random.randint(0, max(0, total-1))
    cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
    ok, frame = cap.read()
    if not ok or frame is None:
        cap.release()
        raise RuntimeError(f"Failed to read frame at index {idx}.")
    ts_sec = idx / fps if fps > 0 else None
    frame_rgb = bgr_to_rgb(frame)
    cap.release()
    return frame_rgb, idx, total, fps, ts_sec

def crop_rect(img_rgb, x1, y1, x2, y2):
    h, w = img_rgb.shape[:2]
    x1 = int(np.clip(min(x1, x2), 0, w-1))
    x2 = int(np.clip(max(x1, x2), 0, w-1))
    y1 = int(np.clip(min(y1, y2), 0, h-1))
    y2 = int(np.clip(max(y1, y2), 0, h-1))
    return img_rgb[y1:y2, x1:x2].copy(), (x1, y1, x2, y2)

def crop_polygon(img_rgb, points):
    # points: list of (x, y) floats
    pts = np.array(points, dtype=np.int32)
    h, w = img_rgb.shape[:2]
    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.fillPoly(mask, [pts], 255)
    masked = cv2.bitwise_and(img_rgb, img_rgb, mask=mask)
    x, y, bw, bh = cv2.boundingRect(pts)
    x2, y2 = x + bw, y + bh
    crop = masked[y:y2, x:x2].copy()
    return crop, (x, y, x2, y2)

def save_json(data, out_path):
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    return out_path

def save_image(img_rgb, out_path):
    # cv2 expects BGR for imwrite
    bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
    cv2.imwrite(str(out_path), bgr)
    return out_path


In [3]:

# ===== App State =====
state = {
    "video_path": None,
    "frame_rgb": None,
    "frame_index": None,
    "frame_count": None,
    "fps": None,
    "timestamp_sec": None,
    "roi_type": "Rectangle",  # or "Polygon"
    "rect_coords": None,      # (x1,y1,x2,y2)
    "poly_points": [],        # [(x,y), ...]
    "selector": None,
    "ax": None,
    "fig": None,
    "image_obj": None,
    "overlay": None
}

# ===== Widgets =====
video_path_txt = W.Text(
    placeholder="Enter path to video file (e.g., /path/to/video.mp4)",
    description="Video Path:",
    layout=W.Layout(width="100%")
)
pick_btn = W.Button(description="Pick Random Frame", button_style="primary")
roi_type_dd = W.Dropdown(options=["Rectangle", "Polygon"], value="Rectangle", description="ROI Type:")
reset_roi_btn = W.Button(description="Reset ROI", button_style="warning")
export_btn = W.Button(description="Export JSON + Save Crops", button_style="success")
status_html = W.HTML(value="")

out = W.Output()
display(W.VBox([
    video_path_txt,
    W.HBox([pick_btn, roi_type_dd, reset_roi_btn, export_btn]),
    status_html,
    out
]))

def clear_plot():
    if state["fig"] is not None:
        plt.close(state["fig"])
    state["fig"] = None
    state["ax"] = None
    state["image_obj"] = None
    state["overlay"] = None
    state["selector"] = None

def update_status(msg):
    status_html.value = f"<b>Status:</b> {msg}"

def on_rectangle_select(eclick, erelease):
    # Store rectangle coordinates
    x1, y1 = eclick.xdata, eclick.ydata
    x2, y2 = erelease.xdata, erelease.ydata
    if None in (x1, y1, x2, y2):
        return
    state["rect_coords"] = (int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2)))
    update_status(f"Rectangle ROI selected: {state['rect_coords']}")

def start_rectangle_selector():
    ax = state["ax"]
    if ax is None:
        return
    state["selector"] = RectangleSelector(
        ax, on_rectangle_select,
        useblit=True, interactive=True, button=[1], drag_from_anywhere=True
    )
    update_status("Rectangle selector active. Drag on the image to select.")

def on_polygon_select(verts):
    # verts: list of (x, y)
    if not verts:
        return
    pts = [(int(round(x)), int(round(y))) for (x,y) in verts]
    state["poly_points"] = pts
    update_status(f"Polygon ROI with {len(pts)} points selected. Double-click to finish.")

def start_polygon_selector():
    ax = state["ax"]
    if ax is None:
        return
    state["selector"] = PolygonSelector(
        ax, on_polygon_select, useblit=True, lineprops={}, markerprops={}
    )
    update_status("Polygon selector active. Click to add points, double-click to finish.")

def draw_frame(frame_rgb):
    clear_plot()
    fig, ax = plt.subplots(figsize=(8, 6))
    img = ax.imshow(frame_rgb)
    ax.set_axis_off()
    state["fig"] = fig
    state["ax"] = ax
    state["image_obj"] = img
    # Start selector
    if state["roi_type"] == "Rectangle":
        start_rectangle_selector()
    else:
        start_polygon_selector()
    plt.show(fig)

def handle_pick_clicked(_):
    video_path = video_path_txt.value.strip()
    if not video_path:
        update_status("Please enter a valid video path.")
        return
    try:
        frame_rgb, idx, total, fps, ts = load_random_frame(video_path)
        state.update({
            "video_path": video_path,
            "frame_rgb": frame_rgb,
            "frame_index": idx,
            "frame_count": total,
            "fps": fps,
            "timestamp_sec": ts,
            "rect_coords": None,
            "poly_points": []
        })
        with out:
            out.clear_output(wait=True)
            print(f"Loaded frame {idx} / {total-1} from '{video_path}'. FPS={fps:.3f}, t={ts:.3f}s" if ts is not None else f"Loaded frame {idx} / {total-1}.")
            draw_frame(frame_rgb)
        update_status("Random frame loaded. Draw your ROI.")
    except Exception as e:
        update_status(f"Error: {e}")

def handle_roi_type_change(change):
    if change["name"] == "value":
        state["roi_type"] = change["new"]
        # Recreate selector on same frame if loaded
        if state["frame_rgb"] is not None:
            draw_frame(state["frame_rgb"])

def handle_reset_roi(_):
    state["rect_coords"] = None
    state["poly_points"] = []
    # Recreate selector
    if state["frame_rgb"] is not None:
        draw_frame(state["frame_rgb"])
    update_status("ROI reset.")

def handle_export(_):
    frame = state["frame_rgb"]
    if frame is None:
        update_status("No frame loaded yet.")
        return

    roi_data = None
    crop_img = None
    crop_box = None
    if state["roi_type"] == "Rectangle":
        if not state["rect_coords"]:
            update_status("Please draw a rectangle ROI first.")
            return
        x1, y1, x2, y2 = state["rect_coords"]
        crop_img, crop_box = crop_rect(frame, x1, y1, x2, y2)
        roi_data = {
            "type": "rectangle",
            "x1": int(x1), "y1": int(y1), "x2": int(x2), "y2": int(y2)
        }
    else:
        if not state["poly_points"]:
            update_status("Please draw a polygon ROI first (double-click to finish).")
            return
        crop_img, crop_box = crop_polygon(frame, state["poly_points"])
        roi_data = {
            "type": "polygon",
            "points": [{"x": int(x), "y": int(y)} for (x,y) in state["poly_points"]]
        }

    # Prepare exports
    export_dir = Path("exports")
    ensure_dir(export_dir)
    stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    base = f"roi_{stamp}_{uuid.uuid4().hex[:8]}"

    # Save full frame
    full_frame_path = export_dir / f"{base}_frame.png"
    save_image(frame, full_frame_path)

    # Save cropped ROI image
    roi_img_path = export_dir / f"{base}_roi.png"
    save_image(crop_img, roi_img_path)

    # Save JSON
    h, w = frame.shape[:2]
    json_payload = {
        "video_path": state["video_path"],
        "frame_index": int(state["frame_index"]),
        "frame_count": int(state["frame_count"]),
        "fps": float(state["fps"]),
        "timestamp_sec": (float(state["timestamp_sec"]) if state["timestamp_sec"] is not None else None),
        "frame_size": {"width": int(w), "height": int(h)},
        "roi": roi_data,
        "roi_crop_bbox": {"x1": int(crop_box[0]), "y1": int(crop_box[1]), "x2": int(crop_box[2]), "y2": int(crop_box[3])},
        "outputs": {
            "full_frame_png": str(full_frame_path),
            "roi_crop_png": str(roi_img_path)
        },
        "coordinate_system": "x=column (left→right), y=row (top→bottom), origin=(0,0) at top-left"
    }
    json_path = export_dir / f"{base}.json"
    save_json(json_payload, json_path)

    with out:
        out.clear_output(wait=True)
        print("Export complete!")
        print("JSON:", json_path)
        print("Full frame:", full_frame_path)
        print("ROI crop:", roi_img_path)

    update_status("Exported JSON and images to ./exports")

# Wire up events
pick_btn.on_click(handle_pick_clicked)
roi_type_dd.observe(handle_roi_type_change)
reset_roi_btn.on_click(handle_reset_roi)
export_btn.on_click(handle_export)

print("UI ready. Enter a video path and click 'Pick Random Frame'.")


VBox(children=(Text(value='', description='Video Path:', layout=Layout(width='100%'), placeholder='Enter path …

UI ready. Enter a video path and click 'Pick Random Frame'.



## Notes & Troubleshooting
- If the interactive ROI tools don't appear, try switching to JupyterLab and ensure the first cell runs `%matplotlib widget` successfully.
- Rectangle ROI: click and drag on the image. You can adjust corners before exporting.
- Polygon ROI: click to place vertices; **double-click** to finalize.
- Outputs are saved under `./exports` next to this notebook: the full frame image, the cropped ROI image, and a JSON metadata file.
- Coordinate system used in JSON: **x** is left→right (columns), **y** is top→bottom (rows), origin at top-left.
