# **Video Time Length Correction**

This section is only to correct fps to generate videos in the same time length.

In [None]:
#!/usr/bin/env python3
import os
import cv2
import bisect
import math
import pickle
from typing import List, Tuple

# -----------------------------
# Config
# -----------------------------
INPUTS = [
    ("cam_0_rgb_video.avi", "cam_0_rgb_video.metadata", "cam_0_synced_ref_fps.mp4", 12.0),
    ("cam_2_rgb_video.avi", "cam_2_rgb_video.metadata", "cam_2_synced_ref_fps.mp4", 15.0),
]

# Reference time grid follows slowest fps
REF_FPS = min(fps for _, _, _, fps in INPUTS)

# Matching threshold (seconds)
THRESH_MS = 33.0
THRESH_S = THRESH_MS / 1000.0


# -----------------------------
# Helpers
# -----------------------------
def load_timestamps(meta_path: str) -> List[float]:
    with open(meta_path, "rb") as f:
        md = pickle.load(f)
    if isinstance(md, dict):
        if "timestamps" in md:
            ts = md["timestamps"]
        elif "ts" in md:
            ts = md["ts"]
        else:
            raise KeyError(f"{meta_path}: no 'timestamps' or 'ts' key")
    elif isinstance(md, list):
        ts = md
    else:
        raise ValueError(f"{meta_path}: unsupported metadata type {type(md)}")

    if len(ts) < 2:
        raise ValueError(f"{meta_path}: not enough timestamps")

    a = 1765330347.4231
    span = ts[-1] - ts[0]
    if span > 1e10:     # nanoseconds -> seconds
        return [t / 1e9 for t in ts]
    elif span > 1e4:    # milliseconds -> seconds
        return [t / 1e3 for t in ts]
    else:               # already seconds
        return ts

def make_grid(start_t: float, duration_s_int: int, fps: float) -> List[float]:
    n = int(duration_s_int * fps)
    step = 1.0 / fps
    return [start_t + i * step for i in range(n)]

def pick_closest_index(ts: List[float], target_t: float, lo: int, hi: int) -> Tuple[int, float]:
    """Closest index to target_t within ts[lo:hi] (hi exclusive)."""
    if hi <= lo:
        return -1, float("inf")

    i = bisect.bisect_left(ts, target_t, lo, hi)

    cand = []
    if i > lo:
        cand.append((i - 1, abs(ts[i - 1] - target_t)))
    if i < hi:
        cand.append((i, abs(ts[i] - target_t)))

    if not cand:
        return -1, float("inf")
    return min(cand, key=lambda p: p[1])

def open_writer_like(cap: cv2.VideoCapture, out_path: str, fps: float):
    W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    writer = cv2.VideoWriter(out_path, fourcc, fps, (W, H))
    if not writer.isOpened():
        raise RuntimeError(f"Failed to open VideoWriter for {out_path}")
    return writer


# -----------------------------
# Main
# -----------------------------
def main():
    # 1) Load streams
    streams = []
    for vid, meta, out, fps in INPUTS:
        if not os.path.exists(vid):
            raise FileNotFoundError(f"Video not found: {vid}")
        if not os.path.exists(meta):
            raise FileNotFoundError(f"Metadata not found: {meta}")
        ts = load_timestamps(meta)
        streams.append({"video": vid, "meta": meta, "out": out, "fps": fps, "ts": ts})

    # 2) Overlap window
    overlap_start = max(s["ts"][0] for s in streams)
    overlap_end   = min(s["ts"][-1] for s in streams)
    if overlap_end <= overlap_start:
        raise ValueError("No temporal overlap between streams.")

    duration_int = int(math.floor(overlap_end - overlap_start))
    if duration_int < 1:
        raise ValueError("Overlap < 1 second; nothing to sync.")

    # 3) Reference grid (slow fps)
    ref_grid = make_grid(overlap_start, duration_int, REF_FPS)
    print(f"Overlap: [{overlap_start:.6f}, {overlap_end:.6f}] duration_int={duration_int}s")
    print(f"REF_FPS={REF_FPS} ref_frames={len(ref_grid)} THRESH={THRESH_MS:.1f}ms")

    # 4) Trim each stream to overlap & build indices to write (one output frame per ref_grid time)
    for s in streams:
        ts = s["ts"]
        lo = bisect.bisect_left(ts, overlap_start)
        hi = bisect.bisect_right(ts, overlap_start + duration_int)
        s["trim_lo"], s["trim_hi"] = lo, hi

        write_indices = []
        last_good = lo if lo < hi else 0

        for t in ref_grid:
            idx, diff = pick_closest_index(ts, t, lo, hi)
            if idx >= 0 and diff <= THRESH_S:
                last_good = idx
            # hold-last if no match
            write_indices.append(last_good)

        s["write_indices"] = write_indices
        print(f"{s['out']}: will write {len(write_indices)} frames at {REF_FPS} FPS")

    # 5) Write videos (both at REF_FPS so frame k aligns across videos)
    for s in streams:
        cap = cv2.VideoCapture(s["video"])
        if not cap.isOpened():
            raise RuntimeError(f"Failed to open {s['video']}")
        writer = open_writer_like(cap, s["out"], REF_FPS)

        prev = -1
        for idx in s["write_indices"]:
            if idx != prev:
                cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ok, frame = cap.read()
            if not ok:
                # fallback: try previous
                if prev >= 0:
                    cap.set(cv2.CAP_PROP_POS_FRAMES, prev)
                    ok, frame = cap.read()
                if not ok:
                    print(f"Warning: failed to read frame {idx} for {s['out']}; stopping.")
                    break
            writer.write(frame)
            prev = idx

        writer.release()
        cap.release()
        print(f"Wrote: {s['out']} at {REF_FPS} FPS")

    print("Done.")

if __name__ == "__main__":
    main()


Overlap: [1765411790.435866, 1765411945.703777] duration_int=155s
REF_FPS=12.0 ref_frames=1860 THRESH=33.0ms
cam_0_synced_ref_fps.mp4: will write 1860 frames at 12.0 FPS
cam_2_synced_ref_fps.mp4: will write 1860 frames at 12.0 FPS
Wrote: cam_0_synced_ref_fps.mp4 at 12.0 FPS
Wrote: cam_2_synced_ref_fps.mp4 at 12.0 FPS
Done.


# **Synchronization**
1. The reference start and end is confirmed by the overlap starting and ending time between the two videos and the timestamp follows the slow fps.
2. Match frames from two videos in the threshold of 20ms.

In [None]:
#!/usr/bin/env python3
import os
import bisect
import math
import pickle
import csv
from typing import List, Tuple

# -----------------------------
# Config
# -----------------------------
INPUTS = [
    ("cam_0_rgb_video.avi", "cam_0_rgb_video.metadata", 12.0),
    ("cam_2_rgb_video.avi", "cam_2_rgb_video.metadata", 15.0),
]

REF_FPS = min(fps for _, _, fps in INPUTS)

THRESH_MS = 33.0
THRESH_S = THRESH_MS / 1000.0

CSV_PATH = "timestamps_synced_refgrid.csv"

# -----------------------------
# Helpers
# -----------------------------
def load_timestamps(meta_path: str) -> List[float]:
    with open(meta_path, "rb") as f:
        md = pickle.load(f)
    if isinstance(md, dict):
        if "timestamps" in md:
            ts = md["timestamps"]
        elif "ts" in md:
            ts = md["ts"]
        else:
            raise KeyError(f"{meta_path}: no 'timestamps' or 'ts' key")
    elif isinstance(md, list):
        ts = md
    else:
        raise ValueError(f"{meta_path}: unsupported metadata type {type(md)}")

    if len(ts) < 2:
        raise ValueError(f"{meta_path}: not enough timestamps")

    span = ts[-1] - ts[0]
    if span > 1e10:
        return [t / 1e9 for t in ts]
    elif span > 1e4:
        return [t / 1e3 for t in ts]
    else:
        return ts

def make_grid(start_t: float, duration_s_int: int, fps: float) -> List[float]:
    n = int(duration_s_int * fps)
    step = 1.0 / fps
    return [start_t + i * step for i in range(n)]

def pick_closest_index(ts: List[float], target_t: float, lo: int, hi: int) -> Tuple[int, float]:
    if hi <= lo:
        return -1, float("inf")
    i = bisect.bisect_left(ts, target_t, lo, hi)
    cand = []
    if i > lo:
        cand.append((i - 1, abs(ts[i - 1] - target_t)))
    if i < hi:
        cand.append((i, abs(ts[i] - target_t)))
    if not cand:
        return -1, float("inf")
    return min(cand, key=lambda p: p[1])

# -----------------------------
# Main
# -----------------------------
def main():
    streams = []
    for vid, meta, fps in INPUTS:
        if not os.path.exists(meta):
            raise FileNotFoundError(f"Metadata not found: {meta}")
        ts = load_timestamps(meta)
        streams.append({"video": vid, "meta": meta, "fps": fps, "ts": ts})

    overlap_start = max(s["ts"][0] for s in streams)
    overlap_end   = min(s["ts"][-1] for s in streams)
    if overlap_end <= overlap_start:
        raise ValueError("No temporal overlap between streams.")

    duration_int = int(math.floor(overlap_end - overlap_start))
    if duration_int < 1:
        raise ValueError("Overlap < 1 second; nothing to sync.")

    ref_grid = make_grid(overlap_start, duration_int, REF_FPS)

    # Trim + compute mapping
    for s in streams:
        ts = s["ts"]
        lo = bisect.bisect_left(ts, overlap_start)
        hi = bisect.bisect_right(ts, overlap_start + duration_int)
        s["trim_lo"], s["trim_hi"] = lo, hi

        picked = []
        last_good = lo if lo < hi else 0
        for t in ref_grid:
            idx, diff = pick_closest_index(ts, t, lo, hi)
            if idx >= 0 and diff <= THRESH_S:
                last_good = idx
            picked.append((last_good, abs(ts[last_good] - t)))
        s["picked"] = picked

    # Write CSV
    header = ["ref_frame_idx", "ref_time_s"]
    for i in [0,2]:
        header += [f"cam_{i}_picked_idx", f"cam_{i}_picked_ts_s", f"cam_{i}_abs_diff_s"]

    with open(CSV_PATH, "w", newline="") as f:
        w = csv.writer(f)
        w.writerow(header)

        for k, t in enumerate(ref_grid):
            row = [k, f"{t:.9f}"]
            for s in streams:
                idx, diff = s["picked"][k]
                row += [idx, f"{s['ts'][idx]:.9f}", f"{diff:.9f}"]
            w.writerow(row)

    print(f"Saved CSV: {CSV_PATH}")
    print(f"Overlap: [{overlap_start:.6f}, {overlap_end:.6f}] duration_int={duration_int}s")
    print(f"REF_FPS={REF_FPS} ref_frames={len(ref_grid)} THRESH={THRESH_MS:.1f}ms")

if __name__ == "__main__":
    main()


Saved CSV: timestamps_synced_refgrid.csv
Overlap: [1765061633.203466, 1765061791.083266] duration_int=157s
REF_FPS=12.0 ref_frames=1884 THRESH=33.0ms
