Reads splits.json, computes video lengths, generates overlapping 16-frame clips (stride 8), and writes the central index clips_index.json with video_path, frame_indices, label, and split.

In [1]:
import json
import os

PROJECT_ROOT = "/home/olzhas/programming/traffic-accident-edge"
DATA_ROOT = os.path.join(PROJECT_ROOT, "TAD-benchmark")
SPLITS_PATH = os.path.join(DATA_ROOT, "splits.json")

with open(SPLITS_PATH, "r") as f:
    splits = json.load(f)

old_prefixes = [
    "/content/drive/MyDrive/traffic-accident-edge",
    "/home/olzhas/Desktop/traffic-accident-edge",
]


def fix_path(p):
    for old in old_prefixes:
        if p.startswith(old):
            return p.replace(old, PROJECT_ROOT)
    if not os.path.isabs(p):
        return os.path.join(DATA_ROOT, p)
    return p


for key in ["acc_train", "acc_val", "norm_train", "norm_val", "acc_test", "norm_test"]:
    splits[key] = [fix_path(p) for p in splits[key]]

with open(SPLITS_PATH, "w") as f:
    json.dump(splits, f, indent=2)

print("Fixed and saved splits.json")

Fixed and saved splits.json


In [2]:
with open(SPLITS_PATH, "r") as f:
    splits = json.load(f)

acc_train = splits["acc_train"]
norm_train = splits["norm_train"]
acc_val = splits["acc_val"]
norm_val = splits["norm_val"]
acc_test = splits["acc_test"]
norm_test = splits["norm_test"]

print("Example acc_train path:", acc_train[0])
print("Exists:", os.path.exists(acc_train[0]))

Example acc_train path: /home/olzhas/programming/traffic-accident-edge/TAD-benchmark/train/accident_1/videox3_10.mp4
Exists: True


In [3]:
print("acc_train:", len(acc_train))
print("acc_val:", len(acc_val))

print("norm_train:", len(norm_train))
print("norm_val:", len(norm_val))

print("acc_test:", len(acc_test))
print("norm_test:", len(norm_test))

acc_train: 208
acc_val: 53
norm_train: 88
norm_val: 23
acc_test: 16
norm_test: 16


In [4]:
import os
import cv2


def video_length_info(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Could not open:", video_path)
        return 0, 0, 0.0

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps <= 0:
        fps = 1
    cap.release()
    duration_sec = total_frames / fps
    return total_frames, fps, duration_sec


print("Sample accident video length info:")
f, fps, dur = video_length_info(acc_train[0])
print(" frames:", f, "fps:", fps, "seconds:", round(dur, 1))

print("\nSample normal video length info:")
f, fps, dur = video_length_info(norm_train[0])
print(" frames:", f, "fps:", fps, "seconds:", round(dur, 1))

Sample accident video length info:
 frames: 799 fps: 25.0 seconds: 32.0

Sample normal video length info:
 frames: 274 fps: 25.0 seconds: 11.0


In [7]:
import cv2

CLIP_LEN = 16
STRIDE = 8
MIN_FRAMES = 32


def get_video_clips(video_path, clip_len=CLIP_LEN, stride=STRIDE, min_frames=MIN_FRAMES):
    """
    For a given video, return a list of frame index lists.
    Each element is something like [start, start+1, ..., start+clip_len-1].
    """
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()

    clips = []
    if total < min_frames:
        return clips

    start = 0
    while start + clip_len <= total:
        frame_indices = list(range(start, start + clip_len))
        clips.append(frame_indices)
        start += stride

    return clips


acc_sample = acc_train[0]
norm_sample = norm_train[0]

acc_clips = get_video_clips(acc_sample)
norm_clips = get_video_clips(norm_sample)

print("Sample accident video:", os.path.basename(acc_sample))
print("  total clips:", len(acc_clips))
print("  first clip indices:", acc_clips[0][:5], "...", acc_clips[0][-5:])

print("Sample normal video:", os.path.basename(norm_sample))
print("  total clips:", len(norm_clips))
print("  first clip indices:", norm_clips[0][:5], "...", norm_clips[0][-5:])

Sample accident video: videox3_10.mp4
  total clips: 98
  first clip indices: [0, 1, 2, 3, 4] ... [11, 12, 13, 14, 15]
Sample normal video: 20220518_acci-bg20.mp4
  total clips: 33
  first clip indices: [0, 1, 2, 3, 4] ... [11, 12, 13, 14, 15]


In [8]:
from collections import Counter


def build_split_clips_debug(video_paths, label, split_name, max_videos=None):
    """
    Same as before, but:
    - can limit to first max_videos for testing
    - prints progress every few videos
    """
    all_samples = []
    paths = video_paths if max_videos is None else video_paths[:max_videos]

    for i, vp in enumerate(paths):
        if i % 10 == 0:
            print(f"[{split_name}] processing video {i+1}/{len(paths)}:",
                  os.path.basename(vp))
        clip_indices = get_video_clips(vp)
        for idxs in clip_indices:
            all_samples.append({
                "video_path": vp,
                "frame_indices": idxs,
                "label": label,
                "split": split_name,
            })
    return all_samples


clips = []
clips += build_split_clips_debug(acc_train, 1, "train")
clips += build_split_clips_debug(norm_train, 0, "train")
clips += build_split_clips_debug(acc_val,   1, "val")
clips += build_split_clips_debug(norm_val,  0, "val")
clips += build_split_clips_debug(acc_test,  1, "test")
clips += build_split_clips_debug(norm_test, 0, "test")

print("Total clips:", len(clips))

split_counts = Counter([c["split"] for c in clips])
label_counts = Counter([c["label"] for c in clips])

print("By split:", split_counts)
print("By label:", label_counts)
print("Sample entry:", clips[0])

[train] processing video 1/208: videox3_10.mp4
[train] processing video 11/208: videox13_1.mp4
[train] processing video 21/208: video105.mp4
[train] processing video 31/208: video36.mp4
[train] processing video 41/208: video163.mp4
[train] processing video 51/208: video85.mp4
[train] processing video 61/208: video151.mp4
[train] processing video 71/208: video102.mp4
[train] processing video 81/208: videox14_3.mp4
[train] processing video 91/208: video137.mp4
[train] processing video 101/208: video141.mp4
[train] processing video 111/208: video125_2.mp4
[train] processing video 121/208: video168.mp4
[train] processing video 131/208: videox8_1.mp4
[train] processing video 141/208: video71.mp4
[train] processing video 151/208: video45_2.mp4
[train] processing video 161/208: video158.mp4
[train] processing video 171/208: video82.mp4
[train] processing video 181/208: video107.mp4
[train] processing video 191/208: video16.mp4
[train] processing video 201/208: video69.mp4
[train] processing v

In [9]:
CLIPS_INDEX_PATH = os.path.join(DATA_ROOT, "clips_index.json")

with open(CLIPS_INDEX_PATH, "w") as f:
    json.dump(clips, f, indent=2)

print("Saved", len(clips), "clips to", CLIPS_INDEX_PATH)

Saved 22037 clips to /home/olzhas/programming/traffic-accident-edge/TAD-benchmark/clips_index.json
