In [2]:
!pip install scipy numpy opencv-python tqdm
!pip install h5py



In [8]:
# Converting .seq files to images

import cv2
import os
from glob import glob
from tqdm import tqdm

base_path = "/Users/melissachang/Desktop/data_and_labels"

train_seq_files = glob(os.path.join(base_path, "Train", "set*", "set*", "V*.seq"))
test_seq_files = glob(os.path.join(base_path, "Test", "set*", "set*", "V*.seq"))

def extract_frames_from_seq(seq_file, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(seq_file)
    count = 0
    base_name = os.path.splitext(os.path.basename(seq_file))[0]
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        filename = os.path.join(output_dir, f"{base_name}_{count:05d}.jpg")
        cv2.imwrite(filename, frame)
        count += 1
    cap.release()

# Extract train
for seq_file in tqdm(train_seq_files, desc="Extracting train frames"):
    extract_frames_from_seq(seq_file, os.path.join(base_path, "images/Train"))

# Extract test
for seq_file in tqdm(test_seq_files, desc="Extracting test frames"):
    extract_frames_from_seq(seq_file, os.path.join(base_path, "images/Test"))


Extracting train frames: 100%|██████████████████| 71/71 [10:09<00:00,  8.58s/it]
Extracting test frames: 100%|███████████████████| 66/66 [07:55<00:00,  7.20s/it]


In [12]:
# import os
# from glob import glob

# output_dir = "/Users/melissachang/Desktop/data_and_labels/images/Test"

# # List all JPGs
# jpg_files = glob(os.path.join(output_dir, "*.jpg"))
# print(f"Found {len(jpg_files)} .jpg images")
# print("First 5 image files:", jpg_files[:5])

Found 35533 .jpg images
First 5 image files: ['/Users/melissachang/Desktop/data_and_labels/images/Test/V015_00541.jpg', '/Users/melissachang/Desktop/data_and_labels/images/Test/V006_00479.jpg', '/Users/melissachang/Desktop/data_and_labels/images/Test/V005_01237.jpg', '/Users/melissachang/Desktop/data_and_labels/images/Test/V017_01026.jpg', '/Users/melissachang/Desktop/data_and_labels/images/Test/V007_00750.jpg']


In [3]:
# Parse .vbb annotation files to txt
from scipy.io import loadmat
import os
label_dir="/Users/melissachang/Desktop/data_and_labels/labels/Train"

def parse_vbb(vbb_path):
    mat = loadmat(vbb_path)
    A = mat['A'][0][0]
    obj_lists = A[1][0]
    obj_labels = A[4][0]

    label_map = [str(l[0]) for l in obj_labels]
    frames = []

    for frame_id, objs in enumerate(obj_lists):
        bboxes = []
        for obj in objs[0]:
            label = label_map[obj[0][0][0] - 1]  # object ID is 1-based
            if label != 'person':
                continue
            pos = obj[1][0]  # [x, y, w, h]
            occluded = obj[3][0][0]
            if occluded:
                continue
            bboxes.append(pos)  # Save [x, y, w, h]
        frames.append(bboxes)
    
    return frames  # List of lists of boxes per frame

# Convert boxes to YOLO format
def convert_to_yolo(box, img_w, img_h):
    x, y, w, h = box
    x_center = (x + w / 2) / img_w
    y_center = (y + h / 2) / img_h
    w /= img_w
    h /= img_h
    return f"0 {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"

# Writing .txt annotation files
import cv2

def write_labels(vbb_path, image_dir, label_dir):
    os.makedirs(label_dir, exist_ok=True)

    frame_boxes = parse_vbb(vbb_path)
    vbb_name = os.path.splitext(os.path.basename(vbb_path))[0]

    for i, boxes in enumerate(frame_boxes):
        img_file = os.path.join(image_dir, f"{vbb_name}_{i:05d}.jpg")
        if not os.path.exists(img_file):
            continue
        img = cv2.imread(img_file)
        h, w = img.shape[:2]

        label_file = os.path.join(label_dir, f"{vbb_name}_{i:05d}.txt")
        with open(label_file, 'w') as f:
            for box in boxes:
                yolo_line = convert_to_yolo(box, w, h)
                f.write(yolo_line + '\n')
        
        # Add this to confirm it's working
        print(f"Wrote: {label_file}")


# Apply to Annotations
from glob import glob

vbb_files = glob("/Users/melissachang/Desktop/data_and_labels/annotations/set*/V*.vbb")

for vbb in vbb_files:
    write_labels(
        vbb_path=vbb,
        image_dir="/Users/melissachang/Desktop/data_and_labels/images/Train",  # or "test" for set06–set10
        label_dir="/Users/melissachang/Desktop/data_and_labels/labels/Train"
    )

test_vbb_files = glob("/Users/melissachang/Desktop/data_and_labels/annotations/set0[6-9]/V*.vbb") + \
                 glob("/Users/melissachang/Desktop/data_and_labels/annotations/set10/V*.vbb")

for vbb in test_vbb_files:
    write_labels(
        vbb_path=vbb,
        image_dir="/Users/melissachang/Desktop/data_and_labels/images/Test",
        label_dir="/Users/melissachang/Desktop/data_and_labels/labels/Test"
    )
