In [9]:
import shutil
import cv2
import os

In [10]:
CWD = os.getcwd()
BASE_DIR = os.path.join(CWD,"resources","city")
vids = ["la_lower","shanghai"]

In [11]:
video_dir = os.path.join(BASE_DIR,"video","train")
img_output_dir = os.path.join(BASE_DIR,"preprocessed","images")
raw_data_dir = os.path.join(BASE_DIR,"raw-labels")
labels_output_dir = os.path.join(BASE_DIR,"preprocessed","labels")


In [15]:
def extract_frames(video_name, output_folder, ls, inds):
    """
    Extracts frames from a video and saves them as individual images.

    Args:
        video_name (str): Name of video file.
        output_folder (str): Directory to save the extracted frames.
        range (tuple): (from,to) Image extraction range inclusive
    """

    video_path = os.path.join(video_dir,video_name+".mp4")

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return

    #true_frames = set([int(labels[i][6:-4]) for i in targets])
    #print(true_frames)
    
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:  # No more frames to read
            break

        if frame_count in inds:
            base_file = ls[frame_count][6:-4]
            frame_filename = os.path.join(output_folder, f"{video_name}_{base_file}.jpg")
            cv2.imwrite(frame_filename, frame)

        #print(f"Extracted frame# {frame_count} to {output_folder}")
        frame_count += 1

    cap.release()



In [16]:
def copy_label(video_name, output_folder, ls, inds):

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    #go thru list of given filenames (frame_123456.jpg) and copy them and rename them
    for ind in inds:
        base_file = ls[ind][6:-4]
        from_f = os.path.join(raw_data_dir, video_name,"obj_train_data", ls[ind])
        shutil.copy(from_f,output_folder)

        os.rename(os.path.join(output_folder, ls[ind]), os.path.join(output_folder, f"{video_name}_{base_file}.txt"))



In [17]:
for vid in vids:
    # process labels first to get the ones with actual content (and the images with actual labels)
    targets = []
    data = os.path.join(raw_data_dir,vid,"obj_train_data")
    labels = os.listdir(data)

    for i in range(len(labels)):
        with open(os.path.join(data,labels[i])) as f:
            if len(f.readlines())!=0:
                targets.append(i)

    print(targets)

    extract_frames(vid,str(os.path.join(img_output_dir,vid)),labels,targets)
    copy_label(vid,str(os.path.join(labels_output_dir,vid)),labels,targets)

[36, 70, 76, 147, 149, 159, 180, 240, 250, 322, 352, 444, 527, 532, 547, 707, 772, 876, 884, 903, 905, 911, 988, 994, 1019, 1028, 1082, 1159, 1193, 1274, 1283, 1287, 1317, 1406, 1458, 1475, 1538, 1546, 1618, 1718, 1750, 1758, 1832, 1841, 1842, 1865, 1913, 1917, 2007, 2036, 2112, 2210, 2223, 2246, 2378, 2448, 2560, 2589, 2591, 2593, 2664, 2674, 2705, 2761, 2856, 2893, 2967, 2975, 2984, 3139, 3156, 3216, 3300, 3381, 3563, 3618, 3638, 3703, 3783, 3814, 3826, 3907, 3932, 3997, 4006, 4083, 4116, 4193, 4196, 4201, 4227, 4409, 4437, 4552, 4564, 4633, 4676, 4760, 4761, 4785, 4832, 4833, 4845, 4923, 4957, 5031, 5037, 5068, 5233, 5237, 5283, 5303, 5318, 5365, 5483, 5502, 5504, 5512, 5584, 5627, 5678, 5687, 5761, 5769, 5790, 5799, 5878, 5880, 5881, 5906, 6082, 6134, 6246, 6252, 6321, 6342, 6430, 6435, 6471, 6517, 6525, 6527, 6607, 6637, 6715, 6721, 6745, 6826, 6834, 6857, 6862, 6938, 6943, 6993, 7014, 7016, 7019, 7091, 7128, 7224, 7240, 7286, 7433, 7459, 7516, 7556, 7560, 7561, 7631, 7642, 7663, 