In [7]:
# At the top of your notebook
%load_ext autoreload
%autoreload 2
    
import os
import cv2
import torch
import numpy as np
import supervision as sv
from PIL import Image
from sam2.build_sam import build_sam2_video_predictor, build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection 
from utils.track_utils import sample_points_from_masks
from utils.video_utils import create_video_from_images
from utils.common_utils import CommonUtils
from utils.mask_dictionary_model import MaskDictionaryModel, ObjectInfo
import json
import copy

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
!nvidia-smi

Wed Jun 11 19:42:19 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA H100 80GB HBM3          On  | 00000000:53:00.0 Off |                    0 |
| N/A   23C    P0              67W / 700W |      3MiB / 81559MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA H100 80GB HBM3          On  | 00000000:64:00.0 Off |  

In [9]:
"""
Step 1: Environment settings and model initialization
"""
# use bfloat16 for the entire notebook
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()

if torch.cuda.get_device_properties(0).major >= 8:
    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

# init sam image predictor and video predictor model
sam2_checkpoint = "./checkpoints/sam2.1_hiera_large.pt"
model_cfg = "configs/sam2.1/sam2.1_hiera_l.yaml"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("device", device)

device cuda


In [10]:
device = "cuda:0"

In [11]:
video_predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint)
sam2_image_model = build_sam2(model_cfg, sam2_checkpoint, device=device)
image_predictor = SAM2ImagePredictor(sam2_image_model)

# init grounding dino model from huggingface
model_id = "IDEA-Research/grounding-dino-tiny"
processor = AutoProcessor.from_pretrained(model_id)
grounding_model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)

In [14]:
import glob
clips = glob.glob("outputs/frames_to_run/*")
clips

['outputs/frames_to_run/232f98b4-9293-46ea-95de-4fb4ae230648-part2-clip_00000003-0',
 'outputs/frames_to_run/9a15e066-f54e-4fe4-98cb-58f7d0f655c7-clip_00000121-4',
 'outputs/frames_to_run/Yca14HAvris-clip_00000079-2',
 'outputs/frames_to_run/677951670-clip_00000010-0',
 'outputs/frames_to_run/e5957a13-8fdd-4696-b8a5-11df9bdbb98d-part1-clip_00000039-0',
 'outputs/frames_to_run/6b887cf3-e579-4543-802d-708b7d598207-part3-clip_00000045-0',
 'outputs/frames_to_run/bLUOFZTDxXI-clip_00000010-2',
 'outputs/frames_to_run/xNVfw6I7cCA_part_000-clip_00000025-0',
 'outputs/frames_to_run/278950132-clip_00000001-0',
 'outputs/frames_to_run/442422784-clip_00000007-0',
 'outputs/frames_to_run/d7e18703-e8cd-4366-9821-e50e0c499176-part3-clip_00000037-0_anime',
 'outputs/frames_to_run/_fhf8qnFihk_part_000-clip_00000006-3',
 'outputs/frames_to_run/320150f9-253c-4d1a-b075-44e6eb264ec1-part6-clip_00000027-1',
 'outputs/frames_to_run/fOwKMumK46E-clip_00000006-2',
 'outputs/frames_to_run/cFKDTfrIpGQ_part_000-c

In [15]:
for video_dir in clips[:5]:
    # setup the input image and text prompt for SAM 2 and Grounding DINO
    # VERY important: text queries need to be lowercased + end with a dot
    #text = "car."
    #text = "person. motorcycle."
    #vid_id = "car
    text = "octopus. person. motorcycle. noodles. tree."
    
    #vid_id = "0alex_anime_3d_action_ufotable"
    #vid_id = "xNVfw6I7cCA_part_000-clip_00000025-0"
    vid_id = video_dir.split("/")[-1] #"tHEcy2tC6ug_part_000-clip_00000015-0"
    print(f"doing {vid_id}")
    
    import shutil
    
    # `video_dir` a directory of JPEG frames with filenames like `<frame_index>.jpg`  
    # video_dir = f"notebooks/videos/{vid_id}"
    
    # 'output_dir' is the directory to save the annotated frames
    output_dir = "./outputs"
    
    # 'output_video_path' is the path to save the final video
    output_video_path = f"./outputs/output_vids/{vid_id}.mp4"
    
    # create the output directory
    CommonUtils.creat_dirs(output_dir)
    mask_data_dir = os.path.join(output_dir, "mask_data")
    json_data_dir = os.path.join(output_dir, "json_data")
    result_dir = os.path.join(output_dir, "result")
    
    for d in [mask_data_dir, json_data_dir, result_dir]: # keep fresh for each vid
        if os.path.exists(d): shutil.rmtree(d)
            
    CommonUtils.creat_dirs(mask_data_dir)
    CommonUtils.creat_dirs(json_data_dir)
    # scan all the JPEG frame names in this directory
    frame_names = [
        p for p in os.listdir(video_dir)
        if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG", ".png", ".PNG"]
    ]
    frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
    
    # init video predictor state
    inference_state = video_predictor.init_state(video_path=video_dir, offload_video_to_cpu=True, async_loading_frames=True)
    step = 20 # the step to sample frames for Grounding DINO predictor
    
    sam2_masks = MaskDictionaryModel()
    PROMPT_TYPE_FOR_VIDEO = "mask" # box, mask or point
    objects_count = 0
    
    """
    Step 2: Prompt Grounding DINO and SAM image predictor to get the box and mask for all frames
    """
    print("Total frames:", len(frame_names))
    
    for start_frame_idx in range(0, len(frame_names), step):
    # prompt grounding dino to get the box coordinates on specific frame
        print("start_frame_idx", start_frame_idx)
        # continue
        img_path = os.path.join(video_dir, frame_names[start_frame_idx])
        image = Image.open(img_path)
        image_base_name = frame_names[start_frame_idx].split(".")[0]
        mask_dict = MaskDictionaryModel(promote_type = PROMPT_TYPE_FOR_VIDEO, mask_name = f"mask_{image_base_name}.npy")
    
        # run Grounding DINO on the image
        inputs = processor(images=image, text=text, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = grounding_model(**inputs)
    
        results = processor.post_process_grounded_object_detection(
            outputs,
            inputs.input_ids,
            box_threshold=0.25,
            text_threshold=0.25,
            target_sizes=[image.size[::-1]]
        )
    
        # prompt SAM image predictor to get the mask for the object
        image_predictor.set_image(np.array(image.convert("RGB")))
    
        # process the detection results
        input_boxes = results[0]["boxes"] # .cpu().numpy()
        # print("results[0]",results[0])
        OBJECTS = results[0]["labels"]
        if input_boxes.shape[0] != 0:
            # prompt SAM 2 image predictor to get the mask for the object
            masks, scores, logits = image_predictor.predict(
                point_coords=None,
                point_labels=None,
                box=input_boxes,
                multimask_output=False,
            )
            # convert the mask shape to (n, H, W)
            if masks.ndim == 2:
                masks = masks[None]
                scores = scores[None]
                logits = logits[None]
            elif masks.ndim == 4:
                masks = masks.squeeze(1)
    
            """
            Step 3: Register each object's positive points to video predictor
            """
    
            # If you are using point prompts, we uniformly sample positive points based on the mask
            if mask_dict.promote_type == "mask":
                mask_dict.add_new_frame_annotation(mask_list=torch.tensor(masks).to(device), box_list=torch.tensor(input_boxes), label_list=OBJECTS)
            else:
                raise NotImplementedError("SAM 2 video predictor only support mask prompts")
    
    
            """
            Step 4: Propagate the video predictor to get the segmentation results for each frame
            """
            objects_count = mask_dict.update_masks(tracking_annotation_dict=sam2_masks, iou_threshold=0.8, objects_count=objects_count)
            print("objects_count", objects_count)
        else:
            print("No object detected in the frame, skip merge the frame merge {}".format(frame_names[start_frame_idx]))
            mask_dict = sam2_masks
    
        
        if len(mask_dict.labels) == 0:
            mask_dict.save_empty_mask_and_json(mask_data_dir, json_data_dir, image_name_list = frame_names[start_frame_idx:start_frame_idx+step])
            print("No object detected in the frame, skip the frame {}".format(start_frame_idx))
            continue
        else: 
            video_predictor.reset_state(inference_state)
    
            for object_id, object_info in mask_dict.labels.items():
                frame_idx, out_obj_ids, out_mask_logits = video_predictor.add_new_mask(
                        inference_state,
                        start_frame_idx,
                        object_id,
                        object_info.mask,
                    )
            
            video_segments = {}  # output the following {step} frames tracking masks
            for out_frame_idx, out_obj_ids, out_mask_logits in video_predictor.propagate_in_video(inference_state, max_frame_num_to_track=step, start_frame_idx=start_frame_idx):
                frame_masks = MaskDictionaryModel()
                
                for i, out_obj_id in enumerate(out_obj_ids):
                    out_mask = (out_mask_logits[i] > 0.0) # .cpu().numpy()
                    object_info = ObjectInfo(instance_id = out_obj_id, mask = out_mask[0], class_name = mask_dict.get_target_class_name(out_obj_id))
                    object_info.update_box()
                    frame_masks.labels[out_obj_id] = object_info
                    image_base_name = frame_names[out_frame_idx].split(".")[0]
                    frame_masks.mask_name = f"mask_{image_base_name}.npy"
                    frame_masks.mask_height = out_mask.shape[-2]
                    frame_masks.mask_width = out_mask.shape[-1]
    
                video_segments[out_frame_idx] = frame_masks
                sam2_masks = copy.deepcopy(frame_masks)
    
            print("video_segments:", len(video_segments))
        """
        Step 5: save the tracking masks and json files
        """
        for frame_idx, frame_masks_info in video_segments.items():
            mask = frame_masks_info.labels
            mask_img = torch.zeros(frame_masks_info.mask_height, frame_masks_info.mask_width)
            for obj_id, obj_info in mask.items():
                mask_img[obj_info.mask == True] = obj_id
    
            mask_img = mask_img.numpy().astype(np.uint16)
            np.save(os.path.join(mask_data_dir, frame_masks_info.mask_name), mask_img)
    
            json_data = frame_masks_info.to_dict()
            json_data_path = os.path.join(json_data_dir, frame_masks_info.mask_name.replace(".npy", ".json"))
            with open(json_data_path, "w") as f:
                json.dump(json_data, f)

        
    """
    Step 6: Draw the results and save the video
    """
    CommonUtils.draw_masks_and_box_with_supervision(video_dir, mask_data_dir, json_data_dir, result_dir)
    
    create_video_from_images(result_dir, output_video_path, frame_rate=15)


doing 232f98b4-9293-46ea-95de-4fb4ae230648-part2-clip_00000003-0
Path './outputs' already exists.
Path './outputs/mask_data' did not exist and has been created.
Path './outputs/json_data' did not exist and has been created.


frame loading (JPEG):   8%|█▋                    | 5/65 [00:00<00:01, 43.20it/s]

Total frames: 65
start_frame_idx 0
objects_count 3


frame loading (JPEG):  15%|███▏                 | 10/65 [00:00<00:01, 40.92it/s]
frame loading (JPEG):  23%|████▊                | 15/65 [00:00<00:01, 40.35it/s][A
frame loading (JPEG):  31%|██████▍              | 20/65 [00:00<00:01, 39.82it/s][A
frame loading (JPEG):  43%|█████████            | 28/65 [00:00<00:00, 39.42it/s][A
frame loading (JPEG):  49%|██████████▎          | 32/65 [00:00<00:00, 39.04it/s][A
frame loading (JPEG):  55%|███████████▋         | 36/65 [00:00<00:00, 38.99it/s][A
frame loading (JPEG):  68%|██████████████▏      | 44/65 [00:01<00:00, 38.76it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 23.12it/s][A
frame loading (JPEG):  80%|████████████████▊    | 52/65 [00:01<00:00, 38.97it/s]

video_segments: 21
start_frame_idx 20


frame loading (JPEG):  92%|███████████████████▍ | 60/65 [00:01<00:00, 38.33it/s]

objects_count 3



frame loading (JPEG): 100%|█████████████████████| 65/65 [00:01<00:00, 39.06it/s][A

propagate in video:  19%|████▌                   | 4/21 [00:00<00:00, 32.22it/s][A
propagate in video:  38%|█████████▏              | 8/21 [00:00<00:00, 28.53it/s][A
propagate in video:  52%|████████████           | 11/21 [00:00<00:00, 27.52it/s][A
propagate in video:  67%|███████████████▎       | 14/21 [00:00<00:00, 26.84it/s][A
propagate in video:  81%|██████████████████▌    | 17/21 [00:00<00:00, 26.56it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 27.03it/s][A


video_segments: 21
start_frame_idx 40
objects_count 5


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 25.15it/s]


video_segments: 21
start_frame_idx 60
objects_count 5


propagate in video: 100%|█████████████████████████| 5/5 [00:00<00:00, 31.89it/s]

video_segments: 5





Path './outputs/result' did not exist and has been created.
raw image name list ['00000.jpg', '00001.jpg', '00002.jpg', '00003.jpg', '00004.jpg', '00005.jpg', '00006.jpg', '00007.jpg', '00008.jpg', '00009.jpg', '00010.jpg', '00011.jpg', '00012.jpg', '00013.jpg', '00014.jpg', '00015.jpg', '00016.jpg', '00017.jpg', '00018.jpg', '00019.jpg', '00020.jpg', '00021.jpg', '00022.jpg', '00023.jpg', '00024.jpg', '00025.jpg', '00026.jpg', '00027.jpg', '00028.jpg', '00029.jpg', '00030.jpg', '00031.jpg', '00032.jpg', '00033.jpg', '00034.jpg', '00035.jpg', '00036.jpg', '00037.jpg', '00038.jpg', '00039.jpg', '00040.jpg', '00041.jpg', '00042.jpg', '00043.jpg', '00044.jpg', '00045.jpg', '00046.jpg', '00047.jpg', '00048.jpg', '00049.jpg', '00050.jpg', '00051.jpg', '00052.jpg', '00053.jpg', '00054.jpg', '00055.jpg', '00056.jpg', '00057.jpg', '00058.jpg', '00059.jpg', '00060.jpg', '00061.jpg', '00062.jpg', '00063.jpg', '00064.jpg']
Annotated image saved as ./outputs/result/00000.jpg
Annotated image saved 

100%|██████████████████████████████████████████| 65/65 [00:00<00:00, 129.25it/s]


Video saved at ./outputs/output_vids/232f98b4-9293-46ea-95de-4fb4ae230648-part2-clip_00000003-0.mp4
doing 9a15e066-f54e-4fe4-98cb-58f7d0f655c7-clip_00000121-4
Path './outputs' already exists.
Path './outputs/mask_data' did not exist and has been created.
Path './outputs/json_data' did not exist and has been created.


frame loading (JPEG):   0%|                             | 0/214 [00:00<?, ?it/s]

Total frames: 214
start_frame_idx 0


frame loading (JPEG):   4%|▊                    | 8/214 [00:00<00:07, 29.14it/s]

objects_count 2



frame loading (JPEG):   7%|█▎                  | 14/214 [00:00<00:07, 28.02it/s][A
frame loading (JPEG):   8%|█▌                  | 17/214 [00:00<00:07, 27.82it/s][A
frame loading (JPEG):   9%|█▊                  | 20/214 [00:00<00:06, 27.85it/s][A
frame loading (JPEG):  12%|██▍                 | 26/214 [00:00<00:06, 27.64it/s][A
frame loading (JPEG):  14%|██▋                 | 29/214 [00:01<00:06, 27.87it/s][A
frame loading (JPEG):  15%|██▉                 | 32/214 [00:01<00:06, 28.30it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 24.70it/s][A
frame loading (JPEG):  18%|███▌                | 38/214 [00:01<00:06, 28.78it/s]

video_segments: 21


frame loading (JPEG):  23%|████▋               | 50/214 [00:01<00:05, 29.10it/s]

start_frame_idx 20
objects_count 3



frame loading (JPEG):  25%|████▉               | 53/214 [00:01<00:05, 29.08it/s][A
frame loading (JPEG):  28%|█████▌              | 59/214 [00:02<00:05, 29.27it/s][A
frame loading (JPEG):  29%|█████▊              | 62/214 [00:02<00:05, 29.27it/s][A
frame loading (JPEG):  32%|██████▎             | 68/214 [00:02<00:05, 28.84it/s][A
frame loading (JPEG):  33%|██████▋             | 71/214 [00:02<00:04, 28.70it/s][A
frame loading (JPEG):  35%|██████▉             | 74/214 [00:02<00:04, 28.74it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 23.64it/s][A
frame loading (JPEG):  39%|███████▊            | 83/214 [00:02<00:04, 28.38it/s]

video_segments: 21


frame loading (JPEG):  43%|████████▌           | 92/214 [00:03<00:04, 28.72it/s]

start_frame_idx 40


frame loading (JPEG):  44%|████████▉           | 95/214 [00:03<00:04, 28.83it/s]

objects_count 3



frame loading (JPEG):  46%|█████████▏          | 98/214 [00:03<00:04, 28.82it/s][A
frame loading (JPEG):  49%|█████████▏         | 104/214 [00:03<00:03, 28.91it/s][A
frame loading (JPEG):  50%|█████████▌         | 107/214 [00:03<00:03, 28.89it/s][A
frame loading (JPEG):  51%|█████████▊         | 110/214 [00:03<00:03, 28.96it/s][A
frame loading (JPEG):  53%|██████████         | 113/214 [00:03<00:03, 28.99it/s][A
frame loading (JPEG):  54%|██████████▎        | 116/214 [00:04<00:03, 28.99it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 24.97it/s][A
frame loading (JPEG):  58%|███████████        | 125/214 [00:04<00:03, 28.89it/s]

video_segments: 21


frame loading (JPEG):  63%|███████████▉       | 134/214 [00:04<00:02, 28.04it/s]

start_frame_idx 60
objects_count 3



frame loading (JPEG):  64%|████████████▏      | 137/214 [00:04<00:02, 27.84it/s][A
frame loading (JPEG):  67%|████████████▋      | 143/214 [00:05<00:02, 27.83it/s][A
frame loading (JPEG):  68%|████████████▉      | 146/214 [00:05<00:02, 27.88it/s][A
frame loading (JPEG):  70%|█████████████▏     | 149/214 [00:05<00:02, 27.84it/s][A
frame loading (JPEG):  71%|█████████████▍     | 152/214 [00:05<00:02, 27.76it/s][A
frame loading (JPEG):  72%|█████████████▊     | 155/214 [00:05<00:02, 27.63it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 25.70it/s][A
frame loading (JPEG):  77%|██████████████▌    | 164/214 [00:05<00:01, 28.23it/s]

video_segments: 21


frame loading (JPEG):  81%|███████████████▎   | 173/214 [00:06<00:01, 28.12it/s]

start_frame_idx 80
objects_count 4



frame loading (JPEG):  82%|███████████████▋   | 176/214 [00:06<00:01, 28.06it/s][A
frame loading (JPEG):  85%|████████████████▏  | 182/214 [00:06<00:01, 28.60it/s][A
frame loading (JPEG):  86%|████████████████▍  | 185/214 [00:06<00:01, 28.91it/s][A
frame loading (JPEG):  89%|████████████████▉  | 191/214 [00:06<00:00, 29.19it/s][A
frame loading (JPEG):  91%|█████████████████▏ | 194/214 [00:06<00:00, 29.34it/s][A
frame loading (JPEG):  92%|█████████████████▍ | 197/214 [00:06<00:00, 29.25it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 23.46it/s][A
frame loading (JPEG):  96%|██████████████████▎| 206/214 [00:07<00:00, 29.13it/s]

video_segments: 21


frame loading (JPEG): 100%|███████████████████| 214/214 [00:07<00:00, 28.60it/s]


start_frame_idx 100
objects_count 4


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 26.95it/s]


video_segments: 21
start_frame_idx 120
objects_count 4


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 26.67it/s]


video_segments: 21
start_frame_idx 140
objects_count 4


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 27.02it/s]


video_segments: 21
start_frame_idx 160
objects_count 4


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 27.43it/s]


video_segments: 21
start_frame_idx 180
objects_count 4


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 27.33it/s]


video_segments: 21
start_frame_idx 200
objects_count 6


propagate in video: 100%|███████████████████████| 14/14 [00:00<00:00, 26.20it/s]


video_segments: 14
Path './outputs/result' did not exist and has been created.
raw image name list ['00000.jpg', '00001.jpg', '00002.jpg', '00003.jpg', '00004.jpg', '00005.jpg', '00006.jpg', '00007.jpg', '00008.jpg', '00009.jpg', '00010.jpg', '00011.jpg', '00012.jpg', '00013.jpg', '00014.jpg', '00015.jpg', '00016.jpg', '00017.jpg', '00018.jpg', '00019.jpg', '00020.jpg', '00021.jpg', '00022.jpg', '00023.jpg', '00024.jpg', '00025.jpg', '00026.jpg', '00027.jpg', '00028.jpg', '00029.jpg', '00030.jpg', '00031.jpg', '00032.jpg', '00033.jpg', '00034.jpg', '00035.jpg', '00036.jpg', '00037.jpg', '00038.jpg', '00039.jpg', '00040.jpg', '00041.jpg', '00042.jpg', '00043.jpg', '00044.jpg', '00045.jpg', '00046.jpg', '00047.jpg', '00048.jpg', '00049.jpg', '00050.jpg', '00051.jpg', '00052.jpg', '00053.jpg', '00054.jpg', '00055.jpg', '00056.jpg', '00057.jpg', '00058.jpg', '00059.jpg', '00060.jpg', '00061.jpg', '00062.jpg', '00063.jpg', '00064.jpg', '00065.jpg', '00066.jpg', '00067.jpg', '00068.jpg', '00

100%|█████████████████████████████████████████| 214/214 [00:03<00:00, 58.27it/s]


Video saved at ./outputs/output_vids/9a15e066-f54e-4fe4-98cb-58f7d0f655c7-clip_00000121-4.mp4
doing Yca14HAvris-clip_00000079-2
Path './outputs' already exists.
Path './outputs/mask_data' did not exist and has been created.
Path './outputs/json_data' did not exist and has been created.


frame loading (JPEG):   2%|▍                    | 5/251 [00:00<00:05, 43.06it/s]

Total frames: 251
start_frame_idx 0
objects_count 6


frame loading (JPEG):   4%|▊                   | 10/251 [00:00<00:06, 40.05it/s]
frame loading (JPEG):   6%|█▏                  | 15/251 [00:00<00:06, 39.18it/s][A
frame loading (JPEG):   8%|█▌                  | 19/251 [00:00<00:06, 38.35it/s][A
frame loading (JPEG):  11%|██▏                 | 27/251 [00:00<00:05, 38.19it/s][A
frame loading (JPEG):  12%|██▍                 | 31/251 [00:00<00:05, 38.18it/s][A
frame loading (JPEG):  14%|██▊                 | 35/251 [00:00<00:05, 38.16it/s][A
frame loading (JPEG):  16%|███                 | 39/251 [00:01<00:05, 38.07it/s][A
frame loading (JPEG):  19%|███▋                | 47/251 [00:01<00:05, 37.91it/s][A
frame loading (JPEG):  20%|████                | 51/251 [00:01<00:05, 37.97it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 18.47it/s][A
frame loading (JPEG):  24%|████▋               | 59/251 [00:01<00:05, 38.05it/s]

video_segments: 21
start_frame_idx 20


frame loading (JPEG):  27%|█████▎              | 67/251 [00:01<00:04, 37.91it/s]

objects_count 7



frame loading (JPEG):  28%|█████▋              | 71/251 [00:01<00:04, 37.53it/s][A
frame loading (JPEG):  31%|██████▎             | 79/251 [00:02<00:04, 36.92it/s][A
frame loading (JPEG):  33%|██████▌             | 83/251 [00:02<00:04, 36.97it/s][A
frame loading (JPEG):  36%|███████▎            | 91/251 [00:02<00:04, 36.72it/s][A
frame loading (JPEG):  38%|███████▌            | 95/251 [00:02<00:04, 37.08it/s][A
frame loading (JPEG):  39%|███████▉            | 99/251 [00:02<00:04, 37.31it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 22.15it/s][A
frame loading (JPEG):  44%|████████▍          | 111/251 [00:02<00:03, 37.86it/s]

video_segments: 21
start_frame_idx 40


frame loading (JPEG):  47%|█████████          | 119/251 [00:03<00:03, 37.94it/s]

objects_count 11



frame loading (JPEG):  49%|█████████▎         | 123/251 [00:03<00:03, 37.67it/s][A
frame loading (JPEG):  51%|█████████▌         | 127/251 [00:03<00:03, 37.80it/s][A
frame loading (JPEG):  54%|██████████▏        | 135/251 [00:03<00:03, 37.93it/s][A
frame loading (JPEG):  57%|██████████▊        | 143/251 [00:03<00:02, 38.13it/s][A
frame loading (JPEG):  59%|███████████▏       | 147/251 [00:03<00:02, 38.12it/s][A
frame loading (JPEG):  60%|███████████▍       | 151/251 [00:03<00:02, 38.29it/s][A
frame loading (JPEG):  62%|███████████▋       | 155/251 [00:04<00:02, 38.37it/s][A
frame loading (JPEG):  63%|████████████       | 159/251 [00:04<00:02, 38.56it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 19.25it/s][A
frame loading (JPEG):  67%|████████████▋      | 167/251 [00:04<00:02, 38.34it/s]

video_segments: 21
start_frame_idx 60


frame loading (JPEG):  70%|█████████████▏     | 175/251 [00:04<00:01, 38.30it/s]

objects_count 11



frame loading (JPEG):  71%|█████████████▌     | 179/251 [00:04<00:01, 38.42it/s][A
frame loading (JPEG):  73%|█████████████▊     | 183/251 [00:04<00:01, 38.11it/s][A
frame loading (JPEG):  75%|██████████████▏    | 187/251 [00:04<00:01, 38.25it/s][A
frame loading (JPEG):  78%|██████████████▊    | 195/251 [00:05<00:01, 38.55it/s][A
frame loading (JPEG):  79%|███████████████    | 199/251 [00:05<00:01, 38.65it/s][A
frame loading (JPEG):  81%|███████████████▎   | 203/251 [00:05<00:01, 38.60it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 26.28it/s][A
frame loading (JPEG):  84%|███████████████▉   | 211/251 [00:05<00:01, 38.58it/s]

video_segments: 21
start_frame_idx 80


frame loading (JPEG):  87%|████████████████▌  | 219/251 [00:05<00:00, 38.28it/s]

objects_count 15



frame loading (JPEG):  89%|████████████████▉  | 223/251 [00:05<00:00, 38.18it/s][A
frame loading (JPEG):  92%|█████████████████▍ | 231/251 [00:06<00:00, 38.29it/s][A
frame loading (JPEG):  94%|█████████████████▊ | 235/251 [00:06<00:00, 38.39it/s][A
frame loading (JPEG):  95%|██████████████████ | 239/251 [00:06<00:00, 38.41it/s][A
frame loading (JPEG):  98%|██████████████████▋| 247/251 [00:06<00:00, 38.47it/s][A
frame loading (JPEG): 100%|███████████████████| 251/251 [00:06<00:00, 38.11it/s][A

propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 21.88it/s][A


video_segments: 21
start_frame_idx 100
objects_count 19


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 18.95it/s]


video_segments: 21
start_frame_idx 120
objects_count 22


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 19.32it/s]


video_segments: 21
start_frame_idx 140
objects_count 30


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 15.31it/s]


video_segments: 21
start_frame_idx 160
objects_count 39


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 14.55it/s]


video_segments: 21
start_frame_idx 180
objects_count 40


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 19.81it/s]


video_segments: 21
start_frame_idx 200
objects_count 42


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 19.72it/s]


video_segments: 21
start_frame_idx 220
objects_count 45


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 19.19it/s]


video_segments: 21
start_frame_idx 240
objects_count 52


propagate in video: 100%|███████████████████████| 11/11 [00:00<00:00, 17.06it/s]


video_segments: 11
Path './outputs/result' did not exist and has been created.
raw image name list ['00000.jpg', '00001.jpg', '00002.jpg', '00003.jpg', '00004.jpg', '00005.jpg', '00006.jpg', '00007.jpg', '00008.jpg', '00009.jpg', '00010.jpg', '00011.jpg', '00012.jpg', '00013.jpg', '00014.jpg', '00015.jpg', '00016.jpg', '00017.jpg', '00018.jpg', '00019.jpg', '00020.jpg', '00021.jpg', '00022.jpg', '00023.jpg', '00024.jpg', '00025.jpg', '00026.jpg', '00027.jpg', '00028.jpg', '00029.jpg', '00030.jpg', '00031.jpg', '00032.jpg', '00033.jpg', '00034.jpg', '00035.jpg', '00036.jpg', '00037.jpg', '00038.jpg', '00039.jpg', '00040.jpg', '00041.jpg', '00042.jpg', '00043.jpg', '00044.jpg', '00045.jpg', '00046.jpg', '00047.jpg', '00048.jpg', '00049.jpg', '00050.jpg', '00051.jpg', '00052.jpg', '00053.jpg', '00054.jpg', '00055.jpg', '00056.jpg', '00057.jpg', '00058.jpg', '00059.jpg', '00060.jpg', '00061.jpg', '00062.jpg', '00063.jpg', '00064.jpg', '00065.jpg', '00066.jpg', '00067.jpg', '00068.jpg', '00

100%|█████████████████████████████████████████| 251/251 [00:02<00:00, 96.93it/s]


Video saved at ./outputs/output_vids/Yca14HAvris-clip_00000079-2.mp4
doing 677951670-clip_00000010-0
Path './outputs' already exists.
Path './outputs/mask_data' did not exist and has been created.
Path './outputs/json_data' did not exist and has been created.


frame loading (JPEG):   1%|▎                    | 4/300 [00:00<00:07, 38.25it/s]

Total frames: 300
start_frame_idx 0


frame loading (JPEG):   3%|▌                    | 8/300 [00:00<00:08, 33.48it/s]

objects_count 1



frame loading (JPEG):   4%|▊                   | 12/300 [00:00<00:09, 31.56it/s][A
frame loading (JPEG):   5%|█                   | 16/300 [00:00<00:09, 31.11it/s][A
frame loading (JPEG):   7%|█▎                  | 20/300 [00:00<00:08, 31.13it/s][A
frame loading (JPEG):   8%|█▌                  | 24/300 [00:00<00:08, 30.86it/s][A
frame loading (JPEG):   9%|█▊                  | 28/300 [00:00<00:08, 30.93it/s][A
propagate in video:  81%|██████████████████▌    | 17/21 [00:00<00:00, 26.12it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 26.69it/s][A
frame loading (JPEG):  12%|██▍                 | 36/300 [00:01<00:08, 30.44it/s]

video_segments: 21


frame loading (JPEG):  17%|███▎                | 50/300 [00:01<00:08, 29.08it/s]

start_frame_idx 20
objects_count 5



frame loading (JPEG):  18%|███▌                | 53/300 [00:01<00:08, 28.76it/s][A
frame loading (JPEG):  20%|███▉                | 59/300 [00:01<00:08, 29.14it/s][A
frame loading (JPEG):  22%|████▎               | 65/300 [00:02<00:07, 29.53it/s][A
frame loading (JPEG):  23%|████▌               | 69/300 [00:02<00:07, 29.80it/s][A
frame loading (JPEG):  24%|████▊               | 73/300 [00:02<00:07, 29.92it/s][A
frame loading (JPEG):  25%|█████               | 76/300 [00:02<00:07, 29.86it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 22.69it/s][A
frame loading (JPEG):  28%|█████▋              | 85/300 [00:02<00:07, 29.31it/s]

video_segments: 21


frame loading (JPEG):  31%|██████▎             | 94/300 [00:03<00:06, 29.49it/s]

start_frame_idx 40


frame loading (JPEG):  32%|██████▍             | 97/300 [00:03<00:06, 29.30it/s]

objects_count 11



frame loading (JPEG):  33%|██████▎            | 100/300 [00:03<00:06, 29.19it/s][A
frame loading (JPEG):  35%|██████▌            | 104/300 [00:03<00:06, 29.52it/s][A
frame loading (JPEG):  37%|███████            | 112/300 [00:03<00:06, 29.96it/s][A
frame loading (JPEG):  39%|███████▎           | 116/300 [00:03<00:06, 30.13it/s][A
frame loading (JPEG):  40%|███████▌           | 120/300 [00:03<00:05, 30.19it/s][A
propagate in video:  67%|███████████████▎       | 14/21 [00:00<00:00, 17.82it/s][A
frame loading (JPEG):  41%|███████▊           | 124/300 [00:04<00:05, 29.37it/s][A
frame loading (JPEG):  43%|████████           | 128/300 [00:04<00:05, 29.72it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 18.22it/s][A
frame loading (JPEG):  45%|████████▌          | 136/300 [00:04<00:05, 30.00it/s]

video_segments: 21


frame loading (JPEG):  52%|█████████▉         | 156/300 [00:05<00:04, 29.96it/s]

start_frame_idx 60
objects_count 11



frame loading (JPEG):  53%|██████████▏        | 160/300 [00:05<00:04, 30.10it/s][A
frame loading (JPEG):  55%|██████████▍        | 164/300 [00:05<00:04, 29.15it/s][A
frame loading (JPEG):  57%|██████████▊        | 170/300 [00:05<00:04, 28.32it/s][A
frame loading (JPEG):  58%|██████████▉        | 173/300 [00:05<00:04, 28.08it/s][A
frame loading (JPEG):  59%|███████████▏       | 176/300 [00:05<00:04, 27.94it/s][A
frame loading (JPEG):  60%|███████████▎       | 179/300 [00:06<00:04, 27.89it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 23.61it/s][A
frame loading (JPEG):  63%|███████████▉       | 188/300 [00:06<00:04, 27.58it/s]

video_segments: 21


frame loading (JPEG):  66%|████████████▍      | 197/300 [00:06<00:03, 27.60it/s]

start_frame_idx 80


frame loading (JPEG):  67%|████████████▋      | 200/300 [00:06<00:03, 27.43it/s]

objects_count 12



frame loading (JPEG):  68%|████████████▊      | 203/300 [00:06<00:03, 27.63it/s][A
frame loading (JPEG):  69%|█████████████      | 206/300 [00:07<00:03, 27.50it/s][A
frame loading (JPEG):  70%|█████████████▏     | 209/300 [00:07<00:03, 27.08it/s][A
frame loading (JPEG):  72%|█████████████▌     | 215/300 [00:07<00:03, 27.06it/s][A
frame loading (JPEG):  73%|█████████████▊     | 218/300 [00:07<00:03, 26.99it/s][A
frame loading (JPEG):  74%|█████████████▉     | 221/300 [00:07<00:02, 27.01it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 25.30it/s][A
frame loading (JPEG):  76%|██████████████▍    | 227/300 [00:07<00:02, 27.14it/s]

video_segments: 21


frame loading (JPEG):  79%|██████████████▉    | 236/300 [00:08<00:02, 27.15it/s]

start_frame_idx 100


frame loading (JPEG):  80%|███████████████▏   | 239/300 [00:08<00:02, 27.36it/s]

objects_count 18



frame loading (JPEG):  81%|███████████████▎   | 242/300 [00:08<00:02, 27.30it/s][A
frame loading (JPEG):  82%|███████████████▌   | 245/300 [00:08<00:02, 27.40it/s][A
frame loading (JPEG):  84%|███████████████▉   | 251/300 [00:08<00:01, 27.23it/s][A
frame loading (JPEG):  85%|████████████████   | 254/300 [00:08<00:01, 27.11it/s][A
frame loading (JPEG):  86%|████████████████▎  | 257/300 [00:08<00:01, 27.18it/s][A
frame loading (JPEG):  87%|████████████████▍  | 260/300 [00:08<00:01, 27.29it/s][A
frame loading (JPEG):  88%|████████████████▋  | 263/300 [00:09<00:01, 27.02it/s][A
frame loading (JPEG):  89%|████████████████▊  | 266/300 [00:09<00:01, 27.03it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 18.63it/s][A
frame loading (JPEG):  92%|█████████████████▍ | 275/300 [00:09<00:00, 27.00it/s]

video_segments: 21


frame loading (JPEG):  97%|██████████████████▎| 290/300 [00:10<00:00, 27.04it/s]

start_frame_idx 120


frame loading (JPEG):  98%|██████████████████▌| 293/300 [00:10<00:00, 26.64it/s]

objects_count 20



frame loading (JPEG):  99%|██████████████████▋| 296/300 [00:10<00:00, 26.73it/s][A
frame loading (JPEG): 100%|███████████████████| 300/300 [00:10<00:00, 28.62it/s][A

propagate in video:  33%|████████                | 7/21 [00:00<00:00, 22.95it/s][A
propagate in video:  48%|██████████▉            | 10/21 [00:00<00:00, 20.94it/s][A
propagate in video:  62%|██████████████▏        | 13/21 [00:00<00:00, 20.11it/s][A
propagate in video:  76%|█████████████████▌     | 16/21 [00:00<00:00, 19.63it/s][A
propagate in video:  86%|███████████████████▋   | 18/21 [00:00<00:00, 19.44it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 20.23it/s][A


video_segments: 21
start_frame_idx 140
objects_count 20


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 25.45it/s]


video_segments: 21
start_frame_idx 160
objects_count 26


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 18.36it/s]


video_segments: 21
start_frame_idx 180
objects_count 33


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 18.53it/s]


video_segments: 21
start_frame_idx 200
objects_count 34


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 23.65it/s]


video_segments: 21
start_frame_idx 220
objects_count 37


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 19.80it/s]


video_segments: 21
start_frame_idx 240
objects_count 37


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 25.57it/s]


video_segments: 21
start_frame_idx 260
objects_count 39


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 20.36it/s]


video_segments: 21
start_frame_idx 280
objects_count 41


propagate in video: 100%|███████████████████████| 20/20 [00:00<00:00, 23.72it/s]


video_segments: 20
Path './outputs/result' did not exist and has been created.
raw image name list ['00000.jpg', '00001.jpg', '00002.jpg', '00003.jpg', '00004.jpg', '00005.jpg', '00006.jpg', '00007.jpg', '00008.jpg', '00009.jpg', '00010.jpg', '00011.jpg', '00012.jpg', '00013.jpg', '00014.jpg', '00015.jpg', '00016.jpg', '00017.jpg', '00018.jpg', '00019.jpg', '00020.jpg', '00021.jpg', '00022.jpg', '00023.jpg', '00024.jpg', '00025.jpg', '00026.jpg', '00027.jpg', '00028.jpg', '00029.jpg', '00030.jpg', '00031.jpg', '00032.jpg', '00033.jpg', '00034.jpg', '00035.jpg', '00036.jpg', '00037.jpg', '00038.jpg', '00039.jpg', '00040.jpg', '00041.jpg', '00042.jpg', '00043.jpg', '00044.jpg', '00045.jpg', '00046.jpg', '00047.jpg', '00048.jpg', '00049.jpg', '00050.jpg', '00051.jpg', '00052.jpg', '00053.jpg', '00054.jpg', '00055.jpg', '00056.jpg', '00057.jpg', '00058.jpg', '00059.jpg', '00060.jpg', '00061.jpg', '00062.jpg', '00063.jpg', '00064.jpg', '00065.jpg', '00066.jpg', '00067.jpg', '00068.jpg', '00

100%|█████████████████████████████████████████| 300/300 [00:04<00:00, 66.61it/s]


Video saved at ./outputs/output_vids/677951670-clip_00000010-0.mp4
doing e5957a13-8fdd-4696-b8a5-11df9bdbb98d-part1-clip_00000039-0
Path './outputs' already exists.
Path './outputs/mask_data' did not exist and has been created.
Path './outputs/json_data' did not exist and has been created.


frame loading (JPEG):   4%|▋                    | 5/142 [00:00<00:03, 41.90it/s]

Total frames: 142
start_frame_idx 0


frame loading (JPEG):   7%|█▍                  | 10/142 [00:00<00:03, 39.16it/s]

objects_count 8



frame loading (JPEG):  10%|█▉                  | 14/142 [00:00<00:03, 38.35it/s][A
frame loading (JPEG):  15%|███                 | 22/142 [00:00<00:03, 37.77it/s][A
frame loading (JPEG):  21%|████▏               | 30/142 [00:00<00:02, 37.58it/s][A
frame loading (JPEG):  24%|████▊               | 34/142 [00:00<00:02, 37.84it/s][A
frame loading (JPEG):  27%|█████▎              | 38/142 [00:01<00:02, 37.15it/s][A
frame loading (JPEG):  30%|█████▉              | 42/142 [00:01<00:02, 37.25it/s][A
frame loading (JPEG):  32%|██████▍             | 46/142 [00:01<00:02, 37.36it/s][A
frame loading (JPEG):  38%|███████▌            | 54/142 [00:01<00:02, 37.51it/s][A
frame loading (JPEG):  41%|████████▏           | 58/142 [00:01<00:02, 37.50it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 16.99it/s][A
frame loading (JPEG):  46%|█████████▎          | 66/142 [00:01<00:02, 37.33it/s]

video_segments: 21


frame loading (JPEG):  58%|███████████▌        | 82/142 [00:02<00:01, 37.60it/s]

start_frame_idx 20


frame loading (JPEG):  61%|████████████        | 86/142 [00:02<00:01, 37.63it/s]

objects_count 11



frame loading (JPEG):  63%|████████████▋       | 90/142 [00:02<00:01, 37.79it/s][A
frame loading (JPEG):  69%|█████████████▊      | 98/142 [00:02<00:01, 38.10it/s][A
frame loading (JPEG):  72%|█████████████▋     | 102/142 [00:02<00:01, 38.22it/s][A
frame loading (JPEG):  75%|██████████████▏    | 106/142 [00:02<00:00, 38.38it/s][A
frame loading (JPEG):  80%|███████████████▎   | 114/142 [00:03<00:00, 38.55it/s][A
frame loading (JPEG):  83%|███████████████▊   | 118/142 [00:03<00:00, 38.35it/s][A
frame loading (JPEG):  86%|████████████████▎  | 122/142 [00:03<00:00, 38.55it/s][A
frame loading (JPEG):  89%|████████████████▊  | 126/142 [00:03<00:00, 38.57it/s][A
propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 17.93it/s][A
frame loading (JPEG):  97%|██████████████████▍| 138/142 [00:03<00:00, 38.84it/s]

video_segments: 21


frame loading (JPEG): 100%|███████████████████| 142/142 [00:03<00:00, 38.03it/s]


start_frame_idx 40
objects_count 14


propagate in video: 100%|███████████████████████| 21/21 [00:01<00:00, 16.49it/s]


video_segments: 21
start_frame_idx 60
objects_count 14


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 23.71it/s]


video_segments: 21
start_frame_idx 80
objects_count 14


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 24.19it/s]


video_segments: 21
start_frame_idx 100
objects_count 14


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 24.10it/s]


video_segments: 21
start_frame_idx 120
objects_count 14


propagate in video: 100%|███████████████████████| 21/21 [00:00<00:00, 26.71it/s]


video_segments: 21
start_frame_idx 140
objects_count 15


propagate in video: 100%|█████████████████████████| 2/2 [00:00<00:00, 48.99it/s]

video_segments: 2
Path './outputs/result' did not exist and has been created.
raw image name list ['00000.jpg', '00001.jpg', '00002.jpg', '00003.jpg', '00004.jpg', '00005.jpg', '00006.jpg', '00007.jpg', '00008.jpg', '00009.jpg', '00010.jpg', '00011.jpg', '00012.jpg', '00013.jpg', '00014.jpg', '00015.jpg', '00016.jpg', '00017.jpg', '00018.jpg', '00019.jpg', '00020.jpg', '00021.jpg', '00022.jpg', '00023.jpg', '00024.jpg', '00025.jpg', '00026.jpg', '00027.jpg', '00028.jpg', '00029.jpg', '00030.jpg', '00031.jpg', '00032.jpg', '00033.jpg', '00034.jpg', '00035.jpg', '00036.jpg', '00037.jpg', '00038.jpg', '00039.jpg', '00040.jpg', '00041.jpg', '00042.jpg', '00043.jpg', '00044.jpg', '00045.jpg', '00046.jpg', '00047.jpg', '00048.jpg', '00049.jpg', '00050.jpg', '00051.jpg', '00052.jpg', '00053.jpg', '00054.jpg', '00055.jpg', '00056.jpg', '00057.jpg', '00058.jpg', '00059.jpg', '00060.jpg', '00061.jpg', '00062.jpg', '00063.jpg', '00064.jpg', '00065.jpg', '00066.jpg', '00067.jpg', '00068.jpg', '000




Annotated image saved as ./outputs/result/00000.jpg
Annotated image saved as ./outputs/result/00001.jpg
Annotated image saved as ./outputs/result/00002.jpg
Annotated image saved as ./outputs/result/00003.jpg
Annotated image saved as ./outputs/result/00004.jpg
Annotated image saved as ./outputs/result/00005.jpg
Annotated image saved as ./outputs/result/00006.jpg
Annotated image saved as ./outputs/result/00007.jpg
Annotated image saved as ./outputs/result/00008.jpg
Annotated image saved as ./outputs/result/00009.jpg
Annotated image saved as ./outputs/result/00010.jpg
Annotated image saved as ./outputs/result/00011.jpg
Annotated image saved as ./outputs/result/00012.jpg
Annotated image saved as ./outputs/result/00013.jpg
Annotated image saved as ./outputs/result/00014.jpg
Annotated image saved as ./outputs/result/00015.jpg
Annotated image saved as ./outputs/result/00016.jpg
Annotated image saved as ./outputs/result/00017.jpg
Annotated image saved as ./outputs/result/00018.jpg
Annotated im

100%|████████████████████████████████████████| 142/142 [00:01<00:00, 121.41it/s]

Video saved at ./outputs/output_vids/e5957a13-8fdd-4696-b8a5-11df9bdbb98d-part1-clip_00000039-0.mp4





In [9]:
video_dir

'notebooks/videos/tHEcy2tC6ug_part_000-clip_00000015-0'

Path './outputs/result' did not exist and has been created.
raw image name list ['.ipynb_checkpoints', '00000.jpg', '00001.jpg', '00002.jpg', '00003.jpg', '00004.jpg', '00005.jpg', '00006.jpg', '00007.jpg', '00008.jpg', '00009.jpg', '00010.jpg', '00011.jpg', '00012.jpg', '00013.jpg', '00014.jpg', '00015.jpg', '00016.jpg', '00017.jpg', '00018.jpg', '00019.jpg', '00020.jpg', '00021.jpg', '00022.jpg', '00023.jpg', '00024.jpg', '00025.jpg', '00026.jpg', '00027.jpg', '00028.jpg', '00029.jpg', '00030.jpg', '00031.jpg', '00032.jpg', '00033.jpg', '00034.jpg', '00035.jpg', '00036.jpg', '00037.jpg', '00038.jpg', '00039.jpg', '00040.jpg', '00041.jpg', '00042.jpg', '00043.jpg', '00044.jpg', '00045.jpg', '00046.jpg', '00047.jpg', '00048.jpg', '00049.jpg', '00050.jpg', '00051.jpg', '00052.jpg', '00053.jpg', '00054.jpg', '00055.jpg', '00056.jpg', '00057.jpg', '00058.jpg', '00059.jpg', '00060.jpg', '00061.jpg', '00062.jpg', '00063.jpg', '00064.jpg', '00065.jpg', '00066.jpg', '00067.jpg', '00068.jpg', 

100%|███████████████████████████████████████████████████████████████| 258/258 [00:03<00:00, 69.24it/s]

Video saved at ./outputs/tHEcy2tC6ug_part_000-clip_00000015-0.mp4



