In [2]:
import os
import glob
import shutil
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools import mask as cocomask
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from collections import defaultdict
import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.build_sam import build_sam2_video_predictor

In [3]:
# SAM2 model setup
checkpoint = r"C:\Users\dell\Desktop\Assignment\sam2_hiera_tiny.pt"
model_cfg = r"C:\Users\dell\Desktop\Assignment\sam2\configs\sam2\sam2_hiera_t.yaml"


# Initialize SAM2 components
predictor_prompt = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint, device='cpu'))
sam2 = build_sam2(model_cfg, checkpoint, device='cpu', apply_postprocessing=False)
mask_generator = SAM2AutomaticMaskGenerator(sam2)
predictor_vid = build_sam2_video_predictor(model_cfg, checkpoint, device='cpu')

In [4]:
# Temporary directory for processing
tempfolder = "./tempdir"

def create_if_not_exists(dirname):
    if not os.path.exists(dirname):
        os.mkdir(dirname)

def cleardir(tempfolder):
    filepaths = glob.glob(tempfolder+"/*")
    for filepath in filepaths:
        os.unlink(filepath)

In [5]:
def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

In [6]:
def track_item_boxes(imgpath1, imgpath2, img1boxclasslist, visualize=True):
    """Track objects between two images using SAM2"""
    create_if_not_exists(tempfolder)
    cleardir(tempfolder)
    shutil.copy(imgpath1, os.path.join(tempfolder, "00000.jpg"))
    shutil.copy(imgpath2, os.path.join(tempfolder, "00001.jpg"))
    
    inference_state = predictor_vid.init_state(video_path="./tempdir")
    predictor_vid.reset_state(inference_state)
    ann_frame_idx = 0
    
    for img1boxclass in img1boxclasslist:
        ([xmin, xmax, ymin, ymax], objectnumint) = img1boxclass
        box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
        _, out_obj_ids, out_mask_logits = predictor_vid.add_new_points_or_box(
            inference_state=inference_state,
            frame_idx=ann_frame_idx,
            obj_id=objectnumint,
            box=box,
        )
    
    video_segments = {}
    for out_frame_idx, out_obj_ids, out_mask_logits in predictor_vid.propagate_in_video(inference_state):
        print(f"\nDebug: Frame {out_frame_idx} has {len(out_obj_ids)} objects")  # Debug print
        print(f"Object IDs: {out_obj_ids}")  # Debug print
        
        video_segments[out_frame_idx] = {
            out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
            for i, out_obj_id in enumerate(out_obj_ids)
        }
    
    if visualize:
        fig, ax = plt.subplots()
        plt.title(f"original image object ::")
        ax.imshow(Image.open(os.path.join(tempfolder, "00000.jpg")))
        rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                               linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        plt.show()
        
        out_frame_idx = 1
        plt.figure(figsize=(6, 4))
        plt.title(f"detected object in test image ::")
        plt.imshow(Image.open(os.path.join(tempfolder, "00001.jpg")))
        for out_obj_id, out_mask in video_segments[out_frame_idx].items():
            show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
        plt.show()
    
    return video_segments

In [7]:
def extract_category_from_filename(filename):
    """Extract category name from filename"""
    return '_'.join(os.path.basename(filename).split('_')[:-1])

def group_files_by_category(data_dir):
    """Group image and mask files by their product category"""
    image_files = glob.glob(os.path.join(data_dir, "*.jpg"))
    mask_files = glob.glob(os.path.join(data_dir, "*_gt.png"))
    
    category_dict = defaultdict(lambda: {'images': [], 'masks': []})
    
    for img in image_files:
        category = extract_category_from_filename(img)
        category_dict[category]['images'].append(img)
    
    for mask in mask_files:
        base_name = '_'.join(os.path.basename(mask).split('_')[:-2])
        category = extract_category_from_filename(base_name + '.jpg')
        
        matching_images = [img for img in category_dict[category]['images'] 
                         if base_name in img]
        
        for img in matching_images:
            category_dict[category]['masks'].append((img, mask))
    
    return category_dict

In [8]:
def process_img_png_mask(img_path, mask_path, visualize=False):
    """Extract bounding box coordinates from mask"""
    try:
        img = Image.open(img_path)
        mask = Image.open(mask_path)
        mask_array = np.array(mask)
        
        if mask_array.size == 0:
            print(f"Warning: Empty mask in {mask_path}")
            return 0, 0, 0, 0
            
        rows = np.any(mask_array, axis=1)
        cols = np.any(mask_array, axis=0)
        
        ymin, ymax = np.where(rows)[0][[0, -1]]
        xmin, xmax = np.where(cols)[0][[0, -1]]
        
        if visualize:
            fig, ax = plt.subplots(1, 2, figsize=(10, 5))
            ax[0].imshow(img)
            ax[0].set_title("Original Image")
            ax[1].imshow(mask_array)
            rect = plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                               linewidth=1, edgecolor='r', facecolor='none')
            ax[1].add_patch(rect)
            ax[1].set_title("Mask with Bounding Box")
            plt.show()
        
        return xmin, xmax, ymin, ymax
        
    except Exception as e:
        print(f"Error processing {img_path}: {str(e)}")
        return 0, 0, 0, 0

In [9]:
def mask_to_bbox(mask):
    """Convert binary mask to COCO-style bounding box [x,y,width,height]"""
    # Handle 3D mask by taking first channel if needed
    if len(mask.shape) == 3:
        mask = mask[0] if mask.shape[0] == 1 else mask
    
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    
    if not np.any(rows) or not np.any(cols):
        return [0, 0, 0, 0]  # Return empty bbox if no True values
    
    ymin, ymax = np.where(rows)[0][[0, -1]]
    xmin, xmax = np.where(cols)[0][[0, -1]]
    
    return [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)]

In [10]:
def evaluate_product_category(category_name, image_mask_pairs):
    """Evaluate SAM2 performance on one product category"""
    print(f"\nEvaluating {category_name}...")
    
    if not image_mask_pairs or len(image_mask_pairs) < 2:
        print("Error: Need at least 2 image-mask pairs for evaluation")
        return None
    
    # Use first image-mask pair as reference
    first_img, first_mask = image_mask_pairs[0]
    print(f"Using reference image: {os.path.basename(first_img)}")
    print(f"Using reference mask: {os.path.basename(first_mask)}")
    
    xmin, xmax, ymin, ymax = process_img_png_mask(first_img, first_mask)
    print(f"Reference bounding box: x=[{xmin},{xmax}], y=[{ymin},{ymax}]")
    
    coco_gt = {"images": [], "annotations": [], "categories": [{"id": 1, "name": "product"}]}
    coco_dt = []
    
    for i, (img_path, mask_path) in enumerate(image_mask_pairs[1:]):
        print(f"\nProcessing image {i+1}/{len(image_mask_pairs)-1}: {os.path.basename(img_path)}")
        
        try:
            # Process ground truth
            gt_mask = np.array(Image.open(mask_path))
            gt_bbox = mask_to_bbox(gt_mask)
            print(f"Ground truth bbox: {gt_bbox}")
            
            # Add to COCO ground truth
            image_id = i+1
            coco_gt["images"].append({"id": image_id, "file_name": img_path})
            
            rle = cocomask.encode(np.asfortranarray(gt_mask))
            rle['counts'] = rle['counts'].decode('ascii')
            
            coco_gt["annotations"].append({
                "id": image_id,
                "image_id": image_id,
                "category_id": 1,
                "bbox": gt_bbox,
                "area": int(gt_bbox[2] * gt_bbox[3]),
                "iscrowd": 0,
                "segmentation": rle
            })
            
            # Get SAM2 prediction
            video_segments = track_item_boxes(first_img, img_path, [([xmin, xmax, ymin, ymax], 1)], False)
            
            if not video_segments or 1 not in video_segments:
                print("Warning: No valid segments found in frame 1")
                continue
                
            # Get the prediction mask
            pred_mask = video_segments[1][1]  # Frame 1, Object ID 1
            
            # Convert mask to correct format if needed
            if len(pred_mask.shape) == 3:
                pred_mask = pred_mask[0] if pred_mask.shape[0] == 1 else pred_mask
            
            pred_bbox = mask_to_bbox(pred_mask)
            print(f"Predicted bbox: {pred_bbox}")
            
            # Skip if prediction is empty
            if pred_bbox == [0, 0, 0, 0]:
                print("Warning: Empty prediction - skipping")
                continue
                
            # Convert prediction to COCO format
            pred_rle = cocomask.encode(np.asfortranarray(pred_mask.astype(np.uint8)))
            pred_rle['counts'] = pred_rle['counts'].decode('ascii')
            
            coco_dt.append({
                "image_id": image_id,
                "category_id": 1,
                "bbox": pred_bbox,
                "score": 1.0,
                "segmentation": pred_rle
            })
            
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            continue
    
    if not coco_dt:
        print("Error: No valid predictions generated!")
        return None
    
    try:
        coco_gt_obj = COCO()
        coco_gt_obj.dataset = coco_gt
        coco_gt_obj.createIndex()
        
        coco_dt_obj = coco_gt_obj.loadRes(coco_dt)
        coco_eval = COCOeval(coco_gt_obj, coco_dt_obj, 'bbox')
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()
        
        return coco_eval.stats
        
    except Exception as e:
        print(f"Error in COCO evaluation: {str(e)}")
        return None

In [11]:
def main():
    data_dir = r"C:\Users\dell\Desktop\Assignment\CMU10_3D\data_2D"
    
    # Group files by category
    category_dict = group_files_by_category(data_dir)
    
    # Evaluate each category
    results = {}
    for category, files in category_dict.items():
        # Create image-mask pairs
        image_mask_pairs = []
        for img in files['images']:
            base_name = os.path.splitext(os.path.basename(img))[0]
            masks = [mask for (img_path, mask) in files['masks'] 
                    if base_name in os.path.basename(mask)]
            
            if masks:
                image_mask_pairs.append((img, masks[0]))  # Using first mask
        
        if len(image_mask_pairs) < 2:
            print(f"Skipping {category} - needs at least 2 images with masks")
            continue
            
        stats = evaluate_product_category(category, image_mask_pairs)
        
        if stats is not None:
            results[category] = {
                "AP": stats[0],  # AP @ IoU=0.50:0.95
                "AP50": stats[1],  # AP @ IoU=0.50
                "AP75": stats[2],  # AP @ IoU=0.75
                "AR": stats[8]     # AR @ maxDets=100
            }
    
    # Print results
    print("\n=== Final Results ===")
    for product, metrics in results.items():
        print(f"\n{product}:")
        print(f"  AP: {metrics['AP']:.3f}")
        print(f"  AP50: {metrics['AP50']:.3f}")
        print(f"  AP75: {metrics['AP75']:.3f}")
        print(f"  AR: {metrics['AR']:.3f}")

if __name__ == "__main__":
    main()


Evaluating can_chowder...
Using reference image: can_chowder_000001.jpg
Using reference mask: can_chowder_000001_1_gt.png
Reference bounding box: x=[371,464], y=[150,290]

Processing image 1/49: can_chowder_000002.jpg
Ground truth bbox: [249, 148, 87, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00,  9.73it/s]

Skipping the post-processing step due to the error above. You can still use SAM 2 and it's OK to ignore the error above, although some post-processing functionality may be limited (which doesn't affect the results in most cases; see https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).
  pred_masks_gpu = fill_holes_in_mask_scores(
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.66s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [249, 150, 82, 128]

Processing image 2/49: can_chowder_000003.jpg
Ground truth bbox: [398, 92, 105, 146]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.43it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.68s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [404, 93, 97, 145]

Processing image 3/49: can_chowder_000004.jpg
Ground truth bbox: [397, 96, 100, 125]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.12it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.74s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 4/49: can_chowder_000005.jpg
Ground truth bbox: [87, 108, 129, 166]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.29it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.81s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [451, 98, 118, 158]

Processing image 5/49: can_chowder_000006.jpg
Ground truth bbox: [175, 189, 103, 143]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.76s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [439, 132, 94, 120]

Processing image 6/49: can_chowder_000007.jpg
Ground truth bbox: [415, 9, 116, 152]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.08it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.33s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [299, 213, 70, 110]

Processing image 7/49: can_chowder_000008.jpg
Ground truth bbox: [365, 217, 80, 132]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.80it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [333, 200, 146, 145]

Processing image 8/49: can_chowder_000009.jpg
Ground truth bbox: [383, 107, 78, 100]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.50it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.36s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [385, 106, 76, 101]

Processing image 9/49: can_chowder_000010.jpg
Ground truth bbox: [473, 65, 102, 127]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.35s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [477, 65, 95, 125]

Processing image 10/49: can_chowder_000011.jpg
Ground truth bbox: [212, 186, 149, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.30s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [471, 115, 94, 128]

Processing image 11/49: can_chowder_000012.jpg
Ground truth bbox: [85, 46, 172, 138]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.79it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.30s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 12/49: can_chowder_000013.jpg
Ground truth bbox: [392, 152, 111, 162]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.32s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 13/49: can_chowder_000014.jpg
Ground truth bbox: [191, 169, 70, 110]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.75it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.33s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [189, 167, 74, 110]

Processing image 14/49: can_chowder_000015.jpg
Ground truth bbox: [138, 121, 72, 96]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.32s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 15/49: can_chowder_000016.jpg
Ground truth bbox: [245, 172, 91, 107]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.29s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: can_chowder_000017.jpg
Ground truth bbox: [306, 203, 195, 269]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.86it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.27s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [310, 201, 189, 270]

Processing image 17/49: can_chowder_000018.jpg
Ground truth bbox: [109, 94, 108, 146]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.34s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: can_chowder_000019.jpg
Ground truth bbox: [226, 149, 102, 115]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.73it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.31s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 19/49: can_chowder_000020.jpg
Ground truth bbox: [143, 49, 73, 115]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.97it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.36s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 20/49: can_chowder_000021.jpg
Ground truth bbox: [135, 130, 102, 142]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.29s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [250, 131, 166, 149]

Processing image 21/49: can_chowder_000022.jpg
Ground truth bbox: [151, 229, 98, 134]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.36it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.31s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [150, 193, 143, 170]

Processing image 22/49: can_chowder_000023.jpg
Ground truth bbox: [219, 149, 121, 175]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.56it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.32s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 23/49: can_chowder_000024.jpg
Ground truth bbox: [197, 184, 93, 136]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.29it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.35s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 24/49: can_chowder_000025.jpg
Ground truth bbox: [190, 116, 106, 151]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.29it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.33s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 25/49: can_chowder_000026.jpg
Ground truth bbox: [210, 160, 111, 159]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.49it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.30s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [419, 104, 97, 137]

Processing image 26/49: can_chowder_000027.jpg
Ground truth bbox: [101, 117, 129, 198]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.73it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.70s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 120, 219, 190]

Processing image 27/49: can_chowder_000028.jpg
Ground truth bbox: [250, 122, 130, 188]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.21it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 28/49: can_chowder_000029.jpg
Ground truth bbox: [312, 44, 111, 169]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.78it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 29/49: can_chowder_000030.jpg
Ground truth bbox: [182, 122, 95, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.63it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.37s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 30/49: can_chowder_000031.jpg
Ground truth bbox: [82, 136, 169, 218]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.37s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [81, 134, 284, 223]

Processing image 31/49: can_chowder_000032.jpg
Ground truth bbox: [70, 119, 178, 117]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.67it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.37s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [206, 270, 265, 209]

Processing image 32/49: can_chowder_000033.jpg
Ground truth bbox: [124, 138, 135, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.58it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [28, 64, 517, 415]

Processing image 33/49: can_chowder_000034.jpg
Ground truth bbox: [178, 36, 114, 179]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.14it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 20, 381, 204]

Processing image 34/49: can_chowder_000035.jpg
Ground truth bbox: [268, 109, 98, 137]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 35/49: can_chowder_000036.jpg
Ground truth bbox: [182, 98, 117, 148]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.78it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 36/49: can_chowder_000037.jpg
Ground truth bbox: [191, 80, 120, 182]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.26it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 37/49: can_chowder_000038.jpg
Ground truth bbox: [198, 64, 124, 172]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 38/49: can_chowder_000039.jpg
Ground truth bbox: [137, 136, 131, 183]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.45it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 39/49: can_chowder_000040.jpg
Ground truth bbox: [173, 139, 111, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.57it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 40/49: can_chowder_000041.jpg
Ground truth bbox: [229, 161, 117, 168]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [229, 167, 84, 131]

Processing image 41/49: can_chowder_000042.jpg
Ground truth bbox: [217, 113, 92, 137]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.92it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 42/49: can_chowder_000043.jpg
Ground truth bbox: [365, 108, 128, 176]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.80it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [367, 108, 121, 178]

Processing image 43/49: can_chowder_000044.jpg
Ground truth bbox: [167, 144, 72, 109]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.96it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 44/49: can_chowder_000045.jpg
Ground truth bbox: [159, 182, 112, 149]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.91it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 45/49: can_chowder_000046.jpg
Ground truth bbox: [206, 75, 121, 149]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.73it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 46/49: can_chowder_000047.jpg
Ground truth bbox: [167, 275, 114, 163]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.25it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 47/49: can_chowder_000048.jpg
Ground truth bbox: [97, 235, 120, 167]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 48/49: can_chowder_000049.jpg
Ground truth bbox: [134, 81, 84, 124]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.94it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [35, 53, 326, 150]

Processing image 49/49: can_chowder_000050.jpg
Ground truth bbox: [207, 123, 74, 94]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [176, 322, 95, 109]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating can_soymilk...
Using reference image: can_soymilk_000001.jpg
Using reference mask: can_soymilk_000001_1_gt.png
Reference bounding box: x=[432,508], y=[101,229]

Processing image 1/49: can_soymilk_000002.jpg
Ground truth bbox: [426, 174, 91, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.12it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [427, 178, 84, 128]

Processing image 2/49: can_soymilk_000003.jpg
Ground truth bbox: [75, 34, 92, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 3/49: can_soymilk_000004.jpg
Ground truth bbox: [204, 25, 80, 127]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.05it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 4/49: can_soymilk_000005.jpg
Ground truth bbox: [44, 288, 73, 115]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.24it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [500, 243, 103, 153]

Processing image 5/49: can_soymilk_000006.jpg
Ground truth bbox: [365, 168, 59, 109]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.68it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 6/49: can_soymilk_000007.jpg
Ground truth bbox: [208, 195, 65, 102]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.57it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.59s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 7/49: can_soymilk_000008.jpg
Ground truth bbox: [382, 162, 61, 127]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.87it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.67s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [434, 153, 57, 115]

Processing image 8/49: can_soymilk_000009.jpg
Ground truth bbox: [464, 131, 61, 117]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.55it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:06<00:00,  3.15s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [524, 127, 58, 113]

Processing image 9/49: can_soymilk_000010.jpg
Ground truth bbox: [444, 290, 67, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00,  9.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.96s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [458, 278, 108, 124]

Processing image 10/49: can_soymilk_000011.jpg
Ground truth bbox: [45, 116, 76, 121]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.21it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:06<00:00,  3.07s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [198, 216, 116, 187]

Processing image 11/49: can_soymilk_000012.jpg
Ground truth bbox: [80, 180, 71, 117]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00,  7.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:06<00:00,  3.03s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [78, 162, 343, 231]

Processing image 12/49: can_soymilk_000013.jpg
Ground truth bbox: [161, 102, 55, 103]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.10it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.90s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [338, 233, 54, 102]

Processing image 13/49: can_soymilk_000014.jpg
Ground truth bbox: [271, 81, 69, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.58it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.65s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [148, 85, 307, 150]

Processing image 14/49: can_soymilk_000015.jpg
Ground truth bbox: [261, 40, 72, 114]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.58it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.57s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [482, 210, 53, 132]

Processing image 15/49: can_soymilk_000016.jpg
Ground truth bbox: [251, 89, 61, 113]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.48it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.56s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: can_soymilk_000017.jpg
Ground truth bbox: [441, 204, 79, 131]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.28it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.59s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [448, 282, 170, 145]

Processing image 17/49: can_soymilk_000018.jpg
Ground truth bbox: [254, 71, 55, 94]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.14it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.56s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: can_soymilk_000019.jpg
Ground truth bbox: [118, 165, 69, 114]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.54it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.74s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 19/49: can_soymilk_000020.jpg
Ground truth bbox: [341, 125, 49, 89]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.43it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.68s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [348, 124, 137, 103]

Processing image 20/49: can_soymilk_000021.jpg
Ground truth bbox: [282, 253, 53, 102]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.37it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [382, 254, 56, 100]

Processing image 21/49: can_soymilk_000022.jpg
Ground truth bbox: [412, 110, 128, 76]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.76it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [516, 64, 123, 131]

Processing image 22/49: can_soymilk_000023.jpg
Ground truth bbox: [231, 197, 101, 173]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.23it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 23/49: can_soymilk_000024.jpg
Ground truth bbox: [349, 213, 104, 179]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.34it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [352, 217, 102, 155]

Processing image 24/49: can_soymilk_000025.jpg
Ground truth bbox: [183, 166, 96, 166]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.19it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 25/49: can_soymilk_000026.jpg
Ground truth bbox: [170, 160, 114, 206]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [479, 34, 114, 295]

Processing image 26/49: can_soymilk_000027.jpg
Ground truth bbox: [282, 136, 82, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.42it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [280, 134, 128, 145]

Processing image 27/49: can_soymilk_000028.jpg
Ground truth bbox: [171, 172, 92, 149]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.81it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 28/49: can_soymilk_000029.jpg
Ground truth bbox: [304, 175, 114, 179]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [307, 175, 112, 167]

Processing image 29/49: can_soymilk_000030.jpg
Ground truth bbox: [228, 166, 104, 187]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.59it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [230, 168, 237, 187]

Processing image 30/49: can_soymilk_000031.jpg
Ground truth bbox: [175, 169, 100, 169]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.05it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [180, 132, 311, 208]

Processing image 31/49: can_soymilk_000032.jpg
Ground truth bbox: [191, 168, 115, 169]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.71it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.38s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 32/49: can_soymilk_000033.jpg
Ground truth bbox: [305, 124, 84, 150]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.06it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [305, 128, 187, 146]

Processing image 33/49: can_soymilk_000034.jpg
Ground truth bbox: [246, 115, 89, 151]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [249, 116, 217, 160]

Processing image 34/49: can_soymilk_000035.jpg
Ground truth bbox: [450, 241, 96, 174]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [449, 247, 97, 167]

Processing image 35/49: can_soymilk_000036.jpg
Ground truth bbox: [285, 108, 91, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.46it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [455, 67, 176, 179]

Processing image 36/49: can_soymilk_000037.jpg
Ground truth bbox: [117, 137, 108, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.94it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [433, 84, 97, 143]

Processing image 37/49: can_soymilk_000038.jpg
Ground truth bbox: [182, 156, 93, 141]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.57it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [439, 191, 192, 205]

Processing image 38/49: can_soymilk_000039.jpg
Ground truth bbox: [217, 138, 105, 179]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.67it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [220, 142, 98, 172]

Processing image 39/49: can_soymilk_000040.jpg
Ground truth bbox: [245, 152, 95, 157]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [244, 55, 137, 252]

Processing image 40/49: can_soymilk_000041.jpg
Ground truth bbox: [273, 158, 99, 163]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.04it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 41/49: can_soymilk_000042.jpg
Ground truth bbox: [117, 157, 112, 160]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.16it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [534, 196, 105, 157]

Processing image 42/49: can_soymilk_000043.jpg
Ground truth bbox: [139, 125, 189, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.31it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 43/49: can_soymilk_000044.jpg
Ground truth bbox: [276, 136, 91, 169]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.93it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [276, 135, 93, 171]

Processing image 44/49: can_soymilk_000045.jpg
Ground truth bbox: [283, 194, 87, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.16it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [287, 194, 83, 144]

Processing image 45/49: can_soymilk_000046.jpg
Ground truth bbox: [249, 106, 97, 154]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.39it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [98, 97, 363, 154]

Processing image 46/49: can_soymilk_000047.jpg
Ground truth bbox: [306, 168, 59, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.62it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 47/49: can_soymilk_000048.jpg
Ground truth bbox: [275, 102, 89, 164]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.82it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [278, 103, 86, 159]

Processing image 48/49: can_soymilk_000049.jpg
Ground truth bbox: [182, 149, 124, 206]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00,  9.88it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 49/49: can_soymilk_000050.jpg
Ground truth bbox: [83, 113, 76, 141]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.80it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [507, 176, 103, 158]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating can_tomatosoup...
Using reference image: can_tomatosoup_000001.jpg
Using reference mask: can_tomatosoup_000001_1_gt.png
Reference bounding box: x=[127,202], y=[135,239]

Processing image 1/49: can_tomatosoup_000002.jpg
Ground truth bbox: [171, 133, 61, 88]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.45it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [169, 131, 63, 93]

Processing image 2/49: can_tomatosoup_000003.jpg
Ground truth bbox: [400, 168, 60, 99]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.29it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [405, 172, 133, 117]

Processing image 3/49: can_tomatosoup_000004.jpg
Ground truth bbox: [385, 106, 64, 105]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.93it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [313, 105, 186, 139]

Processing image 4/49: can_tomatosoup_000005.jpg
Ground truth bbox: [374, 62, 86, 140]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.98it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 5/49: can_tomatosoup_000006.jpg
Ground truth bbox: [438, 22, 73, 114]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.93it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [136, 268, 190, 192]

Processing image 6/49: can_tomatosoup_000007.jpg
Ground truth bbox: [369, 7, 72, 114]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.29it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 7/49: can_tomatosoup_000008.jpg
Ground truth bbox: [398, 61, 60, 90]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.54it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [142, 274, 156, 169]

Processing image 8/49: can_tomatosoup_000009.jpg
Ground truth bbox: [340, 145, 67, 120]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.29it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 9/49: can_tomatosoup_000010.jpg
Ground truth bbox: [371, 99, 90, 142]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.37it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 10/49: can_tomatosoup_000011.jpg
Ground truth bbox: [49, 14, 120, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.31it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [48, 2, 120, 166]

Processing image 11/49: can_tomatosoup_000012.jpg
Ground truth bbox: [109, 43, 80, 110]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.54it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [111, 0, 80, 151]

Processing image 12/49: can_tomatosoup_000013.jpg
Ground truth bbox: [327, 90, 77, 125]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.86it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 13/49: can_tomatosoup_000014.jpg
Ground truth bbox: [260, 81, 79, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.98it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.56s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 14/49: can_tomatosoup_000015.jpg
Ground truth bbox: [227, 142, 79, 108]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.31it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 15/49: can_tomatosoup_000016.jpg
Ground truth bbox: [283, 188, 79, 128]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: can_tomatosoup_000017.jpg
Ground truth bbox: [238, 53, 90, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.86it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 17/49: can_tomatosoup_000018.jpg
Ground truth bbox: [193, 167, 88, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.23it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: can_tomatosoup_000019.jpg
Ground truth bbox: [279, 255, 75, 132]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.03it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [230, 216, 227, 169]

Processing image 19/49: can_tomatosoup_000020.jpg
Ground truth bbox: [394, 209, 51, 90]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.42it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 20/49: can_tomatosoup_000021.jpg
Ground truth bbox: [198, 112, 81, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.00it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 21/49: can_tomatosoup_000022.jpg
Ground truth bbox: [251, 127, 86, 153]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00,  8.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 22/49: can_tomatosoup_000023.jpg
Ground truth bbox: [407, 99, 92, 161]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.54it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 23/49: can_tomatosoup_000024.jpg
Ground truth bbox: [237, 81, 75, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.22it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [316, 169, 232, 210]

Processing image 24/49: can_tomatosoup_000025.jpg
Ground truth bbox: [236, 219, 78, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.03it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [150, 197, 219, 148]

Processing image 25/49: can_tomatosoup_000026.jpg
Ground truth bbox: [277, 137, 71, 120]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.56s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 26/49: can_tomatosoup_000027.jpg
Ground truth bbox: [324, 152, 80, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 27/49: can_tomatosoup_000028.jpg
Ground truth bbox: [289, 237, 70, 121]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.48it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 28/49: can_tomatosoup_000029.jpg
Ground truth bbox: [438, 180, 67, 95]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.65it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 29/49: can_tomatosoup_000030.jpg
Ground truth bbox: [314, 265, 89, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.28it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 30/49: can_tomatosoup_000031.jpg
Ground truth bbox: [256, 221, 75, 131]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.08it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 31/49: can_tomatosoup_000032.jpg
Ground truth bbox: [254, 169, 108, 170]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.54it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 32/49: can_tomatosoup_000033.jpg
Ground truth bbox: [427, 156, 116, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.21it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 33/49: can_tomatosoup_000034.jpg
Ground truth bbox: [199, 161, 99, 164]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.20it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 34/49: can_tomatosoup_000035.jpg
Ground truth bbox: [232, 168, 93, 148]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.88it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 35/49: can_tomatosoup_000036.jpg
Ground truth bbox: [234, 87, 103, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 36/49: can_tomatosoup_000037.jpg
Ground truth bbox: [166, 53, 81, 122]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.90it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 37/49: can_tomatosoup_000038.jpg
Ground truth bbox: [221, 177, 71, 120]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.65it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [220, 152, 111, 145]

Processing image 38/49: can_tomatosoup_000039.jpg
Ground truth bbox: [174, 91, 76, 113]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 39/49: can_tomatosoup_000040.jpg
Ground truth bbox: [179, 112, 97, 159]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.91it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 40/49: can_tomatosoup_000041.jpg
Ground truth bbox: [135, 98, 126, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.55it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 41/49: can_tomatosoup_000042.jpg
Ground truth bbox: [94, 123, 111, 176]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [101, 145, 538, 242]

Processing image 42/49: can_tomatosoup_000043.jpg
Ground truth bbox: [97, 144, 119, 160]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.67it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 43/49: can_tomatosoup_000044.jpg
Ground truth bbox: [111, 116, 92, 127]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.54it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [220, 176, 311, 235]

Processing image 44/49: can_tomatosoup_000045.jpg
Ground truth bbox: [265, 159, 75, 127]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.03it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.56s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 45/49: can_tomatosoup_000046.jpg
Ground truth bbox: [287, 169, 66, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [186, 160, 432, 262]

Processing image 46/49: can_tomatosoup_000047.jpg
Ground truth bbox: [207, 125, 67, 109]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.64it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 47/49: can_tomatosoup_000048.jpg
Ground truth bbox: [327, 231, 93, 150]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 48/49: can_tomatosoup_000049.jpg
Ground truth bbox: [113, 190, 125, 190]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.97it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 49/49: can_tomatosoup_000050.jpg
Ground truth bbox: [139, 222, 98, 154]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.93it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [138, 226, 95, 149]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating carton_oj...
Using reference image: carton_oj_000001.jpg
Using reference mask: carton_oj_000001_1_gt.png
Reference bounding box: x=[118,225], y=[80,270]

Processing image 1/49: carton_oj_000002.jpg
Ground truth bbox: [464, 97, 173, 218]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.88it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [49, 134, 164, 193]

Processing image 2/49: carton_oj_000003.jpg
Ground truth bbox: [323, 11, 122, 224]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.60it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [326, 14, 116, 221]

Processing image 3/49: carton_oj_000004.jpg
Ground truth bbox: [486, 213, 98, 169]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.90it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 4/49: carton_oj_000005.jpg
Ground truth bbox: [10, 121, 224, 186]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 5/49: carton_oj_000006.jpg
Ground truth bbox: [3, 240, 198, 174]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.80it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [478, 0, 161, 240]

Processing image 6/49: carton_oj_000007.jpg
Ground truth bbox: [189, 236, 176, 186]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.65it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [373, 16, 125, 241]

Processing image 7/49: carton_oj_000008.jpg
Ground truth bbox: [233, 259, 146, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.23it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [380, 65, 89, 176]

Processing image 8/49: carton_oj_000009.jpg
Ground truth bbox: [377, 121, 112, 203]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [407, 111, 153, 190]

Processing image 9/49: carton_oj_000010.jpg
Ground truth bbox: [368, 113, 101, 201]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.98it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [371, 105, 238, 211]

Processing image 10/49: carton_oj_000011.jpg
Ground truth bbox: [233, 267, 90, 167]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.16it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [228, 229, 97, 201]

Processing image 11/49: carton_oj_000012.jpg
Ground truth bbox: [133, 12, 117, 212]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.97it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [255, 52, 95, 166]

Processing image 12/49: carton_oj_000013.jpg
Ground truth bbox: [335, 57, 190, 114]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.25it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [364, 221, 116, 214]

Processing image 13/49: carton_oj_000014.jpg
Ground truth bbox: [347, 114, 168, 95]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.66it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [389, 245, 97, 181]

Processing image 14/49: carton_oj_000015.jpg
Ground truth bbox: [291, 83, 124, 239]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [248, 88, 164, 229]

Processing image 15/49: carton_oj_000016.jpg
Ground truth bbox: [99, 183, 228, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.57it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: carton_oj_000017.jpg
Ground truth bbox: [51, 175, 199, 91]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.00it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 17/49: carton_oj_000018.jpg
Ground truth bbox: [115, 83, 180, 96]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.99it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: carton_oj_000019.jpg
Ground truth bbox: [337, 112, 110, 184]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.60it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [506, 121, 107, 193]

Processing image 19/49: carton_oj_000020.jpg
Ground truth bbox: [249, 137, 85, 168]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.55it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [393, 167, 85, 180]

Processing image 20/49: carton_oj_000021.jpg
Ground truth bbox: [173, 118, 181, 286]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.06it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [171, 117, 184, 256]

Processing image 21/49: carton_oj_000022.jpg
Ground truth bbox: [227, 199, 134, 198]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.43it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [71, 61, 337, 408]

Processing image 22/49: carton_oj_000023.jpg
Ground truth bbox: [236, 131, 102, 234]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.14it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [234, 142, 105, 223]

Processing image 23/49: carton_oj_000024.jpg
Ground truth bbox: [197, 110, 172, 301]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.75it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [201, 105, 167, 307]

Processing image 24/49: carton_oj_000025.jpg
Ground truth bbox: [248, 73, 168, 293]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.68it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [248, 78, 152, 283]

Processing image 25/49: carton_oj_000026.jpg
Ground truth bbox: [138, 71, 135, 262]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.61it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [137, 72, 138, 261]

Processing image 26/49: carton_oj_000027.jpg
Ground truth bbox: [140, 66, 198, 375]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.49it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [141, 75, 197, 404]

Processing image 27/49: carton_oj_000028.jpg
Ground truth bbox: [258, 62, 129, 230]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.82it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [260, 66, 126, 225]

Processing image 28/49: carton_oj_000029.jpg
Ground truth bbox: [447, 72, 153, 280]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 29/49: carton_oj_000030.jpg
Ground truth bbox: [111, 103, 176, 295]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.75it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [114, 104, 169, 291]

Processing image 30/49: carton_oj_000031.jpg
Ground truth bbox: [245, 144, 170, 250]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [188, 146, 230, 233]

Processing image 31/49: carton_oj_000032.jpg
Ground truth bbox: [235, 78, 165, 309]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [234, 79, 165, 241]

Processing image 32/49: carton_oj_000033.jpg
Ground truth bbox: [154, 169, 137, 233]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.08it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [165, 142, 243, 214]

Processing image 33/49: carton_oj_000034.jpg
Ground truth bbox: [181, 43, 153, 268]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.31it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 34/49: carton_oj_000035.jpg
Ground truth bbox: [295, 31, 141, 274]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.68it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [236, 31, 200, 272]

Processing image 35/49: carton_oj_000036.jpg
Ground truth bbox: [392, 78, 83, 190]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.90it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [395, 81, 81, 184]

Processing image 36/49: carton_oj_000037.jpg
Ground truth bbox: [223, 165, 124, 233]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.35it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [224, 166, 123, 230]

Processing image 37/49: carton_oj_000038.jpg
Ground truth bbox: [357, 68, 141, 203]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.47it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 38/49: carton_oj_000039.jpg
Ground truth bbox: [233, 97, 89, 190]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.26it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [253, 113, 201, 172]

Processing image 39/49: carton_oj_000040.jpg
Ground truth bbox: [298, 122, 99, 174]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [306, 154, 197, 144]

Processing image 40/49: carton_oj_000041.jpg
Ground truth bbox: [299, 80, 114, 233]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [305, 84, 229, 229]

Processing image 41/49: carton_oj_000042.jpg
Ground truth bbox: [175, 56, 111, 220]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.19it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [180, 61, 108, 201]

Processing image 42/49: carton_oj_000043.jpg
Ground truth bbox: [238, 100, 143, 228]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.58it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [241, 100, 141, 232]

Processing image 43/49: carton_oj_000044.jpg
Ground truth bbox: [243, 84, 123, 215]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.06it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [212, 61, 154, 236]

Processing image 44/49: carton_oj_000045.jpg
Ground truth bbox: [88, 129, 178, 301]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.63it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [110, 133, 151, 251]

Processing image 45/49: carton_oj_000046.jpg
Ground truth bbox: [254, 118, 136, 267]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.35it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [257, 123, 120, 263]

Processing image 46/49: carton_oj_000047.jpg
Ground truth bbox: [369, 103, 121, 201]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.04it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [371, 107, 204, 200]

Processing image 47/49: carton_oj_000048.jpg
Ground truth bbox: [93, 320, 114, 158]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.46it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.59s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [398, 0, 241, 171]

Processing image 48/49: carton_oj_000049.jpg
Ground truth bbox: [232, 109, 114, 173]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.51it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [246, 66, 207, 183]

Processing image 49/49: carton_oj_000050.jpg
Ground truth bbox: [103, 102, 139, 205]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.04it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [358, 156, 63, 172]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating carton_soymilk...
Using reference image: carton_soymilk_000001.jpg
Using reference mask: carton_soymilk_000001_1_gt.png
Reference bounding box: x=[355,439], y=[141,304]

Processing image 1/49: carton_soymilk_000002.jpg
Ground truth bbox: [182, 164, 121, 200]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [186, 169, 110, 168]

Processing image 2/49: carton_soymilk_000003.jpg
Ground truth bbox: [146, 133, 134, 224]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 3/49: carton_soymilk_000004.jpg
Ground truth bbox: [455, 245, 129, 216]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [465, 250, 121, 174]

Processing image 4/49: carton_soymilk_000005.jpg
Ground truth bbox: [309, 8, 260, 188]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.71it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [331, 13, 287, 141]

Processing image 5/49: carton_soymilk_000006.jpg
Ground truth bbox: [388, 73, 87, 150]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [392, 71, 83, 152]

Processing image 6/49: carton_soymilk_000007.jpg
Ground truth bbox: [390, 8, 133, 243]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.27it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.57s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [397, 11, 180, 239]

Processing image 7/49: carton_soymilk_000008.jpg
Ground truth bbox: [375, 181, 131, 195]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.47it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [378, 168, 184, 211]

Processing image 8/49: carton_soymilk_000009.jpg
Ground truth bbox: [371, 14, 94, 217]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.27it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [507, 28, 94, 182]

Processing image 9/49: carton_soymilk_000010.jpg
Ground truth bbox: [345, 167, 123, 230]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.94it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [419, 144, 91, 173]

Processing image 10/49: carton_soymilk_000011.jpg
Ground truth bbox: [277, 97, 70, 153]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.28it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [371, 96, 62, 132]

Processing image 11/49: carton_soymilk_000012.jpg
Ground truth bbox: [339, 69, 179, 94]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [502, 204, 85, 163]

Processing image 12/49: carton_soymilk_000013.jpg
Ground truth bbox: [337, 19, 215, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.67it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [467, 161, 95, 120]

Processing image 13/49: carton_soymilk_000014.jpg
Ground truth bbox: [464, 0, 153, 236]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.00it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [469, 0, 147, 240]

Processing image 14/49: carton_soymilk_000015.jpg
Ground truth bbox: [374, 230, 179, 91]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.46it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [511, 15, 104, 188]

Processing image 15/49: carton_soymilk_000016.jpg
Ground truth bbox: [101, 152, 166, 137]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.25it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [392, 163, 179, 102]

Processing image 16/49: carton_soymilk_000017.jpg
Ground truth bbox: [43, 192, 175, 91]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.53it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [41, 188, 297, 154]

Processing image 17/49: carton_soymilk_000018.jpg
Ground truth bbox: [11, 157, 269, 176]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.88it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 194, 483, 187]

Processing image 18/49: carton_soymilk_000019.jpg
Ground truth bbox: [58, 9, 189, 103]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.24it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [81, 40, 318, 137]

Processing image 19/49: carton_soymilk_000020.jpg
Ground truth bbox: [180, 80, 57, 118]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [296, 94, 60, 117]

Processing image 20/49: carton_soymilk_000021.jpg
Ground truth bbox: [299, 49, 98, 168]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.32it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [468, 26, 116, 190]

Processing image 21/49: carton_soymilk_000022.jpg
Ground truth bbox: [206, 31, 162, 290]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.04it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [210, 45, 194, 274]

Processing image 22/49: carton_soymilk_000023.jpg
Ground truth bbox: [234, 124, 150, 273]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.28it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 23/49: carton_soymilk_000024.jpg
Ground truth bbox: [270, 93, 153, 266]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 24/49: carton_soymilk_000025.jpg
Ground truth bbox: [208, 69, 145, 238]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.21it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 25/49: carton_soymilk_000026.jpg
Ground truth bbox: [189, 101, 121, 206]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.33it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [176, 110, 160, 244]

Processing image 26/49: carton_soymilk_000027.jpg
Ground truth bbox: [197, 72, 163, 228]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.96it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [206, 90, 152, 210]

Processing image 27/49: carton_soymilk_000028.jpg
Ground truth bbox: [222, 131, 146, 222]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.94it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [232, 135, 138, 218]

Processing image 28/49: carton_soymilk_000029.jpg
Ground truth bbox: [161, 112, 122, 199]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.70it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [156, 91, 277, 185]

Processing image 29/49: carton_soymilk_000030.jpg
Ground truth bbox: [191, 60, 210, 319]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [195, 51, 253, 328]

Processing image 30/49: carton_soymilk_000031.jpg
Ground truth bbox: [237, 85, 153, 277]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.77it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [235, 90, 176, 248]

Processing image 31/49: carton_soymilk_000032.jpg
Ground truth bbox: [254, 104, 154, 239]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.36it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 32/49: carton_soymilk_000033.jpg
Ground truth bbox: [234, 125, 138, 254]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [208, 134, 233, 242]

Processing image 33/49: carton_soymilk_000034.jpg
Ground truth bbox: [268, 27, 126, 207]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [238, 105, 185, 128]

Processing image 34/49: carton_soymilk_000035.jpg
Ground truth bbox: [369, 22, 174, 279]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.01it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [376, 22, 177, 280]

Processing image 35/49: carton_soymilk_000036.jpg
Ground truth bbox: [432, 31, 171, 244]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 19.19it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [413, 62, 53, 130]

Processing image 36/49: carton_soymilk_000037.jpg
Ground truth bbox: [144, 291, 90, 156]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.47it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [455, 0, 167, 231]

Processing image 37/49: carton_soymilk_000038.jpg
Ground truth bbox: [441, 15, 101, 186]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.19it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 38/49: carton_soymilk_000039.jpg
Ground truth bbox: [124, 338, 115, 137]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.01it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [498, 0, 141, 203]

Processing image 39/49: carton_soymilk_000040.jpg
Ground truth bbox: [210, 3, 142, 227]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.15it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [217, 6, 134, 225]

Processing image 40/49: carton_soymilk_000041.jpg
Ground truth bbox: [323, 39, 131, 227]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 41/49: carton_soymilk_000042.jpg
Ground truth bbox: [276, 2, 177, 310]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 42/49: carton_soymilk_000043.jpg
Ground truth bbox: [85, 144, 174, 281]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [11, 138, 389, 275]

Processing image 43/49: carton_soymilk_000044.jpg
Ground truth bbox: [208, 59, 159, 249]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.64it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [366, 105, 152, 181]

Processing image 44/49: carton_soymilk_000045.jpg
Ground truth bbox: [190, 32, 110, 220]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [126, 51, 267, 201]

Processing image 45/49: carton_soymilk_000046.jpg
Ground truth bbox: [247, 17, 117, 220]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.25it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [251, 46, 192, 193]

Processing image 46/49: carton_soymilk_000047.jpg
Ground truth bbox: [206, 132, 109, 223]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.82it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [150, 141, 211, 217]

Processing image 47/49: carton_soymilk_000048.jpg
Ground truth bbox: [127, 40, 127, 251]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.08it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [127, 36, 226, 214]

Processing image 48/49: carton_soymilk_000049.jpg
Ground truth bbox: [263, 2, 229, 270]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.91it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [183, 277, 123, 134]

Processing image 49/49: carton_soymilk_000050.jpg
Ground truth bbox: [203, 27, 111, 188]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.56s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating diet_coke...
Using reference image: diet_coke_000001.jpg
Using reference mask: diet_coke_000001_1_gt.png
Reference bounding box: x=[354,445], y=[104,277]

Processing image 1/49: diet_coke_000002.jpg
Ground truth bbox: [400, 139, 77, 142]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.16it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [401, 141, 77, 139]

Processing image 2/49: diet_coke_000003.jpg
Ground truth bbox: [210, 65, 113, 209]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.86it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.60s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [209, 65, 111, 206]

Processing image 3/49: diet_coke_000004.jpg
Ground truth bbox: [226, 207, 78, 134]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.83it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 4/49: diet_coke_000005.jpg
Ground truth bbox: [211, 91, 102, 185]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.61it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 5/49: diet_coke_000006.jpg
Ground truth bbox: [302, 94, 105, 219]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.46it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.58s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [333, 99, 73, 187]

Processing image 6/49: diet_coke_000007.jpg
Ground truth bbox: [257, 85, 84, 161]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 7/49: diet_coke_000008.jpg
Ground truth bbox: [351, 118, 61, 112]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.87it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [354, 117, 51, 115]

Processing image 8/49: diet_coke_000009.jpg
Ground truth bbox: [369, 115, 103, 177]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.37it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [368, 115, 102, 177]

Processing image 9/49: diet_coke_000010.jpg
Ground truth bbox: [192, 102, 107, 180]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.05it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 10/49: diet_coke_000011.jpg
Ground truth bbox: [98, 120, 122, 194]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.68it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [367, 137, 122, 220]

Processing image 11/49: diet_coke_000012.jpg
Ground truth bbox: [114, 102, 97, 138]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.00it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [454, 105, 80, 150]

Processing image 12/49: diet_coke_000013.jpg
Ground truth bbox: [215, 23, 61, 108]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.08it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [214, 22, 101, 104]

Processing image 13/49: diet_coke_000014.jpg
Ground truth bbox: [215, 227, 96, 174]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [219, 190, 194, 210]

Processing image 14/49: diet_coke_000015.jpg
Ground truth bbox: [102, 129, 124, 181]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.62it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [121, 105, 297, 180]

Processing image 15/49: diet_coke_000016.jpg
Ground truth bbox: [82, 162, 143, 199]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.18it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [81, 161, 250, 201]

Processing image 16/49: diet_coke_000017.jpg
Ground truth bbox: [349, 195, 105, 178]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.08it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [352, 200, 98, 174]

Processing image 17/49: diet_coke_000018.jpg
Ground truth bbox: [100, 174, 114, 175]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.68it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [106, 173, 225, 179]

Processing image 18/49: diet_coke_000019.jpg
Ground truth bbox: [239, 112, 84, 166]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [158, 12, 274, 326]

Processing image 19/49: diet_coke_000020.jpg
Ground truth bbox: [210, 125, 91, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.97it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [210, 128, 166, 177]

Processing image 20/49: diet_coke_000021.jpg
Ground truth bbox: [53, 197, 114, 142]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.76it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [6, 165, 195, 172]

Processing image 21/49: diet_coke_000022.jpg
Ground truth bbox: [154, 121, 92, 172]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.27it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [188, 126, 181, 256]

Processing image 22/49: diet_coke_000023.jpg
Ground truth bbox: [95, 131, 109, 173]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.34it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [229, 12, 72, 176]

Processing image 23/49: diet_coke_000024.jpg
Ground truth bbox: [147, 71, 85, 158]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.44it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 24/49: diet_coke_000025.jpg
Ground truth bbox: [255, 95, 94, 171]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.49it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [260, 94, 74, 166]

Processing image 25/49: diet_coke_000026.jpg
Ground truth bbox: [199, 124, 90, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.70it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [203, 126, 85, 165]

Processing image 26/49: diet_coke_000027.jpg
Ground truth bbox: [189, 148, 79, 125]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.48it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 27/49: diet_coke_000028.jpg
Ground truth bbox: [141, 98, 94, 172]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.65it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 28/49: diet_coke_000029.jpg
Ground truth bbox: [98, 166, 115, 197]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.12it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [90, 165, 231, 188]

Processing image 29/49: diet_coke_000030.jpg
Ground truth bbox: [57, 149, 125, 174]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 30/49: diet_coke_000031.jpg
Ground truth bbox: [181, 180, 84, 159]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.31it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [178, 184, 213, 128]

Processing image 31/49: diet_coke_000032.jpg
Ground truth bbox: [139, 229, 86, 169]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.65it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [130, 218, 211, 178]

Processing image 32/49: diet_coke_000033.jpg
Ground truth bbox: [194, 77, 71, 147]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.01it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [193, 102, 82, 116]

Processing image 33/49: diet_coke_000034.jpg
Ground truth bbox: [171, 173, 69, 136]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.51it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 34/49: diet_coke_000035.jpg
Ground truth bbox: [265, 180, 71, 139]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.81it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [267, 182, 155, 145]

Processing image 35/49: diet_coke_000036.jpg
Ground truth bbox: [442, 32, 93, 169]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.19it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [443, 30, 176, 172]

Processing image 36/49: diet_coke_000037.jpg
Ground truth bbox: [115, 185, 60, 90]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.76it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [410, 48, 130, 182]

Processing image 37/49: diet_coke_000038.jpg
Ground truth bbox: [194, 136, 86, 176]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.61it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [346, 156, 142, 298]

Processing image 38/49: diet_coke_000039.jpg
Ground truth bbox: [238, 62, 61, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.15it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 47, 405, 154]

Processing image 39/49: diet_coke_000040.jpg
Ground truth bbox: [295, 119, 59, 111]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 40/49: diet_coke_000041.jpg
Ground truth bbox: [248, 113, 82, 156]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 41/49: diet_coke_000042.jpg
Ground truth bbox: [239, 127, 94, 172]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.20it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [240, 131, 92, 166]

Processing image 42/49: diet_coke_000043.jpg
Ground truth bbox: [217, 107, 74, 132]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.96it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 43/49: diet_coke_000044.jpg
Ground truth bbox: [221, 70, 81, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.33it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [141, 72, 161, 283]

Processing image 44/49: diet_coke_000045.jpg
Ground truth bbox: [65, 174, 102, 179]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.24it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [199, 171, 154, 176]

Processing image 45/49: diet_coke_000046.jpg
Ground truth bbox: [195, 81, 68, 148]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.73it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 46/49: diet_coke_000047.jpg
Ground truth bbox: [330, 62, 74, 123]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.51it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [378, 137, 126, 228]

Processing image 47/49: diet_coke_000048.jpg
Ground truth bbox: [232, 198, 96, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.73it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 48/49: diet_coke_000049.jpg
Ground truth bbox: [122, 135, 91, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.66it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [439, 218, 115, 210]

Processing image 49/49: diet_coke_000050.jpg
Ground truth bbox: [179, 227, 70, 106]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.50it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating hc_potroastsoup...
Using reference image: hc_potroastsoup_000001.jpg
Using reference mask: hc_potroastsoup_000001_1_gt.png
Reference bounding box: x=[147,288], y=[191,339]

Processing image 1/49: hc_potroastsoup_000002.jpg
Ground truth bbox: [147, 52, 119, 105]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.39it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.57s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [152, 42, 115, 106]

Processing image 2/49: hc_potroastsoup_000003.jpg
Ground truth bbox: [272, 217, 91, 77]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [277, 223, 87, 76]

Processing image 3/49: hc_potroastsoup_000004.jpg
Ground truth bbox: [263, 275, 94, 91]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [259, 280, 96, 86]

Processing image 4/49: hc_potroastsoup_000005.jpg
Ground truth bbox: [316, 260, 124, 114]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.74it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [316, 269, 123, 107]

Processing image 5/49: hc_potroastsoup_000006.jpg
Ground truth bbox: [350, 161, 109, 109]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.62it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [352, 162, 109, 172]

Processing image 6/49: hc_potroastsoup_000007.jpg
Ground truth bbox: [133, 195, 91, 79]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.66it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 7/49: hc_potroastsoup_000008.jpg
Ground truth bbox: [284, 53, 87, 71]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.55it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [278, 54, 93, 137]

Processing image 8/49: hc_potroastsoup_000009.jpg
Ground truth bbox: [54, 106, 123, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.47it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 9/49: hc_potroastsoup_000010.jpg
Ground truth bbox: [151, 109, 142, 122]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.64it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 10/49: hc_potroastsoup_000011.jpg
Ground truth bbox: [229, 162, 111, 109]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.82it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.64s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 11/49: hc_potroastsoup_000012.jpg
Ground truth bbox: [262, 205, 90, 86]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.51it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 12/49: hc_potroastsoup_000013.jpg
Ground truth bbox: [311, 172, 134, 147]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.47it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [310, 174, 138, 143]

Processing image 13/49: hc_potroastsoup_000014.jpg
Ground truth bbox: [191, 163, 118, 100]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.47it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 14/49: hc_potroastsoup_000015.jpg
Ground truth bbox: [233, 137, 111, 89]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.58it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 15/49: hc_potroastsoup_000016.jpg
Ground truth bbox: [222, 195, 106, 86]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: hc_potroastsoup_000017.jpg
Ground truth bbox: [244, 95, 94, 97]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.12it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 17/49: hc_potroastsoup_000018.jpg
Ground truth bbox: [176, 111, 129, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.44it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: hc_potroastsoup_000019.jpg
Ground truth bbox: [249, 146, 108, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [264, 148, 99, 190]

Processing image 19/49: hc_potroastsoup_000020.jpg
Ground truth bbox: [220, 134, 133, 137]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.35it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [221, 132, 136, 140]

Processing image 20/49: hc_potroastsoup_000021.jpg
Ground truth bbox: [240, 118, 126, 137]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.97it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [240, 123, 124, 134]

Processing image 21/49: hc_potroastsoup_000022.jpg
Ground truth bbox: [267, 150, 132, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.39it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [292, 155, 104, 130]

Processing image 22/49: hc_potroastsoup_000023.jpg
Ground truth bbox: [101, 62, 128, 123]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.16it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 23/49: hc_potroastsoup_000024.jpg
Ground truth bbox: [285, 111, 92, 87]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.91it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 24/49: hc_potroastsoup_000025.jpg
Ground truth bbox: [294, 30, 107, 117]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.44it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 25/49: hc_potroastsoup_000026.jpg
Ground truth bbox: [134, 106, 157, 162]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.63it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [133, 115, 154, 154]

Processing image 26/49: hc_potroastsoup_000027.jpg
Ground truth bbox: [209, 74, 105, 117]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.43it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [209, 79, 104, 116]

Processing image 27/49: hc_potroastsoup_000028.jpg
Ground truth bbox: [304, 130, 121, 124]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.03it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [306, 134, 124, 287]

Processing image 28/49: hc_potroastsoup_000029.jpg
Ground truth bbox: [151, 145, 94, 94]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.98it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [319, 173, 188, 176]

Processing image 29/49: hc_potroastsoup_000030.jpg
Ground truth bbox: [228, 155, 95, 88]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [226, 158, 225, 139]

Processing image 30/49: hc_potroastsoup_000031.jpg
Ground truth bbox: [315, 23, 81, 77]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.74it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [158, 383, 76, 81]

Processing image 31/49: hc_potroastsoup_000032.jpg
Ground truth bbox: [144, 210, 88, 86]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.32it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 32/49: hc_potroastsoup_000033.jpg
Ground truth bbox: [215, 17, 111, 101]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.61it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [209, 287, 102, 98]

Processing image 33/49: hc_potroastsoup_000034.jpg
Ground truth bbox: [175, 153, 93, 92]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.00it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 34/49: hc_potroastsoup_000035.jpg
Ground truth bbox: [111, 135, 159, 157]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.64it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [239, 101, 121, 122]

Processing image 35/49: hc_potroastsoup_000036.jpg
Ground truth bbox: [265, 165, 95, 95]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.74it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 36/49: hc_potroastsoup_000037.jpg
Ground truth bbox: [248, 211, 122, 124]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.04it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 37/49: hc_potroastsoup_000038.jpg
Ground truth bbox: [272, 192, 123, 122]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.10it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [281, 234, 116, 81]

Processing image 38/49: hc_potroastsoup_000039.jpg
Ground truth bbox: [113, 123, 118, 103]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.08it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 39/49: hc_potroastsoup_000040.jpg
Ground truth bbox: [198, 175, 90, 70]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.88it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 40/49: hc_potroastsoup_000041.jpg
Ground truth bbox: [262, 153, 140, 143]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.34it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [263, 154, 144, 143]

Processing image 41/49: hc_potroastsoup_000042.jpg
Ground truth bbox: [257, 263, 118, 129]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.91it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 42/49: hc_potroastsoup_000043.jpg
Ground truth bbox: [250, 212, 119, 119]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [250, 220, 111, 112]

Processing image 43/49: hc_potroastsoup_000044.jpg
Ground truth bbox: [262, 179, 101, 105]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.73it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [263, 172, 208, 112]

Processing image 44/49: hc_potroastsoup_000045.jpg
Ground truth bbox: [240, 28, 104, 86]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.12it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [238, 28, 104, 90]

Processing image 45/49: hc_potroastsoup_000046.jpg
Ground truth bbox: [128, 205, 82, 90]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.14it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [404, 150, 198, 195]

Processing image 46/49: hc_potroastsoup_000047.jpg
Ground truth bbox: [190, 157, 102, 76]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.25it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [199, 149, 193, 83]

Processing image 47/49: hc_potroastsoup_000048.jpg
Ground truth bbox: [168, 207, 115, 99]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.18it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [297, 222, 181, 120]

Processing image 48/49: hc_potroastsoup_000049.jpg
Ground truth bbox: [145, 196, 130, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.66it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [155, 128, 225, 195]

Processing image 49/49: hc_potroastsoup_000050.jpg
Ground truth bbox: [213, 233, 152, 163]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.25it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.57s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [352, 145, 65, 113]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating juicebox...
Using reference image: juicebox_000001.jpg
Using reference mask: juicebox_000001_1_gt.png
Reference bounding box: x=[267,333], y=[224,332]

Processing image 1/49: juicebox_000002.jpg
Ground truth bbox: [317, 225, 61, 100]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.03it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [322, 231, 51, 86]

Processing image 2/49: juicebox_000003.jpg
Ground truth bbox: [282, 161, 133, 85]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.43it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [306, 345, 56, 77]

Processing image 3/49: juicebox_000004.jpg
Ground truth bbox: [215, 103, 64, 118]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 4/49: juicebox_000005.jpg
Ground truth bbox: [199, 110, 126, 71]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.59it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 5/49: juicebox_000006.jpg
Ground truth bbox: [243, 186, 67, 118]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.60it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 6/49: juicebox_000007.jpg
Ground truth bbox: [190, 227, 74, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.35it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 7/49: juicebox_000008.jpg
Ground truth bbox: [199, 149, 135, 86]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.45it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.59s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 8/49: juicebox_000009.jpg
Ground truth bbox: [177, 99, 65, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 9/49: juicebox_000010.jpg
Ground truth bbox: [440, 268, 65, 116]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.91it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 10/49: juicebox_000011.jpg
Ground truth bbox: [482, 7, 80, 128]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.97it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 11/49: juicebox_000012.jpg
Ground truth bbox: [125, 186, 111, 87]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.57it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 12/49: juicebox_000013.jpg
Ground truth bbox: [342, 214, 76, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.36it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 13/49: juicebox_000014.jpg
Ground truth bbox: [196, 228, 129, 87]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.58s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 14/49: juicebox_000015.jpg
Ground truth bbox: [250, 109, 66, 112]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.37it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 15/49: juicebox_000016.jpg
Ground truth bbox: [283, 140, 78, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.28it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: juicebox_000017.jpg
Ground truth bbox: [400, 136, 53, 122]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.69it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [407, 144, 50, 98]

Processing image 17/49: juicebox_000018.jpg
Ground truth bbox: [355, 169, 72, 141]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.51it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: juicebox_000019.jpg
Ground truth bbox: [205, 153, 86, 156]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 19/49: juicebox_000020.jpg
Ground truth bbox: [161, 263, 75, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.48it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.72s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 20/49: juicebox_000021.jpg
Ground truth bbox: [227, 151, 124, 74]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.82it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 21/49: juicebox_000022.jpg
Ground truth bbox: [227, 221, 147, 119]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.34it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 22/49: juicebox_000023.jpg
Ground truth bbox: [228, 192, 83, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.78it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [234, 197, 65, 161]

Processing image 23/49: juicebox_000024.jpg
Ground truth bbox: [238, 117, 67, 117]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.14it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 24/49: juicebox_000025.jpg
Ground truth bbox: [147, 167, 115, 97]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 25/49: juicebox_000026.jpg
Ground truth bbox: [264, 138, 96, 164]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.58it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 26/49: juicebox_000027.jpg
Ground truth bbox: [277, 132, 97, 164]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [280, 132, 99, 164]

Processing image 27/49: juicebox_000028.jpg
Ground truth bbox: [165, 187, 96, 170]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [166, 193, 94, 166]

Processing image 28/49: juicebox_000029.jpg
Ground truth bbox: [168, 213, 95, 168]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.60it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 29/49: juicebox_000030.jpg
Ground truth bbox: [241, 151, 78, 149]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.80it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 30/49: juicebox_000031.jpg
Ground truth bbox: [356, 198, 82, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.45it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 31/49: juicebox_000032.jpg
Ground truth bbox: [304, 151, 63, 152]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.06it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 32/49: juicebox_000033.jpg
Ground truth bbox: [149, 205, 77, 148]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.87it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 33/49: juicebox_000034.jpg
Ground truth bbox: [228, 66, 144, 86]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.43it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 34/49: juicebox_000035.jpg
Ground truth bbox: [120, 113, 143, 82]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.86it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 35/49: juicebox_000036.jpg
Ground truth bbox: [90, 296, 136, 81]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.03it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 36/49: juicebox_000037.jpg
Ground truth bbox: [157, 116, 78, 151]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.77it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 37/49: juicebox_000038.jpg
Ground truth bbox: [132, 202, 170, 156]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.74it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 38/49: juicebox_000039.jpg
Ground truth bbox: [199, 94, 76, 149]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.99it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 39/49: juicebox_000040.jpg
Ground truth bbox: [167, 169, 120, 156]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.99it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 40/49: juicebox_000041.jpg
Ground truth bbox: [287, 120, 93, 166]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 41/49: juicebox_000042.jpg
Ground truth bbox: [232, 159, 106, 92]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.18it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 42/49: juicebox_000043.jpg
Ground truth bbox: [212, 181, 70, 137]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 43/49: juicebox_000044.jpg
Ground truth bbox: [140, 183, 60, 126]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.69it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [141, 187, 80, 101]

Processing image 44/49: juicebox_000045.jpg
Ground truth bbox: [102, 172, 98, 160]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.68it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 45/49: juicebox_000046.jpg
Ground truth bbox: [53, 256, 91, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.82it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [51, 263, 91, 137]

Processing image 46/49: juicebox_000047.jpg
Ground truth bbox: [70, 212, 81, 167]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.75it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 47/49: juicebox_000048.jpg
Ground truth bbox: [264, 247, 107, 115]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.84it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 48/49: juicebox_000049.jpg
Ground truth bbox: [179, 148, 84, 149]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.39it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 49/49: juicebox_000050.jpg
Ground truth bbox: [376, 247, 72, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.56it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating ricepilaf...
Using reference image: ricepilaf_000001.jpg
Using reference mask: ricepilaf_000001_1_gt.png
Reference bounding box: x=[417,505], y=[104,242]

Processing image 1/49: ricepilaf_000002.jpg
Ground truth bbox: [136, 120, 127, 144]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 2/49: ricepilaf_000003.jpg
Ground truth bbox: [314, 29, 116, 110]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [280, 27, 151, 110]

Processing image 3/49: ricepilaf_000004.jpg
Ground truth bbox: [275, 213, 85, 122]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 10.51it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [406, 207, 112, 158]

Processing image 4/49: ricepilaf_000005.jpg
Ground truth bbox: [150, 164, 86, 134]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:06<00:00,  3.25s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 5/49: ricepilaf_000006.jpg
Ground truth bbox: [307, 136, 77, 127]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.43it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [83, 331, 183, 142]

Processing image 6/49: ricepilaf_000007.jpg
Ground truth bbox: [166, 115, 89, 160]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.74it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 7/49: ricepilaf_000008.jpg
Ground truth bbox: [40, 115, 143, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.93it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 8/49: ricepilaf_000009.jpg
Ground truth bbox: [64, 122, 98, 125]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.04it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.58s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 9/49: ricepilaf_000010.jpg
Ground truth bbox: [297, 167, 141, 113]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.96it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 10/49: ricepilaf_000011.jpg
Ground truth bbox: [179, 188, 166, 174]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.57it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 11/49: ricepilaf_000012.jpg
Ground truth bbox: [266, 111, 142, 246]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.94it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 12/49: ricepilaf_000013.jpg
Ground truth bbox: [245, 153, 110, 190]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.57s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 13/49: ricepilaf_000014.jpg
Ground truth bbox: [247, 109, 94, 164]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.13it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 14/49: ricepilaf_000015.jpg
Ground truth bbox: [218, 142, 111, 159]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.57it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 15/49: ricepilaf_000016.jpg
Ground truth bbox: [255, 160, 109, 179]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.88it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: ricepilaf_000017.jpg
Ground truth bbox: [317, 123, 139, 150]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.62it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 17/49: ricepilaf_000018.jpg
Ground truth bbox: [192, 123, 93, 182]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.47it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: ricepilaf_000019.jpg
Ground truth bbox: [158, 88, 147, 244]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.02it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 19/49: ricepilaf_000020.jpg
Ground truth bbox: [123, 80, 124, 244]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.77it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 20/49: ricepilaf_000021.jpg
Ground truth bbox: [156, 100, 132, 150]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.03it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 21/49: ricepilaf_000022.jpg
Ground truth bbox: [117, 158, 174, 155]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.82it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [432, 152, 36, 104]

Processing image 22/49: ricepilaf_000023.jpg
Ground truth bbox: [316, 101, 113, 246]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.55it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.58s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 23/49: ricepilaf_000024.jpg
Ground truth bbox: [227, 123, 132, 224]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.35it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.59s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 24/49: ricepilaf_000025.jpg
Ground truth bbox: [326, 101, 99, 236]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.22it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 25/49: ricepilaf_000026.jpg
Ground truth bbox: [238, 88, 122, 247]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.61it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 26/49: ricepilaf_000027.jpg
Ground truth bbox: [88, 94, 178, 273]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.98it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 27/49: ricepilaf_000028.jpg
Ground truth bbox: [293, 241, 219, 151]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.02it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 28/49: ricepilaf_000029.jpg
Ground truth bbox: [132, 208, 116, 220]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.10it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [426, 275, 27, 23]

Processing image 29/49: ricepilaf_000030.jpg
Ground truth bbox: [191, 42, 138, 168]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.06it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 30/49: ricepilaf_000031.jpg
Ground truth bbox: [118, 112, 112, 247]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.07it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 31/49: ricepilaf_000032.jpg
Ground truth bbox: [137, 138, 168, 176]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.90it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 32/49: ricepilaf_000033.jpg
Ground truth bbox: [165, 206, 89, 221]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 13.61it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.54s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [430, 317, 69, 143]

Processing image 33/49: ricepilaf_000034.jpg
Ground truth bbox: [37, 122, 162, 163]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 11.10it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 34/49: ricepilaf_000035.jpg
Ground truth bbox: [149, 164, 146, 217]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.40it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 35/49: ricepilaf_000036.jpg
Ground truth bbox: [210, 109, 165, 302]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.99it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.57s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 36/49: ricepilaf_000037.jpg
Ground truth bbox: [142, 19, 214, 275]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.36it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 37/49: ricepilaf_000038.jpg
Ground truth bbox: [175, 212, 125, 186]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 38/49: ricepilaf_000039.jpg
Ground truth bbox: [314, 75, 76, 190]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.45it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.56s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 39/49: ricepilaf_000040.jpg
Ground truth bbox: [195, 175, 198, 171]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.54it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 40/49: ricepilaf_000041.jpg
Ground truth bbox: [311, 176, 157, 241]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [131, 227, 192, 107]

Processing image 41/49: ricepilaf_000042.jpg
Ground truth bbox: [223, 89, 79, 237]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.66it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 42/49: ricepilaf_000043.jpg
Ground truth bbox: [275, 121, 92, 216]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.09it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 43/49: ricepilaf_000044.jpg
Ground truth bbox: [249, 149, 128, 160]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 44/49: ricepilaf_000045.jpg
Ground truth bbox: [314, 126, 134, 216]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 45/49: ricepilaf_000046.jpg
Ground truth bbox: [292, 160, 161, 150]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.19it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 46/49: ricepilaf_000047.jpg
Ground truth bbox: [124, 134, 224, 153]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.60it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [473, 0, 58, 13]

Processing image 47/49: ricepilaf_000048.jpg
Ground truth bbox: [321, 255, 90, 158]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.80it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [321, 257, 88, 157]

Processing image 48/49: ricepilaf_000049.jpg
Ground truth bbox: [128, 108, 156, 214]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 49/49: ricepilaf_000050.jpg
Ground truth bbox: [229, 111, 114, 167]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.97it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]
creating index...
index created!
Error in COCO evaluation: 'info'

Evaluating rice_tuscan...
Using reference image: rice_tuscan_000001.jpg
Using reference mask: rice_tuscan_000001_1_gt.png
Reference bounding box: x=[217,307], y=[66,227]

Processing image 1/49: rice_tuscan_000002.jpg
Ground truth bbox: [193, 186, 91, 133]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.67it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [192, 188, 94, 129]

Processing image 2/49: rice_tuscan_000003.jpg
Ground truth bbox: [245, 155, 84, 147]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.83it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.53s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [199, 56, 128, 247]

Processing image 3/49: rice_tuscan_000004.jpg
Ground truth bbox: [125, 200, 156, 91]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [38, 83, 230, 171]

Processing image 4/49: rice_tuscan_000005.jpg
Ground truth bbox: [320, 127, 84, 130]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.76it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [323, 101, 126, 106]

Processing image 5/49: rice_tuscan_000006.jpg
Ground truth bbox: [174, 186, 87, 117]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.87it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [194, 154, 156, 50]

Processing image 6/49: rice_tuscan_000007.jpg
Ground truth bbox: [244, 135, 75, 128]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.38it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [55, 34, 413, 407]

Processing image 7/49: rice_tuscan_000008.jpg
Ground truth bbox: [121, 44, 100, 141]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.64it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [119, 36, 119, 143]

Processing image 8/49: rice_tuscan_000009.jpg
Ground truth bbox: [225, 104, 133, 270]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.37it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 9/49: rice_tuscan_000010.jpg
Ground truth bbox: [244, 118, 103, 209]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 19.10it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.55s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [244, 121, 97, 204]

Processing image 10/49: rice_tuscan_000011.jpg
Ground truth bbox: [258, 91, 135, 250]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.16it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 11/49: rice_tuscan_000012.jpg
Ground truth bbox: [219, 100, 108, 205]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.61it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [223, 105, 103, 196]

Processing image 12/49: rice_tuscan_000013.jpg
Ground truth bbox: [265, 142, 107, 174]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.99it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [263, 149, 107, 168]

Processing image 13/49: rice_tuscan_000014.jpg
Ground truth bbox: [314, 90, 82, 173]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.30it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 14/49: rice_tuscan_000015.jpg
Ground truth bbox: [227, 54, 92, 193]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.00it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 15/49: rice_tuscan_000016.jpg
Ground truth bbox: [316, 94, 108, 207]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.49it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 16/49: rice_tuscan_000017.jpg
Ground truth bbox: [208, 14, 118, 194]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.84it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [206, 16, 120, 195]

Processing image 17/49: rice_tuscan_000018.jpg
Ground truth bbox: [270, 129, 129, 240]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.93it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 18/49: rice_tuscan_000019.jpg
Ground truth bbox: [103, 198, 147, 154]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.92it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 19/49: rice_tuscan_000020.jpg
Ground truth bbox: [86, 165, 184, 158]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.64it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [75, 167, 194, 159]

Processing image 20/49: rice_tuscan_000021.jpg
Ground truth bbox: [129, 95, 178, 248]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.23it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [238, 0, 170, 128]

Processing image 21/49: rice_tuscan_000022.jpg
Ground truth bbox: [166, 69, 107, 252]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 12.89it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [255, 0, 158, 100]

Processing image 22/49: rice_tuscan_000023.jpg
Ground truth bbox: [166, 125, 110, 207]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.95it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [156, 126, 116, 221]

Processing image 23/49: rice_tuscan_000024.jpg
Ground truth bbox: [241, 32, 90, 181]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [79, 244, 108, 219]

Processing image 24/49: rice_tuscan_000025.jpg
Ground truth bbox: [96, 149, 189, 278]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.30it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 25/49: rice_tuscan_000026.jpg
Ground truth bbox: [157, 224, 111, 216]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.32it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [260, 261, 41, 169]

Processing image 26/49: rice_tuscan_000027.jpg
Ground truth bbox: [54, 100, 169, 236]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.31it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 27/49: rice_tuscan_000028.jpg
Ground truth bbox: [100, 133, 139, 247]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.72it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.39s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [283, 110, 150, 284]

Processing image 28/49: rice_tuscan_000029.jpg
Ground truth bbox: [68, 146, 150, 251]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.18it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [70, 83, 261, 313]

Processing image 29/49: rice_tuscan_000030.jpg
Ground truth bbox: [114, 126, 158, 262]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.49it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.51s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [111, 128, 264, 259]

Processing image 30/49: rice_tuscan_000031.jpg
Ground truth bbox: [78, 75, 151, 209]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.75it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [80, 75, 163, 209]

Processing image 31/49: rice_tuscan_000032.jpg
Ground truth bbox: [167, 177, 158, 184]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.85it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.44s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [170, 107, 271, 247]

Processing image 32/49: rice_tuscan_000033.jpg
Ground truth bbox: [94, 151, 127, 163]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 18.66it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 33/49: rice_tuscan_000034.jpg
Ground truth bbox: [394, 240, 142, 147]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.17it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [397, 109, 221, 287]

Processing image 34/49: rice_tuscan_000035.jpg
Ground truth bbox: [363, 192, 122, 166]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.86it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [8, 136, 136, 148]

Processing image 35/49: rice_tuscan_000036.jpg
Ground truth bbox: [238, 161, 68, 179]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.70it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 36/49: rice_tuscan_000037.jpg
Ground truth bbox: [187, 222, 159, 181]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.83it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.40s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 37/49: rice_tuscan_000038.jpg
Ground truth bbox: [228, 133, 75, 183]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.41it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.50s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [319, 172, 107, 144]

Processing image 38/49: rice_tuscan_000039.jpg
Ground truth bbox: [111, 149, 122, 201]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.81it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [114, 142, 107, 206]

Processing image 39/49: rice_tuscan_000040.jpg
Ground truth bbox: [84, 72, 146, 249]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 17.04it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.43s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [81, 73, 128, 247]

Processing image 40/49: rice_tuscan_000041.jpg
Ground truth bbox: [136, 193, 96, 187]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.52it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [371, 173, 84, 134]

Processing image 41/49: rice_tuscan_000042.jpg
Ground truth bbox: [111, 144, 103, 221]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.01it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 42/49: rice_tuscan_000043.jpg
Ground truth bbox: [160, 180, 206, 48]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.11it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 43/49: rice_tuscan_000044.jpg
Ground truth bbox: [39, 182, 94, 173]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.44it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.57s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [38, 187, 153, 166]

Processing image 44/49: rice_tuscan_000045.jpg
Ground truth bbox: [242, 134, 112, 145]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.48it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.49s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 45/49: rice_tuscan_000046.jpg
Ground truth bbox: [364, 178, 126, 159]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 15.81it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 46/49: rice_tuscan_000047.jpg
Ground truth bbox: [107, 207, 227, 175]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.26it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:05<00:00,  2.52s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 47/49: rice_tuscan_000048.jpg
Ground truth bbox: [416, 149, 100, 214]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 14.05it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.47s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 48/49: rice_tuscan_000049.jpg
Ground truth bbox: [162, 143, 115, 165]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.00it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.48s/it]



Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [0, 0, 0, 0]

Processing image 49/49: rice_tuscan_000050.jpg
Ground truth bbox: [347, 234, 119, 205]


frame loading (JPEG): 100%|██████████| 2/2 [00:00<00:00, 16.24it/s]
propagate in video:   0%|          | 0/2 [00:00<?, ?it/s]


Debug: Frame 0 has 1 objects
Object IDs: [1]


propagate in video: 100%|██████████| 2/2 [00:04<00:00,  2.46s/it]


Debug: Frame 1 has 1 objects
Object IDs: [1]
Predicted bbox: [6, 137, 562, 339]
creating index...
index created!
Error in COCO evaluation: 'info'

=== Final Results ===





In [None]:
import os
import glob
import shutil
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools import mask as cocomask
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from collections import defaultdict
import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.build_sam import build_sam2_video_predictor


# SAM2 model setup
checkpoint = r"C:\Users\dell\Desktop\Assignment\sam2_hiera_tiny.pt"
model_cfg = r"C:\Users\dell\Desktop\Assignment\sam2\configs\sam2\sam2_hiera_t.yaml"


# Initialize SAM2 components
predictor_prompt = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint, device='cpu'))
sam2 = build_sam2(model_cfg, checkpoint, device='cpu', apply_postprocessing=False)
mask_generator = SAM2AutomaticMaskGenerator(sam2)
predictor_vid = build_sam2_video_predictor(model_cfg, checkpoint, device='cpu')

# Temporary directory for processing
tempfolder = "./tempdir"

def create_if_not_exists(dirname):
    if not os.path.exists(dirname):
        os.mkdir(dirname)

def cleardir(tempfolder):
    filepaths = glob.glob(tempfolder+"/*")
    for filepath in filepaths:
        os.unlink(filepath)

def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def track_item_boxes(imgpath1, imgpath2, img1boxclasslist, visualize=True):
    """Track objects between two images using SAM2"""
    create_if_not_exists(tempfolder)
    cleardir(tempfolder)
    shutil.copy(imgpath1, os.path.join(tempfolder, "00000.jpg"))
    shutil.copy(imgpath2, os.path.join(tempfolder, "00001.jpg"))
    
    inference_state = predictor_vid.init_state(video_path="./tempdir")
    predictor_vid.reset_state(inference_state)
    ann_frame_idx = 0
    
    for img1boxclass in img1boxclasslist:
        ([xmin, xmax, ymin, ymax], objectnumint) = img1boxclass
        box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
        _, out_obj_ids, out_mask_logits = predictor_vid.add_new_points_or_box(
            inference_state=inference_state,
            frame_idx=ann_frame_idx,
            obj_id=objectnumint,
            box=box,
        )
    
    video_segments = {}
    for out_frame_idx, out_obj_ids, out_mask_logits in predictor_vid.propagate_in_video(inference_state):
        print(f"\nDebug: Frame {out_frame_idx} has {len(out_obj_ids)} objects")  # Debug print
        print(f"Object IDs: {out_obj_ids}")  # Debug print
        
        video_segments[out_frame_idx] = {
            out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
            for i, out_obj_id in enumerate(out_obj_ids)
        }
    
    if visualize:
        fig, ax = plt.subplots()
        plt.title(f"original image object ::")
        ax.imshow(Image.open(os.path.join(tempfolder, "00000.jpg")))
        rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                               linewidth=1, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        plt.show()
        
        out_frame_idx = 1
        plt.figure(figsize=(6, 4))
        plt.title(f"detected object in test image ::")
        plt.imshow(Image.open(os.path.join(tempfolder, "00001.jpg")))
        for out_obj_id, out_mask in video_segments[out_frame_idx].items():
            show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
        plt.show()
    
    return video_segments

def extract_category_from_filename(filename):
    """Extract category name from filename"""
    return '_'.join(os.path.basename(filename).split('_')[:-1])

def group_files_by_category(data_dir):
    """Group image and mask files by their product category"""
    image_files = glob.glob(os.path.join(data_dir, "*.jpg"))
    mask_files = glob.glob(os.path.join(data_dir, "*_gt.png"))
    
    category_dict = defaultdict(lambda: {'images': [], 'masks': []})
    
    for img in image_files:
        category = extract_category_from_filename(img)
        category_dict[category]['images'].append(img)
    
    for mask in mask_files:
        base_name = '_'.join(os.path.basename(mask).split('_')[:-2])
        category = extract_category_from_filename(base_name + '.jpg')
        
        matching_images = [img for img in category_dict[category]['images'] 
                         if base_name in img]
        
        for img in matching_images:
            category_dict[category]['masks'].append((img, mask))
    
    return category_dict

def process_img_png_mask(img_path, mask_path, visualize=False):
    """Extract bounding box coordinates from mask"""
    try:
        img = Image.open(img_path)
        mask = Image.open(mask_path)
        mask_array = np.array(mask)
        
        if mask_array.size == 0:
            print(f"Warning: Empty mask in {mask_path}")
            return 0, 0, 0, 0
            
        rows = np.any(mask_array, axis=1)
        cols = np.any(mask_array, axis=0)
        
        ymin, ymax = np.where(rows)[0][[0, -1]]
        xmin, xmax = np.where(cols)[0][[0, -1]]
        
        if visualize:
            fig, ax = plt.subplots(1, 2, figsize=(10, 5))
            ax[0].imshow(img)
            ax[0].set_title("Original Image")
            ax[1].imshow(mask_array)
            rect = plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                               linewidth=1, edgecolor='r', facecolor='none')
            ax[1].add_patch(rect)
            ax[1].set_title("Mask with Bounding Box")
            plt.show()
        
        return xmin, xmax, ymin, ymax
        
    except Exception as e:
        print(f"Error processing {img_path}: {str(e)}")
        return 0, 0, 0, 0

def mask_to_bbox(mask):
    """Convert binary mask to COCO-style bounding box [x,y,width,height]"""
    # Handle 3D mask by taking first channel if needed
    if len(mask.shape) == 3:
        mask = mask[0] if mask.shape[0] == 1 else mask
    
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    
    if not np.any(rows) or not np.any(cols):
        return [0, 0, 0, 0]  # Return empty bbox if no True values
    
    ymin, ymax = np.where(rows)[0][[0, -1]]
    xmin, xmax = np.where(cols)[0][[0, -1]]
    
    return [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)]

def evaluate_product_category(category_name, image_mask_pairs):
    """Evaluate SAM2 performance on one product category"""
    print(f"\nEvaluating {category_name}...")
    
    if not image_mask_pairs or len(image_mask_pairs) < 2:
        print("Error: Need at least 2 image-mask pairs for evaluation")
        return None
    
    # Use first image-mask pair as reference
    first_img, first_mask = image_mask_pairs[0]
    print(f"Using reference image: {os.path.basename(first_img)}")
    print(f"Using reference mask: {os.path.basename(first_mask)}")
    
    xmin, xmax, ymin, ymax = process_img_png_mask(first_img, first_mask)
    print(f"Reference bounding box: x=[{xmin},{xmax}], y=[{ymin},{ymax}]")
    
    coco_gt = {"images": [], "annotations": [], "categories": [{"id": 1, "name": "product"}]}
    coco_dt = []
    
    for i, (img_path, mask_path) in enumerate(image_mask_pairs[1:]):
        print(f"\nProcessing image {i+1}/{len(image_mask_pairs)-1}: {os.path.basename(img_path)}")
        
        try:
            # Process ground truth
            gt_mask = np.array(Image.open(mask_path))
            gt_bbox = mask_to_bbox(gt_mask)
            print(f"Ground truth bbox: {gt_bbox}")
            
            # Add to COCO ground truth
            image_id = i+1
            coco_gt["images"].append({"id": image_id, "file_name": img_path})
            
            rle = cocomask.encode(np.asfortranarray(gt_mask))
            rle['counts'] = rle['counts'].decode('ascii')
            
            coco_gt["annotations"].append({
                "id": image_id,
                "image_id": image_id,
                "category_id": 1,
                "bbox": gt_bbox,
                "area": int(gt_bbox[2] * gt_bbox[3]),
                "iscrowd": 0,
                "segmentation": rle
            })
            
            # Get SAM2 prediction
            video_segments = track_item_boxes(first_img, img_path, [([xmin, xmax, ymin, ymax], 1)], False)
            
            if not video_segments or 1 not in video_segments:
                print("Warning: No valid segments found in frame 1")
                continue
                
            # Get the prediction mask
            pred_mask = video_segments[1][1]  # Frame 1, Object ID 1
            
            # Convert mask to correct format if needed
            if len(pred_mask.shape) == 3:
                pred_mask = pred_mask[0] if pred_mask.shape[0] == 1 else pred_mask
            
            pred_bbox = mask_to_bbox(pred_mask)
            print(f"Predicted bbox: {pred_bbox}")
            
            # Skip if prediction is empty
            if pred_bbox == [0, 0, 0, 0]:
                print("Warning: Empty prediction - skipping")
                continue
                
            # Convert prediction to COCO format
            pred_rle = cocomask.encode(np.asfortranarray(pred_mask.astype(np.uint8)))
            pred_rle['counts'] = pred_rle['counts'].decode('ascii')
            
            coco_dt.append({
                "image_id": image_id,
                "category_id": 1,
                "bbox": pred_bbox,
                "score": 1.0,
                "segmentation": pred_rle
            })
            
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            continue
    
    if not coco_dt:
        print("Error: No valid predictions generated!")
        return None
    
    try:
        coco_gt_obj = COCO()
        coco_gt_obj.dataset = coco_gt
        coco_gt_obj.createIndex()
        
        coco_dt_obj = coco_gt_obj.loadRes(coco_dt)
        coco_eval = COCOeval(coco_gt_obj, coco_dt_obj, 'bbox')
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()
        
        return coco_eval.stats
        
    except Exception as e:
        print(f"Error in COCO evaluation: {str(e)}")
        return None
    
def main():
    data_dir = r"C:\Users\dell\Desktop\Assignment\CMU10_3D\data_2D"
    
    # Group files by category
    category_dict = group_files_by_category(data_dir)
    
    # Evaluate each category
    results = {}
    for category, files in category_dict.items():
        # Create image-mask pairs
        image_mask_pairs = []
        for img in files['images']:
            base_name = os.path.splitext(os.path.basename(img))[0]
            masks = [mask for (img_path, mask) in files['masks'] 
                    if base_name in os.path.basename(mask)]
            
            if masks:
                image_mask_pairs.append((img, masks[0]))  # Using first mask
        
        if len(image_mask_pairs) < 2:
            print(f"Skipping {category} - needs at least 2 images with masks")
            continue
            
        stats = evaluate_product_category(category, image_mask_pairs)
        
        if stats is not None:
            results[category] = {
                "AP": stats[0],  # AP @ IoU=0.50:0.95
                "AP50": stats[1],  # AP @ IoU=0.50
                "AP75": stats[2],  # AP @ IoU=0.75
                "AR": stats[8]     # AR @ maxDets=100
            }
    
    # Print results
    print("\n=== Final Results ===")
    for product, metrics in results.items():
        print(f"\n{product}:")
        print(f"  AP: {metrics['AP']:.3f}")
        print(f"  AP50: {metrics['AP50']:.3f}")
        print(f"  AP75: {metrics['AP75']:.3f}")
        print(f"  AR: {metrics['AR']:.3f}")

if __name__ == "__main__":
    main()

In [12]:
import os
import glob
import shutil
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools import mask as cocomask
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from collections import defaultdict
import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.build_sam import build_sam2_video_predictor
from IPython.display import clear_output
import sys
import warnings

# Suppress warnings and limit output
warnings.filterwarnings('ignore')

# SAM2 model setup
checkpoint = r"C:\Users\dell\Desktop\Assignment\sam2_hiera_tiny.pt"
model_cfg = r"C:\Users\dell\Desktop\Assignment\sam2\configs\sam2\sam2_hiera_t.yaml"

# Initialize SAM2 components
predictor_prompt = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint, device='cpu'))
sam2 = build_sam2(model_cfg, checkpoint, device='cpu', apply_postprocessing=False)
mask_generator = SAM2AutomaticMaskGenerator(sam2)
predictor_vid = build_sam2_video_predictor(model_cfg, checkpoint, device='cpu')

# Temporary directory for processing
tempfolder = "./tempdir"

def create_if_not_exists(dirname):
    if not os.path.exists(dirname):
        os.mkdir(dirname)

def cleardir(tempfolder):
    filepaths = glob.glob(tempfolder+"/*")
    for filepath in filepaths:
        os.unlink(filepath)

def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def track_item_boxes(imgpath1, imgpath2, img1boxclasslist, visualize=True):
    """Enhanced tracking with padding and better error handling"""
    try:
        create_if_not_exists(tempfolder)
        cleardir(tempfolder)
        
        # Load and verify images
        img1 = Image.open(imgpath1)
        img2 = Image.open(imgpath2)
        if img1.size != img2.size:
            img2 = img2.resize(img1.size)
        
        img1.save(os.path.join(tempfolder, "00000.jpg"))
        img2.save(os.path.join(tempfolder, "00001.jpg"))
        
        inference_state = predictor_vid.init_state(video_path="./tempdir")
        predictor_vid.reset_state(inference_state)
        
        # Process boxes with padding
        for ([xmin, xmax, ymin, ymax], obj_id) in img1boxclasslist:
            # Add 10% padding to the bounding box
            width, height = img1.size
            pad_x = int(0.1 * (xmax - xmin))
            pad_y = int(0.1 * (ymax - ymin))
            
            xmin = max(0, xmin - pad_x)
            xmax = min(width, xmax + pad_x)
            ymin = max(0, ymin - pad_y)
            ymax = min(height, ymax + pad_y)
            
            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
            _, out_obj_ids, _ = predictor_vid.add_new_points_or_box(
                inference_state=inference_state,
                frame_idx=0,
                obj_id=obj_id,
                box=box,
            )
        
        # Get predictions with threshold adjustment
        video_segments = {}
        for out_frame_idx, out_obj_ids, out_mask_logits in predictor_vid.propagate_in_video(inference_state):
            video_segments[out_frame_idx] = {
                obj_id: (out_mask_logits[i] > 0.5).cpu().numpy().astype(bool)
                for i, obj_id in enumerate(out_obj_ids)
            }
        
        if visualize:
            fig, ax = plt.subplots()
            plt.title(f"Original image object")
            ax.imshow(img1)
            rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin,
                                   linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            plt.show()
            
            plt.figure(figsize=(6, 4))
            plt.title(f"Detected object in test image")
            plt.imshow(img2)
            if 1 in video_segments:
                for out_obj_id, out_mask in video_segments[1].items():
                    show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
            plt.show()
        
        return video_segments
        
    except Exception as e:
        print(f"Tracking error: {str(e)}")
        return None

def extract_category_from_filename(filename):
    """Extract category name from filename"""
    return '_'.join(os.path.basename(filename).split('_')[:-1])

def group_files_by_category(data_dir):
    """Group image and mask files by their product category"""
    image_files = glob.glob(os.path.join(data_dir, "*.jpg"))
    mask_files = glob.glob(os.path.join(data_dir, "*_gt.png"))
    
    category_dict = defaultdict(lambda: {'images': [], 'masks': []})
    
    for img in image_files:
        category = extract_category_from_filename(img)
        category_dict[category]['images'].append(img)
    
    for mask in mask_files:
        base_name = '_'.join(os.path.basename(mask).split('_')[:-2])
        category = extract_category_from_filename(base_name + '.jpg')
        
        matching_images = [img for img in category_dict[category]['images'] 
                         if base_name in img]
        
        for img in matching_images:
            category_dict[category]['masks'].append((img, mask))
    
    return category_dict

def process_img_png_mask(img_path, mask_path, visualize=False):
    """Extract bounding box coordinates from mask"""
    try:
        img = Image.open(img_path)
        mask = Image.open(mask_path)
        mask_array = np.array(mask)
        
        if mask_array.size == 0:
            print(f"Warning: Empty mask in {mask_path}")
            return 0, 0, 0, 0
            
        rows = np.any(mask_array, axis=1)
        cols = np.any(mask_array, axis=0)
        
        ymin, ymax = np.where(rows)[0][[0, -1]]
        xmin, xmax = np.where(cols)[0][[0, -1]]
        
        if visualize:
            fig, ax = plt.subplots(1, 2, figsize=(10, 5))
            ax[0].imshow(img)
            ax[0].set_title("Original Image")
            ax[1].imshow(mask_array)
            rect = plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                               linewidth=1, edgecolor='r', facecolor='none')
            ax[1].add_patch(rect)
            ax[1].set_title("Mask with Bounding Box")
            plt.show()
        
        return xmin, xmax, ymin, ymax
        
    except Exception as e:
        print(f"Error processing {img_path}: {str(e)}")
        return 0, 0, 0, 0

def mask_to_bbox(mask):
    """Improved mask to bbox conversion with better handling"""
    # Handle 3D mask by taking first channel if needed
    if len(mask.shape) == 3:
        mask = mask[0] if mask.shape[0] == 1 else mask
    
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    
    if not np.any(rows) or not np.any(cols):
        return [0, 0, 0, 0]  # Return empty bbox if no True values
    
    ymin, ymax = np.where(rows)[0][[0, -1]]
    xmin, xmax = np.where(cols)[0][[0, -1]]
    
    return [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)]

def evaluate_product_category(category_name, image_mask_pairs, max_display=10):
    """Enhanced evaluation with output management"""
    print(f"\nEvaluating {category_name} (showing first {max_display} outputs)...")
    
    if not image_mask_pairs or len(image_mask_pairs) < 2:
        print("Error: Need at least 2 image-mask pairs for evaluation")
        return None
    
    # Use first image-mask pair as reference
    first_img, first_mask = image_mask_pairs[0]
    print(f"Using reference image: {os.path.basename(first_img)}")
    print(f"Using reference mask: {os.path.basename(first_mask)}")
    
    xmin, xmax, ymin, ymax = process_img_png_mask(first_img, first_mask)
    print(f"Reference bounding box: x=[{xmin},{xmax}], y=[{ymin},{ymax}]")
    
    coco_gt = {"images": [], "annotations": [], "categories": [{"id": 1, "name": "product"}]}
    coco_dt = []
    
    for i, (img_path, mask_path) in enumerate(image_mask_pairs[1:]):
        if i >= max_display:  # Only show first N outputs
            clear_output(wait=True)
            print(f"Processing {len(image_mask_pairs)-1} images, showing first {max_display}...")
            break
            
        print(f"\nProcessing image {i+1}/{len(image_mask_pairs)-1}: {os.path.basename(img_path)}")
        
        try:
            # Process ground truth
            gt_mask = np.array(Image.open(mask_path))
            gt_bbox = mask_to_bbox(gt_mask)
            print(f"Ground truth bbox: {gt_bbox}")
            
            # Add to COCO ground truth
            image_id = i+1
            coco_gt["images"].append({"id": image_id, "file_name": img_path})
            
            rle = cocomask.encode(np.asfortranarray(gt_mask))
            rle['counts'] = rle['counts'].decode('ascii')
            
            coco_gt["annotations"].append({
                "id": image_id,
                "image_id": image_id,
                "category_id": 1,
                "bbox": gt_bbox,
                "area": int(gt_bbox[2] * gt_bbox[3]),
                "iscrowd": 0,
                "segmentation": rle
            })
            
            # Get SAM2 prediction
            video_segments = track_item_boxes(first_img, img_path, [([xmin, xmax, ymin, ymax], 1)], False)
            
            if not video_segments or 1 not in video_segments:
                print("Warning: No valid segments found in frame 1")
                continue
                
            # Get the prediction mask
            pred_mask = video_segments[1][1]  # Frame 1, Object ID 1
            
            # Convert mask to correct format if needed
            if len(pred_mask.shape) == 3:
                pred_mask = pred_mask[0] if pred_mask.shape[0] == 1 else pred_mask
            
            pred_bbox = mask_to_bbox(pred_mask)
            print(f"Predicted bbox: {pred_bbox}")
            
            # Skip if prediction is empty
            if pred_bbox == [0, 0, 0, 0]:
                print("Warning: Empty prediction - skipping")
                continue
                
            # Convert prediction to COCO format
            pred_rle = cocomask.encode(np.asfortranarray(pred_mask.astype(np.uint8)))
            pred_rle['counts'] = pred_rle['counts'].decode('ascii')
            
            coco_dt.append({
                "image_id": image_id,
                "category_id": 1,
                "bbox": pred_bbox,
                "score": 1.0,
                "segmentation": pred_rle
            })
            
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            continue
    
    if not coco_dt:
        print("Error: No valid predictions generated!")
        return None
    
    try:
        coco_gt_obj = COCO()
        coco_gt_obj.dataset = coco_gt
        coco_gt_obj.createIndex()
        
        coco_dt_obj = coco_gt_obj.loadRes(coco_dt)
        coco_eval = COCOeval(coco_gt_obj, coco_dt_obj, 'bbox')
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()
        
        return coco_eval.stats
        
    except Exception as e:
        print(f"Error in COCO evaluation: {str(e)}")
        return None

def main():
    data_dir = r"C:\Users\dell\Desktop\Assignment\CMU10_3D\data_2D"
    
    # Group files by category
    category_dict = group_files_by_category(data_dir)
    
    # Evaluate each category
    results = {}
    for category, files in category_dict.items():
        # Create image-mask pairs
        image_mask_pairs = []
        for img in files['images']:
            base_name = os.path.splitext(os.path.basename(img))[0]
            masks = [mask for (img_path, mask) in files['masks'] 
                    if base_name in os.path.basename(mask)]
            
            if masks:
                image_mask_pairs.append((img, masks[0]))  # Using first mask
        
        if len(image_mask_pairs) < 2:
            print(f"Skipping {category} - needs at least 2 images with masks")
            continue
            
        # Save output to file
        output_file = f"{category}_results.txt"
        with open(output_file, 'w') as f:
            original_stdout = sys.stdout
            sys.stdout = f
            
            stats = evaluate_product_category(category, image_mask_pairs)
            
            sys.stdout = original_stdout
            print(f"Results saved to {output_file}")
        
        if stats is not None:
            results[category] = {
                "AP": stats[0],  # AP @ IoU=0.50:0.95
                "AP50": stats[1],  # AP @ IoU=0.50
                "AP75": stats[2],  # AP @ IoU=0.75
                "AR": stats[8]     # AR @ maxDets=100
            }
    
    # Print final results
    print("\n=== Final Results ===")
    for product, metrics in results.items():
        print(f"\n{product}:")
        print(f"  AP: {metrics['AP']:.3f}")
        print(f"  AP50: {metrics['AP50']:.3f}")
        print(f"  AP75: {metrics['AP75']:.3f}")
        print(f"  AR: {metrics['AR']:.3f}")

if __name__ == "__main__":
    main()

Results saved to rice_tuscan_results.txt

=== Final Results ===


In [13]:
import os
import glob
import shutil
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools import mask as cocomask
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from collections import defaultdict
import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.build_sam import build_sam2_video_predictor
from IPython.display import clear_output
import sys
import warnings
from io import StringIO

# Suppress warnings and limit output
warnings.filterwarnings('ignore')

# SAM2 model setup
checkpoint = r"C:\Users\dell\Desktop\Assignment\sam2_hiera_tiny.pt"
model_cfg = r"C:\Users\dell\Desktop\Assignment\sam2\configs\sam2\sam2_hiera_t.yaml"

# Initialize SAM2 components
predictor_prompt = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint, device='cpu'))
sam2 = build_sam2(model_cfg, checkpoint, device='cpu', apply_postprocessing=False)
mask_generator = SAM2AutomaticMaskGenerator(sam2)
predictor_vid = build_sam2_video_predictor(model_cfg, checkpoint, device='cpu')

# Temporary directory for processing
tempfolder = "./tempdir"

def create_if_not_exists(dirname):
    if not os.path.exists(dirname):
        os.mkdir(dirname)

def cleardir(tempfolder):
    filepaths = glob.glob(tempfolder+"/*")
    for filepath in filepaths:
        os.unlink(filepath)

def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

def track_item_boxes(imgpath1, imgpath2, img1boxclasslist, visualize=True):
    """Enhanced tracking with padding and better error handling"""
    try:
        create_if_not_exists(tempfolder)
        cleardir(tempfolder)
        
        # Load and verify images
        img1 = Image.open(imgpath1)
        img2 = Image.open(imgpath2)
        if img1.size != img2.size:
            img2 = img2.resize(img1.size)
        
        img1.save(os.path.join(tempfolder, "00000.jpg"))
        img2.save(os.path.join(tempfolder, "00001.jpg"))
        
        inference_state = predictor_vid.init_state(video_path="./tempdir")
        predictor_vid.reset_state(inference_state)
        
        # Process boxes with padding
        for ([xmin, xmax, ymin, ymax], obj_id) in img1boxclasslist:
            # Add 10% padding to the bounding box
            width, height = img1.size
            pad_x = int(0.1 * (xmax - xmin))
            pad_y = int(0.1 * (ymax - ymin))
            
            xmin = max(0, xmin - pad_x)
            xmax = min(width, xmax + pad_x)
            ymin = max(0, ymin - pad_y)
            ymax = min(height, ymax + pad_y)
            
            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
            _, out_obj_ids, _ = predictor_vid.add_new_points_or_box(
                inference_state=inference_state,
                frame_idx=0,
                obj_id=obj_id,
                box=box,
            )
        
        # Get predictions with threshold adjustment
        video_segments = {}
        for out_frame_idx, out_obj_ids, out_mask_logits in predictor_vid.propagate_in_video(inference_state):
            video_segments[out_frame_idx] = {
                obj_id: (out_mask_logits[i] > 0.5).cpu().numpy().astype(bool)
                for i, obj_id in enumerate(out_obj_ids)
            }
        
        if visualize:
            fig, ax = plt.subplots()
            plt.title(f"Original image object")
            ax.imshow(img1)
            rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin,
                                   linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            plt.show()
            
            plt.figure(figsize=(6, 4))
            plt.title(f"Detected object in test image")
            plt.imshow(img2)
            if 1 in video_segments:
                for out_obj_id, out_mask in video_segments[1].items():
                    show_mask(out_mask, plt.gca(), obj_id=out_obj_id)
            plt.show()
        
        return video_segments
        
    except Exception as e:
        print(f"Tracking error: {str(e)}")
        return None

def extract_category_from_filename(filename):
    """Extract category name from filename"""
    return '_'.join(os.path.basename(filename).split('_')[:-1])

def group_files_by_category(data_dir):
    """Group image and mask files by their product category"""
    image_files = glob.glob(os.path.join(data_dir, "*.jpg"))
    mask_files = glob.glob(os.path.join(data_dir, "*_gt.png"))
    
    category_dict = defaultdict(lambda: {'images': [], 'masks': []})
    
    for img in image_files:
        category = extract_category_from_filename(img)
        category_dict[category]['images'].append(img)
    
    for mask in mask_files:
        base_name = '_'.join(os.path.basename(mask).split('_')[:-2])
        category = extract_category_from_filename(base_name + '.jpg')
        
        matching_images = [img for img in category_dict[category]['images'] 
                         if base_name in img]
        
        for img in matching_images:
            category_dict[category]['masks'].append((img, mask))
    
    return category_dict

def process_img_png_mask(img_path, mask_path, visualize=False):
    """Extract bounding box coordinates from mask"""
    try:
        img = Image.open(img_path)
        mask = Image.open(mask_path)
        mask_array = np.array(mask)
        
        if mask_array.size == 0:
            print(f"Warning: Empty mask in {mask_path}")
            return 0, 0, 0, 0
            
        rows = np.any(mask_array, axis=1)
        cols = np.any(mask_array, axis=0)
        
        ymin, ymax = np.where(rows)[0][[0, -1]]
        xmin, xmax = np.where(cols)[0][[0, -1]]
        
        if visualize:
            fig, ax = plt.subplots(1, 2, figsize=(10, 5))
            ax[0].imshow(img)
            ax[0].set_title("Original Image")
            ax[1].imshow(mask_array)
            rect = plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                               linewidth=1, edgecolor='r', facecolor='none')
            ax[1].add_patch(rect)
            ax[1].set_title("Mask with Bounding Box")
            plt.show()
        
        return xmin, xmax, ymin, ymax
        
    except Exception as e:
        print(f"Error processing {img_path}: {str(e)}")
        return 0, 0, 0, 0

def mask_to_bbox(mask):
    """Improved mask to bbox conversion with better handling"""
    # Handle 3D mask by taking first channel if needed
    if len(mask.shape) == 3:
        mask = mask[0] if mask.shape[0] == 1 else mask
    
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    
    if not np.any(rows) or not np.any(cols):
        return [0, 0, 0, 0]  # Return empty bbox if no True values
    
    ymin, ymax = np.where(rows)[0][[0, -1]]
    xmin, xmax = np.where(cols)[0][[0, -1]]
    
    return [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)]

def evaluate_product_category(category_name, image_mask_pairs, max_display=10):
    """Fixed evaluation with proper COCO format and output handling"""
    print(f"\nEvaluating {category_name} (showing first {max_display} outputs)...")
    
    if not image_mask_pairs or len(image_mask_pairs) < 2:
        print("Error: Need at least 2 image-mask pairs for evaluation")
        return None
    
    # Initialize COCO ground truth with all required fields
    coco_gt = {
        "info": {
            "description": f"{category_name} product detection",
            "version": "1.0",
            "year": 2023,
            "contributor": "User",
            "date_created": "2023-01-01"
        },
        "licenses": [{
            "id": 1,
            "name": "Academic",
            "url": ""
        }],
        "images": [],
        "annotations": [],
        "categories": [{
            "id": 1,
            "name": "product",
            "supercategory": "object"
        }]
    }
    
    # Use first image-mask pair as reference
    first_img, first_mask = image_mask_pairs[0]
    print(f"Using reference image: {os.path.basename(first_img)}")
    print(f"Using reference mask: {os.path.basename(first_mask)}")
    
    xmin, xmax, ymin, ymax = process_img_png_mask(first_img, first_mask)
    print(f"Reference bounding box: x=[{xmin},{xmax}], y=[{ymin},{ymax}]")
    
    coco_dt = []
    
    for i, (img_path, mask_path) in enumerate(image_mask_pairs[1:]):
        if i >= max_display:  # Only show first N outputs
            clear_output(wait=True)
            print(f"Processing {len(image_mask_pairs)-1} images, showing first {max_display}...")
            break
            
        print(f"\nProcessing image {i+1}/{len(image_mask_pairs)-1}: {os.path.basename(img_path)}")
        
        try:
            # Process ground truth
            gt_mask = np.array(Image.open(mask_path))
            gt_bbox = mask_to_bbox(gt_mask)
            print(f"Ground truth bbox: {gt_bbox}")
            
            # Add to COCO ground truth
            image_id = i+1
            coco_gt["images"].append({
                "id": image_id,
                "width": gt_mask.shape[1],
                "height": gt_mask.shape[0],
                "file_name": img_path
            })
            
            rle = cocomask.encode(np.asfortranarray(gt_mask))
            rle['counts'] = rle['counts'].decode('ascii')
            
            coco_gt["annotations"].append({
                "id": image_id,
                "image_id": image_id,
                "category_id": 1,
                "bbox": gt_bbox,
                "area": int(gt_bbox[2] * gt_bbox[3]),
                "iscrowd": 0,
                "segmentation": rle
            })
            
            # Get SAM2 prediction
            video_segments = track_item_boxes(first_img, img_path, [([xmin, xmax, ymin, ymax], 1)], False)
            
            if not video_segments or 1 not in video_segments:
                print("Warning: No valid segments found in frame 1")
                continue
                
            # Get the prediction mask
            pred_mask = video_segments[1][1]  # Frame 1, Object ID 1
            
            # Convert mask to correct format if needed
            if len(pred_mask.shape) == 3:
                pred_mask = pred_mask[0] if pred_mask.shape[0] == 1 else pred_mask
            
            pred_bbox = mask_to_bbox(pred_mask)
            print(f"Predicted bbox: {pred_bbox}")
            
            # Skip if prediction is empty
            if pred_bbox == [0, 0, 0, 0]:
                print("Warning: Empty prediction - skipping")
                continue
                
            # Convert prediction to COCO format
            pred_rle = cocomask.encode(np.asfortranarray(pred_mask.astype(np.uint8)))
            pred_rle['counts'] = pred_rle['counts'].decode('ascii')
            
            coco_dt.append({
                "image_id": image_id,
                "category_id": 1,
                "bbox": pred_bbox,
                "score": 1.0,
                "segmentation": pred_rle
            })
            
            # Visual inspection for first few images
            if i < 3:
                plt.figure(figsize=(10,5))
                plt.subplot(1,2,1)
                plt.imshow(Image.open(img_path))
                plt.title("Original")
                plt.subplot(1,2,2)
                plt.imshow(pred_mask)
                plt.title("Prediction")
                plt.show()
            
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            continue
    
    if not coco_dt:
        print("Error: No valid predictions generated!")
        return None
    
    try:
        # Create COCO objects
        coco_gt_obj = COCO()
        coco_gt_obj.dataset = coco_gt
        coco_gt_obj.createIndex()
        
        coco_dt_obj = coco_gt_obj.loadRes(coco_dt)
        
        # Initialize COCOeval
        coco_eval = COCOeval(coco_gt_obj, coco_dt_obj, 'bbox')
        
        # Evaluate and accumulate results
        coco_eval.evaluate()
        coco_eval.accumulate()
        
        # Capture the summary output
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        coco_eval.summarize()
        sys.stdout = old_stdout
        
        # Print the summary
        print(mystdout.getvalue())
        
        return coco_eval.stats
        
    except Exception as e:
        print(f"Error in COCO evaluation: {str(e)}")
        return None

def main():
    data_dir = r"C:\Users\dell\Desktop\Assignment\CMU10_3D\data_2D"
    
    # Group files by category
    category_dict = group_files_by_category(data_dir)
    
    # Evaluate each category
    results = {}
    for category, files in category_dict.items():
        # Create image-mask pairs
        image_mask_pairs = []
        for img in files['images']:
            base_name = os.path.splitext(os.path.basename(img))[0]
            masks = [mask for (img_path, mask) in files['masks'] 
                    if base_name in os.path.basename(mask)]
            
            if masks:
                image_mask_pairs.append((img, masks[0]))  # Using first mask
        
        if len(image_mask_pairs) < 2:
            print(f"Skipping {category} - needs at least 2 images with masks")
            continue
            
        # Save output to file
        output_file = f"{category}_results.txt"
        with open(output_file, 'w') as f:
            original_stdout = sys.stdout
            sys.stdout = f
            
            stats = evaluate_product_category(category, image_mask_pairs)
            
            sys.stdout = original_stdout
            print(f"Results saved to {output_file}")
        
        if stats is not None:
            results[category] = {
                "AP": stats[0],  # AP @ IoU=0.50:0.95
                "AP50": stats[1],  # AP @ IoU=0.50
                "AP75": stats[2],  # AP @ IoU=0.75
                "AR": stats[8]     # AR @ maxDets=100
            }
    
    # Print final results
    print("\n=== Final Results ===")
    if not results:
        print("No valid results generated - check individual category logs")
    else:
        for product, metrics in results.items():
            print(f"\n{product}:")
            print(f"  AP: {metrics['AP']:.3f}")
            print(f"  AP50: {metrics['AP50']:.3f}")
            print(f"  AP75: {metrics['AP75']:.3f}")
            print(f"  AR: {metrics['AR']:.3f}")

if __name__ == "__main__":
    main()

Results saved to rice_tuscan_results.txt

=== Final Results ===

can_chowder:
  AP: 0.198
  AP50: 0.257
  AP75: 0.257
  AR: 0.250

can_soymilk:
  AP: 0.087
  AP50: 0.109
  AP75: 0.109
  AR: 0.080

can_tomatosoup:
  AP: 0.128
  AP50: 0.142
  AP75: 0.142
  AR: 0.180

carton_oj:
  AP: 0.000
  AP50: 0.000
  AP75: 0.000
  AR: 0.000

carton_soymilk:
  AP: 0.289
  AP50: 0.693
  AP75: 0.022
  AR: 0.330

diet_coke:
  AP: 0.347
  AP50: 0.505
  AP75: 0.366
  AR: 0.380

hc_potroastsoup:
  AP: 0.298
  AP50: 0.590
  AP75: 0.230
  AR: 0.320

juicebox:
  AP: 0.136
  AP50: 0.234
  AP75: 0.083
  AR: 0.200

ricepilaf:
  AP: 0.044
  AP50: 0.054
  AP75: 0.054
  AR: 0.080

rice_tuscan:
  AP: 0.106
  AP50: 0.149
  AP75: 0.109
  AR: 0.110


In [14]:
import os
import glob
import shutil
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools import mask as cocomask
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from collections import defaultdict
import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from sam2.sam2_image_predictor import SAM2ImagePredictor
from sam2.build_sam import build_sam2_video_predictor
from IPython.display import clear_output
import sys
import warnings
from io import StringIO

# Suppress warnings and limit output
warnings.filterwarnings('ignore')

# SAM2 model setup
checkpoint = r"C:\Users\dell\Desktop\Assignment\sam2_hiera_tiny.pt"
model_cfg = r"C:\Users\dell\Desktop\Assignment\sam2\configs\sam2\sam2_hiera_t.yaml"

# Initialize SAM2 components
predictor_prompt = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint, device='cpu'))
sam2 = build_sam2(model_cfg, checkpoint, device='cpu', apply_postprocessing=False)
mask_generator = SAM2AutomaticMaskGenerator(sam2)
predictor_vid = build_sam2_video_predictor(model_cfg, checkpoint, device='cpu')

# Temporary directory for processing
tempfolder = "./tempdir"

def create_if_not_exists(dirname):
    if not os.path.exists(dirname):
        os.mkdir(dirname)

def cleardir(tempfolder):
    filepaths = glob.glob(tempfolder+"/*")
    for filepath in filepaths:
        os.unlink(filepath)

def show_mask(mask, ax, obj_id=None, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        cmap = plt.get_cmap("tab10")
        cmap_idx = 0 if obj_id is None else obj_id
        color = np.array([*cmap(cmap_idx)[:3], 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

TRACKING_THRESHOLD = 0.6  # Increased from 0.5
MIN_MASK_COVERAGE = 0.05  # Minimum % of mask coverage to consider valid
MAX_PADDING_RATIO = 0.2   # Increased from 0.1

def track_item_boxes(imgpath1, imgpath2, img1boxclasslist, visualize=True):
    """Enhanced tracking with dynamic padding and thresholding"""
    try:
        create_if_not_exists(tempfolder)
        cleardir(tempfolder)
        
        img1 = Image.open(imgpath1)
        img2 = Image.open(imgpath2)
        if img1.size != img2.size:
            img2 = img2.resize(img1.size)
        
        img1.save(os.path.join(tempfolder, "00000.jpg"))
        img2.save(os.path.join(tempfolder, "00001.jpg"))
        
        inference_state = predictor_vid.init_state(video_path="./tempdir")
        predictor_vid.reset_state(inference_state)
        
        # Dynamic padding based on object size
        for ([xmin, xmax, ymin, ymax], obj_id) in img1boxclasslist:
            box_width = xmax - xmin
            box_height = ymax - ymin
            pad_x = min(int(MAX_PADDING_RATIO * box_width), 100)  # Limit padding
            pad_y = min(int(MAX_PADDING_RATIO * box_height), 100)
            
            xmin = max(0, xmin - pad_x)
            xmax = min(img1.width, xmax + pad_x)
            ymin = max(0, ymin - pad_y)
            ymax = min(img1.height, ymax + pad_y)
            
            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
            _, out_obj_ids, _ = predictor_vid.add_new_points_or_box(
                inference_state=inference_state,
                frame_idx=0,
                obj_id=obj_id,
                box=box,
            )
        
        # Get predictions with adaptive threshold
        video_segments = {}
        for out_frame_idx, out_obj_ids, out_mask_logits in predictor_vid.propagate_in_video(inference_state):
            video_segments[out_frame_idx] = {
                obj_id: (out_mask_logits[i] > TRACKING_THRESHOLD).cpu().numpy().astype(bool)
                for i, obj_id in enumerate(out_obj_ids)
            }
        
        return video_segments
        
    except Exception as e:
        print(f"Tracking error: {str(e)}")
        return None

def extract_category_from_filename(filename):
    """Extract category name from filename"""
    return '_'.join(os.path.basename(filename).split('_')[:-1])

def group_files_by_category(data_dir):
    """Group image and mask files by their product category"""
    image_files = glob.glob(os.path.join(data_dir, "*.jpg"))
    mask_files = glob.glob(os.path.join(data_dir, "*_gt.png"))
    
    category_dict = defaultdict(lambda: {'images': [], 'masks': []})
    
    for img in image_files:
        category = extract_category_from_filename(img)
        category_dict[category]['images'].append(img)
    
    for mask in mask_files:
        base_name = '_'.join(os.path.basename(mask).split('_')[:-2])
        category = extract_category_from_filename(base_name + '.jpg')
        
        matching_images = [img for img in category_dict[category]['images'] 
                         if base_name in img]
        
        for img in matching_images:
            category_dict[category]['masks'].append((img, mask))
    
    return category_dict

def process_img_png_mask(img_path, mask_path, visualize=False):
    """Extract bounding box coordinates from mask"""
    try:
        img = Image.open(img_path)
        mask = Image.open(mask_path)
        mask_array = np.array(mask)
        
        if mask_array.size == 0:
            print(f"Warning: Empty mask in {mask_path}")
            return 0, 0, 0, 0
            
        rows = np.any(mask_array, axis=1)
        cols = np.any(mask_array, axis=0)
        
        ymin, ymax = np.where(rows)[0][[0, -1]]
        xmin, xmax = np.where(cols)[0][[0, -1]]
        
        if visualize:
            fig, ax = plt.subplots(1, 2, figsize=(10, 5))
            ax[0].imshow(img)
            ax[0].set_title("Original Image")
            ax[1].imshow(mask_array)
            rect = plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                               linewidth=1, edgecolor='r', facecolor='none')
            ax[1].add_patch(rect)
            ax[1].set_title("Mask with Bounding Box")
            plt.show()
        
        return xmin, xmax, ymin, ymax
        
    except Exception as e:
        print(f"Error processing {img_path}: {str(e)}")
        return 0, 0, 0, 0

def mask_to_bbox(mask):
    """Improved mask to bbox conversion with better handling"""
    # Handle 3D mask by taking first channel if needed
    if len(mask.shape) == 3:
        mask = mask[0] if mask.shape[0] == 1 else mask
    
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    
    if not np.any(rows) or not np.any(cols):
        return [0, 0, 0, 0]  # Return empty bbox if no True values
    
    ymin, ymax = np.where(rows)[0][[0, -1]]
    xmin, xmax = np.where(cols)[0][[0, -1]]
    
    return [int(xmin), int(ymin), int(xmax-xmin), int(ymax-ymin)]

def evaluate_product_category(category_name, image_mask_pairs, max_display=10):
    """Fixed evaluation with proper COCO format and output handling"""
    print(f"\nEvaluating {category_name} (showing first {max_display} outputs)...")
    
    if not image_mask_pairs or len(image_mask_pairs) < 2:
        print("Error: Need at least 2 image-mask pairs for evaluation")
        return None
    
    # Initialize COCO ground truth with all required fields
    coco_gt = {
        "info": {
            "description": f"{category_name} product detection",
            "version": "1.0",
            "year": 2023,
            "contributor": "User",
            "date_created": "2023-01-01"
        },
        "licenses": [{
            "id": 1,
            "name": "Academic",
            "url": ""
        }],
        "images": [],
        "annotations": [],
        "categories": [{
            "id": 1,
            "name": "product",
            "supercategory": "object"
        }]
    }
    
    # Use first image-mask pair as reference
    first_img, first_mask = image_mask_pairs[0]
    print(f"Using reference image: {os.path.basename(first_img)}")
    print(f"Using reference mask: {os.path.basename(first_mask)}")
    
    xmin, xmax, ymin, ymax = process_img_png_mask(first_img, first_mask)
    print(f"Reference bounding box: x=[{xmin},{xmax}], y=[{ymin},{ymax}]")
    
    coco_dt = []
    
    for i, (img_path, mask_path) in enumerate(image_mask_pairs[1:]):
        if i >= max_display:  # Only show first N outputs
            clear_output(wait=True)
            print(f"Processing {len(image_mask_pairs)-1} images, showing first {max_display}...")
            break
            
        print(f"\nProcessing image {i+1}/{len(image_mask_pairs)-1}: {os.path.basename(img_path)}")
        
        try:
            # Process ground truth
            gt_mask = np.array(Image.open(mask_path))
            gt_bbox = mask_to_bbox(gt_mask)
            print(f"Ground truth bbox: {gt_bbox}")
            
            # Add to COCO ground truth
            image_id = i+1
            coco_gt["images"].append({
                "id": image_id,
                "width": gt_mask.shape[1],
                "height": gt_mask.shape[0],
                "file_name": img_path
            })
            
            rle = cocomask.encode(np.asfortranarray(gt_mask))
            rle['counts'] = rle['counts'].decode('ascii')
            
            coco_gt["annotations"].append({
                "id": image_id,
                "image_id": image_id,
                "category_id": 1,
                "bbox": gt_bbox,
                "area": int(gt_bbox[2] * gt_bbox[3]),
                "iscrowd": 0,
                "segmentation": rle
            })
            
            # Get SAM2 prediction
            best_pred = None
            for attempt in range(2):  # Try up to 2 different parameter sets
                video_segments = track_item_boxes(
                    first_img, img_path, 
                    [([xmin, xmax, ymin, ymax], 1)], 
                    visualize=(attempt==0 and i<3)
                )
                
                if not video_segments or 1 not in video_segments:
                    continue
                    
                pred_mask = video_segments[1][1]
                if len(pred_mask.shape) == 3:
                    pred_mask = pred_mask[0] if pred_mask.shape[0] == 1 else pred_mask
                
                coverage = np.mean(pred_mask)
                if coverage < MIN_MASK_COVERAGE:
                    print(f"Low coverage ({coverage:.2%}), retrying with adjusted parameters")
                    global TRACKING_THRESHOLD
                    TRACKING_THRESHOLD = max(0.3, TRACKING_THRESHOLD - 0.1)  # Lower threshold
                    continue
                
                best_pred = pred_mask
                break
            
            if best_pred is None:
                print("Warning: No valid prediction after retries")
                continue
                
            pred_bbox = mask_to_bbox(best_pred)
            coverage = np.mean(best_pred)
            print(f"Predicted bbox: {pred_bbox}, Coverage: {coverage:.1%}")
            
            if coverage < MIN_MASK_COVERAGE:
                print("Warning: Insufficient coverage - skipping")
                continue
            
            # Skip if prediction is empty
            if pred_bbox == [0, 0, 0, 0]:
                print("Warning: Empty prediction - skipping")
                continue
                
            # Convert prediction to COCO format
            pred_rle = cocomask.encode(np.asfortranarray(pred_mask.astype(np.uint8)))
            pred_rle['counts'] = pred_rle['counts'].decode('ascii')
            
            coco_dt.append({
                "image_id": image_id,
                "category_id": 1,
                "bbox": pred_bbox,
                "score": 1.0,
                "segmentation": pred_rle
            })
            
            # Visual inspection for first few images
            if i < 3:
                plt.figure(figsize=(10,5))
                plt.subplot(1,2,1)
                plt.imshow(Image.open(img_path))
                plt.title("Original")
                plt.subplot(1,2,2)
                plt.imshow(pred_mask)
                plt.title("Prediction")
                plt.show()
            
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            continue
    
    if not coco_dt:
        print("Error: No valid predictions generated!")
        return None
    
    try:
        # Create COCO objects
        coco_gt_obj = COCO()
        coco_gt_obj.dataset = coco_gt
        coco_gt_obj.createIndex()
        
        coco_dt_obj = coco_gt_obj.loadRes(coco_dt)
        
        # Initialize COCOeval
        coco_eval = COCOeval(coco_gt_obj, coco_dt_obj, 'bbox')
        
        # Evaluate and accumulate results
        coco_eval.evaluate()
        coco_eval.accumulate()
        
        # Capture the summary output
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        coco_eval.summarize()
        sys.stdout = old_stdout
        
        # Print the summary
        print(mystdout.getvalue())
        
        return coco_eval.stats
        
    except Exception as e:
        print(f"Error in COCO evaluation: {str(e)}")
        return None

def main():
    data_dir = r"C:\Users\dell\Desktop\Assignment\CMU10_3D\data_2D"
    
    # Group files by category
    category_dict = group_files_by_category(data_dir)
    
    # Evaluate each category
    results = {}
    for category, files in category_dict.items():
        # Create image-mask pairs
        image_mask_pairs = []
        for img in files['images']:
            base_name = os.path.splitext(os.path.basename(img))[0]
            masks = [mask for (img_path, mask) in files['masks'] 
                    if base_name in os.path.basename(mask)]
            
            if masks:
                image_mask_pairs.append((img, masks[0]))  # Using first mask
        
        if len(image_mask_pairs) < 2:
            print(f"Skipping {category} - needs at least 2 images with masks")
            continue
            
        # Save output to file
        output_file = f"{category}_results.txt"
        with open(output_file, 'w') as f:
            original_stdout = sys.stdout
            sys.stdout = f
            
            stats = evaluate_product_category(category, image_mask_pairs)
            
            sys.stdout = original_stdout
            print(f"Results saved to {output_file}")
        
        if stats is not None:
            results[category] = {
                "AP": stats[0],  # AP @ IoU=0.50:0.95
                "AP50": stats[1],  # AP @ IoU=0.50
                "AP75": stats[2],  # AP @ IoU=0.75
                "AR": stats[8]     # AR @ maxDets=100
            }
    
    # Print final results
    print("\n=== Final Results ===")
    if not results:
        print("No valid results generated - check individual category logs")
    else:
        for product, metrics in results.items():
            print(f"\n{product}:")
            print(f"  AP: {metrics['AP']:.3f}")
            print(f"  AP50: {metrics['AP50']:.3f}")
            print(f"  AP75: {metrics['AP75']:.3f}")
            print(f"  AR: {metrics['AR']:.3f}")

if __name__ == "__main__":
    main()

Results saved to rice_tuscan_results.txt

=== Final Results ===

can_chowder:
  AP: 0.000
  AP50: 0.000
  AP75: 0.000
  AR: 0.000

carton_oj:
  AP: 0.028
  AP50: 0.069
  AP75: 0.036
  AR: 0.080

carton_soymilk:
  AP: 0.156
  AP50: 0.437
  AP75: 0.000
  AR: 0.180

diet_coke:
  AP: 0.178
  AP50: 0.208
  AP75: 0.208
  AR: 0.170

hc_potroastsoup:
  AP: 0.000
  AP50: 0.000
  AP75: 0.000
  AR: 0.000

ricepilaf:
  AP: 0.000
  AP50: 0.000
  AP75: 0.000
  AR: 0.000

rice_tuscan:
  AP: 0.098
  AP50: 0.109
  AP75: 0.109
  AR: 0.090
