In [1]:
from sort import Sort
from pathlib import Path
import argparse
from copy import deepcopy
import json
import os
import os.path as osp
from pathlib import Path
import time
from typing import Tuple

import numpy as np
from PIL import Image

from pycocotools import mask as cocomask

In [2]:
# Arguments
max_age = 30

In [5]:
def rle_ann_to_mask(rle: str, image_size: Tuple[int, int]) -> np.ndarray:
    return cocomask.decode({
        "size": image_size,
        "counts": rle.encode("utf-8")
    }).astype(bool)
    
def calculate_iou(box1, box2):
    # Coordinates of the intersection rectangle
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    # Intersection area
    intersection_width = max(0, x2 - x1)
    intersection_height = max(0, y2 - y1)
    intersection_area = intersection_width * intersection_height
    
    # Areas of both rectangles
    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    # Union area
    union_area = area_box1 + area_box2 - intersection_area
    
    # IoU calculation
    iou = intersection_area / union_area if union_area > 0 else 0
    return iou

def find_box_with_max_iou(boxes, target_box):
    max_iou = 0
    max_index = -1
    for i, box in enumerate(boxes):
        if box is None:
            continue
        iou = calculate_iou(box, target_box)
        if iou > max_iou:
            max_iou = iou
            max_index = i
    return max_index, max_iou


In [6]:
boxes_masks_path = "/home/uig93971/src/data/street_obstacle_sequences/detections/boxes_masks"
boxes_masks_path = Path(boxes_masks_path)
heatmaps_path = "/home/uig93971/src/data/street_obstacle_sequences/detections/heatmaps"
track_masks_path = "/home/uig93971/src/data/street_obstacle_sequences/detections/ood_prediction_tracked"
video_id = None
video_ids = [video_id] if video_id is not None else [os.path.basename(str(f)) for f in boxes_masks_path.iterdir() if f.is_dir()]

videos_processed = 0
videos_total = len(video_ids)
inference_start_time = time.time()

sort_tracker = Sort(max_age=max_age)

for video_id in video_ids:
    frame_data_paths = [str(f) for f in (boxes_masks_path / video_id).iterdir() if f.is_file()]    
    frame_data_paths.sort()
    
    Path(osp.join(track_masks_path, video_id)).mkdir(parents=True, exist_ok=True)
    
    for frame_id, frame_path in enumerate(frame_data_paths, 1):
        with open(frame_path, "r") as f:
            frame_data = json.load(f)
        boxes = frame_data["boxes"]
        scores = frame_data["scores"]
        masks = frame_data["masks"]
        dets_to_feed = []
        for score, box, enc_mask in zip(scores, boxes, masks):
            mask = rle_ann_to_mask(enc_mask, (1080, 1920))
            if np.sum(mask) < 100:
                continue
            dets_to_feed.append(np.concatenate([box, [score]]))
        dets_to_feed = np.array(dets_to_feed)
        if len(dets_to_feed > 0):
            trackers = sort_tracker.update(dets_to_feed)
        else:
            trackers = sort_tracker.update()
        
        final_track_mask = np.zeros((1080, 1920), dtype=np.uint16)
        for tracker in trackers:
            tr_id = tracker[4]
            tr_box = tracker[:4]
            box_index, box_iou = find_box_with_max_iou(boxes, tr_box)
            if box_iou < 0.5:
                print(f"Box IoU is less than 0.5: {box_iou}")
            mask = rle_ann_to_mask(masks[box_index], (1080, 1920))
            final_track_mask[mask] = tr_id
            
        np.save(osp.join(track_masks_path, video_id, osp.splitext(os.path.basename(frame_path))[0] + ".npy"), final_track_mask)
    
    videos_processed += 1
    print(f"Processed videos {videos_processed}/{videos_total}")
    print(f"Time passed: {(time.time() - inference_start_time):.4f} seconds, Avg time per video: {(time.time() - inference_start_time) / videos_processed:.4f} seconds")
        
        

Box IoU is less than 0.5: 0.38895432246433936
Box IoU is less than 0.5: 0.46495470712962117
Box IoU is less than 0.5: 0.48758612688749264
Box IoU is less than 0.5: 0.4804536747233322
Box IoU is less than 0.5: 0.4329689682461474
Box IoU is less than 0.5: 0.4565374858821196
Box IoU is less than 0.5: 0.48174447734601017
Box IoU is less than 0.5: 0.4904056580557013
Box IoU is less than 0.5: 0.41511312642582165
Box IoU is less than 0.5: 0.48903275204728364
Box IoU is less than 0.5: 0.43893915016897717
Box IoU is less than 0.5: 0.49440959926805306
Box IoU is less than 0.5: 0.4901235966552905
Box IoU is less than 0.5: 0.3937044213068359
Box IoU is less than 0.5: 0.41988963919403505
Box IoU is less than 0.5: 0.3920225790912589
Box IoU is less than 0.5: 0.4960182495734719
Processed videos 1/20
Time passed: 63.7069 seconds, Avg time per video: 63.7069 seconds
Box IoU is less than 0.5: 0.48046501078619147
Box IoU is less than 0.5: 0.46647350531282306
Box IoU is less than 0.5: 0.42815739621629123


In [11]:
enc_mask = "Q\\SQ14`Q15L4N2O0O1O100O1O1O10000O10000000001O00000000001O00000000001O000000001O0001O0001O0001O00000010O0001O0000001O001O001N101O1O1O001O1O1O1O1N2O3Lcgjk0"
np.sum(rle_ann_to_mask(enc_mask, (1080, 1920)))

1425

In [12]:
np.concatenate([[1, 2, 3, 4], [100]])

array([  1,   2,   3,   4, 100])