In [5]:
import cv2
import numpy as np
from tqdm import tqdm
import pickle
import tqdm
from evaluation import mAP
from data_utils import extract_rectangles_from_xml

In [6]:
def process_frame(frame, background_model):
    """Process a single frame to extract foreground bounding boxes."""
    frameMask = background_model.apply(frame)
    contours, _ = cv2.findContours(frameMask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    rectangles = [cv2.boundingRect(contour) for contour in contours]

    return [(x, y, x+w, y+h) 
            for x, y, w, h in rectangles if w >= 50 and h >= 50], frameMask

In [8]:
# Load video and get some variables
cap = cv2.VideoCapture('data/AICity_data/train/S03/c010/vdo.avi')
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# We only use the last 75% of the video
starting_frame = int(n_frames // 4)

# Initialize background model
history_to_use = 200 # How many past frames use to build the background model
cap.set(cv2.CAP_PROP_POS_FRAMES, starting_frame-history_to_use)

background_algorithm = "MOG2"
if background_algorithm == "MOG2":
    background_model = cv2.createBackgroundSubtractorMOG2(history=history_to_use, detectShadows=True)

elif background_algorithm == "LSBP":
    background_model = cv2.bgsegm.createBackgroundSubtractorLSBP(
    noiseRemovalThresholdFacFG=0.008,
    minCount=4
    )

elif background_algorithm == "KNN":
    background_model = cv2.createBackgroundSubtractorKNN(
        history=history_to_use,
        detectShadows=True
    )

In [11]:
processed_frames, processed_masks, frame_detections = [], [], []
for i in tqdm.tqdm(range(starting_frame-history_to_use, n_frames)):
    if not cap.isOpened():
        break
    ret, frame = cap.read()
    bboxes, mask = process_frame(frame, background_model)
    if i < starting_frame:
        continue

    # Save frame detections
    frame_detections.append(bboxes)

    # Add bounding boxes to frames/foreground masks
    for bbox in bboxes:
        cv2.rectangle(frame, (bbox[0],bbox[1]), (bbox[2],bbox[3]), (0,255,0), 3)
        cv2.rectangle(mask, (bbox[0],bbox[1]), (bbox[2],bbox[3]), (0,255,0), 3)
    processed_masks.append(mask)
    processed_frames.append(frame)

cap.release()

100%|██████████| 1806/1806 [02:12<00:00, 13.59it/s]


In [12]:
# Create video with detected bounding boxes
video=cv2.VideoWriter(f'output/output_{background_algorithm}2.mp4', -1, fps, (width, height), True)
for frame in processed_frames:
    video.write(frame)
video.release()

video=cv2.VideoWriter(f'output/output_{background_algorithm}_mask.mp4', -1, fps, (width, height), True)
for frame in processed_masks:
    video.write(frame)
video.release()

# Save detections
with open(f"output/{background_algorithm}_detections.pkl", "wb") as f:
    pickle.dump(frame_detections, f)

In [9]:
# Load GT annotations
annotation = extract_rectangles_from_xml('data/ai_challenge_s03_c010-full_annotation.xml')
parked_cars = annotation[0] # Get the parked cars bboxes from the first frame

# Ignore parked cars from the GTs
gt_bbox = [
    [list(np.array(r)) for r in rect if r not in parked_cars]
    for rect in list(annotation.values())[starting_frame:]
]

In [10]:
# Load YOLO detections
with open(f"output/{background_algorithm}_detections.pkl", "rb") as f:
    frame_detections = pickle.load(f)

# Filter those predictions that correspond to parked cars
dist_thr = 30
filtered_predictions = []
for i, detections in enumerate(frame_detections):
    filtered = []
    for bbox in detections:
        distances = np.array([np.mean([np.linalg.norm((bbox[0]-gt[0], bbox[1]-gt[1])), np.linalg.norm((bbox[2]-gt[2], bbox[3]-gt[3]))]) for gt in parked_cars])
        if not np.any(distances < dist_thr):
            filtered.append(bbox)
    filtered_predictions.append(filtered)

In [24]:
# Compute mAP
mAP(filtered_predictions, gt_bbox, N=10)

0.24006015360456048