# Task 2: Multi-target single-camera (MTSC) tracking

In [1]:
import os
import gc
import bz2
import pickle
import _pickle as cPickle
import torch
import cv2
import numpy as np
from PIL import Image
from VehicleDetection import *
from itertools import chain
from tqdm import tqdm


# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# Import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2_dataset_loader import *
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

In [2]:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
torch.cuda.empty_cache()
# PATHS
DATASET = "../datasets/aic19-track1-mtmc-train/train/"
SEQUENCES = [DATASET+seq+"/" for seq in os.listdir(DATASET)]
CAMERAS = [[seq+cam+"/" for cam in os.listdir(seq)]for seq in SEQUENCES]
SEQUENCES = [seq.replace(DATASET, "").replace("/", "") for seq in SEQUENCES]
CAMERAS = dict(zip(SEQUENCES, CAMERAS))

# DEFINE SPLITS
train = ["S01", "S04"]
test = ["S03"]

# Model Parameters
selected_model = 'COCO-Detection/retinanet_R_101_FPN_3x.yaml'

In [3]:
def extract_video(path, div_frames, skip):
    vidcap = cv2.VideoCapture(path)
    fps = int(vidcap.get(cv2.CAP_PROP_FPS))
    num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames = []
    # Read Half the frames 
    for _ in range(num_frames//div_frames):
        for i in range(skip):
            frame = vidcap.read()[1]
            if i == 0:
                frames.append(frame.astype(np.float32)) # Reduce soze
    return iter(frames) # Iterator

def readDetections(path):
  #Generates detection dictionary where the frame number is the key and the values are the info of the corresponding detection/s
  
    with open(path) as f:
        lines = f.readlines()

    detections = {}
    for line in lines:
        data = line.split(',')
        if data[0] in detections:
            detections[data[0]].append(VehicleDetection(int(data[0]), int(data[1]), float(data[2]), float(data[3]), float(data[4]), float(data[5]), float(data[6])))
        else:
            detections[data[0]] = [VehicleDetection(int(data[0]), int(data[1]), float(data[2]), float(data[3]), float(data[4]), float(data[5]), float(data[6]))]

    return detections


In [9]:
seq_data = []

# For each training seq move through cameras and extact even frames and even gt
for i, seq in enumerate(test):
    for j, cam in tqdm(enumerate(CAMERAS[seq]), total = len(CAMERAS[seq]), desc = f"Processing {seq}..."):
        data = {}
        data["div"] = 1
        data["base_path"] = cam + "frames/" # To Save Frames
        data["gt_detected"] = readDetections(cam + "gt/gt.txt")
        data["gt_detected"] = {key:data["gt_detected"][key] for key in data["gt_detected"].keys() if int(key) % data["div"] == 0}
        data["frames"] = extract_video(cam + "vdo.avi", 10,data["div"])
        seq_data.append(data)


Processing S03...: 100%|██████████| 6/6 [00:32<00:00,  5.39s/it]


In [10]:

DatasetCatalog.clear()
DatasetCatalog.register("AICity_eval" , lambda d=seq_data: get_AICity_dicts_big(d))
MetadataCatalog.get("AICity_eval").set(thing_classes=["car"])
AICity_metadata = MetadataCatalog.get("AICity_eval")

gc.collect()

44518

In [11]:
# Training
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(selected_model))
cfg.DATASETS.VAL = ('AICity_eval',)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(selected_model)  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 1e-3
cfg.SOLVER.MAX_ITER = 5000
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 # (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.MODEL.BACKBONE.FREEZE_AT = 1

cfg.OUTPUT_DIR = "./results_train_seq01-04"

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

evaluator = COCOEvaluator("AICity_eval", output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, "AICity_eval")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
!rm -rf last_id.pkl

Loading config /home/group05/anaconda3/lib/python3.7/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


[32m[04/06 10:21:34 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|    car     | 489          |
|            |              |[0m
[32m[04/06 10:21:34 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[04/06 10:21:34 d2.data.common]: [0mSerializing 481 elements to byte tensors and concatenating them all ...
[32m[04/06 10:21:34 d2.data.common]: [0mSerialized dataset takes 0.13 MiB
[32m[04/06 10:21:34 d2.evaluation.evaluator]: [0mStart inference on 481 batches
[32m[04/06 10:21:35 d2.evaluation.evaluator]: [0mInference done 11/481. Dataloading: 0.0117 s/iter. Inference: 0.0447 s/iter. Eval: 0.0002 s/iter. Total: 0.0566 s/iter. ETA=0:00:26
[32m[04/06 10:21:40 d2.evaluation.evaluator]: [0mInference done 91/481. Dataloading: 0.0172 s/iter. Inference: 0.0448 s/iter. Eval: 0.0

AssertionError: Results do not correspond to current coco set