# Task 2: Multi-target single-camera (MTSC) tracking

In [3]:
import os
import gc
import bz2
import pickle
import _pickle as cPickle
import torch
import cv2
import numpy as np
from PIL import Image
from VehicleDetection import *
from itertools import chain
from tqdm import tqdm
# from pqdm.processes import pqdm


# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# Import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2_dataset_loader import *


In [4]:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
torch.cuda.empty_cache()
# PATHS
DATASET = "../datasets/aic19-track1-mtmc-train/train/"
SEQUENCES = [DATASET+seq+"/" for seq in os.listdir(DATASET)]
CAMERAS = [[seq+cam+"/" for cam in os.listdir(seq)]for seq in SEQUENCES]
SEQUENCES = [seq.replace(DATASET, "").replace("/", "") for seq in SEQUENCES]
CAMERAS = dict(zip(SEQUENCES, CAMERAS))

# DEFINE SPLITS
train = ["S01", "S04"]
test = ["S03"]

# Model Parameters
selected_model = 'COCO-Detection/retinanet_R_101_FPN_3x.yaml'

In [1]:
def extract_video(path, div_frames, skip):
    vidcap = cv2.VideoCapture(path)
    fps = int(vidcap.get(cv2.CAP_PROP_FPS))
    num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames = []
    # Read Half the frames 
    for _ in range(num_frames//div_frames):
        for i in range(skip):
            frame = vidcap.read()[1]
            if i == 0:
                frames.append(frame.astype(np.float32)) # Reduce soze
    return iter(frames) # Iterator

def readDetections(path):
  #Generates detection dictionary where the frame number is the key and the values are the info of the corresponding detection/s
  
    with open(path) as f:
        lines = f.readlines()

    detections = {}
    for line in lines:
        data = line.split(',')
        if data[0] in detections:
            detections[data[0]].append(VehicleDetection(int(data[0]), int(data[1]), float(data[2]), float(data[3]), float(data[4]), float(data[5]), float(data[6])))
        else:
            detections[data[0]] = [VehicleDetection(int(data[0]), int(data[1]), float(data[2]), float(data[3]), float(data[4]), float(data[5]), float(data[6]))]

    return detections


In [6]:
data = {}
for i, seq in enumerate(test):
    for j, cam in tqdm(enumerate(CAMERAS[seq]), total = len(CAMERAS[seq]), desc = f"Processing {seq}..."):
        data[cam] = readDetections(cam + "gt/gt.txt")

Processing S03...: 100%|██████████| 6/6 [00:00<00:00, 252.69it/s]


In [8]:
for key, detections in data.items():
    camName = key[-5:-1]
    with open(f'cam_pred/seq3_{camName}_gt.pkl', "wb") as output_file:
        pickle.dump(detections, output_file)

In [5]:
seq_data = []

# For each training seq move through cameras and extact even frames and even gt
for i, seq in enumerate(train):
    for j, cam in tqdm(enumerate(CAMERAS[seq]), total = len(CAMERAS[seq]), desc = f"Processing {seq}..."):
        data = {}
        data["div"] = 1
        data["base_path"] = cam + "frames/" # To Save Frames
        data["gt_detected"] = readDetections(cam + "gt/gt.txt")
        data["gt_detected"] = {key:data["gt_detected"][key] for key in data["gt_detected"].keys() if int(key) % data["div"] == 0}
        data["frames"] = extract_video(cam + "vdo.avi", 30,data["div"])
        seq_data.append(data)


Processing S01...: 100%|██████████| 5/5 [00:03<00:00,  1.46it/s]
Processing S04...: 100%|██████████| 25/25 [00:04<00:00,  5.09it/s]


In [6]:

DatasetCatalog.clear()
DatasetCatalog.register("AICity_train" , lambda d=seq_data: get_AICity_dicts_big(d))
MetadataCatalog.get("AICity_train").set(thing_classes=["car"])
AICity_metadata = MetadataCatalog.get("AICity_train")

gc.collect()

20

In [7]:
# Training
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(selected_model))
cfg.DATASETS.TRAIN = ("AICity_train",)
#cfg.DATASETS.VAL = ('AICity_valid',)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(selected_model)  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 1e-3
cfg.SOLVER.MAX_ITER = 500
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 # (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
cfg.MODEL.BACKBONE.FREEZE_AT = 1

cfg.OUTPUT_DIR = "./results_train_seq01-04"

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

Loading config /home/group05/anaconda3/lib/python3.7/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


[32m[04/06 09:57:33 d2.engine.defaults]: [0mModel:
RetinaNet(
  (backbone): FPN(
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelP6P7(
      (p6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (p7): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    )
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
   

The checkpoint state_dict contains keys that are not used by the model:
  [35mpixel_mean[0m
  [35mpixel_std[0m


[32m[04/06 09:58:12 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[04/06 09:58:55 d2.utils.events]: [0m eta: 0:18:46  iter: 19  total_loss: 2.068  loss_cls: 1.606  loss_box_reg: 0.5386  time: 2.0830  data_time: 0.0223  lr: 3.8962e-05  max_mem: 11849M
[32m[04/06 09:59:05 d2.utils.events]: [0m eta: 0:04:09  iter: 39  total_loss: 0.9569  loss_cls: 0.417  loss_box_reg: 0.493  time: 1.2646  data_time: 0.0041  lr: 7.8922e-05  max_mem: 11849M
[32m[04/06 09:59:12 d2.utils.events]: [0m eta: 0:01:49  iter: 59  total_loss: 0.6539  loss_cls: 0.2873  loss_box_reg: 0.3734  time: 0.9369  data_time: 0.0038  lr: 0.00011888  max_mem: 11849M
[32m[04/06 09:59:28 d2.utils.events]: [0m eta: 0:01:44  iter: 79  total_loss: 0.4684  loss_cls: 0.2033  loss_box_reg: 0.2795  time: 0.9060  data_time: 0.0041  lr: 0.00015884  max_mem: 11849M
[32m[04/06 09:59:34 d2.utils.events]: [0m eta: 0:01:39  iter: 99  total_loss: 0.3716  loss_cls: 0.1498  loss_box_reg: 0.2496  time: 0.7851  data_time: 0.0040  lr: 0.0001988  max_mem: 11849M
[32m[04/06 09:59:45 d2.utils.events]: 