# Global Tracking Transformers

## Requirements
- Linux or macOS with Python ≥ 3.6
- PyTorch ≥ 1.8.
  Install them together at [pytorch.org](https://pytorch.org) to make sure of this. Note, please check
  PyTorch version matches that is required by Detectron2.
- Detectron2: follow [Detectron2 installation instructions](https://detectron2.readthedocs.io/tutorials/install.html).

In [1]:
try:
    import detectron2
except ImportError:
    import torch
    TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
    CUDA_VERSION = torch.__version__.split("+")[-1] if torch.cuda.is_available() else 'cpu'
    print("Detectron2 not found. Installing now..")
    !python -m pip install detectron2 -f \
        https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html -q

In [2]:
from os.path import exists as path_exists
if not path_exists('GTR'):
    !git clone https://github.com/xingyizhou/GTR.git --recurse-submodules
    # !curl https://www.dropbox.com/s/eufigxmmkv5woop/RealBasicVSR.pth?dl=0 --create-dirs -o $ModelWeights

Cloning into 'GTR'...
remote: Enumerating objects: 128, done.[K
remote: Counting objects: 100% (128/128), done.[K
remote: Compressing objects: 100% (104/104), done.[Kpressing objects:   5% (6/104)[K
remote: Total 128 (delta 21), reused 126 (delta 19), pack-reused 0[K
Receiving objects: 100% (128/128), 2.66 MiB | 26.96 MiB/s, done.
Resolving deltas: 100% (21/21), done.
Submodule 'third_party/CenterNet2' (https://github.com/xingyizhou/CenterNet2) registered for path 'third_party/CenterNet2'
Cloning into '/home/studio-lab-user/global-tracking-transformers-demo/GTR/third_party/CenterNet2'...
remote: Enumerating objects: 13905, done.        
remote: Counting objects: 100% (3017/3017), done.        
remote: Compressing objects: 100% (1300/1300), done.        
remote: Total 13905 (delta 1997), reused 2437 (delta 1698), pack-reused 10888        
Receiving objects: 100% (13905/13905), 5.70 MiB | 27.67 MiB/s, done.
Resolving deltas: 100% (9890/9890), done.
Submodule path 'third_party/Center

## Run demo

In [29]:
if 'GTR' not in os.getcwd(): os.chdir('GTR')

In [42]:
import argparse
from faulthandler import disable
import glob
import multiprocessing as mp
import numpy as np
import os
import tempfile
import time
import warnings
import cv2
import tqdm
import sys

from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger
from detectron2.utils.colormap import *

sys.path.insert(0, 'third_party/CenterNet2/projects/CenterNet2/')
from centernet.config import add_centernet_config
from gtr.config import add_gtr_config

from gtr.predictor import VisualizationDemo

In [66]:
try:
    import importlib.resources as pkg_resources
except ImportError:
    # Try backported to PY<37 `importlib_resources`.
    import importlib_resources as pkg_resources

from detectron2 import utils

template = pkg_resources.read_text(utils, 'colormap.py')

In [70]:
from detectron2.utils.colormap import _COLORS
import random

In [71]:
def random_colors(N, rgb=False, maximum=255):
    """
    Args:
        N (int): number of unique colors needed
        rgb (bool): whether to return RGB colors or BGR colors.
        maximum (int): either 255 or 1
    Returns:
        ndarray: a list of random_color
    """
    indices = random.sample(range(len(_COLORS)), N)
    ret = [_COLORS[i] * maximum for i in indices]
    if not rgb:
        ret = [x[::-1] for x in ret]
    return ret

In [72]:
def setup_cfg(args):
    cfg = get_cfg()
    if args.cpu:
        cfg.MODEL.DEVICE="cpu"
    add_centernet_config(cfg)
    add_gtr_config(cfg)
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    # Set score_threshold for builtin models
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
    cfg.freeze()
    return cfg

In [73]:
def test_opencv_video_format(codec, file_ext):
    with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
        filename = os.path.join(dir, "test_file" + file_ext)
        writer = cv2.VideoWriter(
            filename=filename,
            fourcc=cv2.VideoWriter_fourcc(*codec),
            fps=float(30),
            frameSize=(10, 10),
            isColor=True,
        )
        [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
        writer.release()
        if os.path.isfile(filename):
            return True
        return False

In [74]:
# constants
WINDOW_NAME = "GTR"

class args:
    config_file = "configs/GTR_TAO_DR2101.yaml"
    confidence_threshold = 0.5
    cpu = True
    input = None
    video_input = 'docs/yfcc_v_acef1cb6d38c2beab6e69e266e234f.mp4'
    output = 'output/demo_yfcc.mp4'
    opts = ['MODEL.WEIGHTS','models/GTR_TAO_DR2101.pth']

In [78]:
mp.set_start_method("spawn", force=True)
# args = get_parser().parse_args()
setup_logger(name="fvcore")
logger = setup_logger()
logger.info("Arguments: " + str(args))

cfg = setup_cfg(args)
demo = VisualizationDemo(cfg)

if args.input: # image folder
    if len(args.input) == 1:
        args.input = glob.glob(os.path.expanduser(args.input[0]))
        assert args.input, "The input path(s) was not found"
    assert len(args.input) > 1, "Input must be more than one image"
    args.input = sorted(args.input)
    frames = []
    for path in args.input:
        img = read_image(path, format="BGR")
        frames.append(img)

    for path, visualized_output in zip(
        args.input, demo.run_on_images(frames)):
        if args.output:
            if not os.path.exists(args.output):
                os.mkdir(args.output)
            out_filename = os.path.join(args.output, os.path.basename(path))
            cv2.imwrite(out_filename, visualized_output)
        else:
            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
            cv2.imshow(WINDOW_NAME, visualized_output)
            if cv2.waitKey(0) == 27:
                break  # esc to quit
elif args.video_input:
    video = cv2.VideoCapture(args.video_input)
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frames_per_second = video.get(cv2.CAP_PROP_FPS)
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    basename = os.path.basename(args.video_input)
    codec, file_ext = (
        ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
    )
    if codec == ".mp4v":
        warnings.warn("x264 codec not available, switching to mp4v")
    if args.output:
        if os.path.isdir(args.output):
            output_fname = os.path.join(args.output, basename)
            output_fname = os.path.splitext(output_fname)[0] + file_ext
        else:
            folder_name = os.path.dirname(args.output)
            if not os.path.exists(folder_name):
                os.makedirs(folder_name)
            output_fname = args.output
        # assert not os.path.isfile(output_fname), output_fname
        output_file = cv2.VideoWriter(
            filename=output_fname,
            # some installation of opencv may not support x264 (due to its license),
            # you can try other format (e.g. MPEG)
            fourcc=cv2.VideoWriter_fourcc(*codec),
            fps=float(frames_per_second),
            frameSize=(width, height),
            isColor=True,
        )
    assert os.path.isfile(args.video_input)
    for vis_frame in demo.run_on_video(video):
        if args.output:
            output_file.write(vis_frame)
        else:
            cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
            cv2.imshow(basename, vis_frame)
            if cv2.waitKey(1) == 27:
                break  # esc to quit
    video.release()
    if args.output:
        output_file.release()
    else:
        cv2.destroyAllWindows()
print('Done')

[32m[03/27 23:03:12 detectron2]: [0mArguments: <class '__main__.args'>
[32m[03/27 23:03:14 fvcore.common.checkpoint]: [0m[Checkpointer] Loading from models/GTR_TAO_DR2101.pth ...
[34mroi_heads.box_predictor.0.freq_weight[0m
[34mroi_heads.box_predictor.1.freq_weight[0m
[34mroi_heads.box_predictor.2.freq_weight[0m
  [35mroi_heads.pos_emb.weight[0m


OpenCV: FFMPEG: tag 0x34363278/'x264' is not supported with codec id 27 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x31637661/'avc1'


NameError: name 'random_colors' is not defined

In [77]:
random_colors

<function __main__.random_colors(N, rgb=False, maximum=255)>

## Reference

```bibtex
@inproceedings{zhou2022global,
  title={Global Tracking Transformers},
  author={Zhou, Xingyi and Yin, Tianwei and Koltun, Vladlen and Kr{\"a}henb{\"u}hl, Philipp},
  booktitle={CVPR},
  year={2022}
}
```