# DETR for Tensorflow

This notebook is a friendly tool for implementing my DETR object detection and multi-instance classification models on the COCO dataset.

My models are coded in Tensorflow from first principles, as presented in the paper [End-to-End Object Detection with Transformers](https://ai.facebook.com/research/publications/end-to-end-object-detection-with-transformers) by Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko.

In [1]:
"""
# automatically reload imports as they change (for debugging cusom imports)
%load_ext autoreload
%autoreload 2
"""

'\n# automatically reload imports as they change (for debugging cusom imports)\n%load_ext autoreload\n%autoreload 2\n'

In [2]:
"""
# Fiftyone data downloader
!pip install -q fiftyone
import fiftyone as fiftyone
import fiftyone.zoo as foz
fiftyone.config.default_ml_backend = "tensorflow"
fiftyone.config.show_progress_bars = True
"""

# ML
!pip install -q tensorflow-addons
import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
import numpy as np

# file system
import sys
import os
import glob
import shutil
import json
from zipfile import ZipFile
!pip install -q wget

# custom imports
sys.path.insert(0, '/content/drive/MyDrive/GitHub/DETR_for_TF/ModelComponents')  # if using GDrive
import model
import model_pretrainer
import learning_rate_schedulers
import parameters
import datasets
import pipeline

Load Modules

In [3]:
model_parameters = parameters.ModelParameters(dataset_name='COCO')
params = model_parameters.default_params()

fashionpedia_filepaths = parameters.Filepaths()
strategies = parameters.StrategyOptions(mixed_precision=True)
STRATEGY = strategies.strategy()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Tesla V100-SXM2-16GB, compute capability 7.0


## Load data

In [4]:
coco = datasets.COCOStandard(local_base_dir='/content',
                             archive_base_dir='/content/drive/MyDrive/datasets/')

In [5]:
coco.get_data(download=False, unzip=True, force_rebuild=False)

extracting: train2017.zip
/content/COCO/images/train found. Using previously extracted data. (Note: set force_rebuild=True to override)
/content/COCO/annotations/train found. Using previously extracted data. (Note: set force_rebuild=True to override)
/content/COCO/annotations/train found. Using previously extracted data. (Note: set force_rebuild=True to override)
/content/COCO/annotations/train found. Using previously extracted data. (Note: set force_rebuild=True to override)
extracting: val2017.zip
/content/COCO/images/test found. Using previously extracted data. (Note: set force_rebuild=True to override)
extracting: test2017.zip
/content/COCO/images/test found. Using previously extracted data. (Note: set force_rebuild=True to override)


Prepare dataframes

In [6]:
all_info_train = coco.prepare_COCO_from_json(subset='train', force_rebuild=False)
all_info_valid = coco.prepare_COCO_from_json(subset='val', force_rebuild=False)

Returns dictionary with keys: dict_keys(['annotations_df', 'categories_df', 'meta_info'])

annotations_df: Index(['id_num', 'width', 'height', 'coco_url', 'file_name', 'num_boxes',
       'bbox', 'category', 'segmentation', 'num_keypoints', 'area', 'iscrowd',
       'keypoints', 'image_path'],
      dtype='object')

note: bbox provided in normalized COCO format: [xmin, ymin, width, height]
Returns dictionary with keys: dict_keys(['annotations_df', 'categories_df', 'meta_info'])

annotations_df: Index(['id_num', 'width', 'height', 'coco_url', 'file_name', 'num_boxes',
       'bbox', 'category', 'segmentation', 'num_keypoints', 'area', 'iscrowd',
       'keypoints', 'image_path'],
      dtype='object')

note: bbox provided in normalized COCO format: [xmin, ymin, width, height]


In [7]:
print('train samples:', len(all_info_train['annotations_df']))
print('valid samples:', len(all_info_valid['annotations_df']))

train samples: 64115
valid samples: 2693


Create TF Datasets

In [8]:
data_pipeline = pipeline.Pipeline(**params)
image_augmentations = pipeline.Augmentations()

In [9]:
ds_train = data_pipeline.data_generator(
                            labels_df=all_info_train['annotations_df'],
                            text_pad_val=params['pad_value'],
                            cache_before_image_load=True,
                            decode_images=True,
                            stream_from_directory=False)

ds_train_augmented = image_augmentations.apply_image_augmentations(ds_train)

ds_valid = data_pipeline.data_generator(
                            labels_df=all_info_valid['annotations_df'], 
                            text_pad_val=params['pad_value'],
                            cache_before_image_load=True,
                            decode_images=True,
                            stream_from_directory=False)

{'image_id': TensorSpec(shape=(), dtype=tf.int64, name=None), 'category': TensorSpec(shape=(20, 1), dtype=tf.string, name=None), 'attribute': TensorSpec(shape=(20, 1), dtype=tf.string, name=None), 'bbox': TensorSpec(shape=(20, 4), dtype=tf.float32, name=None), 'num_objects': TensorSpec(shape=(), dtype=tf.int64, name=None), 'image': TensorSpec(shape=(560, 560, 3), dtype=tf.float32, name=None)} 

{'image_id': TensorSpec(shape=(), dtype=tf.int64, name=None), 'category': TensorSpec(shape=(14, 1), dtype=tf.string, name=None), 'attribute': TensorSpec(shape=(14, 1), dtype=tf.string, name=None), 'bbox': TensorSpec(shape=(14, 4), dtype=tf.float32, name=None), 'num_objects': TensorSpec(shape=(), dtype=tf.int64, name=None), 'image': TensorSpec(shape=(560, 560, 3), dtype=tf.float32, name=None)} 



In [10]:
# helper function
def prep_ds(val):
    return val['image'], val['category'], val['attribute'], val['bbox'], val['num_objects']

## Prepare Model

Set Checkpoints

In [11]:
CLASS_CHECKPOINT_DIR = '/content/drive/MyDrive/datasets/COCO/ModelCheckpoints/classification/'
class_checkpoint_path = CLASS_CHECKPOINT_DIR + './coco_class.ckpt' 

DETECTION_CHECKPOINT_DIR = '/content/drive/MyDrive/datasets/COCO/ModelCheckpoints/detection/'
detection_checkpoint_path = DETECTION_CHECKPOINT_DIR + './coco_detect.ckpt'


class_checkpoint = tf.keras.callbacks.ModelCheckpoint(class_checkpoint_path,
                                                      save_weights_only=True)

detection_checkpoint = tf.keras.callbacks.ModelCheckpoint(detection_checkpoint_path,
                                                          save_weights_only=True)

### Load Detection & Classification Models

In [15]:
LOAD_CLASS_WEIGHTS = True
# otherwise loads detection weights

with STRATEGY.scope():
    # DETECTION MODEL
    # load base model and build
    detection_model = model.DETR(**params)

    # build
    for val in ds_valid.map(prep_ds).batch(3).take(1):
        out_detect_0 = detection_model(val)

    # compile
    lr = learning_rate_schedulers.LRScheduleAIAYN()
    optimizer_detect = tfa.optimizers.AdamW(learning_rate=lr, weight_decay=.001)

    if strategies.precision():  # if using mixed precision
        optimizer_detect = tf.keras.mixed_precision.LossScaleOptimizer(optimizer_detect)

    detection_model.compile(optimizer=optimizer_detect)  # loss functions are built in
    
    # test training step
    for val in ds_valid.map(prep_ds).batch(3).take(1):
        
        @tf.function
        def temp_tf_func(val):
            return detection_model.train_step(val)
             
        out_detect_1 = STRATEGY.run(temp_tf_func, args=(val,))

    # load weights
    detect_checkpoint_filename = tf.train.latest_checkpoint(DETECTION_CHECKPOINT_DIR)
    detection_model.load_weights(detect_checkpoint_filename)


    # CLASSIFICATION
    # initialize
    classification_model = model_pretrainer.DETR_MultiClassifier(base_model=detection_model, 
                                                      vocab_dict=model_parameters.vocab_dict('COCO'),
                                                      hidden_dim=128, 
                                                      name='COCO_Classifier_DETR')
    # build
    for val in ds_valid.map(prep_ds).batch(3).take(1):
        out_class_0 = classification_model(val)

    # compile
    lr = learning_rate_schedulers.LRScheduleAIAYN(10.0)
    optimizer_class = tfa.optimizers.AdamW(learning_rate=lr, weight_decay=.001)

    if strategies.precision():  # if using mixed precision
        optimizer_class = tf.keras.mixed_precision.LossScaleOptimizer(optimizer_class)

    classification_model.compile(loss=tfa.losses.SigmoidFocalCrossEntropy(),
                                 optimizer=optimizer_class)

    # test
    for val in ds_valid.map(prep_ds).batch(3).take(1):

        @tf.function
        def temp_tf_func(val):
            return classification_model.train_step(val)
             
        out_class_1 = STRATEGY.run(temp_tf_func, args=(val,))

    # load weights
    if LOAD_CLASS_WEIGHTS:
        class_checkpoint_filename = tf.train.latest_checkpoint(CLASS_CHECKPOINT_DIR)
        classification_model.load_weights(class_checkpoint_filename)



In [16]:
# examine
classification_model.summary()

Model: "COCO_Classifier_DETR"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
DETR (DETR)                  multiple                  12912498  
_________________________________________________________________
EncoderBackbone (EncoderBack multiple                  7768569   
_________________________________________________________________
BackboneNeck (BackboneNeck)  multiple                  361728    
_________________________________________________________________
ImageEncoderAttention (Image multiple                  1666048   
_________________________________________________________________
DecoderPrep (DecoderPrep)    multiple                  24576     
_________________________________________________________________
DecoderBlock_0 (DecoderBlock multiple                  395776    
_________________________________________________________________
DecoderBlock_1 (DecoderBlock multiple         

# Training

### Train Classifier Model

In [None]:
# train classifier
NUM_EPOCHS = 20
BATCH_SIZE = 64

classification_model.fit(ds_train.map(prep_ds).shuffle(5000).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE), 
                         epochs=NUM_EPOCHS, 
                         validation_data=ds_valid.map(prep_ds).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE), 
                         callbacks=[class_checkpoint,
                                    tf.keras.callbacks.TerminateOnNaN()],
                         #steps_per_epoch=2
                         )

Epoch 6/20
 133/1002 [==>...........................] - ETA: 5:26 - loss: 9.4413 - Loss: 0.0000e+00 - Category_Loss: 0.0000e+00 - Attribute_Loss: 0.0000e+00 - Box_Loss: 0.0000e+00 - Existence_Loss: 0.0000e+00 - IOU: 0.0000e+00 - mAP_50: 0.0000e+00 - mAP_50_95: 0.0000e+00

In [None]:
save_weights_to_base = True

# save updated weights into base model
if save_weights_to_base:
    detection_model.save_weights(detection_checkpoint_path)

### Train Detection Model

In [None]:
## Train Detection Model
NUM_EPOCHS = 1
BATCH_SIZE = 64

detection_model.fit(ds_train.map(prep_ds).shuffle(5000).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE),  
                    epochs=NUM_EPOCHS, 
                    validation_data=ds_valid.map(prep_ds).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE), 
                    callbacks=[detection_checkpoint,
                               tf.keras.callbacks.TerminateOnNaN()],
                    steps_per_epoch=2
                    )