In [None]:
!pip install --upgrade git+https://github.com/EmGarr/kerod.git

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [1]:
import sys

sys.path.append("../../kerod/src/")

# Download COCO/2017

Download and preprocess COCO/2017 to the following format (required by od networks):

```python
dataset = {
        'images' : A tensor of float32 and shape [1, height, widht, 3],
        'images_info': A tensor of float32 and shape [1, 2] ,
        'bbox': A tensor of float32 and shape [1, num_boxes, 4],
        'labels': A tensor of int32 and shape [1, num_boxes],
        'num_boxes': A tensor of int32 and shape [1, 1],
        'weights': A tensor of float32 and shape [1, num_boxes]
    }
```

If you need to download the dataset in a specific directory you can use the argument `data_dir` of `tfds.load`.

In [2]:
import functools
import tensorflow as tf
import tensorflow_datasets as tfds
from kerod.dataset.preprocessing import preprocess, expand_dims_for_single_batch
from kerod.core.standard_fields import BoxField

ds_train, ds_info = tfds.load(name="coco/2017", split="train", shuffle_files=True, with_info=True)
ds_train = ds_train.map(functools.partial(preprocess, bgr=True), num_parallel_calls=tf.data.experimental.AUTOTUNE)
# Filter example with no boxes after preprocessing
ds_train =  ds_train.filter(lambda x, y: tf.shape(y[BoxField.BOXES])[0] > 1)
ds_train = ds_train.map(expand_dims_for_single_batch, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

ds_test = tfds.load(name="coco/2017", split="validation", shuffle_files=False)
ds_test = ds_test.map(
    functools.partial(preprocess, horizontal_flip=False, bgr=True),
    num_parallel_calls=tf.data.experimental.AUTOTUNE)
# Filter example with no boxes after preprocessing
ds_test =  ds_test.filter(lambda x, y: tf.shape(y[BoxField.BOXES])[0] > 1)
ds_test = ds_test.map(expand_dims_for_single_batch, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)

In [3]:
ds_info

tfds.core.DatasetInfo(
    name='coco',
    full_name='coco/2017/1.1.0',
    description="""
    COCO is a large-scale object detection, segmentation, and
    captioning dataset.
    
    Note:
     * Some images from the train and validation sets don't have annotations.
     * Coco 2014 and 2017 uses the same images, but different train/val/test splits
     * The test split don't have any annotations (only images).
     * Coco defines 91 classes but the data only uses 80 classes.
     * Panotptic annotations defines defines 200 classes but only uses 133.
    """,
    config_description="""
    
    This version contains images, bounding boxes and labels for the 2017 version.
    
    """,
    homepage='http://cocodataset.org/#home',
    data_path='/home/nikasmohan/tensorflow_datasets/coco/2017/1.1.0',
    download_size=25.20 GiB,
    dataset_size=24.98 GiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=tf.uint8),
        'image/filename': Text(shape=(

In [4]:
next(iter(ds_train))

({'images': <tf.Tensor: shape=(1, 800, 1199, 3), dtype=float32, numpy=
  array([[[[59.      , 58.      , 60.      ],
           [59.      , 58.      , 60.      ],
           [59.      , 58.      , 60.      ],
           ...,
           [52.834473, 54.834473, 54.834473],
           [52.30066 , 54.30066 , 54.30066 ],
           [52.      , 54.      , 54.      ]],
  
          [[58.39875 , 57.39875 , 59.39875 ],
           [58.48914 , 57.48914 , 59.48914 ],
           [58.649605, 57.649605, 59.649605],
           ...,
           [52.081882, 54.081882, 54.081882],
           [52.029503, 54.029503, 54.029503],
           [52.      , 54.      , 54.      ]],
  
          [[57.33125 , 56.33125 , 58.33125 ],
           [57.58212 , 56.58212 , 58.58212 ],
           [58.027493, 57.027493, 59.027493],
           ...,
           [50.74568 , 52.74568 , 52.74568 ],
           [51.548073, 53.548073, 53.548073],
           [52.      , 54.      , 54.      ]],
  
          ...,
  
          [[31.171906, 

# Load and train the network


In [5]:
from kerod.core.standard_fields import BoxField
from kerod.core.learning_rate_schedule import LearningRateScheduler
from kerod.model import factory

from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

# Number of classes of COCO
classes = ds_info.features['objects']['label'].names
num_classes = len(classes)

model_faster_rcnn = factory.build_model(num_classes)
base_lr = 0.02
optimizer = tf.keras.optimizers.SGD(learning_rate=base_lr, momentum=0.9)
model_faster_rcnn.compile(optimizer=optimizer, loss=None)

#The numbering of epochs (LearningRateScheduler) starts at 0.
# Which means the decrease will happens on the epoch 9:
#(8 + 1: numbering of fit logging starts at 1)
callbacks = [
    LearningRateScheduler(base_lr, 1, epochs=[8, 10]),
    TensorBoard(),
    ModelCheckpoint('checkpoints/')
]

model_faster_rcnn.fit(ds_train, validation_data=ds_test, epochs=12, callbacks=callbacks)

Downloading data from https://files.heuritech.com/raw_files/resnet50_tensorpack_conversion.h5
Epoch 1/12
<dtype: 'bool'> <dtype: 'float32'> <dtype: 'float32'>
Instructions for updating:
Use fn_output_signature instead
Instructions for updating:
Use fn_output_signature instead
<dtype: 'bool'> <dtype: 'float32'> <dtype: 'float32'>
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
<dtype: 'bool'> <dtype: 'float32'> <dtype: 'float32'>
<dtype: 'bool'> <dtype: 'float32'> <dtype: 'float32'>
      1/Unknown - 37s 37s/step - loss: 72.8336 - rpn_recall: 0.3667 - region_proposal_network_classification_loss: 7.8147 - region_proposal_network_localization_loss: 2.7242 - accuracy: 0.0000e+00 - fg_accuracy: 0.0000e+00 - false_negative: 0.0000e+00 - fast_rcnn_classification_loss: 61.2057 - fast_rcnn_loca

KeyboardInterrupt: 

In [None]:
# Save the weights for the serving
model_faster_rcnn.save_weights('final_weights.h5')

In [None]:
# Export a saved model for serving purposes
model_faster_rcnn.export_for_serving('serving')

## Visualisation of few images

In [None]:
from od.utils.drawing import BoxDrawer

drawer = BoxDrawer(classes)

for i, example in enumerate(ds_val):
    inputs, ground_truths = example
    out = model_faster_rcnn.predict_on_batch(inputs)
    boxes, scores, labels, valid_detections = out
    # Will draw the results
    drawer(
        inputs['images'],
        boxes,
        scores=scores,
        labels=labels,
        num_valid_detections=valid_detections
    )
    if i == 5:
        break

## Tensorboard

In [None]:
# Load TENSORBOARD
%load_ext tensorboard
# Start TENSORBOARD
%tensorboard --logdir logs

## Coco evaluation


### Load the dataset

In [None]:
import tensorflow_datasets as tfds

ds_val, ds_info = tfds.load(name="coco/2017", split="validation", shuffle_files=False, with_info=True)
# category_ids basicaly map the index 0 the id
# e.g: 0 -> 1, 2 -> 3, 79 -> 90
category_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]

### Super dirty but the evaluation works

1. perform the analysis

In [None]:
import json
import numpy as np
import tensorflow as tf

from kerod.core.standard_fields import DatasetField, BoxField             
from kerod.core.box_ops import convert_to_center_coordinates              
from kerod.dataset.preprocessing import resize_to_min_dim                 
                                                                       
results = []                                                           
                                                                       
for i, example in enumerate(ds_val): 
    print(i)
    # preprocess image
    image = example['image'][:, :, ::-1]
    image = resize_to_min_dim(image, 800.0, 1333.0)         
    image_information = tf.cast(tf.shape(image)[:2], dtype=tf.float32) 
    inputs = {
        'images': tf.expand_dims(image, axis=0),
        'images_information':tf.expand_dims(image_information, axis=0)
    }
                                                                 
    # predict                                                          
    boxes, scores, labels, valid_detections = model_faster_rcnn.predict_on_batch(inputs)
                                                                       
    # Post processing and append to coco results                       
    bbox = boxes[0] * tf.tile(
        tf.expand_dims(tf.cast(example['image'].shape[:2], tf.float32), axis=0),
        [1, 2])                   
    scores = scores[0]                                           
    labels = labels[0]                                           
    for i in range(valid_detections[0]):
        # Convert from [y_min, x_min, y_max, x_max] to coco format [x_min, y_min, w, h]
        sbox = bbox[i].numpy()
        sbox = [sbox[1], sbox[0], sbox[3] - sbox[1], sbox[2] - sbox[0]]
        res = {                                                        
                'image_id': int(example['image/id']),                       
                'category_id': category_ids[int(labels[i])],           
                'bbox': [round(float(c), 4) for c in sbox],
                'score': round(float(scores[i]), 4),                     
            }                                                          
        results.append(res)                                            
                                                   
                                                                       
with open('coco_results.json', 'w') as f:                              
    json.dump(results, f)

2. install the coco library to compute the performances

In [None]:
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip annotations_trainval2017.zip
!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

3. compute the performances

In [None]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

with open('coco_results_corrected.json', 'r') as f:                              
    results = json.load(f)
coco = COCO('./annotations/instances_val2017.json')
ret = {}

cocoDt = coco.loadRes(results)
cocoEval = COCOeval(coco, cocoDt, 'bbox')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()