### MaskRCNN training. Balloon dataset

In [1]:
import os
os.chdir('..')
import tensorflow as tf

from samples.balloon import balloon
from preprocess import preprocess
from preprocess import augmentation as aug
from training import train_model
from model import mask_rcnn_functional
from common.utils import tf_limit_gpu_memory

In [2]:
%load_ext watermark
%watermark
%watermark --iversions

Last updated: 2021-06-24T01:28:19.527328+03:00

Python implementation: CPython
Python version       : 3.7.7
IPython version      : 7.16.1

Compiler    : GCC 7.3.0
OS          : Linux
Release     : 5.4.0-65-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 12
Architecture: 64bit

tensorflow: 2.2.0



In [3]:
tf_limit_gpu_memory(tf, 4500)

1 Physical GPUs, 1 Logical GPUs Memory limit: 4500
Physical GPU-devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
from common.config import CONFIG

In [5]:
CONFIG.update({'class_dict': {'balloon': 1, 'background': 0},
               'num_classes': 2,
               'epochs': 30,
              },
             )
CONFIG.update({'meta_shape': (1 + 3 + 3 + 4 + 1 + CONFIG['num_classes']),})
CONFIG

{'image_shape': (512, 512, 3),
 'img_size': 512,
 'backbone': 'mobilenet',
 'meta_shape': 14,
 'num_classes': 2,
 'class_dict': {'balloon': 1, 'background': 0},
 'normalization': None,
 'image_min_dim': 300,
 'image_min_scale': 0,
 'image_max_dim': 512,
 'image_resize_mode': 'square',
 'use_mini_masks': False,
 'mini_mask_shape': (32, 32),
 'mask_shape': (28, 28),
 'epochs': 30,
 'gpu_num': 1,
 'batch_size': 1,
 'images_per_gpu': 1,
 'training': True,
 'log_per_steps': 5,
 'use_multiprocessing': True,
 'workers': 6,
 'callback': {'log_dir': 'logs/scalars',
  'reduce_lr_on_plateau': 0.98,
  'reduce_lr_on_plateau_patience': 10,
  'save_weights_only': True,
  'save_best_only': True,
  'histogram_freq': 0,
  'profile_batch': '1,2'},
 'backbone_strides': [4, 8, 16, 32, 64],
 'top_down_pyramid_size': 256,
 'rpn_anchor_scales': (32, 64, 128, 256, 512),
 'rpn_anchor_ratios': [0.5, 1, 2],
 'rpn_anchor_stride': 1,
 'rpn_train_anchors_per_image': 256,
 'max_gt_instances': 100,
 'rpn_bbox_std_dev'

In [6]:
model = mask_rcnn_functional(config=CONFIG)

[MaskRCNN] Training mode


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

[MaskRCNN] Backbone architecture: mobilenet




In [7]:
CONFIG['training']

True

In [8]:
CONFIG['backbone']

'mobilenet'

In [9]:
base_dir = os.getcwd().replace('src', 'balloon')
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')

train_dataset = balloon.BalloonDataset(images_dir=train_dir,
                                       class_key='object',
                                       classes_dict=CONFIG['class_dict'],
                                       augmentation=aug.get_training_augmentation(
                                           image_size=CONFIG['img_size'],
                                           normalize=CONFIG['normalization']
                                       ),
                                       json_annotation_key=None,
                                       **CONFIG
                                            )

val_dataset = balloon.BalloonDataset(images_dir=val_dir,
                                     class_key='object',
                                     classes_dict=CONFIG['class_dict'],
                                     augmentation=aug.get_validation_augmentation(
                                         image_size=CONFIG['img_size'],
                                         normalize=CONFIG['normalization']
                                     ),
                                     json_annotation_key=None,
                                     **CONFIG
                                    )

Found annotation file: via_region_data.json in dataset path: /home/alexander/Documents/py_projects/bitbucket/mask-rcnn/balloon/train
Found annotation file: via_region_data.json in dataset path: /home/alexander/Documents/py_projects/bitbucket/mask-rcnn/balloon/val


In [10]:
train_model(model, 
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            config=CONFIG, 
            weights_path=None)

train DataLoader. Steps per epoch: 61
val DataLoader. Steps per epoch: 13
MaskRCNN Losses:
rpn_class_loss: <layers.losses.RPNClassLoss object at 0x7fcda01eddd0>
rpn_bbox_loss: <layers.losses.RPNBboxLoss object at 0x7fcda051d190>
mrcnn_class_loss: <layers.losses.MRCNNClassLoss object at 0x7fcda01ede50>
mrcnn_bbox_loss: <layers.losses.MRCNNBboxLoss object at 0x7fcda07aa7d0>
mrcnn_mask_loss: <layers.losses.MRCNNMaskLoss object at 0x7fcda01edd90>
l2_regularizer: <layers.losses.L2RegLoss object at 0x7fcda0626950>

Epoch 1/30
Instructions for updating:
Use tf.identity instead.
Epoch 00001: val_loss_sum improved from inf to 8.33637, saving model to logs/scalars/maskrcnn_mobilenet_ed3e7dd4c2e064d9dd92df2088834243_cp-0001.ckpt
Epoch 2/30
Epoch 00002: val_loss_sum improved from 8.33637 to 2.20023, saving model to logs/scalars/maskrcnn_mobilenet_ed3e7dd4c2e064d9dd92df2088834243_cp-0002.ckpt
Epoch 3/30
Epoch 00003: val_loss_sum improved from 2.20023 to 1.71945, saving model to logs/scalars/maskrcn

Epoch 11/30
Epoch 00011: val_loss_sum did not improve from 1.53026
Epoch 12/30
Epoch 00012: val_loss_sum improved from 1.53026 to 1.39191, saving model to logs/scalars/maskrcnn_mobilenet_ed3e7dd4c2e064d9dd92df2088834243_cp-0012.ckpt
Epoch 13/30
Epoch 00013: val_loss_sum did not improve from 1.39191
Epoch 14/30
Epoch 00014: val_loss_sum did not improve from 1.39191
Epoch 15/30
Epoch 00015: val_loss_sum did not improve from 1.39191
Epoch 16/30
Epoch 00016: val_loss_sum improved from 1.39191 to 1.22282, saving model to logs/scalars/maskrcnn_mobilenet_ed3e7dd4c2e064d9dd92df2088834243_cp-0016.ckpt
Epoch 17/30
Epoch 00017: val_loss_sum did not improve from 1.22282
Epoch 18/30
Epoch 00018: val_loss_sum did not improve from 1.22282
Epoch 19/30
Epoch 00019: val_loss_sum did not improve from 1.22282
Epoch 20/30
Epoch 00020: val_loss_sum did not improve from 1.22282
Epoch 21/30
Epoch 00021: val_loss_sum improved from 1.22282 to 1.20168, saving model to logs/scalars/maskrcnn_mobilenet_ed3e7dd4c2e0

Epoch 22/30
Epoch 00022: val_loss_sum did not improve from 1.20168
Epoch 23/30
Epoch 00023: val_loss_sum improved from 1.20168 to 1.06905, saving model to logs/scalars/maskrcnn_mobilenet_ed3e7dd4c2e064d9dd92df2088834243_cp-0023.ckpt
Epoch 24/30
Epoch 00024: val_loss_sum did not improve from 1.06905
Epoch 25/30
Epoch 00025: val_loss_sum did not improve from 1.06905
Epoch 26/30
Epoch 00026: val_loss_sum did not improve from 1.06905
Epoch 27/30
Epoch 00027: val_loss_sum did not improve from 1.06905
Epoch 28/30
Epoch 00028: val_loss_sum did not improve from 1.06905
Epoch 29/30
Epoch 00029: val_loss_sum improved from 1.06905 to 0.99173, saving model to logs/scalars/maskrcnn_mobilenet_ed3e7dd4c2e064d9dd92df2088834243_cp-0029.ckpt
Epoch 30/30
Epoch 00030: val_loss_sum did not improve from 0.99173
