# Mask R-CNN - Train FCN using MRCNN in Predict Mode 

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os, sys, math, io, time, gc, argparse, platform, pprint
import numpy as np
import tensorflow as tf
import keras
import keras.backend as KB
sys.path.append('../')
import mrcnn.model_mrcnn  as mrcnn_modellib
import mrcnn.model_fcn    as fcn_modellib
import mrcnn.visualize    as visualize
import mrcnn.new_shapes   as shapes
from datetime import datetime   
from mrcnn.utils        import command_line_parser, Paths
from mrcnn.config       import Config
from mrcnn.dataset      import Dataset 
from mrcnn.utils        import log, stack_tensors, stack_tensors_3d, write_stdout
from mrcnn.datagen      import data_generator, load_image_gt
from mrcnn.callbacks    import get_layer_output_1,get_layer_output_2
from mrcnn.coco         import CocoDataset, CocoConfig, CocoInferenceConfig, evaluate_coco, build_coco_results
from mrcnn.prep_notebook import mrcnn_coco_train, prep_coco_dataset

pp = pprint.PrettyPrinter(indent=2, width=100)
np.set_printoptions(linewidth=100,precision=4,threshold=1000, suppress = True)
start_time = datetime.now().strftime("%m-%d-%Y @ %H:%M:%S")
print()
print('--> Execution started at:', start_time)
print("    Tensorflow Version: {}   Keras Version : {} ".format(tf.__version__,keras.__version__))

####  Pass input parameters to argparse

# args = parser.parse_args("--epochs 100 --steps_in_epoch 128  --last_epoch 1264 --batch_size 8  --lr 0.5               --logs_dir train_fcn_adagrad --model /home/kbardool/models/train_mrcnn/shapes20180621T1554/mask_rcnn_shapes_1119.h5 --fcn_model init".split())
# input_parms = "--epochs 100 --steps_in_epoch 100  --last_epoch 1264 --batch_size 25 --lr 0.8 --val_steps 5 --logs_dir train_fcn_adagrad --model /home/kbardool/models/train_mrcnn/shapes20180621T1554/mask_rcnn_shapes_1119.h5 --fcn_model /home/kbardool/models/train_fcn_adagrad/shapes20180709T1732/fcn_shapes_1167.h5"
# input_parms +=" --model     /home/kbardool/models/train_mrcnn/shapes20180621T1554/mask_rcnn_shapes_1119.h5 "
##------------------------------------------------------------------------------------
## Parse command line arguments
##------------------------------------------------------------------------------------
parser = command_line_parser()
input_parms = "--epochs 2 --steps_in_epoch 32  --last_epoch 0 --batch_size 1 --lr 0.00001 --val_steps 8 " 
input_parms +="--mrcnn_logs_dir train_mrcnn_coco "
input_parms +="--fcn_logs_dir   train_fcn8_coco "
input_parms +="--mrcnn_model    last "
input_parms +="--fcn_model      init "
input_parms +="--opt            adagrad "
input_parms +="--fcn_arch       fcn8 " 
input_parms +="--fcn_layers     all " 
input_parms +="--sysout        screen "
input_parms +="--new_log_folder    "
# input_parms +="--fcn_model /home/kbardool/models/train_fcn_adagrad/shapes20180709T1732/fcn_shapes_1167.h5"
print(input_parms)

args = parser.parse_args(input_parms.split())
# args = parser.parse_args()

##----------------------------------------------------------------------------------------------
## if debug is true set stdout destination to stringIO
##----------------------------------------------------------------------------------------------            
# debug = False
if args.sysout == 'FILE':
    sys.stdout = io.StringIO()

# print("    Dataset            : ", args.dataset)
# print("    Logs               : ", args.logs)
# print("    Limit              : ", args.limit)
print("    MRCNN Model        : ", args.mrcnn_model)
print("    FCN Model          : ", args.fcn_model)
print("    MRCNN Log Dir      : ", args.mrcnn_logs_dir)
print("    FCN Log Dir        : ", args.fcn_logs_dir)
print("    FCN Arch           : ", args.fcn_arch)
print("    FCN Log Dir        : ", args.fcn_layers)
print("    Last Epoch         : ", args.last_epoch)
print("    Epochs to run      : ", args.epochs)
print("    Steps in each epoch: ", args.steps_in_epoch)
print("    Validation steps   : ", args.val_steps)
print("    Batch Size         : ", args.batch_size)
print("    Optimizer          : ", args.opt)
print("    sysout             : ", args.sysout)
# print("    OS Platform        : ", syst)

##------------------------------------------------------------------------------------
## setup project directories
##   ROOT_DIR         : Root directory of the project 
##   MODEL_DIR        : Directory to save logs and trained model
##   COCO_MODEL_PATH  : Path to COCO trained weights
##---------------------------------------------------------------------------------
paths = Paths(fcn_training_folder = args.fcn_logs_dir, mrcnn_training_folder = args.mrcnn_logs_dir)
paths.display()

Using TensorFlow backend.


Tensorflow Version: 1.8.0   Keras Version : 2.1.6 

--> Execution started at: 10-31-2018 @ 16:46:55
    Tensorflow Version: 1.8.0   Keras Version : 2.1.6 
--epochs 2 --steps_in_epoch 32  --last_epoch 0 --batch_size 1 --lr 0.00001 --val_steps 8 --mrcnn_logs_dir train_mrcnn_coco --fcn_logs_dir   train_fcn8_coco --mrcnn_model    last --fcn_model      init --opt            adagrad --fcn_arch       fcn8 --fcn_layers     all --sysout        screen --new_log_folder    
    MRCNN Model        :  last
    FCN Model          :  init
    MRCNN Log Dir      :  train_mrcnn_coco
    FCN Log Dir        :  train_fcn8_coco
    FCN Arch           :  FCN8
    FCN Log Dir        :  ['all']
    Last Epoch         :  0
    Epochs to run      :  2
    Steps in each epoch:  32
    Validation steps   :  8
    Batch Size         :  1
    Optimizer          :  ADAGRAD
    sysout             :  SCREEN


In [3]:
##------------------------------------------------------------------------------------
## Build configuration object 
##------------------------------------------------------------------------------------                          
mrcnn_config                    = CocoConfig()
mrcnn_config.NAME               = 'mrcnn'              
mrcnn_config.TRAINING_PATH      = paths.MRCNN_TRAINING_PATH
mrcnn_config.COCO_DATASET_PATH  = paths.COCO_DATASET_PATH 
mrcnn_config.COCO_MODEL_PATH    = paths.COCO_MODEL_PATH   
mrcnn_config.RESNET_MODEL_PATH  = paths.RESNET_MODEL_PATH 
mrcnn_config.VGG16_MODEL_PATH   = paths.VGG16_MODEL_PATH  
mrcnn_config.COCO_CLASSES       = None 
mrcnn_config.DETECTION_PER_CLASS = 200
mrcnn_config.HEATMAP_SCALE_FACTOR = 4
mrcnn_config.BATCH_SIZE         = int(args.batch_size)                  # Batch size is 2 (# GPUs * images/GPU).
mrcnn_config.IMAGES_PER_GPU     = int(args.batch_size)                  # Must match BATCH_SIZE

mrcnn_config.STEPS_PER_EPOCH    = int(args.steps_in_epoch)
mrcnn_config.LEARNING_RATE      = float(args.lr)
mrcnn_config.EPOCHS_TO_RUN      = int(args.epochs)
mrcnn_config.FCN_INPUT_SHAPE    = mrcnn_config.IMAGE_SHAPE[0:2]
mrcnn_config.LAST_EPOCH_RAN     = int(args.last_epoch)

# mrcnn_config.WEIGHT_DECAY       = 2.0e-4
# mrcnn_config.VALIDATION_STEPS   = int(args.val_steps)
# mrcnn_config.REDUCE_LR_FACTOR   = 0.5
# mrcnn_config.REDUCE_LR_COOLDOWN = 30
# mrcnn_config.REDUCE_LR_PATIENCE = 40
# mrcnn_config.EARLY_STOP_PATIENCE= 80
# mrcnn_config.EARLY_STOP_MIN_DELTA = 1.0e-4
# mrcnn_config.MIN_LR             = 1.0e-10
# mrcnn_config.OPTIMIZER          = args.opt.upper()
# mrcnn_model.config.OPTIMIZER    = 'ADAGRAD'
mrcnn_config.NEW_LOG_FOLDER       = False
mrcnn_config.SYSOUT               = args.sysout
mrcnn_config.display() 



Configuration Parameters:
-------------------------
BACKBONE_SHAPES                [[256 256]
 [128 128]
 [ 64  64]
 [ 32  32]
 [ 16  16]]
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COCO_CLASSES                   None
COCO_DATASET_PATH              /home/kbardool/MLDatasets/coco2014
COCO_MODEL_PATH                /home/kbardool/PretrainedModels/mask_rcnn_coco.h5
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
DETECTION_PER_CLASS            200
EPOCHS_TO_RUN                  2
FCN_INPUT_SHAPE                [1024 1024]
GPU_COUNT                      1
HEATMAP_SCALE_FACTOR           4
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  1024
IMAGE_MIN_DIM                  800
IMAGE_PADDING                  True
IMAGE_SHAPE                    [1024 1024    3]
LAST_EPOCH_RAN                 0
LEARNING_MOMENTUM              0.9
LEARNIN

In [4]:
##------------------------------------------------------------------------------------
## Build Mask RCNN Model in TRAINFCN mode
##------------------------------------------------------------------------------------
mrcnn_model,mrcnn_config = mrcnn_coco_train(mode = 'trainfcn', mrcnn_config = mrcnn_config)

>>> Initialize ModelBase model 
   Mode      :  trainfcn
   Model dir :  /home/kbardool/models/train_mrcnn_coco
>>> ModelBase initialiation complete
>>> ---Initialize MRCNN model, mode:  trainfcn

----------------------------
>>> Resnet Graph 
----------------------------
     Input_image shape : (?, 1024, 1024, 3)
     After ZeroPadding2D  : (?, 1030, 1030, 3) (?, 1030, 1030, 3)
     After Conv2D padding : (?, 512, 512, 64) (?, 512, 512, 64)
     After BatchNorm      : (?, 512, 512, 64) (?, 512, 512, 64)
     C1 Shape: (?, 256, 256, 64) (?, 256, 256, 64)
     C2 Shape:  (?, 256, 256, 256) (?, 256, 256, 256)
     C3 Shape:  (?, 128, 128, 512) (?, 128, 128, 512)
     C4 Shape:  (?, 64, 64, 1024) (?, 64, 64, 1024)
     C5 Shape:  (?, 32, 32, 2048) (?, 32, 32, 2048)

>>> Feature Pyramid Network (FPN) Graph 
     FPN P2 shape : (None, 256, 256, 256)
     FPN P3 shape : (None, 128, 128, 256)
     FPN P4 shape : (None, 64, 64, 256)
     FPN P5 shape : (None, 32, 32, 256)
     FPN P6 shape : 

    class_grid             :  <class 'tensorflow.python.framework.ops.Tensor'>  shape  (1, 81, 200)
    batch_grid             :  <class 'tensorflow.python.framework.ops.Tensor'>  shape  (1, 81, 200)
    gather_inds            :  (1, 81, 200, 3)
    gt_tensor.shape        :  (1, 81, 200, 8) (1, 81, 200, 8)

 
  > build_heatmap() for  ['gt_heatmap']
    in_tensor shape        :  (1, 81, 200, 8)
    num bboxes per class   :  200
    heatmap scale        :  4 Dimensions:  w: 256  h: 256
    pt2_sum shape  :  (1, 81, 200)
    pt2_ind shape  :  (?, 3)
    pt2_dense shape:  (?, 8)
     Prob_grid shape :  (?, 256, 256)
    prob_grid_clipped      :  (?, 256, 256)
    scores_scattered shape :  (1, 81, 200, 3)
    gauss_scores           :  (1, 81, 200, 11)  Name:    cntxt_layer_gt/gt_heatmap_scores:0
    gauss_scores  (FINAL)  :  (1, 81, 200, 11)  Keras tensor  False

    Scatter out the probability distributions based on class --------------
    pt2_ind shape   :  (?, 3)
    prob_grid shape :  

In [5]:
##------------------------------------------------------------------------------------
## Build configuration for FCN model
##------------------------------------------------------------------------------------
fcn_config = CocoConfig()
# fcn_config.IMAGE_MAX_DIM        = 600
# fcn_config.IMAGE_MIN_DIM        = 480      
# mrcnn_config.COCO_DATASET_PATH  = COCO_DATASET_PATH 
# mrcnn_config.COCO_MODEL_PATH    = COCO_MODEL_PATH   
# mrcnn_config.RESNET_MODEL_PATH  = RESNET_MODEL_PATH 
fcn_config.NAME                 = 'fcn'              
fcn_config.TRAINING_PATH        = paths.FCN_TRAINING_PATH
fcn_config.VGG16_MODEL_PATH     = paths.FCN_VGG16_MODEL_PATH
fcn_config.FCN_INPUT_SHAPE      = mrcnn_config.IMAGE_SHAPE[0:2] // mrcnn_config.HEATMAP_SCALE_FACTOR 

fcn_config.BATCH_SIZE           = int(args.batch_size)                  # Batch size is 2 (# GPUs * images/GPU).
fcn_config.IMAGES_PER_GPU       = int(args.batch_size)                  # Must match BATCH_SIZE
fcn_config.EPOCHS_TO_RUN        = int(args.epochs)
fcn_config.STEPS_PER_EPOCH      = int(args.steps_in_epoch)
fcn_config.LEARNING_RATE        = float(args.lr)
fcn_config.LAST_EPOCH_RAN       = int(args.last_epoch)
fcn_config.VALIDATION_STEPS     = int(args.val_steps)

fcn_config.WEIGHT_DECAY         = 2.0e-4     ## FCN Weight decays are 5.0e-4 or 2.0e-4
fcn_config.BATCH_MOMENTUM       = 0.9
fcn_config.REDUCE_LR_FACTOR     = 0.5
fcn_config.REDUCE_LR_COOLDOWN   = 5
fcn_config.REDUCE_LR_PATIENCE   = 5
fcn_config.REDUCE_LR_MIN_DELTA  = 1e-5
fcn_config.EARLY_STOP_PATIENCE  = 15
fcn_config.EARLY_STOP_MIN_DELTA = 1.0e-4
fcn_config.MIN_LR               = 1.0e-10
fcn_config.CHECKPOINT_PERIOD    = 1

fcn_config.NEW_LOG_FOLDER       = args.new_log_folder
fcn_config.OPTIMIZER            = args.opt.upper()
fcn_config.SYSOUT               = args.sysout

fcn_config.display()


Configuration Parameters:
-------------------------
BACKBONE_SHAPES                [[256 256]
 [128 128]
 [ 64  64]
 [ 32  32]
 [ 16  16]]
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_MOMENTUM                 0.9
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
CHECKPOINT_PERIOD              1
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
DETECTION_PER_CLASS            200
EARLY_STOP_MIN_DELTA           0.0001
EARLY_STOP_PATIENCE            15
EPOCHS_TO_RUN                  2
FCN_INPUT_SHAPE                [256 256]
GPU_COUNT                      1
HEATMAP_SCALE_FACTOR           4
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  1024
IMAGE_MIN_DIM                  800
IMAGE_PADDING                  True
IMAGE_SHAPE                    [1024 1024    3]
LAST_EPOCH_RAN                 0
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  1e-05
MASK_POOL_SIZE 

## Define FCN model

In [6]:
##------------------------------------------------------------------------------------
## Build FCN Model in Training Mode
##------------------------------------------------------------------------------------
try :
    del fcn_model
    gc.collect()
except: 
    pass    
fcn_model = fcn_modellib.FCN(mode="training", arch = args.fcn_arch, config=fcn_config)


##------------------------------------------------------------------------------------
## Display model configuration information
##------------------------------------------------------------------------------------
# paths.display()
# fcn_config.display()  
fcn_model.layer_info()


>>> Initialize ModelBase model 
   Mode      :  training
   Model dir :  /home/kbardool/models/train_fcn8_coco
>>> ModelBase initialiation complete
>>> Initialize FCN model, mode:  training architecture:  FCN8
>>> set_log_dir(): model_path:  None
    set_log_dir(): model_path has NOT been provided : None 
                  NewFolder: False  config.NEW_LOG_FOLDER: True 
    set_log_dir(): weight file template (self.checkpoint_path): /home/kbardool/models/train_fcn8_coco/fcn20181031T1647/fcn_{epoch:04d}.h5 
    set_log_dir(): weight file dir      (self.log_dir)        : /home/kbardool/models/train_fcn8_coco/fcn20181031T1647 
    set_log_dir(): Last completed epoch (self.epoch)          : 0 
arch set to FCN8
<function fcn8_graph at 0x7f1928db4f28>


---------------------------------------------------
   Arch:  FCN8  Adding  FCN layers
---------------------------------------------------

---------------
>>> FCN8 Layer 
---------------
     feature map      : (?, 256, 256, 81)
     height :

##  Load model weights

In [7]:
##------------------------------------------------------------------------------------
## Load Mask RCNN Model Weight file
##------------------------------------------------------------------------------------
# exclude_list = ["mrcnn_class_logits"]
#load_model(model, init_with = args.model)   
exclude_list = []
mrcnn_model.load_model_weights(init_with = args.mrcnn_model, exclude = exclude_list)   

-----------------------------------------------
 Load Model with init parm: [ last ]
 Exclude layers: 
-----------------------------------------------
 ---> last
>>> find_last checkpoint in :  /home/kbardool/models/train_mrcnn_coco
    Key : > mrcnn <
    Dir names:  ['mrcnn20181011T1100']
    Folder:  /home/kbardool/models/train_mrcnn_coco/mrcnn20181011T1100
    Checkpoints:  ['mrcnn_0103.h5']
    find_last():   dir_name: /home/kbardool/models/train_mrcnn_coco/mrcnn20181011T1100
    find_  last(): checkpoint: /home/kbardool/models/train_mrcnn_coco/mrcnn20181011T1100/mrcnn_0103.h5
   Last file is : /home/kbardool/models/train_mrcnn_coco/mrcnn20181011T1100/mrcnn_0103.h5
>>> find_last checkpoint in :  /home/kbardool/models/train_mrcnn_coco
    Key : > mrcnn <
    Dir names:  ['mrcnn20181011T1100']
    Folder:  /home/kbardool/models/train_mrcnn_coco/mrcnn20181011T1100
    Checkpoints:  ['mrcnn_0103.h5']
    find_last():   dir_name: /home/kbardool/models/train_mrcnn_coco/mrcnn20181011T1100

In [8]:
##------------------------------------------------------------------------------------
## Load FCN Model weights  
##------------------------------------------------------------------------------------
if args.fcn_model != 'init':
    fcn_model.load_model_weights(init_with = args.fcn_model, verbose = 1)
else:
    print(' FCN Training starting from randomly initialized weights ...')


 FCN Training starting from randomly initialized weights ...


## Defined training datasets

In [9]:
##------------------------------------------------------------------------------------
## Build & Load Training and Validation datasets
##------------------------------------------------------------------------------------
# dataset_train = prep_coco_dataset(["train",  "val35k"], mrcnn_config, generator = False)
# dataset_val   = prep_coco_dataset(["minival"]         , mrcnn_config, generator = False)
from mrcnn.prep_notebook import coco_dataset
dataset_train = coco_dataset(["val35k"], mrcnn_config)
dataset_val   = coco_dataset(["minival"], mrcnn_config)


loading annotations into memory...
Done (t=4.72s)
creating index...
index created!
 image dir        :  /home/kbardool/MLDatasets/coco2014/val2014
 json_path_dir    :  /home/kbardool/MLDatasets/coco2014/annotations/instances_valminusminival2014.json
 number of images :  35185
loading annotations into memory...
Done (t=1.33s)
creating index...
index created!
 image dir        :  /home/kbardool/MLDatasets/coco2014/val2014
 json_path_dir    :  /home/kbardool/MLDatasets/coco2014/annotations/instances_minival2014.json
 number of images :  4952


In [None]:
##--------------------------------------------------------------------------------
## Data generators
##--------------------------------------------------------------------------------
# train_generator = data_generator(dataset_train, mrcnn_model.config, shuffle=True,
#                                  batch_size=mrcnn_config.BATCH_SIZE)
# val_generator   = data_generator(dataset_val, mrcnn_model.config, shuffle=True,
#                                  batch_size=mrcnn_config.BATCH_SIZE,
#                                  augment=False)

In [None]:
train_batch_x, train_batch_y = next(train_generator)

for i in train_batch_x:
    print(type(i), i.shape)
for i in train_batch_y:
    print(type(i), i.shape)
print(train_batch_y)  

####  Print model layer and weight information

In [None]:
for layer in fcn_model.keras_model.layers:
    print('layer: ', layer.name)
    for weight in layer.weights:
        print('   mapped_weight_name : ',weight.name)
    if hasattr(layer, 'output'):
        print('   layer output ', type(layer),' shape: ',layer.output.shape )

## Call `train_in_batches()`

In [10]:
##----------------------------------------------------------------------------------------------
## Train the FCN only 
## Passing layers="heads" freezes all layers except the head
## layers. You can also pass a regular expression to select
## which layers to train by name pattern.
##----------------------------------------------------------------------------------------------            
train_layers = args.fcn_layers
loss_names   = ['fcn_heatmap_loss']
fcn_model.epoch                  = fcn_config.LAST_EPOCH_RAN

fcn_model.train_in_batches(
            mrcnn_model,    
            dataset_train,
            dataset_val, 
            layers = train_layers,
            losses = loss_names,
            # learning_rate   = fcn_config.LEARNING_RATE,  
            # epochs          = 25,                             # total number of epochs to run (accross multiple trainings)
            # epochs_to_run   = fcn_config.EPOCHS_TO_RUN,
            # batch_size      = fcn_config.BATCH_SIZE,          # gets value from self.config.BATCH_SIZE
            # steps_per_epoch = fcn_config.STEPS_PER_EPOCH ,    # gets value form self.config.STEPS_PER_EPOCH
            # min_LR          = fcn_config.MIN_LR
            )


['all']
['.*']
layers regex : .*

Selecting layers to train
-------------------------
Layer    Layer Name               Layer Type
   0  input_pr_hm_norm       (InputLayer          )   ............................no weights to train ]
   1  block1_conv1           (Conv2D              )   TRAIN 
   2  block1_conv2           (Conv2D              )   TRAIN 
   3  block1_pool            (MaxPooling2D        )   ............................no weights to train ]
   4  block2_conv1           (Conv2D              )   TRAIN 
   5  block2_conv2           (Conv2D              )   TRAIN 
   6  block2_pool            (MaxPooling2D        )   ............................no weights to train ]
   7  block3_conv1           (Conv2D              )   TRAIN 
   8  block3_conv2           (Conv2D              )   TRAIN 
   9  block3_conv3           (Conv2D              )   TRAIN 
  10  block3_pool            (MaxPooling2D        )   ............................no weights to train ]
  11  block4_conv1        





    val_batch_sizes     :  [1, 1, 1, 1, 1, 1, 1, 1]
    val_outs_per_batch - shape : (8, 2)
        batch:  0    [6.523567e-05, 6.523567e-05]
        batch:  1    [0.00013274401, 0.00013274401]
        batch:  2    [0.00019663242, 0.00019663242]
        batch:  3    [3.7198046e-05, 3.7198046e-05]
        batch:  4    [6.409755e-05, 6.409755e-05]
        batch:  5    [5.188294e-05, 5.188294e-05]
        batch:  6    [4.3492804e-05, 4.3492804e-05]
        batch:  7    [5.7984664e-05, 5.7984664e-05]
val_averages : [8.115851278489572e-05, 8.115851278489572e-05]

Epoch 00001: val_loss improved from inf to 0.0000812, saving model to /home/kbardool/models/train_fcn8_coco/fcn20181031T1647/fcn_0001.h5
Epoch 2/2
    val_batch_sizes     :  [1, 1, 1, 1, 1, 1, 1, 1]
    val_outs_per_batch - shape : (8, 2)
        batch:  0    [0.00017297259, 0.00017297259]
        batch:  1    [0.00012016448, 0.00012016448]
        batch:  2    [0.00022886808, 0.00022886808]
        batch:  3    [2.2509123e-05, 2.2

## `train_in_batches` development code

In [None]:
def train_in_batches_dev(self,
          mrcnn_model,
          train_dataset, 
          val_dataset, 
          learning_rate, 
          layers            = None,
          losses            = None,              
          epochs            = 0,
          epochs_to_run     = 1, 
          batch_size        = 0, 
          steps_per_epoch   = 0,
          min_LR            = 0.00001):

    '''
    Train the model.
    train_dataset, 
    val_dataset:    Training and validation Dataset objects.

    learning_rate:  The learning rate to train with

    epochs:         Number of training epochs. Note that previous training epochs
                    are considered to be done already, so this actually determines
                    the epochs to train in total rather than in this particaular
                    call.

    layers:         Allows selecting wich layers to train. It can be:
                    - A regular expression to match layer names to train
                    - One of these predefined values:
                    heads: The RPN, classifier and mask heads of the network
                    all: All the layers
                    3+: Train Resnet stage 3 and up
                    4+: Train Resnet stage 4 and up
                    5+: Train Resnet stage 5 and up
    '''
    assert self.mode == "training", "Create model in training mode."

    if batch_size == 0 :
        batch_size = self.config.BATCH_SIZE

    # if epochs_to_run > 0 :
    epochs = self.epoch + epochs_to_run

    if steps_per_epoch == 0:
        steps_per_epoch = self.config.STEPS_PER_EPOCH

    # use Pre-defined layer regular expressions
    # if layers in self.layer_regex.keys():
        # layers = self.layer_regex[layers]
    print(layers)
    # train_regex_list = []
    # for x in layers:
        # print( ' layers ias : ',x)
        # train_regex_list.append(x)
    train_regex_list = [self.layer_regex[x] for x in layers]
    print(train_regex_list)
    layers = '|'.join(train_regex_list)        
    print('layers regex :', layers)


    ##--------------------------------------------------------------------------------
    ## Data generators
    ##--------------------------------------------------------------------------------
    train_generator = data_generator(train_dataset, self.config, shuffle=True,
                                     batch_size=batch_size)
    val_generator   = data_generator(val_dataset, self.config, shuffle=True,
                                     batch_size=batch_size,
                                     augment=False)

    ##--------------------------------------------------------------------------------
    ## Set trainable layers and compile
    ##--------------------------------------------------------------------------------
    self.set_trainable(layers)            
    self.compile(learning_rate, self.config.LEARNING_MOMENTUM, losses)        


    ##--------------------------------------------------------------------------------
    ## Create checkpoint folder if it doesn't exists
    ##--------------------------------------------------------------------------------
    from tensorflow.python.platform import gfile
    if not gfile.IsDirectory(self.log_dir):
        log('Creating checkpoint folder : {}'.format(self.log_dir))
        gfile.MakeDirs(self.log_dir)
    else:
        log('Checkpoint folder already exists: {}'.format(self.log_dir))                                   
    # my_callback = MyCallback()

    ##--------------------------------------------------------------------------------
    ## Callbacks
    ##--------------------------------------------------------------------------------
    # call back for model checkpoint was originally (?) loss. chanegd to val_loss (which is default) 2-5-18

    # copied from \keras\engine\training.py
    # def _get_deduped_metrics_names(self):
    ## get metrics from keras_model.metrics_names
    out_labels = self.get_deduped_metrics_names()
    print()
    print(' out_labels from get_deduped_metrics_names() : ')
    print(' --------------------------------------------- ')
    print(out_labels)

    ## setup Progress Bar callback
    callback_metrics = out_labels + ['val_' + n for n in out_labels]
    print()
    print(' Callback metrics monitored by progbar :')
    print(' ---------------------------------------')
    pp.pprint(callback_metrics)

    # progbar = keras.callbacks.ProgbarLogger(count_mode='steps')
    # progbar.set_model(self.keras_model)
    # progbar.set_params({
        # 'epochs': epochs,
        # 'steps': steps_per_epoch,
        # 'verbose': 1,
        # 'do_validation': False,
        # 'metrics': callback_metrics,
    # })


    # progbar.set_model(self.keras_model) 

    ## setup Checkpoint callback
    # chkpoint = keras.callbacks.ModelCheckpoint(self.checkpoint_path, 
                                               # monitor='val_loss', 
                                               # verbose=1, 
                                               # save_best_only = True, 
                                               # save_weights_only=True)
    # chkpoint.set_model(self.keras_model)

    # progbar.on_train_begin()



    callbacks_list = [
        keras.callbacks.ProgbarLogger(count_mode='steps'),

        keras.callbacks.TensorBoard(log_dir=self.log_dir,
                                      histogram_freq=0,
                                      batch_size=32,
                                      write_graph=True,
                                      write_grads=False,
                                      write_images=True,
                                      embeddings_freq=0,
                                      embeddings_layer_names=None,
                                      embeddings_metadata=None)

        , keras.callbacks.ModelCheckpoint(self.checkpoint_path, 
                                          mode = 'auto', 
                                          period = 1, 
                                          monitor='val_loss', 
                                          verbose=1, 
                                          save_best_only = True, 
                                          save_weights_only=True)

        , keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                            mode     = 'auto', 
                                            factor   = self.config.REDUCE_LR_FACTOR,   
                                            cooldown = self.config.REDUCE_LR_COOLDOWN,
                                            patience = self.config.REDUCE_LR_PATIENCE,
                                            min_lr   = self.config.MIN_LR, 
                                            verbose  = 1)                                            

        , keras.callbacks.EarlyStopping(monitor='val_loss', 
                                            mode      = 'auto', 
                                            min_delta = 0.00001, 
                                            patience  = self.config.EARLY_STOP_PATIENCE, 
                                            verbose   = 1)                                            
        # , my_callback
    ]


    callbacks =  keras.callbacks.CallbackList(callbacks = callbacks_list)
    callbacks.set_model(self.keras_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': steps_per_epoch,
        'verbose': 1,
        'do_validation': False,
        'metrics': callback_metrics,
    })



    log("Starting at epoch {} of {} epochs. LR={}\n".format(self.epoch, epochs, learning_rate))
    log("Steps per epochs {} ".format(steps_per_epoch))
    log("    Last epoch completed : {} ".format(self.epoch))
    log("    Starting from epoch  : {} for {} epochs".format(self.epoch, epochs_to_run))
    log("    Learning Rate        : {} ".format(learning_rate))
    log("    Steps per epoch      : {} ".format(steps_per_epoch))
    log("    Batch Size           : {} ".format(batch_size))
    log("    Checkpoint Folder    : {} ".format(self.checkpoint_path))



    ##--------------------------------------------------------------------------------
    ## Start main training loop
    ##--------------------------------------------------------------------------------
    epoch_idx = self.epoch
    # progbar.on_train_begin()
    callbacks.on_train_begin()

    if epoch_idx >= epochs:
        print('Final epoch {} has already completed - Training will not proceed'.format(epochs))
    else:

        while epoch_idx < epochs :
            # progbar.on_epoch_begin(epoch_idx)
            callbacks.on_epoch_begin(epoch_idx)

            for steps_index in range(steps_per_epoch):

                batch_logs = {}
                print(' self.epoch {}   epochs {}  step {} '.format(self.epoch, epochs, steps_index))
                batch_logs['batch'] = steps_index
                batch_logs['size']  = batch_size
                # progbar.on_batch_begin(steps_index, batch_logs)
                callbacks.on_batch_begin(steps_index, batch_logs)

                train_batch_x, train_batch_y = next(train_generator)
                # print('length of train_batch_x:', len(train_batch_x))
                # print('length of train_batch_y:', len(train_batch_y))


                # # model_output   = get_layer_output_2(mrcnn_model.keras_model, train_batch_x, training_flag = False)
                # # model_output = get_layer_output_1(model.keras_model, train_batch_x, [ 26], 1)

                # print(len(model_output))
                # # print(type(output_rois))
                # for i in model_output:
                    # print( i.shape)                    

                results = mrcnn_model.keras_model.predict(train_batch_x)
#                 print('# of items in results:', len(results))


                pr_hm_norm, gt_hm_norm, pr_hm_scores, gt_hm_scores = results[11:]                 

                # print('pr_hm_norm shape   :', pr_hm_norm.shape)
                # print('pr_hm_scores shape :', pr_hm_scores.shape)
                # print('gt_hm_norm shape   :', gt_hm_norm.shape)
                # print('gt_hm_scores shape :', gt_hm_scores.shape)

                outs = self.keras_model.train_on_batch([pr_hm_norm,  pr_hm_scores,gt_hm_norm, gt_hm_scores], train_batch_y)

#                 print(' outs: ', outs)
                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                # progbar.on_batch_end(steps_index, batch_logs)
                callbacks.on_batch_end(steps_index, batch_logs)

                # print(outs)

            ## end of epoch operations     
            ##-------------------------------
            val_batch_x, val_batch_y = next(val_generator)
            val_outs = self.keras_model.test_on_batch(X_val, Y_val)
            # write_log(callback, val_names, logs, batch_no//10)
            print(' validation logs output: ', val_outs)
            if not isinstance(val_outs, list):
                val_outs = [val_outs]
            for l, o in zip(out_labels, outs):
                batch_logs[l] = o




            # progbar.on_epoch_end(epoch_idx, {})
            # if (epoch_idx % 10) == 0:
            # chkpoint.on_epoch_end(epoch_idx  , batch_logs)
            callbacks.on_epoch_end(epoch_idx, batch_logs)
            epoch_idx += 1

        ## end of all epochs operations
        ##--------------------------------
        # if epoch_idx != self.epoch:
        # chkpoint.on_epoch_end(epoch_idx -1, batch_logs)
        callbacks.on_train_end()
        self.epoch = max(epoch_idx - 1, epochs)
        print('Final : self.epoch {}   epochs {}'.format(self.epoch, epochs))

    ##--------------------------------------------------------------------------------
    ## End main training loop
    ##--------------------------------------------------------------------------------




## `train_in_batches` step by step

In [None]:
print(train_layers)
# train_regex_list = []
# for x in layers:
    # print( ' layers ias : ',x)
    # train_regex_list.append(x)
train_regex_list = [fcn_model.layer_regex[x] for x in train_layers]
print(train_regex_list)
layers = '|'.join(train_regex_list)        
print('layers regex :', layers)

##--------------------------------------------------------------------------------
## Set trainable layers and compile
##--------------------------------------------------------------------------------
fcn_model.set_trainable(layers)            
fcn_model.compile(learning_rate, fcn_model.config.LEARNING_MOMENTUM, loss_names)   

In [None]:
out_labels = fcn_model.get_deduped_metrics_names()
print()
print(' out_labels from get_deduped_metrics_names() : ')
print(' --------------------------------------------- ')
print(out_labels)

In [None]:
##--------------------------------------------------------------------------------
## Data generators
##--------------------------------------------------------------------------------
train_generator = data_generator(dataset_train, mrcnn_model.config, shuffle=True,
                                 batch_size=batch_size)
val_generator   = data_generator(dataset_val, mrcnn_model.config, shuffle=True,
                                 batch_size=batch_size,
                                 augment=False)

In [None]:
fcn_config.EPOCHS_TO_RUN

In [None]:

epochs = fcn_model.epoch + epochs_to_run
log("Starting at epoch {} of {} epochs. LR={}\n".format(fcn_model.epoch, epochs, learning_rate))
log("Steps per epochs {} ".format(steps_per_epoch))
log("    Last epoch completed : {} ".format(fcn_model.epoch))
log("    Starting from epoch  : {} for {} epochs".format(fcn_model.epoch, epochs_to_run))
log("    Learning Rate        : {} ".format(learning_rate))
log("    Steps per epoch      : {} ".format(steps_per_epoch))
log("    Batch Size           : {} ".format(batch_size))
log("    Checkpoint Folder    : {} ".format(fcn_model.checkpoint_path))

In [None]:
steps_index = 0 

In [None]:
batch_logs = {}
print(' self.epoch {}   epochs {}  step {} '.format(fcn_model.epoch, epochs, steps_index))
batch_logs['batch'] = steps_index
batch_logs['size']  = batch_size
# progbar.on_batch_begin(steps_index, batch_logs)
callbacks.on_batch_begin(steps_index, batch_logs)

In [None]:
train_batch_x, train_batch_y = next(train_generator)

for i in train_batch_x:
    print( i.shape)
  

In [None]:
results = mrcnn_model.keras_model.predict(train_batch_x)
print('# of items in results:', len(results))

for i in results:
    print( i.shape)


In [None]:
pr_hm_norm, gt_hm_norm, pr_hm_scores, gt_hm_scores = results[11:]                 
print(pr_hm_norm.shape)
print(gt_hm_norm.shape)
print(pr_hm_scores.shape)
print(gt_hm_scores.shape)

In [None]:
outs = fcn_model.keras_model.train_on_batch([pr_hm_norm,  pr_hm_scores,gt_hm_norm, gt_hm_scores], train_batch_y)

In [None]:
pp.pprint(outs)
print(' outs: ', outs)
if not isinstance(outs, list):
    outs = [outs]
for l, o in zip(out_labels, outs):
    batch_logs[l] = o


## Misc

In [None]:
fcn_model.keras_model.metrics_names

In [None]:
model.keras_model.losses
print(model.keras_model.metrics_names)

In [None]:
# model.keras_model.summary(line_length=132, positions=[0.30,0.75, .83, 1. ])