In [1]:
# # Uncomment the this cell when running on Google Colab
# from google.colab import drive
# drive.mount('/content/gdrive/', force_remount=True)

# import os
# # Make sure to change the following line to reflect the location of your root folder
# os.chdir('/content/gdrive/My Drive/2/Mask_RCNN_TF2')

In [2]:
# # Uncomment the following line install all required packages recursively
# !pip install --upgrade -r requirements.txt

In [3]:
import os
import json
import datetime
import numpy as np
print("Numpy version: ",np.__version__) # 1.19.2
import skimage.draw
import tensorflow as tf
print("TensorFlow version: ",tf.__version__) #2.5.0
import random
import matplotlib.pyplot as plt

Numpy version:  1.19.2


2021-07-29 15:44:05.254210: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-07-29 15:44:05.254236: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


TensorFlow version:  2.5.0


In [4]:
print("Number of GPUs available: ", len(tf.config.list_physical_devices('GPU')))

Number of GPUs available:  0


2021-07-29 15:44:06.664009: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-07-29 15:44:06.664028: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-07-29 15:44:06.664041: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kiprono-pc): /proc/driver/nvidia/version does not exist


In [5]:
import sys
sys.path.append("./mrcnn")  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

In [6]:
# Define the the locations
ROOT_DIR = './'
DATA_DIR = './datasets/fruits2'
DEFAULT_LOGS_DIR = './assets/logs'

In [7]:
# Local path to COCO pre-trained file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")

# Download COCO trained weights from releases if it does not exist
if not os.path.exists(COCO_WEIGHTS_PATH):
    utils.download_trained_weights(COCO_WEIGHTS_PATH)
else:
    print("Info: COCO weights already exists")

Info: COCO weights already exists


In [8]:
class FruitsConfig(Config):
    """
    Configuration for training on the toy  dataset.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "fruits2"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU =   1

    # Number of classes (including background)
    NUM_CLASSES = 2  # background and fruit

    # Number of training steps per epoch
    STEPS_PER_EPOCH = 200

    # Skip detections with the following confidence level
    DETECTION_MIN_CONFIDENCE = 0.90

    # Initialize the model with: "imagenet", "coco", last
    INIT_IT = "imagenet"

In [9]:
############################################################
#  Dataset
############################################################

class FruitsDataset(utils.Dataset):

    def load_fruit(self, dataset_dir, subset):
        """
        Load a subset of the Fruits dataset.
        dataset_dir: Root directory of the dataset.
        subset: Subset to load: train or val
        """
        # Add classes. We have only one class to add.
        self.add_class("fruit", 1, "fruit")

        # Train or validation dataset?
        assert subset in ["train", "val"]
        dataset_dir = os.path.join(dataset_dir, subset)

        # Load annotations
        # VGG Image Annotator saves each image in the form:
        # { 'filename': '28503151_5b5b7ec140_b.jpg',
        #   'regions': {
        #       '0': {
        #           'region_attributes': {},
        #           'shape_attributes': {
        #               'all_points_x': [...],
        #               'all_points_y': [...],
        #               'name': 'polygon'}},
        #       ... more regions ...
        #   },
        #   'size': 100202
        # }
        
        # We are majorly interested with x and y values

        # Load the JSON annotations file
        annotations = json.load(open(os.path.join(dataset_dir, "via_project_fruits.json")))
        annotations = list(annotations.values())  # don't need the dict keys

        # The VIA tool saves images in the JSON even if they don't have any
        # annotations. Skip unannotated images.
        annotations = [a for a in annotations if a['regions']]

        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons that make up
            # the outline of each object instance. There are stores in the
            # shape_attributes (see json format above)
            polygons = [r['shape_attributes'] for r in a['regions']]
            
            # load_mask() needs the image size to convert polygons to masks.
            # Unfortunately, VIA doesn't include it in JSON, so we must read
            # the image. This is only managable since the dataset is tiny - Around ~3K images

            image_path = os.path.join(dataset_dir, a['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]
            
            self.add_image(
                "fruit",
                image_id=a['filename'],  # use file name as a unique image id
                path=image_path,
                width=width, height=height,
                polygons=polygons)

    def load_mask(self, image_id):
        """
        Generate instance masks for an image.
        Returns:
            masks: A bool array of shape [height, width, instance count] with
                one mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a fruit dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "fruit":
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask, np.ones([mask.shape[-1]], dtype=np.float32) #dtype=np.int32

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "fruit":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)       

In [10]:
# Training dataset
dataset_train = FruitsDataset()
dataset_train.load_fruit(DATA_DIR, "train")
dataset_train.prepare()

# Validation dataset
dataset_val = FruitsDataset()
dataset_val.load_fruit(DATA_DIR, "val")
dataset_val.prepare()

In [11]:
# Create model in training mode
# model = modellib.MaskRCNN(mode="training", config=opt,
#                           model_dir=opt.MODEL_DIR)
config = FruitsConfig()

model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=DEFAULT_LOGS_DIR)

# Which weights to start with?
init_with = config.INIT_IT  # imagenet, coco, or last
if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    if not os.path.exists(opt.COCO_MODEL_PATH):
        utils.download_trained_weights(opt.COCO_MODEL_PATH)
    
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(opt.COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Looking for the latest model to continue training. If this is not
    # what you want provide the path to the model you wish to resume training on

    # by kiprono@aims.ac.za
    from pathlib import Path
    latest_model_dir = sorted(Path(DEFAULT_LOGS_DIR).iterdir(),\
                            key=os.path.getmtime)[-1]
    latest_model_file = sorted([i for i in Path(latest_model_dir).iterdir()\
            if str(i).endswith('.h5')], key=os.path.getmtime)[-1]

    # Load the last model you trained and continue training
    model.load_weights(latest_model_file, by_name=True)
    
'''
train
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers 
    (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, 
    pass layers='heads' to the train() function.
2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. 
    Simply pass layers="all to train all layers.
'''

# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.

# Fine tuning "all" layers

model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=200, 
            layers='all')

# model.train(dataset_train, dataset_val,
#             learning_rate=config.LEARNING_RATE,
#             epochs=80,
#             layers='all')

# model.train(dataset_train, dataset_val,
#             learning_rate=config.LEARNING_RATE/10,
#             epochs=120,
#             layers='all')
# model.train(dataset_train, dataset_val,
#             learning_rate=config.LEARNING_RATE/20,
#             epochs=200,
#             layers='all')
# model.train(dataset_train, dataset_val,
#             learning_rate=config.LEARNING_RATE/30,
#             epochs=280,
#             layers='all')
# model.train(dataset_train, dataset_val,
#             learning_rate=config.LEARNING_RATE/30,
#             epochs=360,
#             layers='all')


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


2021-07-29 15:44:35.583947: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-07-29 15:44:35.936242: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2112000000 Hz



Starting at epoch 0. LR=0.001

Checkpoint Path: ./assets/logs/fruits220210729T1544/mask_rcnn_fruits2_{epoch:04d}.h5
Selecting layers to train
conv1                  (Conv2D)
bn_conv1               (BatchNorm)
res2a_branch2a         (Conv2D)
bn2a_branch2a          (BatchNorm)
res2a_branch2b         (Conv2D)
bn2a_branch2b          (BatchNorm)
res2a_branch2c         (Conv2D)
res2a_branch1          (Conv2D)
bn2a_branch2c          (BatchNorm)
bn2a_branch1           (BatchNorm)
res2b_branch2a         (Conv2D)
bn2b_branch2a          (BatchNorm)
res2b_branch2b         (Conv2D)
bn2b_branch2b          (BatchNorm)
res2b_branch2c         (Conv2D)
bn2b_branch2c          (BatchNorm)
res2c_branch2a         (Conv2D)
bn2c_branch2a          (BatchNorm)
res2c_branch2b         (Conv2D)
bn2c_branch2b          (BatchNorm)
res2c_branch2c         (Conv2D)
bn2c_branch2c          (BatchNorm)
res3a_branch2a         (Conv2D)
bn3a_branch2a          (BatchNorm)
res3a_branch2b         (Conv2D)
bn3a_branch2b        

  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
2021-07-29 15:46:31.318795: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 134217728 exceeds 10% of free system memory.
2021-07-29 15:46:32.186145: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 134217728 exceeds 10% of free system memory.
2021-07-29 15:46:32.186275: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 134217728 exceeds 10% of free system memory.
2021-07-29

  1/200 [..............................] - ETA: 2:52:42 - batch: 0.0000e+00 - size: 1.0000 - loss: 5.7494 - rpn_class_loss: 3.8310 - rpn_bbox_loss: 1.9046 - mrcnn_class_loss: 0.0137 - mrcnn_bbox_loss: 0.0000e+00 - mrcnn_mask_loss: 0.0000e+00

2021-07-29 15:47:03.461402: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session initializing.
2021-07-29 15:47:03.461442: I tensorflow/core/profiler/lib/profiler_session.cc:141] Profiler session started.
2021-07-29 15:47:30.915350: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2021-07-29 15:47:31.001865: I tensorflow/core/profiler/lib/profiler_session.cc:159] Profiler session tear down.
2021-07-29 15:47:31.108912: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./assets/logs/fruits220210729T1544/plugins/profile/2021_07_29_15_47_31


  2/200 [..............................] - ETA: 1:31:55 - batch: 0.5000 - size: 1.0000 - loss: 5.5309 - rpn_class_loss: 2.6166 - rpn_bbox_loss: 2.9059 - mrcnn_class_loss: 0.0084 - mrcnn_bbox_loss: 0.0000e+00 - mrcnn_mask_loss: 0.0000e+00    

2021-07-29 15:47:31.150639: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to ./assets/logs/fruits220210729T1544/plugins/profile/2021_07_29_15_47_31/kiprono-pc.trace.json.gz
2021-07-29 15:47:31.276351: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./assets/logs/fruits220210729T1544/plugins/profile/2021_07_29_15_47_31
2021-07-29 15:47:31.276579: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for memory_profile.json.gz to ./assets/logs/fruits220210729T1544/plugins/profile/2021_07_29_15_47_31/kiprono-pc.memory_profile.json.gz
2021-07-29 15:47:31.283759: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: ./assets/logs/fruits220210729T1544/plugins/profile/2021_07_29_15_47_31Dumped tool data for xplane.pb to ./assets/logs/fruits220210729T1544/plugins/profile/2021_07_29_15_47_31/kiprono-pc.xplane.pb
Dumped tool data for overview_page.pb to

KeyboardInterrupt: 