In [None]:
'''
Rice grain quality recognition project
1.Build data structure for Mask_RCNN
    - setup training data heiarchy
    - parse and load image and annotation file
    

'''

In [None]:
!pip install keras==2.1.0; pip install tensorflow==1.14.0

In [15]:
import cv2
import os
import re
import json
import warnings
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from mrcnn import visualize
from mrcnn.utils import Dataset, extract_bboxes, resize_image, resize_mask
from mrcnn.config import Config
from mrcnn.model import MaskRCNN, mold_image, load_image_gt, log

%matplotlib inline
warnings.filterwarnings('ignore')

train_dir = os.getcwd() + '/training/'
test_dir = os.getcwd() + '/test/'

In [2]:
def extract_boxes(filename):
    
    with open(filename) as f:
        ann = json.load(f)
        
    height = ann['imageHeight']
    width = ann['imageWidth']
    all_boxes = ann['shapes']
    
    boxes = [(shape['label'], 
            np.array(shape['points'], 
            dtype=np.int16)) for shape in all_boxes]
    return height, width, boxes

In [3]:
class riceDataset(Dataset):
    def load_dataset(self, dataset_dir, select_labels):
        
        images_dir = dataset_dir + 'imgs/'
        annots_dir = dataset_dir + 'annots/'
        
        with open(dataset_dir + 'train_labels.csv') as tl:
            labels = re.split('[,\n]', tl.read()[:-1])
            print('labels: ', labels)
        
        for i, label in zip(range(len(labels)), labels):
            if label in select_labels:
                self.add_class('dataset', i, label)
        
        for filename in os.listdir(images_dir):
            for l in select_labels:
                if l in filename:
                    image_id = filename[:-4]
                    image_path = images_dir + filename
                    annots_path = annots_dir + image_id + '.json'

                    height, width, boxes = extract_boxes(annots_path)
                    self.add_image('dataset', 
                                   image_id=image_id, 
                                   path=image_path,
                                   width=width,
                                   height=height,
                                   boxes=boxes,
                                   annotation=annots_path)
        
    def load_mask(self, image_id):
        
        info = self.image_info[image_id]
        path = info['annotation']
        masks = np.zeros([info['height'], 
                          info['width'], 
                          len(info['boxes'])], dtype='uint8')
        
        class_ids = []
        for i in range(len(info['boxes'])):
            box = info['boxes'][i]
            row_s, row_e = box[1][0][1], box[1][1][1]
            col_s, col_e = box[1][0][0], box[1][1][0]
            masks[row_s:row_e, col_s:col_e, i] = 1
            class_ids.append(self.class_names.index(box[0]))
        
        return masks, np.asarray(class_ids, dtype='int32')
    
    def image_reference(self, image_id):
        
        info = self.image_info[image_id]
        return info['path']

In [24]:
class RiceConfig(Config):
    BACKBONE = 'resnet50'
    IMAGE_RESIZE_MODE = "crop"
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    NAME = 'rice_config'
    STEPS_PER_EPOCH = 1
    NUM_CLASSES = 2
    LEARNING_RATE = 0.001
    BATCH_SIZE = 3

In [25]:
config = RiceConfig()
config.display()


Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  512
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  512
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              crop
IMAGE_SHAPE                    [512 512   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                

In [6]:
train = riceDataset()
train.load_dataset(train_dir, ['full'])
train.prepare()

test = riceDataset()
test.load_dataset(test_dir, ['full'])
test.prepare()
print('Train: %d' % len(train.image_ids))

labels:  ['bad', 'full', 'cracked', 'heat', 'white']
labels:  ['bad', 'full', 'cracked', 'heat', 'white']
Train: 1


In [7]:
# image_id = 1
# image = train.load_image(image_id)
# print(image.shape)

# mask, class_ids = train.load_mask(image_id)
# print(mask.shape)
# plt.imshow(image)
# plt.imshow(mask[:, :, 3], cmap='Reds', alpha=.5)
# plt.show()

In [8]:
for image_id in train.image_ids:
    info = train.image_info[image_id]
    print(info)

{'id': 'full', 'source': 'dataset', 'path': '/Users/kyy/riceCV/training/imgs/full.jpg', 'width': 960, 'height': 1706, 'boxes': [('full', array([[465, 710],
       [503, 743]], dtype=int16)), ('full', array([[541, 750],
       [572, 792]], dtype=int16)), ('full', array([[596, 695],
       [630, 739]], dtype=int16)), ('full', array([[672, 691],
       [709, 730]], dtype=int16)), ('full', array([[519, 623],
       [562, 654]], dtype=int16)), ('full', array([[440, 592],
       [476, 638]], dtype=int16)), ('full', array([[334, 710],
       [378, 740]], dtype=int16)), ('full', array([[427, 823],
       [463, 857]], dtype=int16)), ('full', array([[494, 811],
       [530, 850]], dtype=int16)), ('full', array([[503, 896],
       [534, 937]], dtype=int16)), ('full', array([[604, 909],
       [637, 951]], dtype=int16)), ('full', array([[705, 876],
       [739, 911]], dtype=int16)), ('full', array([[699, 776],
       [740, 809]], dtype=int16)), ('full', array([[768, 734],
       [804, 767]], dtype

In [28]:
# image_id = 0
# # load the image
# image = train.load_image(image_id)
# # load the masks and the class ids
# mask, class_ids = train.load_mask(image_id)
# # extract bounding boxes from the masks
# bbox = extract_bboxes(mask)
# # display image with masks and bounding boxes
# display_instances(image, bbox, mask, class_ids, train.class_names)

In [None]:
def get_ax(rows=1, cols=1, size=16):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Adjust the size attribute to control how big to render images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

In [None]:
# limit = 4
# image_id = np.random.choice(train.image_ids, 1)[0]
# ax = get_ax(rows=2, cols=limit//2)
# for i in range(limit):
#     image, image_meta, class_ids, bbox, mask = modellib.load_image_gt(
#         train, config, image_id, use_mini_mask=False)
#     visualize.display_instances(image, bbox, mask, class_ids,
#                                 train.class_names, ax=ax[i//2, i % 2],
#                                 show_mask=False, show_bbox=False)

In [None]:
# # Load random image and mask.
# image_id = np.random.choice(train.image_ids, 1)[0]
# image = train.load_image(image_id)
# mask, class_ids = train.load_mask(image_id)
# original_shape = image.shape
# # Resize
# image, window, scale, padding, _ = resize_image(
#     image, 
#     min_dim=config.IMAGE_MIN_DIM, 
#     max_dim=config.IMAGE_MAX_DIM,
#     mode=config.IMAGE_RESIZE_MODE)
# mask = resize_mask(mask, scale, padding)
# # Compute Bounding box
# bbox = extract_bboxes(mask)

# # Display image and additional stats
# print("image_id: ", image_id, train.image_reference(image_id))
# print("Original shape: ", original_shape)
# log("image", image)
# log("mask", mask)
# log("class_ids", class_ids)
# log("bbox", bbox)
# # Display image and instances
# visualize.display_instances(image, bbox, mask, class_ids, train.class_names)

In [None]:
# image_id = 0
# image = train.load_image(image_id)
# mask, class_ids = train.load_mask(image_id)
# original_shape = image.shape
# # resize
# image, window, scale, padding, _ = resize_image(image,
#                                                 min_dim=config.IMAGE_MIN_DIM,
#                                                 max_dim=config.IMAGE_MAX_DIM,
#                                                 mode=config.IMAGE_RESIZE_MODE)

In [21]:
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  256
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  256
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              crop
IMAGE_SHAPE                    [256 256   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE               

In [26]:
model = MaskRCNN(mode='training', model_dir='./', config=config)
model.load_weights('mask_rcnn_coco.h5', by_name=True, exclude=['mrcnn_class_logits', 
                                                                'mrcnn_bbox_fc', 
                                                                "mrcnn_bbox", 
                                                                "mrcnn_mask"])
model.train(train, test, learning_rate=config.LEARNING_RATE, epochs=5, layers='heads')


Starting at epoch 0. LR=0.001

Checkpoint Path: ./rice_config20200405T1509/mask_rcnn_rice_config_{epoch:04d}.h5
Selecting layers to train
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4                 (Conv2D)
In model:  rpn_model
    rpn_conv_shared        (Conv2D)
    rpn_class_raw          (Conv2D)
    rpn_bbox_pred          (Conv2D)
mrcnn_mask_conv1       (TimeDistributed)
mrcnn_mask_bn1         (TimeDistributed)
mrcnn_mask_conv2       (TimeDistributed)
mrcnn_mask_bn2         (TimeDistributed)
mrcnn_class_conv1      (TimeDistributed)
mrcnn_class_bn1        (TimeDistributed)
mrcnn_mask_conv3       (TimeDistributed)
mrcnn_mask_bn3         (TimeDistributed)
mrcnn_class_conv2      (TimeDistributed)
mrcnn_class_bn2        (TimeDistributed)
mrcnn_mask_conv4       (TimeDistributed)
mrcnn_mask_bn4         (T

In [None]:

# rice_data = riceDataset()
# h, w, data = extract_polygon('./training/annots/bad.json')
# rice_data.load_dataset(train_dir)

In [17]:
# def evaluate_model(dataset, model, cfg):
# 	APs = list()
# 	for image_id in dataset.image_ids:
# 		# load image, bounding boxes and masks for the image id
# 		image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
# 		# convert pixel values (e.g. center)
# 		scaled_image = mold_image(image, cfg)
# 		# convert image into one sample
# 		sample = np.expand_dims(scaled_image, 0)
# 		# make prediction
# 		yhat = model.detect(sample, verbose=0)
# 		# extract results for first sample
# 		r = yhat[0]
# 		# calculate statistics, including AP
# 		AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
# 		# store
# 		APs.append(AP)
# 	# calculate the mean AP across all images
# 	mAP = mean(APs)
# 	return mAP

In [27]:
# model = MaskRCNN(mode='inference', model_dir='./', config=config)
# model.load_weights('mask_rcnn_rice_config_0005.h5', by_name=True)
# train_mAP = evaluate_model(train, model, config)
# print('Train mAP:', train_mAP)