In [1]:
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.config import Config


Using TensorFlow backend.


In [85]:
model_path = "model/mask_rcnn_barcode.h5"
test_image = "test_images/20190806112122_0074_receiptNo5241_imgRect.png"
MODEL_DIR = "logs"
def apply_mask(image, mask):
    for c in range(3):
        image[:, :, c] = np.where(mask == 1,255,0)
    return image

In [3]:
class BarcodeConfig(Config):
    """Configuration for training on the toy  dataset.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "barcode"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # Background + balloon

    # Number of training steps per epoch
    STEPS_PER_EPOCH = 100

    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9

In [4]:
class InferenceConfig(BarcodeConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.9
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE         

In [86]:
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(model_path, by_name=True)

In [90]:
import imutils
import cv2
import colorsys
image = cv2.imread(test_image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = imutils.resize(image, width=512)
 
# perform a forward pass of the network to obtain the results
print("[INFO] making predictions with Mask R-CNN...")
r = model.detect([image], verbose=1)[0]

[INFO] making predictions with Mask R-CNN...
Processing 1 images
image                    shape: (625, 512, 3)         min:   53.00000  max:  255.00000  uint8
molded_images            shape: (1, 1024, 1024, 3)    min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 14)               min:    0.00000  max: 1024.00000  float64
anchors                  shape: (1, 261888, 4)        min:   -0.35390  max:    1.29134  float32


In [91]:
CLASS_NAMES = ["BG",'barcode']
rects = []
hsv = [(i / len(CLASS_NAMES), 1, 1.0) for i in range(len(CLASS_NAMES))]
COLORS = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
for i in range(0, r["rois"].shape[0]):
    # extract the class ID and mask for the current detection, then
    # grab the color to visualize the mask (in BGR format)
    classID = r["class_ids"][i]
    mask = r["masks"][:, :, i]
    color = COLORS[classID][::-1]
 
    # visualize the pixel-wise mask of the object
    image = apply_mask(image, mask)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
origin = cv2.imread(test_image)
origin = cv2.cvtColor(origin, cv2.COLOR_BGR2RGB)
origin = imutils.resize(origin, width=512)
cnts, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in cnts:
    rect = cv2.minAreaRect(cnt)
    rects.append(rect)
    print(rect[-1])
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    cv2.drawContours(origin,[box],0,(0,0,255),2)
cv2.imshow("test", origin)
cv2.waitKey(0)
cv2.destroyAllWindows()


-55.08059310913086


In [96]:
origin = cv2.imread(test_image)
origin = cv2.cvtColor(origin, cv2.COLOR_BGR2RGB)
origin = imutils.resize(origin, width=512)
image = cv2.cvtColor(origin, cv2.COLOR_RGB2BGR)
import imutils
print(r["scores"])
# loop over the predicted scores and class labels
for i in range(0, len(r["scores"])):
    # extract the bounding box information, class ID, label, predicted
    # probability, and visualization color
    (startY, startX, endY, endX) = r["rois"][i]
    classID = r["class_ids"][i]
    label = CLASS_NAMES[classID]
    score = r["scores"][i]
    color = [int(c) for c in np.array(COLORS[classID]) * 255]
    img = image[startY:endY, startX:endX]
    rect = rects[i]

    center, size, angle = rect[0], rect[1], rect[2]
    if angle < -45:
        angle = -(90 + angle)
 
    else:
        angle = -angle
    center, size = tuple(map(int, center)), tuple(map(int, size))
    height, width = img.shape[0], img.shape[1]

    # calculate the rotation matrix
    M = cv2.getRotationMatrix2D(center, angle, 1)
    # rotate the original image
    img_rot = cv2.warpAffine(img, M, (width, height))

#     draw the bounding box, class label, and score of the object
#     cv2.rectangle(image, (startX, startY), (endX, endY), color, 2)
#     text = "{}: {:.3f}".format(label, score)
#     y = startY - 10 if startY - 10 > 10 else startY + 10
#     cv2.putText(image, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX,0.6, color, 2)
    cv2.imshow("origin", img)
    cv2.imshow("output", img_rot)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

[0.99848914]


In [None]:
import glob
image_path = glob.glob("/Users/apple/Desktop/machine_learning/projects/barcode_segment/dataset/Lv.3/*")
output = "crop/Lv3"
import os
print(image_path)

In [None]:
def crop(path, padding = 0):
    
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = imutils.resize(image, width=512)
    r = model.detect([image], verbose=1)[0]
    basename = os.path.basename(path)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    for i in range(0, len(r['scores'])):
        (startY, startX, endY, endX) = r["rois"][i]
        img = image[startY - padding: endY + padding, startX - padding: endX+padding]
        cv2.imwrite(os.path.join(output, "{}.png_{}.png".format(basename, i+1)), img)

In [None]:
for path in image_path:
    crop(path, padding=0)