In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf

plt.figure(figsize=(10,10))
plt.axis('off')

train_path = '/home/mattias/projects/GANs/data/train'

# files = os.listdir(train_path)
# max_dim = 0
# image_name = ''
# for image_id in files:
#     path = os.path.join(train_path,image_id)
#     image = Image.open(path)
#     nimage = np.array(image)
#     width,height,channels = nimage.shape
#     if(width > max_dim):
#         max_dim = width
#         image_name = image_id
#     if(height > max_dim):
#         max_dim = height
#         image_name = image_id
        
# im = Image.open(os.path.join(train_path,image_name))
# im.show()
# print(np.array(im).shape)

min_dim = 5121

In [2]:
sys.path.append('/home/mattias/projects/GANs/data_utilities')
import aug_util as aug
import wv_util as wv
from tfr_util import *

In [3]:
#set your root directory
ROOT_DIR = os.path.abspath("/home/mattias/projects/GANs")

In [4]:
# add Mask_RCNN to the path
sys.path.append('/home/mattias/projects/Mask_RCNN')
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline

MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

Using TensorFlow backend.


In [5]:
class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 8

    # Number of classes (including background)
    NUM_CLASSES = 84  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 5121
    IMAGE_MAX_DIM = 5121

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = ( 16, 32, 64, 128, 256)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5
    
config = ShapesConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     8
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 8
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  5121
IMAGE_META_SIZE                96
IMAGE_MIN_DIM                  5121
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [5121 5121    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE        

In [6]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Dataset

Create a synthetic dataset

Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:

* load_image()
* load_mask()
* image_reference()

In [7]:
# #Loading our labels
# coords, chips, classes = wv.get_labels('data/xView_train.geojson')

In [10]:


class xViewTrainDataset(utils.Dataset):
    """Generates the shapes synthetic dataset. The dataset consists of simple
    shapes (triangles, squares, circles) placed randomly on a blank surface.
    The images are generated on the fly. No file access required.
    """
    def __init__(self,data_dir,**kwargs):
        self.data_dir = data_dir
#         self.coords = coords
#         self.chips = chips
#         self.classes = classes
        self.feature_set = {
            'image/height':tf.FixedLenFeature([], tf.int64),
            'image/width': tf.FixedLenFeature([], tf.int64),
            'image/encoded':  tf.FixedLenFeature([], tf.string),
            'image/format': tf.FixedLenFeature([],tf.string),
            'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
            'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
            'image/object/class/label': tf.VarLenFeature(tf.int64),
        }
        super(xViewTrainDataset, self).__init__(kwargs)
    def load_xview(self):
        with open(os.path.join(self.data_dir, "xview_class_labels.txt")) as f:
            l = f.readline()
            while l:
                parts = l.split(':')
                self.add_class('xview',parts[0],parts[1].strip())
                l = f.readline()
            
    def read_tfrecord(self):
        reader = tf.TFRecordReader()
        fpath = os.path.join(self.data_dir, "tfrecords/xview_train_t1.record")
        filename_queue = tf.train.string_input_producer([fpath])
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example( serialized_example, features= self.feature_set )
        height = features['image/height']
        print(features)
     
     
    def load_image(self, image_id):
        """Retrieve the image from the xview dataset train folder
        """
        path = self.image_info[image_id]['path']
        im = Image.open(path)
        nim = np.array(image) 
        return self.resize_image(nim,min_dim=min_dim)[0]

    def image_reference(self, image_id):
        return "not sure what image_regerence does exactly"

    def load_mask(self, image_id):
       
        img_coords = self.coords[self.chips==image_id]
        img_classes = self.classes[self.chips==image_id].astype(np.int64)
        print("number of masks is",len(img_classes))
#         mask = np.zeros([5121, 5121, len(img_classes)], dtype=np.uint8)
        mask = np.zeros([512, 512, int(len(img_classes))], dtype=np.bool)
 
        for i in range(len(img_classes)):
            coord = coords[i]
            coord = list(map(int, coord))
            mask[:,:,i] = cv2.rectangle(mask[:,:,i].copy(), (coord[0], coord[1]), (coord[2], coord[3]), (255,0,0), -1)
            
        return mask.astype(np.bool), img_classes.astype(np.int32)

  

In [None]:
dataset = xViewTrainDataset('/home/mattias/projects/GANs/data/')
dataset.read_tfrecord()

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Tensor("ParseSingleExample/ParseSingleExample:17", shape=(), dtype=int64)


In [None]:
# Training dataset
dataset_train = ShapesDataset()
dataset_train.load_shapes(500, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_train.prepare()

# Validation dataset
dataset_val = ShapesDataset()
dataset_val.load_shapes(50, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_val.prepare()

In [None]:
# Load and display random samples
image_ids = np.random.choice(dataset_train.image_ids, 4)
for image_id in image_ids:
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)

    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

## Ceate Model

In [None]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

In [None]:
# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [None]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=1, 
            layers='heads')

In [None]:
# Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=2, 
            layers="all")

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
# model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes.h5")
# model.keras_model.save_weights(model_path)

In [None]:
class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
# Test on a random image
image_id = random.choice(dataset_val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, 
                           image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))