In [1]:
import os
import sys
import itertools
import math
import logging
import json
import re
import random
from collections import OrderedDict
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon
from pathlib import Path

# Root directory of the project
ROOT_DIR = os.path.abspath("../final_project/")
MASK_PATH = '/Akamai/rgradilla/final_project/Mask_RCNN'
DATA_DIR = Path('/Akamai/rgradilla/final_project/imaterialist')
COCO_WEIGHTS_PATH = os.path.join(MASK_PATH, "mask_rcnn_coco.h5")
DEFAULT_LOGS_DIR = os.path.join(MASK_PATH, "logs")

# Import Mask RCNN
sys.path.append(MASK_PATH)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log


from fashion import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


### Configurations

Configurations are defined in fashion.py

In [2]:
class imaterialistConfig(Config):
    """ Configuration for training on imaterialist images.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "imaterialist"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + 46  # Background + classIds

    # Number of training steps per epoch
    STEPS_PER_EPOCH = 100

    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9
    
    ## TESTING
    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256
    
    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
    
    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32
    
    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5

In [3]:
config = imaterialistConfig()
imaterialist_dir = '/Akamai/rgradilla/final_project/Mask_RCNN/train'

## Data preparation

In [4]:
train_df = pd.read_csv('/Akamai/rgradilla/final_project/imaterialist/train.csv')
train_df = train_df.drop('AttributesIds', axis=1)

image_df = train_df.groupby('ImageId')['EncodedPixels', 'ClassId'].agg(lambda x: list(x))
size_df = train_df.groupby('ImageId')['Height', 'Width'].mean()
image_df = image_df.join(size_df, on='ImageId')

In [5]:
with open('/Akamai/rgradilla/final_project/imaterialist/label_descriptions.json', 'r') as file:
            label_desc = json.load(file)

In [6]:
class imaterialistDataset(utils.Dataset):
    
    def __init__(self, df):
        super().__init__(self)
        
        self.IMAGE_SIZE = 256
        
        # Add classes
        for cat in label_desc['categories']:
            self.add_class('fashion', cat.get('id'), cat.get('name'))
        
        # Add images
        for i, row in df.iterrows():
            self.add_image("fashion", 
                           image_id=row.name, 
                           path= '/Akamai/rgradilla/final_project/imaterialist/train/{}.jpg'.format(row.name), 
                           labels=row['ClassId'],
                           annotations=row['EncodedPixels'], 
                           height=row['Height'], width=row['Width'])
            
    def _resize_image(self, image_path):
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (self.IMAGE_SIZE, self.IMAGE_SIZE), interpolation=cv2.INTER_AREA)  
        return img
        
    def load_image(self, image_id):
        return self._resize_image(self.image_info[image_id]['path'])
       
    def load_mask(self, image_id):
        info = self.image_info[image_id]
                
        mask = np.zeros((self.IMAGE_SIZE, self.IMAGE_SIZE, len(info['annotations'])), dtype=np.uint8)
        labels = []
        
        for m, (annotation, label) in enumerate(zip(info['annotations'], info['labels'])):
            sub_mask = np.full(info['height']*info['width'], 0, dtype=np.uint8)
            annotation = [int(x) for x in annotation.split(' ')]
            
            for i, start_pixel in enumerate(annotation[::2]):
                sub_mask[start_pixel: start_pixel+annotation[2*i+1]] = 1

            sub_mask = sub_mask.reshape((info['height'], info['width']), order='F')
            sub_mask = cv2.resize(sub_mask, (self.IMAGE_SIZE, self.IMAGE_SIZE), interpolation=cv2.INTER_NEAREST)
            
            mask[:, :, m] = sub_mask
            labels.append(int(label)+1)
            
        return mask, np.array(labels, dtype='int32')
    
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path'], [x for x in info['labels']]

In [7]:
dataset = imaterialistDataset(image_df)
dataset.prepare()

In [8]:
len(dataset.image_ids)

45623

# Training

In [9]:
LR = 1e-4
EPOCHS = [1, 6, 8]

import warnings 
warnings.filterwarnings("ignore")

In [71]:
from sklearn.model_selection import StratifiedKFold, KFold

# This code partially supports k-fold training, 
# you can specify the fold to train and the total number of folds here
FOLD = 0
N_FOLDS = 2

kf = KFold(n_splits=N_FOLDS, random_state=42, shuffle=True)
splits = kf.split(image_df) # ideally, this should be multilabel stratification

def get_fold():    
    for i, (train_index, valid_index) in enumerate(splits):
        if i == FOLD:
            return image_df.iloc[train_index], image_df.iloc[valid_index]
        
train_df, valid_df = get_fold()

train_dataset = imaterialistDataset(train_df)
train_dataset.prepare()

valid_dataset = imaterialistDataset(valid_df)
valid_dataset.prepare()

In [72]:
print(train_df.shape)
print(valid_df.shape)

(22811, 4)
(22812, 4)


In [73]:
# Data augmentation

import imgaug as ia
from imgaug import augmenters as iaa

augmentation = iaa.Sequential([
    iaa.Fliplr(0.5) # only horizontal flip here
])

In [74]:
MODEL_DIR = '/Akamai/rgradilla/final_project/Mask_RCNN/mrcnn/'

### Transfer learning

In [75]:
model = modellib.MaskRCNN(mode='training', 
                          config=config, 
                          model_dir=MODEL_DIR)

In [76]:
model.keras_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        (None, None, None, 3 0                                            
__________________________________________________________________________________________________
zero_padding2d_3 (ZeroPadding2D (None, None, None, 3 0           input_image[0][0]                
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        zero_padding2d_3[0][0]           
__________________________________________________________________________________________________
bn_conv1 (BatchNorm)            (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

Let's load the COCO dataset weights to our model

In [77]:
# Load the weights
COCO_WEIGHTS_PATH = os.path.join(MASK_PATH, "mask_rcnn_coco.h5")
    
# For the coco dataset exclude the last layers because 
# it requires a matching number of classes
print("loading...", end='')
model.load_weights(COCO_WEIGHTS_PATH, by_name=True, exclude=[
    "mrcnn_class_logits", "mrcnn_bbox_fc",
    "mrcnn_bbox", "mrcnn_mask"])

print("loaded.")

loading...loaded.


In [78]:
def train(model, train=train_dataset, validate=valid_dataset, epochs=30):
    """Train the model."""
    
    #train=train_dataset, validate=valid_dataset
    
    # Experiment with training options.
    # Since we're using a very small dataset, and starting from
    # COCO trained weights, we don't need to train too long. Also,
    # no need to train all layers, just the heads should do it.
    print("Training network heads")
    history = model.train(train, validate,
                learning_rate=config.LEARNING_RATE,
                epochs=epochs,
                layers='heads', 
                augmentation=augmentation
                )
    
    return history

In [79]:
history = train(model)

Training network heads

Starting at epoch 0. LR=0.001

Checkpoint Path: /Akamai/rgradilla/final_project/Mask_RCNN/mrcnn/imaterialist20200506T1750/mask_rcnn_imaterialist_{epoch:04d}.h5
Selecting layers to train
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4                 (Conv2D)
In model:  rpn_model
    rpn_conv_shared        (Conv2D)
    rpn_class_raw          (Conv2D)
    rpn_bbox_pred          (Conv2D)
mrcnn_mask_conv1       (TimeDistributed)
mrcnn_mask_bn1         (TimeDistributed)
mrcnn_mask_conv2       (TimeDistributed)
mrcnn_mask_bn2         (TimeDistributed)
mrcnn_class_conv1      (TimeDistributed)
mrcnn_class_bn1        (TimeDistributed)
mrcnn_mask_conv3       (TimeDistributed)
mrcnn_mask_bn3         (TimeDistributed)
mrcnn_class_conv2      (TimeDistributed)
mrcnn_class_bn2        (TimeDistribu

In [84]:
import math

def plot_history(history):
    
    fig = plt.figure(figsize=(16,10))
       
    n_history_loss = len(history.history)
    n_epochs = len(history.epoch)
    epoch = history.epoch
    
    # The loss is in pairs, one for train, one for val
    loss_stats = [ k for k in history.history.keys() if 'val_' not in k ]
    
    n_cols = 4
    n_rows = math.ceil(len(loss_stats) / n_cols) 
    
    for i, k in enumerate(loss_stats):
        val_k = 'val_'+k
        ax = plt.subplot( n_rows, n_cols, i+1)
        ax.plot(epoch, history.history[k], label=k)
        ax.plot(epoch, history.history[val_k], label=val_k)
        ax.set_xlabel('Epochs')
        ax.set_ylabel('Loss')
        ax.set_title(str(i)+' - '+k)
        plt.legend(shadow=True, fancybox=True)
    
    fig.tight_layout()
    plt.show()

## Inference

In [87]:
# Find last trained weights
weights_path = model.find_last()
weights_path

'/Akamai/rgradilla/final_project/Mask_RCNN/mrcnn/imaterialist20200506T1750/mask_rcnn_imaterialist_0030.h5'