In [8]:
import os
import sys
import itertools
import math
import logging
import json
import re
import random
from collections import OrderedDict
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon
from pathlib import Path

# Root directory of the project
ROOT_DIR = os.path.abspath("../final_project/")
MASK_PATH = '/Akamai/rgradilla/final_project/Mask_RCNN'
DATA_DIR = Path('/Akamai/rgradilla/final_project/imaterialist')
COCO_WEIGHTS_PATH = os.path.join(MASK_PATH, "mask_rcnn_coco.h5")
DEFAULT_LOGS_DIR = os.path.join(MASK_PATH, "logs")

# Import Mask RCNN
sys.path.append(MASK_PATH)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log

from fashion import *

In [3]:
from mrcnn.utils import compute_ap

In [34]:

class EvalImage():
  def __init__(self,dataset,model,cfg):
    self.dataset = dataset
    self.model   = model
    self.cfg     = cfg

 
 
  def evaluate_model(self , len = 50):
    APs = list()
    precisions_dict = {}
    recall_dict     = {}
    for index,image_id in enumerate(self.dataset.image_ids):
      if(index > len):
         break; 
      # load image, bounding boxes and masks for the image id
      image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(self.dataset, self.cfg,image_id, use_mini_mask=False)
      # convert pixel values (e.g. center)
      #scaled_image = modellib.mold_image(image, self.cfg)
      # convert image into one sample
      sample = np.expand_dims(image, 0)
      #print(len(image))
      # make prediction
      yhat = self.model.detect(sample, verbose=1)
      # extract results for first sample
      r = yhat[0]
      # calculate statistics, including AP
      AP, precisions, recalls, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
      precisions_dict[image_id] = np.mean(precisions)
      recall_dict[image_id] = np.mean(recalls)
      # store
      APs.append(AP)

    # calculate the mean AP across all images
    mAP = np.mean(APs)
    return mAP,precisions_dict,recall_dict

In [9]:
class imaterialistConfig(Config):
    """ Configuration for training on imaterialist images.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "imaterialist"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Number of classes (including background)
    NUM_CLASSES = 1 + 46  # Background + classIds

    # Number of training steps per epoch
    STEPS_PER_EPOCH = 100

    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9
    
    ## TESTING
    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256
    
    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
    
    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32
    
    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5

In [10]:
config = imaterialistConfig()
imaterialist_dir = '/Akamai/rgradilla/final_project/Mask_RCNN/train'

In [31]:
class InferenceConfig(config.__class__):
    # Run detection on one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    
config = InferenceConfig()

### Data preparation

In [12]:
train_df = pd.read_csv('/Akamai/rgradilla/final_project/imaterialist/train.csv')
train_df = train_df.drop('AttributesIds', axis=1)

image_df = train_df.groupby('ImageId')['EncodedPixels', 'ClassId'].agg(lambda x: list(x))
size_df = train_df.groupby('ImageId')['Height', 'Width'].mean()
image_df = image_df.join(size_df, on='ImageId')

In [16]:
with open('/Akamai/rgradilla/final_project/imaterialist/label_descriptions.json', 'r') as file:
            label_desc = json.load(file)

In [13]:
class imaterialistDataset(utils.Dataset):
    
    def __init__(self, df):
        super().__init__(self)
        
        self.IMAGE_SIZE = 256
        
        # Add classes
        for cat in label_desc['categories']:
            self.add_class('fashion', cat.get('id'), cat.get('name'))
        
        # Add images
        for i, row in df.iterrows():
            self.add_image("fashion", 
                           image_id=row.name, 
                           path= '/Akamai/rgradilla/final_project/imaterialist/train/{}.jpg'.format(row.name), 
                           labels=row['ClassId'],
                           annotations=row['EncodedPixels'], 
                           height=row['Height'], width=row['Width'])
            
    def _resize_image(self, image_path):
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (self.IMAGE_SIZE, self.IMAGE_SIZE), interpolation=cv2.INTER_AREA)  
        return img
        
    def load_image(self, image_id):
        return self._resize_image(self.image_info[image_id]['path'])
       
    def load_mask(self, image_id):
        info = self.image_info[image_id]
                
        mask = np.zeros((self.IMAGE_SIZE, self.IMAGE_SIZE, len(info['annotations'])), dtype=np.uint8)
        labels = []
        
        for m, (annotation, label) in enumerate(zip(info['annotations'], info['labels'])):
            sub_mask = np.full(info['height']*info['width'], 0, dtype=np.uint8)
            annotation = [int(x) for x in annotation.split(' ')]
            
            for i, start_pixel in enumerate(annotation[::2]):
                sub_mask[start_pixel: start_pixel+annotation[2*i+1]] = 1

            sub_mask = sub_mask.reshape((info['height'], info['width']), order='F')
            sub_mask = cv2.resize(sub_mask, (self.IMAGE_SIZE, self.IMAGE_SIZE), interpolation=cv2.INTER_NEAREST)
            
            mask[:, :, m] = sub_mask
            labels.append(int(label)+1)
            
        return mask, np.array(labels, dtype='int32')
    
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path'], [x for x in info['labels']]

In [23]:
dataset = imaterialistDataset(image_df)
dataset.prepare()

In [60]:
len(image_df)

45623

# Evaluate

### Prepare dataset:

In [39]:
import tensorflow as tf
DEVICE = "/gpu:0" 

# Create model in inference mode
with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode="inference", 
                          model_dir=MASK_PATH,
                          config=config)

In [52]:
# this is the last model's weights
IMATERIALIST_WEIGHTS_PATH = '/Akamai/rgradilla/final_project/Mask_RCNN/mrcnn/imaterialist20200506T1750/mask_rcnn_imaterialist_0030.h5'

weights_path = IMATERIALIST_WEIGHTS_PATH

# Load weights
print("Loading weights ", weights_path)
model.load_weights(weights_path, by_name=True)

Loading weights  /Akamai/rgradilla/final_project/Mask_RCNN/mrcnn/imaterialist20200506T1750/mask_rcnn_imaterialist_0030.h5
Re-starting from epoch 30


In [53]:
eval = EvalImage(dataset,model,config)

In [61]:
evaluation = eval.evaluate_model(len=2000)

Processing 1 images
image                    shape: (256, 256, 3)         min:    8.00000  max:  247.00000  uint8
molded_images            shape: (1, 256, 256, 3)      min: -115.70000  max:  142.10000  float64
image_metas              shape: (1, 59)               min:    0.00000  max:  256.00000  int64
anchors                  shape: (1, 16368, 4)         min:   -0.35494  max:    1.10396  float32
Processing 1 images
image                    shape: (256, 256, 3)         min:   19.00000  max:  232.00000  uint8
molded_images            shape: (1, 256, 256, 3)      min:  -84.90000  max:  123.10000  float64
image_metas              shape: (1, 59)               min:    0.00000  max:  256.00000  int64
anchors                  shape: (1, 16368, 4)         min:   -0.35494  max:    1.10396  float32
Processing 1 images
image                    shape: (256, 256, 3)         min:    0.00000  max:  254.00000  uint8
molded_images            shape: (1, 256, 256, 3)      min: -116.80000  max:  149.10000

In [62]:
evaluation[0]

0.17089238242217966