In [1]:
import os
import numpy as np
from PIL import Image
#from IPython.display import Image 

In [2]:
def load_custom_dataset(root_dir, resolution, instance_per_class):
    dataset = []
    class_meta = {}
    for class_name in os.listdir(root_dir):
        class_meta[class_name] = len(class_meta)
        datapoints = load_class_datapoint(root_dir, class_name, resolution, instance_per_class)
        dataset.extend(datapoint)
    np.random.shuffle(dataset)
    images = []
    bbox = []
    masks = []
    for datapoint in dataset:
        images.append(datapoint['images'])
        bbox.append(datapoint['bbox'])
        mask.append(convert_mask(datapoint['mask'], datapoint['class'], class_meta))
    images = np.array(images)
    bbox = np.array(bbox)
    masks = np.array(masks)
    return images, bbox, masks, class_meta

In [3]:
def load_class_datapoint(root_dir, class_name, resolution, instance_per_class):
    """
    input:
    root_dir: relative path of the data folder
    class_name: the string name of the class, same as the folder
    resolution: a tuple (width, height)
    instance_per_class: a int specify the number of instance to load for each class
    output:
    a dictionary
    {
        'image': img_data, //image file data
        'mask': mask_data, //mask file data
        'bbox': bbox, //bbox
        'class': class_name //string class name
    }
    """
    img_path = os.path.join(root_dir, class_name, 'images')
    mask_path = os.path.join(root_dir, class_name, 'masks')
    datapoints = []
    for filename in os.listdir(img_path):
        img_id = filename.split('.')[0]
        img_filename = os.path.join(img_path, img_id + '.jpg')
        mask_filename = os.path.join(mask_path, img_id + '.jpg')
        img_data = load_image(img_filename, resoltion)
        mask_data = load_mask(mask_filename, resolution)
        bbox = calculate_bbox(mask_data)
        datapoint = {
            'image': img_data,
            'mask': mask_data,
            'bbox': bbox,
            'class': class_name
        }
        datapoints.append(datapoint)
    return datapoints

In [7]:
def normalize_image(img):
    """
    input:
    img: image data
    output:
    normalized arr
    """
    img /= 255 # normalize to range of (0,1)
    img = img - np.mean(img, (0,1)) # Zero mean each channel
    return img

In [30]:
def load_image(img_filename, resolution):
    """
    input:
    img_filename: the full relative path to the image file
    resolution: a tuple (width, height)
    output:
    output an normalized array with shape (resolution[0], resolution[1], 3)
    """
    im = Image.open(img_filename)
    im = im.resize(resolution, Image.ANTIALIAS)
    im.show()
    np_im = np.array(im)

    return np_im

In [31]:
#load_image "test" code
npim = load_image("/Users/stephenhansen/Code/background_subtraction/cup2/cup2_Right_regularlight_image22.jpg", (120,120))

res = normalize_image(npim)
avg = np.mean(res,(0,1))

In [32]:
def load_mask(mask_filename, resolution):
    """
    input:
    mask_filename: the full relative path to the mask image file
    resolution: a tuple (width, height)
    output:
    an array with shape (resolution[0], resolution[1])
    """
    np_image = load_image(mask_filename, resolution)
    np_im_shape = np.shape(np_image)
    correct_dim = (resolution[0], resolution[1])
    
    if np_im_shape != correct_dim:
        raise ValueError("The third dimension of {} is {}. The dimension should be {}".format(img_filename, np_im_shape, correct_dim))
    
    return normalize_image(np_image)

In [44]:
# load_mask "test" code
msk = load_mask("/Users/stephenhansen/Code/background_subtraction/cup2/matlab/cup2_Right_regularlight_image20.jpg", (120,120))

In [11]:
def calculate_bbox(mask_data):
    """
    input:
    mask_data: arr of mask image
    output:
    a array [bottom_left_x, bottom_left_y, width, height]
    """
    
    
    img = mask_data
    
    ret,thresh = cv2.threshold(img,50,120,0)
    im2, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    
    size = 0
    c_largest = None
    
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    for c in contours:
        # get the bounding rect
        x, y, w, h = cv2.boundingRect(c)
        if size <= h*w:
            size = h*w
            c_largest = c

        # draw a green rectangle to visualize the bounding rect
        # cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

    if c_largest is not None:
        x, y, w, h = cv2.boundingRect(c_largest)
        cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
        return [x, y, w, h]
    
    return None

In [50]:
def convert_mask(mask_data, class_name, class_meta):
    """
    input:
    mask_data: arr of mask image
    class_name: the string name of the class
    class_meta: a dictionary, key is class name and value is index number
    output:
    an array of shape (mask_data.shape[0], mask_data.shape[1], len(class_meta))
    """
    idx = class_meta[class_name]
    image_mask = np.zeros((np.shape(mask_data)[0], np.shape(mask_data)[1], len(class_meta)))
    image_mask[:,:,idx] = mask_data
    return image_mask

In [56]:
#test convert_mask

class_meta = {"cup":0, "bottle":1, "glasses":2, "bla":3}
mask_Result = convert_mask(msk, "cup", class_meta)
print(np.shape(mask_Result))

(120, 120, 4)
