## Speed-up the code

In [1]:
# About parmap
# y = [myfunction(x, argument1, mykeyword=argument2) for x in mylist]
# In parallel:
# y = parmap.map(myfunction, mylist, argument1, mykeyword=argument2)

In [3]:
import numpy as np
from skimage.segmentation import slic
from skimage.transform import resize
import os
import warnings
from matplotlib import image as mpimg
import sys
from numpy import unique
from numpy import random
import time
import parmap

In [126]:
def load_image(infilename):
    """ Reads images """
    data = mpimg.imread(infilename)
    return data


def load_batch(path, pimg, pgt, nfiles, batch_size=1000):
    # sample randomly
    randomise = np.random.choice(nfiles, size=batch_size, replace=False)
    # generate file lists
    print('Reading file names ..')
    filelist = []
    filelist = [os.listdir(path + pimg)[i] for i in randomise]
    gtlist = ['gt_' + filelist[i] for i in range(len(filelist))]
    print('read')
    # initialise datasets
    imgs = []
    gts = []
    # read files
    print('Reading ', batch_size, ' files...')
    i = 0
    while i < batch_size:
        name = path + pimg + filelist[i]
        gtname = path + pgt + gtlist[i]
        if name.endswith(".jpg"):
            i += 1
            imgs.append(load_image(name))
            gts.append(load_image(gtname))

    imgs = np.asarray(imgs)
    gts = np.asarray(gts)
    print('Read ', i, ' files.')
    print('Check: img size', imgs.shape, '\tgt size', gts.shape)
    return imgs, gts

def old_box(seg, i):
    xind = np.nonzero(seg.ravel('C') == i)
    [xmax, _] = np.unravel_index(np.max(xind), seg.shape, order = 'C')
    [xmin, _] = np.unravel_index(np.min(xind), seg.shape, order = 'C')
    yind = np.nonzero(seg.ravel('F') == i)
    [_, ymax] = np.unravel_index(np.max(yind), seg.shape, order = 'F')
    [_, ymin] = np.unravel_index(np.min(yind), seg.shape, order = 'F')
    return np.array([xmax, ymax, xmin, ymin])

def box(seg):
    list_box = []
    for i in range(np.max(seg)):
        xind = np.nonzero(seg.ravel('C') == i)
        [xmax, _] = np.unravel_index(np.max(xind), seg.shape, order = 'C')
        [xmin, _] = np.unravel_index(np.min(xind), seg.shape, order = 'C')
        yind = np.nonzero(seg.ravel('F') == i)
        [_, ymax] = np.unravel_index(np.max(yind), seg.shape, order = 'F')
        [_, ymin] = np.unravel_index(np.min(yind), seg.shape, order = 'F')
        list_box.append(np.array([xmax, ymax, xmin, ymin]))
    return list_box

def old_patch_cat(gt, SLIC, i, thres1, thres2):
    num = np.sum(gt[SLIC == i] > 125)
    denom = gt[SLIC == i].size
    size_true = np.sum(gt > 125)
    if float(num)/float(denom)>thres1:
        return 1
    else:
        if float(size_true) > 0 and float(num)/float(size_true) > thres2:
            return 1
        else: 
            return 0

def patch_cat(gt_SLIC, thres1, thres2):
    gt = gt_SLIC[0]
    SLIC = gt_SLIC[1]
    label_list = []
    for i in range(np.max(SLIC)):
        num = np.sum(gt[SLIC == i] > 125)
        denom = gt[SLIC == i].size
        size_true = np.sum(gt > 125)
        if float(num)/float(denom)>thres1:
            label_list.append(1)
        else:
            if float(size_true) > 0 and float(num)/float(size_true) > thres2:
                label_list.append(1)
            else: 
                label_list.append(0)
    return label_list

def old_xpatchify(img, SLIC, boxed, i):
    [inda, indb] = np.nonzero(SLIC!=i)
    imtemp = np.copy(img)
    imtemp[inda,indb,:] = 0
    x_temp = imtemp[int(boxed[2]):int(boxed[0]),
                 int(boxed[3]):int(boxed[1])]
    x_train = resize(x_temp, (80,80))
    return(x_train)

def xpatchify(img_SLIC_boxed):
    img = img_SLIC_boxed[0]
    SLIC = img_SLIC_boxed[1]
    boxed = img_SLIC_boxed[2]
    list_patches = []
    for i in range(np.max(SLIC)):
        [inda, indb] = np.nonzero(SLIC!=i)
        imtemp = np.copy(img)
        imtemp[inda,indb,:] = 0
        x_temp = imtemp[int(boxed[i][2]):int(boxed[i][0]),
                     int(boxed[i][3]):int(boxed[i][1])]
        x_train = resize(x_temp, (80,80))
        list_patches.append(x_train)
    return(list_patches)

def old_get_labeled_patches(imgs, gts, n_segments = 100, thres1 = 0.2, thres2 = 0.2):
    """
    Get all the patches from the set of images.
    :param imgs: images
    :param gts: masks
    :param n_segments: max number of patches for image
    :param thres1: label = 1 if a proportion bigger than thres1 in the patch is masked as 1
    :param thres2: label = 1 if pixels masked as 1 in patch / total number of pixels masked as 1 in the picture > thres2
    :return: patches: list of patches, size [len(img), n_patches_per_image, 80,80]
    :return: labels: list of labels per each patch, size [len(img), n_patches_per_image]
    """
    n = len(imgs)
    SLIC_list = np.asarray([slic(imgs[i, :], n_segments, compactness=20, sigma=10) for i in range(len(imgs))])

    # initialise boxes
    # run box function to find all superpixel patches sizes
    boxes = np.empty((n, 0)).tolist()                                          
    for i in range(n):
        [boxes[i].append(old_box(SLIC_list[i, :], j)) for j in range(np.max(SLIC_list[i, :]))]                 

    patches = np.empty((n, 0)).tolist()
    # populating x_train
    for i in range(n):
        for j in range(np.max(SLIC_list[i, :])):
            patches[i].append(old_xpatchify(imgs[i, :], SLIC_list[i, :], boxes[i][j], j))

    #labels
    labels = np.empty((n, 0)).tolist()
    for j in range(n):
        [labels[j].append(old_patch_cat(gts[j, :], SLIC_list[j, :], i, thres1, thres2)) for i in range(np.max(SLIC_list[j, :]))]

    return patches, labels

def get_labeled_patches(imgs, gts, n_segments = 100, thres1 = 0.2, thres2 = 0.2):
    """
    Get all the patches from the set of images.
    :param imgs: images
    :param gts: masks
    :param n_segments: max number of patches for image
    :param thres1: label = 1 if a proportion bigger than thres1 in the patch is masked as 1
    :param thres2: label = 1 if pixels masked as 1 in patch / total number of pixels masked as 1 in the picture > thres2
    :return: patches: list of patches, size [len(img), n_patches_per_image, 80,80]
    :return: labels: list of labels per each patch, size [len(img), n_patches_per_image]
    """
    n = len(imgs)
    SLIC_list = np.asarray([slic(imgs[i, :], n_segments, compactness=20, sigma=10) for i in range(len(imgs))])

    # run box function to find all superpixel patches sizes
    boxes = parmap.map(box, SLIC_list)
    
    # populating x_train
    patches = parmap.map(xpatchify, zip(imgs,  SLIC_list, boxes))

    #labels
    labels = parmap.map(patch_cat, zip(gts,  SLIC_list), thres1, thres2)
    
    return patches, labels




def balanced_sample_maker(X, y, random_seed=None):
    """ return a balanced data set by oversampling minority class and downsampling majority class
        current version is developed on assumption that the positive
        class is the minority.

    Parameters:
    ===========
    X: {numpy.ndarrray}
    y: {numpy.ndarray}
    """
    uniq_levels = unique(y)
    if len(uniq_levels) < 2:
        print("Not enough data, there are no images with a boat!")
        exit(0)
    uniq_counts = {level: sum(y == level) for level in uniq_levels}

    if not random_seed is None:
        random.seed(random_seed)

    # find observation index of each class levels
    groupby_levels = {}
    for ii, level in enumerate(uniq_levels):
        obs_idx = [idx for idx, val in enumerate(y) if val == level]
        groupby_levels[level] = obs_idx

    # downsampling on observations of negative label
    sample_size = uniq_counts[0]  # number of negative samples
    down_sample_idx = random.choice(groupby_levels[0], size=int(sample_size / 10), replace=True).tolist()

    # oversampling on observations of positive label
    over_sample_idx = random.choice(groupby_levels[1], size=int(sample_size / 10), replace=True).tolist()
    balanced_copy_idx = down_sample_idx + over_sample_idx
    random.shuffle(balanced_copy_idx)

    return X[balanced_copy_idx, :], y[balanced_copy_idx]

In [128]:
path = '../data/'
pimg = 'train_sample/'
pgt = 'train_maps/'
nfiles = len(os.listdir(path + pimg))
startTime = time.time()
imgs, gts = load_batch(path, pimg, pgt, nfiles, 10)
print("Load_batch takes {}".format(time.time() - startTime))
startTime = time.time()
list_patches_old, list_labels_old = old_get_labeled_patches(imgs, gts)
print("Old get_labeled_patches takes {}".format(time.time()- startTime))
startTime = time.time()
list_patches, list_labels = get_labeled_patches(imgs, gts)
print("New get_labeled_patches takes {}".format(time.time()- startTime))

Reading file names ..
read
Reading  10  files...
Read  10  files.
Check: img size (10, 768, 768, 3) 	gt size (10, 768, 768)
Load_batch takes 0.30666327476501465


  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Old get_labeled_patches takes 28.56762981414795
New get_labeled_patches takes 17.667948007583618


In [134]:
# DEBUG
for i in range(len(list_patches)):
    for j in range(len(list_patches[i])):
        for h in range(len(list_patches[i][j])):
            for z in range(len(list_patches[i][j][h])):
                for x in range(len(list_patches[i][j][h][z])):
                    if not list_patches[i][j][h][z][x] == list_patches_old[i][j][h][z][x]:
                        print("bad")

## boxes

Time divided in half !!

In [70]:
def old_box(seg, i):
    xind = np.nonzero(seg.ravel('C') == i)
    [xmax, _] = np.unravel_index(np.max(xind), seg.shape, order = 'C')
    [xmin, _] = np.unravel_index(np.min(xind), seg.shape, order = 'C')
    yind = np.nonzero(seg.ravel('F') == i)
    [_, ymax] = np.unravel_index(np.max(yind), seg.shape, order = 'F')
    [_, ymin] = np.unravel_index(np.min(yind), seg.shape, order = 'F')
    return np.array([xmax, ymax, xmin, ymin])

def box(seg):
    list_box = []
    for i in range(np.max(seg)):
        xind = np.nonzero(seg.ravel('C') == i)
        [xmax, _] = np.unravel_index(np.max(xind), seg.shape, order = 'C')
        [xmin, _] = np.unravel_index(np.min(xind), seg.shape, order = 'C')
        yind = np.nonzero(seg.ravel('F') == i)
        [_, ymax] = np.unravel_index(np.max(yind), seg.shape, order = 'F')
        [_, ymin] = np.unravel_index(np.min(yind), seg.shape, order = 'F')
        list_box.append(np.array([xmax, ymax, xmin, ymin]))
    return list_box

n = len(imgs)
SLIC_list = np.asarray([slic(imgs[i, :], n_segments = 100, compactness=20, sigma=10) for i in range(len(imgs))])

# initialise boxes
# run box function to find all superpixel patches sizes
    
startTime = time.time()
boxes1 = np.empty((n, 0)).tolist()                                          
for i in range(n):
    [boxes1[i].append(old_box(SLIC_list[i, :], j)) for j in range(np.max(SLIC_list[i, :]))]
print("Boxes old method takes {}".format(time.time() - startTime))

startTime = time.time()
boxes = parmap.map(box, SLIC_list)
print("Boxes parmap takes {}".format(time.time() - startTime))

startTime = time.time()
boxes0 = np.empty((n, 0)).tolist()
for i in range(n):
    [boxes0[i].append(box(SLIC_list[i, :]))]
flat_boxes = []
[flat_boxes.append(box) for i in range(n) for box in boxes0[i]]
print(len(flat_boxes))
print("Boxes takes {}".format(time.time() - startTime))

Boxes old method takes 2.691939115524292
Boxes parmap takes 1.3860588073730469
10
Boxes takes 2.731315851211548


Boxes old method takes 2.731771230697632


Boxes parmap takes 1.4589712619781494


Boxes takes 2.75345516204834

In [66]:
# DEBUG
for i in range(len(boxes0)):
    for j in range(len(boxes0[i])):
        for h in range(len(boxes0[i][j])):
            if not boxes1[i][j][h] == boxes0[i][j][h]:
                print("bad")

## Patches

Time divided in half !!

In [94]:
def old_xpatchify(img, SLIC, boxed, i):
    [inda, indb] = np.nonzero(SLIC!=i)
    imtemp = np.copy(img)
    imtemp[inda,indb,:] = 0
    x_temp = imtemp[int(boxed[2]):int(boxed[0]),
                 int(boxed[3]):int(boxed[1])]
    x_train = resize(x_temp, (80,80))
    return(x_train)

def xpatchify(img_SLIC_boxed):
    img = img_SLIC_boxed[0]
    SLIC = img_SLIC_boxed[1]
    boxed = img_SLIC_boxed[2]
    list_patches = []
    for i in range(np.max(SLIC)):
        [inda, indb] = np.nonzero(SLIC!=i)
        imtemp = np.copy(img)
        imtemp[inda,indb,:] = 0
        x_temp = imtemp[int(boxed[i][2]):int(boxed[i][0]),
                     int(boxed[i][3]):int(boxed[i][1])]
        x_train = resize(x_temp, (80,80))
        list_patches.append(x_train)
    return(list_patches)

In [96]:
startTime = time.time()
patches_old = np.empty((n, 0)).tolist()
    # populating x_train
for i in range(n):
    for j in range(np.max(SLIC_list[i, :])):
        patches_old[i].append(old_xpatchify(imgs[i, :], SLIC_list[i, :], boxes[i][j], j))
print("Patches old method takes {}".format(time.time() - startTime))

startTime = time.time()
patches = parmap.map(xpatchify, zip(imgs,  SLIC_list, boxes))
print("Patches new method takes {}".format(time.time() - startTime))

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


Patches old method takes 14.836807012557983
Patches new method takes 8.277411699295044


In [115]:
# DEBUG
for i in range(len(patches)):
    for j in range(len(patches[i])):
        for h in range(len(patches[i][j])):
            for z in range(len(patches[i][j][h])):
                for x in range(len(patches[i][j][h][z])):
                    if not patches[i][j][h][z][x] == patches_old[i][j][h][z][x] :
                        print("bad")

## Labels

In [122]:
def old_patch_cat(gt, SLIC, i, thres1, thres2):
    num = np.sum(gt[SLIC == i] > 125)
    denom = gt[SLIC == i].size
    size_true = np.sum(gt > 125)
    if float(num)/float(denom)>thres1:
        return 1
    else:
        if float(size_true) > 0 and float(num)/float(size_true) > thres2:
            return 1
        else: 
            return 0

def patch_cat(gt_SLIC, thres1, thres2):
    gt = gt_SLIC[0]
    SLIC = gt_SLIC[1]
    label_list = []
    for i in range(np.max(SLIC)):
        num = np.sum(gt[SLIC == i] > 125)
        denom = gt[SLIC == i].size
        size_true = np.sum(gt > 125)
        if float(num)/float(denom)>thres1:
            label_list.append(1)
        else:
            if float(size_true) > 0 and float(num)/float(size_true) > thres2:
                label_list.append(1)
            else: 
                label_list.append(0)
    return label_list

In [123]:
thres1 = 0.1 
thres2 = 0.1

startTime = time.time()
labels_old = np.empty((n, 0)).tolist()
for j in range(n):
    [labels_old[j].append(old_patch_cat(gts[j, :], SLIC_list[j, :], i, thres1, thres2)) for i in range(np.max(SLIC_list[j, :]))]
print("Labels old method takes {}".format(time.time() - startTime))

startTime = time.time()
labels = parmap.map(patch_cat, zip(gts,  SLIC_list), thres1, thres2)
print("Labels new method takes {}".format(time.time() - startTime))

Labels old method takes 1.3308868408203125
Labels new method takes 0.8495790958404541


In [125]:
# DEBUG
for i in range(len(labels)):
    for j in range(len(labels[i])):
        if not labels[i][j] == labels_old[i][j]:
            print("bad")