In [2]:
import os
from shutil import copy2
import numpy as np
import utils
from scipy.misc import imsave
from scipy.ndimage import imread

In [3]:
# copy train_image to destination
path = "../data/stage1_train/"
for x in os.listdir(path):
    if x != ".DS_Store":
        copy2(path + x + "/images/" + x + ".png", "../data/stage1_train_images/")

In [10]:
!ls ../data/stage1_train_images/ | wc 

     670     670   46230


In [11]:
# copy test
path = "../data/stage1_test/"
for x in os.listdir(path):
    if x != ".DS_Store":
        copy2(path + x + "/images/" + x + ".png", "../data/stage1_test_images/")

In [9]:
"""extend the given ndarry in shape H*W*C to 9 times in shape 3H*3W*C 
    The 2D transformation view is as:
    from: |---|---|
          | A | B |
          |---|---|
          | C | D |
          |---|---|
    to:   
          |---|---|---|---|---|---|
          | d | c | c | d | d | c |
          |---|---|---|---|---|---|
          | b | a | a | b | b | a |
          |---|---|---|---|---|---|
          | b | a | A | B | b | a |
          |---|---|---|---|---|---|
          | d | c | C | D | d | c |
          |---|---|---|---|---|---|
          | d | c | c | d | d | c |
          |---|---|---|---|---|---|
          | b | a | a | b | b | a |
          |---|---|---|---|---|---|
    """
origin = np.array([[['a1', 'a2', 'a3'],['b1', 'b2','a3']],[['c1', 'c2','a3'],['d1', 'd2','a3']]])

print("----------------------origin--------------------------------")
print(origin)
print(origin.shape)
print(origin[:,:,0])
print(origin[:,:,1])

print("----------------------horizontal--------------------------------")
horizontal = np.fliplr(origin)
print(horizontal.shape)
print(horizontal[:,:,0])
print(horizontal[:,:,1])

print("----------------------vertical--------------------------------")
vertical = np.flipud(origin)
print(vertical.shape)
print(vertical[:,:,0])
print(vertical[:,:,1])

print("----------------------cornor--------------------------------")
cornor = np.flipud(horizontal)
print(cornor.shape)
print(cornor[:,:,0])
print(cornor[:,:,1])

print("----------------------row1--------------------------------")
row1 = np.concatenate((cornor,vertical,cornor), axis=1)
print(row1.shape)
print(row1[:,:,0])
print(row1[:,:,1])

print("----------------------row2--------------------------------")
row2 = np.concatenate((horizontal,origin,horizontal), axis=1)
print(row2.shape)
print(row2[:,:,0])
print(row2[:,:,1])

print("----------------------row3--------------------------------")
row3 = np.concatenate((cornor,vertical,cornor), axis=1)
print(row3.shape)
print(row3[:,:,0])
print(row3[:,:,1])

print("----------------------final--------------------------------")
final = np.concatenate((row1,row2,row3), axis=0)
print(final.shape)
print(final[:,:,0])
print(final[:,:,1])

def extend_sides(origin):
    MARGIN = 1
    horizontal = np.fliplr(origin)
    vertical = np.flipud(origin)
    cornor = np.flipud(horizontal)
    row1 = np.concatenate((cornor,vertical,cornor), axis=1)
    row2 = np.concatenate((horizontal,origin,horizontal), axis=1)
    row3 = np.concatenate((cornor,vertical,cornor), axis=1)
    full_size = np.concatenate((row1,row2,row3), axis=0)
    H, W, _ = origin.shape
    H_S = H - MARGIN
    W_S = W - MARGIN
    H_E = 2*H + MARGIN
    W_E = 2*W + MARGIN
    final = full_size[H_S:H_E, W_S:W_E, :]
    return final

a  = extend_sides(origin)
print(a.shape)
print(a[:,:,0])
print(a[:,:,1])

In [4]:
# seek max resolution
max_h=0
max_w=0
min_h=99999
min_w=99999
for image_id in utils.TRAIN_IMAGE_IDS:
  h,w,_ = utils.image2ndarry(image_id, training=True).shape
  if h > max_h : max_h = h
  if w > max_w : max_w = w
  if h < min_h : min_h = h
  if w < min_w : min_w = w
print(max_h, max_w)
print(min_h, min_w)

1040 1388
256 256


In [25]:
# slice images into 256x256
image_dst = utils.DATA_ROOT + "stage1_train_256x256/images/"
mask_dst = utils.DATA_ROOT + "stage1_train_256x256/masks/"
os.system("rm {}*".format(image_dst))
os.system("rm {}*".format(mask_dst))
image_ids = [x.split('.')[0] for x in os.listdir(utils.DATA_ROOT + "stage1_train_images/") if x.endswith(".png")]
for image_id in image_ids:
    img_np = imread(utils.DATA_ROOT + "stage1_train_images/" + image_id + ".png")
    H,W,C = img_np.shape
    mask_np = utils.mask2ndarry(image_id, H, W)
    if H < 256 or W < 256:
        print("image {} size is {}x{}x{}, this program can't handle".format(image_id, H, W, C) )
    elif H == 256 and W == 256:
        # save, not slice
        imsave(image_dst + image_id + ".png", img_np)
        imsave(mask_dst + image_id + ".png", mask_np)
    else:
        for h in range(0, H, 256):
            if h+256 <= H:
                start_h = h
                end_h = h+256
            else:
                start_h = H - 256
                end_h = H
            for w in range(0, W, 256):
                if w+256 <= W:
                    start_w = w
                    end_w = w+256
                else:
                    start_w = W - 256
                    end_w = W
                slice_img = img_np[start_h:end_h, start_w:end_w, :]
                imsave(image_dst + image_id +"_"+str(start_h)+"_"+str(start_w) + ".png", slice_img)
                slice_mask = mask_np[start_h:end_h, start_w:end_w, :]
                imsave(mask_dst + image_id +"_"+str(start_h)+"_"+str(start_w) + ".png", slice_mask)

In [27]:
# verify
verify_id = "4a424e0cb845cf6fd4d9fe62875552c7b89a4e0276cf16ebf46babe4656a794e"
img_before = imread(utils.DATA_ROOT + "stage1_train_images/" + verify_id + ".png")
img_after = imread(image_dst + verify_id + ".png")
print(img_before.all() == img_after.all())
mask_before = utils.mask2ndarry(verify_id, img_before.shape[0], img_before.shape[1])
mask_after = imread(mask_dst + verify_id + ".png")
print(mask_before.all() == mask_after.all())

True
True


In [8]:
# classify train data by image size
from scipy.ndimage import imread
path = "../data/stage1_train_images/"
dst_root = "../data/stage1_train_size_classifier/"
for x in os.listdir(path):
    if not os.path.isdir(path + x):
        h,w,_ = imread(path + x).shape
        folder = str(h) + 'x' + str(w)
        if not os.path.exists(dst_root + folder):
            os.mkdir(dst_root + folder)
        copy2(path + x, dst_root + folder)

In [9]:
# classify test data by image size
from scipy.ndimage import imread
path = "../data/stage1_test_images/"
dst_root = "../data/stage1_test_size_classifier/"
for x in os.listdir(path):
    if not os.path.isdir(path + x):
        h,w,_ = imread(path + x).shape
        folder = str(h) + 'x' + str(w)
        if not os.path.exists(dst_root + folder):
            os.mkdir(dst_root + folder)
        copy2(path + x, dst_root + folder)