In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import zipfile
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm
base_path = '../input/mobilewsis/mobilewsis/'
all_cases = os.listdir(base_path)

In [None]:
def get_mask(i2, thresh=10):
    gray = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(gray, thresh, 255, cv2.THRESH_BINARY_INV)
    output_img = np.zeros(shape=i2.shape, dtype='uint8') + 255
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for contourIdx, cnt in enumerate(contours):
        # compute a convex hull
        hull = cv2.convexHull(cnt)

        # fill the inside with black
        cv2.fillPoly(output_img, pts=[hull], color=(0, 0, 0))
    gray = output_img
    return gray

In [None]:
def tile(img, mask):
    sz = 512
    result = []
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz, (sz - shape[1]%sz)%sz
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],
                constant_values=255)
    mask = np.pad(mask,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],
                constant_values=0)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    mask = mask.reshape(mask.shape[0]//sz,sz,mask.shape[1]//sz,sz,3)
    mask = mask.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
#     if len(img) < N:
#         mask = np.pad(mask,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=0)
#         img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
#     idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))
#     img = img[idxs]
#     mask = mask[idxs]
    for i in range(len(img)):
        result.append({'img':img[i], 'mask':mask[i], 'idx':i})
    return result

In [None]:
training_cases = ['Case 6', 'Case 1', 'Case 10', 'Case 9', 'Case 8', 'Case 4', 'Case 2', 'Case 7', 'Case 3', 'Case 2-B', 'Case 5']
valid_cases = [ 'Case 13', 'Case 12', 'Case 11']

In [None]:
x_tot,x2_tot = [],[]
OUT_TRAIN = 'train_patches.zip'
OUT_MASKS = 'train_masks.zip'
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out,\
 zipfile.ZipFile(OUT_MASKS, 'w') as mask_out:
    
    for selected_case in tqdm(training_cases):
        sub_images = os.listdir(base_path + selected_case)

        for image_paths in sub_images:
            image_path = base_path + selected_case + "/" + image_paths
            name = image_paths.split('.')[0]
            if name + " copy.JPG" in os.listdir(base_path + selected_case):
                image = cv2.imread(image_path)
                mask = get_mask(image, 10) / 255
                mask = mask.astype('uint8')
            else:
                image = cv2.imread(image_path)
                mask = np.zeros(shape=image.shape)

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            tiles = tile(image, mask)

            for t in tiles:
                img,mask,idx = t['img'],t['mask'],t['idx']
                x_tot.append((img/255.0).reshape(-1,3).mean(0))
                x2_tot.append(((img/255.0)**2).reshape(-1,3).mean(0)) 
                #if read with PIL RGB turns into BGR
                img = cv2.imencode('.png',cv2.cvtColor(img, cv2.COLOR_RGB2BGR))[1]
                img_out.writestr(f'{name}_{idx}.png', img)
                mask = cv2.imencode('.png',mask[:,:,0])[1]
                mask_out.writestr(f'{name}_{idx}.png', mask)
