In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append('../')

import os

import numpy as np
import pandas as pd
from sklearn.externals import joblib
from tqdm import tqdm_notebook as tqdm
import ipywidgets as ipy
import matplotlib.pyplot as plt

from common_blocks.utils import plot_list, load_image, save_image, resize_image, get_cut_coordinates

METADATA_FILEPATH = '/mnt/ml-team/minerva/open-solutions/salt/files/metadata.csv'
IMG_DIR = '/mnt/ml-team/minerva/open-solutions/salt/files/auxiliary_data'
IMG_DIR_MASKS =os.path.join(IMG_DIR,'masks')
AUXILIARY_METADATA_FILEPATH = '/mnt/ml-team/minerva/open-solutions/salt/files/auxiliary_metadata.csv'

In [None]:
metadata = pd.read_csv(METADATA_FILEPATH)
metadata_train = metadata[metadata['is_train']==1]
metadata_train.head()

# Filter larger masks

In [None]:
sizes = []
for file_path in tqdm(metadata_train.file_path_mask):
    mask = load_image(file_path, is_mask=True)
    sizes.append(mask.sum())
    
metadata_train['size'] = sizes

metadata_large_masks = metadata_train[metadata_train['size'].between(300,8000)]
metadata_large_masks.head()

# Explore cut results

In [None]:
@ipy.interact(idx=ipy.IntSlider(min=0,max=4000,value=0,step=1))
def present_cut(idx):
    row = metadata_large_masks.iloc[idx]
    image = load_image(row.file_path_image, is_mask=False)
    mask = load_image(row.file_path_mask, is_mask=True)
    (x1,x2),(y1,y2), was_cropped = get_cut_coordinates(mask,step=4, min_size=50, max_size=300)
    if was_cropped:
        synthetic_mask = resize_image(mask[x1:x2,y1:y2], (101,101),is_mask=True)
        synthetic_image = resize_image(image[x1:x2,y1:y2], (101,101))
        plot_list(images=[image, synthetic_image], labels=[mask, synthetic_mask])

# Prepare synthetic data

In [None]:
file_path_images,file_path_masks,ids, zs = [],[],[],[]
for _, row in tqdm(metadata_large_masks.iterrows()):
    image = load_image(row.file_path_image, is_mask=False)
    mask = load_image(row.file_path_mask, is_mask=True)
    (x1,x2),(y1,y2), was_cropped = get_cut_coordinates(mask,step=4, min_size=50, max_size=300)
    if was_cropped:
        synthetic_mask = resize_image(mask[x1:x2,y1:y2], (101,101),is_mask=True).astype(np.uint8)
        synthetic_image = (resize_image(image[x1:x2,y1:y2], (101,101))*255.).astype(np.uint8)
        idx = row.id
        
        file_path_image=os.path.join(IMG_DIR,'images','{}.png'.format(idx))
        file_path_mask=os.path.join(IMG_DIR,'masks','{}.png'.format(idx))
        save_image(synthetic_image, file_path_image)
        save_image(synthetic_mask, file_path_mask)
        test=load_image(file_path_mask)
    
        file_path_images.append(file_path_image)
        file_path_masks.append(file_path_mask)
        ids.append(idx)
        zs.append(row.z)

In [None]:
metadata_small_masks = pd.DataFrame({'file_path_image':file_path_images,
                                     'file_path_mask':file_path_masks,
                                     'id':ids,
                                     'z':zs
                                    })
metadata_small_masks['is_train']=1

In [None]:
display(metadata_small_masks.shape)
metadata_small_masks.head()

In [None]:
metadata_small_masks.to_csv(AUXILIARY_METADATA_FILEPATH)