In [1]:
import os
import gc
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from zipfile import ZipFile
from joblib import Parallel, delayed
from tqdm.notebook import tqdm


def create_path(sizes):
    for s in sizes:
        os.mkdir(s)
        os.chdir(s)
        os.mkdir('Train')
        os.mkdir('Test')
        os.chdir('./../')
        
sizes = ['512x512']
create_path(sizes)


def scale(type_,data):
    assert type_ in ['norm','std']
    if type_ == 'std':
        return (data - np.mean(data))/np.std(data)
    elif type_ == 'norm':
        return (data - np.min(data))/(np.max(data) - np.min(data))
    
def color_constancy(img, power=6, gamma=None):
    img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    img_dtype = img.dtype

    if gamma is not None:
        img = img.astype('uint8')
        look_up_table = np.ones((256,1), dtype='uint8') * 0
        for i in range(256):
            look_up_table[i][0] = 255*pow(i/255, 1/gamma)
        img = cv2.LUT(img, look_up_table)

    img = img.astype('float32')
    img_power = np.power(img, power)
    rgb_vec = np.power(np.mean(img_power, (0,1)), 1/power)
    rgb_norm = np.sqrt(np.sum(np.power(rgb_vec, 2.0)))
    rgb_vec = rgb_vec/rgb_norm
    rgb_vec = 1/(rgb_vec*np.sqrt(3))
    img = np.multiply(img, rgb_vec)

    img = cv2.cvtColor(np.array(img), cv2.COLOR_BGR2RGB)
    return img.astype(img_dtype)



def circle_crop(img, sigmaX=10):   
    height, width, depth = img.shape    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    return img 


def hair_remove(src):
    grayScale = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
    kernel = cv2.getStructuringElement(1,(17,17))
    blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
    ret,thresh2 = cv2.threshold(blackhat,10,255,cv2.THRESH_BINARY)
    dst = cv2.inpaint(src,thresh2,1,cv2.INPAINT_TELEA)
    return dst

def proccess_and_save(in_path,file,size,subset):
    img = cv2.imread(os.path.join(in_path,file))
    file = file.replace('_downsampled',"")
    img = color_constancy(img)
    img = hair_remove(img)
    path = f'./../../working/{size}x{size}/{subset}/{file}'
    img = cv2.resize(img,(size,size),cv2.INTER_AREA)
    img = circle_crop(img)
    plt.imsave(path,img)
    
    
PATH_org = './../input/siim-isic-melanoma-classification'
PATH_ext = './../input/melanoma-merged-external-data-512x512-jpeg'

os.chdir(PATH_ext)
os.listdir()

size = [512]
file_list_train = os.listdir('512x512-dataset-melanoma/512x512-dataset-melanoma/')
file_list_test = os.listdir('512x512-test/512x512-test')
for s in size:
    img = Parallel(n_jobs=7)(delayed(proccess_and_save)('512x512-dataset-melanoma/512x512-dataset-melanoma/',i,s,'Train') for i in tqdm(file_list_train))
    img = Parallel(n_jobs=7)(delayed(proccess_and_save)('512x512-test/512x512-test/',i,s,'Test') for i in tqdm(file_list_test))
    

HBox(children=(FloatProgress(value=0.0, max=60487.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10982.0), HTML(value='')))




In [2]:
os.chdir('./../../working/')
file_path = []
for root, direc, files_ in os.walk(f'./{sizes[0]}'):
    for f in files_:
        file_path.append(os.path.join(root,f))
        
with ZipFile(f'Melanoma_JPEG_{sizes[0]}.zip','w') as z:
    for f in file_path:
        z.write(f)

In [3]:
!rm -rf 512x512