In [1]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import skimage.transform

In [3]:
def get_filelist(img_loc):
    filelist = glob.glob(img_loc+'*.jpg')
    filelist.sort()
    return filelist

In [4]:
img_loc = '../ISIC2018_Task3_Training_Input/'

filelist = get_filelist(img_loc)

In [8]:
# Load images as arrays

def get_X_orig(filelist, input_shape):
    
    bs, n_H, n_W, n_C = input_shape
    X_orig = np.empty([bs, n_H, n_W, n_C])

    for i, fname in enumerate(filelist[:bs]):
        img = plt.imread(fname)
        img = skimage.transform.resize(img, (n_H,n_W), mode='constant')
        X_orig[i] = img
        if i%100 == 99 or i == bs-1:
            print('{} files loaded'.format(i+1))
    
    return X_orig

In [9]:
bs = len(filelist)
input_shape = [bs, 224, 224, 3]

X_orig = get_X_orig(filelist, input_shape)

  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


100 files loaded
200 files loaded
300 files loaded
400 files loaded
500 files loaded
600 files loaded
700 files loaded
800 files loaded
900 files loaded
1000 files loaded
1100 files loaded
1200 files loaded
1300 files loaded
1400 files loaded
1500 files loaded
1600 files loaded
1700 files loaded
1800 files loaded
1900 files loaded
2000 files loaded
2100 files loaded
2200 files loaded
2300 files loaded
2400 files loaded
2500 files loaded
2600 files loaded
2700 files loaded
2800 files loaded
2900 files loaded
3000 files loaded
3100 files loaded
3200 files loaded
3300 files loaded
3400 files loaded
3500 files loaded
3600 files loaded
3700 files loaded
3800 files loaded
3900 files loaded
4000 files loaded
4100 files loaded
4200 files loaded
4300 files loaded
4400 files loaded
4500 files loaded
4600 files loaded
4700 files loaded
4800 files loaded
4900 files loaded
5000 files loaded
5100 files loaded
5200 files loaded
5300 files loaded
5400 files loaded
5500 files loaded
5600 files loaded
5

In [12]:
def get_Y_orig(labels_loc):
    Y_df = pd.read_csv(labels_loc)
    Y_orig = np.array(Y_df.iloc[:,1:])
    return Y_orig

In [21]:
labels_loc = '../ISIC2018_Task3_Training_GroundTruth/ISIC2018_Task3_Training_GroundTruth.csv'

Y_orig = get_Y_orig(labels_loc)

In [22]:
def get_random_crops(filelist, input_shape, Y_orig, num_crops, label):
    
    #rc = random crom
    
    label_dict = {'MEL':0, 'NV':1, 'BCC': 2, 'AKIEC': 3, 'BKL': 4, 'DF': 5, 'VASC': 6}
    label_num = label_dict[label]
    Y_lst = []
    bs, n_H, n_W, n_C = input_shape
    bs_rc = (np.sum((Y_orig[:,label_num]==1).astype(int)))*num_crops
    X_rc = np.empty([bs_rc, n_H, n_W, n_C])
    dfrcloc = 0
    for i, fname in enumerate(filelist[:bs]):
        if Y_orig[i,label_num]:
            img = plt.imread(fname)
            for j in range(num_crops):
                c = np.random.choice(range(-20,20),4)
                img_cropped = img[50+c[0]:400+c[1],50+c[2]:550+c[3],:]
                img_cropped = skimage.transform.resize(img, (n_H,n_W), mode='constant')
                X_rc[dfrcloc] = img_cropped
                Y_lst.append(Y_orig[i])
                dfrcloc +=1
    Y_rc = np.array(Y_lst)
    return X_rc, Y_rc

In [23]:
num_crops = 4

X_DF_rc, Y_DF_rc = get_random_crops(filelist, input_shape, Y_orig, num_crops, label='DF')

  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [24]:
print(X_orig.shape)
print(Y_orig.shape)
print(X_DF_rc.shape)
print(Y_DF_rc.shape)

(10015, 224, 224, 3)
(10015, 7)
(460, 224, 224, 3)
(460, 7)


In [25]:
X_VASC_rc, Y_VASC_rc = get_random_crops(filelist, input_shape, Y_orig, num_crops, label='VASC')

  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [26]:
print(X_VASC_rc.shape)
print(Y_VASC_rc.shape)

(568, 224, 224, 3)
(568, 7)


In [27]:
def flip_vert(X_orig, Y_orig):
    m, n_H0, n_W0, n_C0 = X_orig.shape
    X_flip = X_orig[:,n_H0-1::-1,:,:]
    Y_flip = Y_orig
    return X_flip, Y_flip

In [35]:
X_AKIEC = X_orig[Y_orig[:,3]==1,:]
Y_AKIEC = Y_orig[Y_orig[:,3]==1,:]

In [36]:
X_AKIEC_flip, Y_AKIEC_flip = flip_vert(X_AKIEC, Y_AKIEC)

In [37]:
print(Y_AKIEC.shape)

(327, 7)


In [38]:
def flip_hor(X_orig, Y_orig):
    m, n_H0, n_W0, n_C0 = X_orig.shape
    X_flip = X_orig[:,:,n_W0-1::-1,:]
    Y_flip = Y_orig
    return X_flip, Y_flip

In [39]:
X_orig_mirror, Y_orig_mirror = flip_hor(X_orig, Y_orig)
X_VASC_mirror, Y_VASC_mirror = flip_hor(X_VASC_rc, Y_VASC_rc)
X_DF_mirror, Y_DF_mirror = flip_hor(X_DF_rc, Y_DF_rc)
X_AKIEC_flip_mirror, Y_AKIEC_flip_mirror = flip_hor(X_AKIEC_flip, Y_AKIEC_flip)

In [40]:
X_final = np.vstack([X_orig, X_DF_rc, X_VASC_rc, X_AKIEC_flip, X_orig_mirror, X_VASC_mirror, X_DF_mirror, X_AKIEC_flip_mirror])
Y_final = np.vstack([Y_orig, Y_DF_rc, Y_VASC_rc, Y_AKIEC_flip, Y_orig_mirror, Y_VASC_mirror, Y_DF_mirror, Y_AKIEC_flip_mirror])

In [41]:
print(X_final.shape)
print(Y_final.shape)

(22740, 224, 224, 3)
(22740, 7)


In [42]:
X_final_file = 'X_final.npy'
np.save(X_final_file, X_final)

Y_final_file = 'Y_final.npy'
np.save(Y_final_file, Y_final)
#X_orig_inception_file = 'X_orig_inception.npy'
#np.save(X_orig_inception_file, X_orig_inception)

In [2]:
X_final_file = 'X_final.npy'
X = np.load(X_final_file)

In [15]:
colmins = np.min(np.min(X, axis = 1,keepdims =True), axis = 2, keepdims =True)

In [16]:
colmins.shape

(22740, 1, 1, 3)

In [18]:
X_colcorr = X-colmins

In [23]:
X_colcorr[3]

array([[[0.38878989, 0.36769395, 0.36061049],
        [0.36700211, 0.35849996, 0.34213154],
        [0.37427158, 0.35836491, 0.34569171],
        ...,
        [0.39551821, 0.39559543, 0.39552727],
        [0.39446998, 0.39948323, 0.39885548],
        [0.39468975, 0.38943484, 0.39620004]],

       [[0.36590699, 0.33059192, 0.3360391 ],
        [0.37720745, 0.35730042, 0.35294493],
        [0.38922944, 0.36806004, 0.37188563],
        ...,
        [0.38820466, 0.41144333, 0.41202606],
        [0.38166767, 0.40444303, 0.38929353],
        [0.38450912, 0.39435368, 0.38316014]],

       [[0.35849434, 0.33065851, 0.33803052],
        [0.37265437, 0.35213679, 0.35257541],
        [0.38865859, 0.37048163, 0.36414785],
        ...,
        [0.39536346, 0.40255477, 0.40310093],
        [0.39895396, 0.40018945, 0.3966446 ],
        [0.38436281, 0.38413866, 0.38675939]],

       ...,

       [[0.30574917, 0.35555316, 0.29819522],
        [0.31595232, 0.37002895, 0.31051639],
        [0.32269251, 0

In [25]:
colmins[3]

array([[[0.4350981 , 0.10952381, 0.17475303]]])