In [9]:
import glob
import numpy as np
from PIL import Image

In [11]:
# Paths are properly named, we do not need to sort them
images = glob.glob('COVID19-DL/dataset/im/256/*')
masks = glob.glob('COVID19-DL/dataset/mask/multi/256/*')

In [49]:
classes = [85, 170, 255, 0]

In [50]:
X = []
Y = []
for index in range(len(images)):
    img = Image.open(images[index])
    im = np.asarray(img, dtype='float32') / 255
    mask = Image.open(masks[index]).convert('L')
    cmsk = np.zeros((im.shape[0], im.shape[1], len(classes)), dtype='bool')
    for i in range(len(classes)):
        cmsk[:,:,i] = np.asarray(mask) == classes[i]
    if im.ndim != 3: # b&w -> RGB
        im = np.repeat(im[:,:,None], 3, axis=-1)
    X.append(im)
    Y.append(cmsk)

In [51]:
# shuffle
idx = np.random.permutation(len(X))
X = [X[i] for i in idx]
Y = [Y[i] for i in idx]

In [52]:
# split and save to pklz
print('Saving validation set ...')
valdb = {'X': X[int(len(X)*0.8):], 'Y': Y[int(len(X)*0.8):]}
np.savez_compressed('covid-val', db=valdb)
print('Saving training set ...')
traindb = {'X': X[:int(len(X)*0.8)], 'Y': Y[:int(len(X)*0.8)]}
np.savez_compressed('covid-train', db=traindb)

Saving validation set ...
Saving training set ...


## Calculate weights

In [53]:
all_masks = np.array(traindb.get('Y'))

In [54]:
all_masks.shape

(80, 256, 256, 4)

In [71]:
freq = np.mean(all_masks, axis=(0,1,2))
freq

array([0.04567184, 0.02231903, 0.00148087, 0.93052826])

In [72]:
weights = np.median(freq)/freq
weights

array([ 0.74434125,  1.52315923, 22.95646574,  0.03653348])