In [1]:
import pylidc as pl
import numpy as np
from tqdm import tqdm

### Full image

In [2]:
# start, stop, id
part = (500,700,3)
X = []
Y = []
for scan in tqdm(pl.query(pl.Scan).filter(pl.Scan.id >= part[0], pl.Scan.id < part[1]).all()):
    vol = scan.to_volume(verbose=False)
    slices = [x.bbox()[2] for x in scan.annotations]
    indexes = [list(range(slice.start, slice.stop)) for slice in slices]
    #flatten
    indexes = np.unique([index for l in indexes for index in l])
    mask = np.zeros(vol.shape)
    for ann in scan.annotations:
        mask[ann.bbox()] += ann.boolean_mask()
    mask = mask > 0
    for z in indexes:
        cmsk = np.zeros((mask.shape[0], mask.shape[1], 2), dtype='bool')
        cmsk[:,:, 0] = mask[:,:,z]
        cmsk[:,:, 1] = np.logical_not(mask[:,:,z])
        X.append(vol[:,:,z])
        Y.append(cmsk)
partdb = {'X': X, 'Y': Y}
np.savez_compressed('lidc-small-part' + str(part[2]), db=partdb)

100%|██████████| 200/200 [15:48<00:00,  4.74s/it]


### Part of image

In [2]:
# start, stop, id
part = (0,300,1)
X = []
Y = []
for scan in tqdm(pl.query(pl.Scan).filter(pl.Scan.id >= part[0], pl.Scan.id < part[1]).all()):
    vol = scan.to_volume(verbose=False)
    slices = [x.bbox()[2] for x in scan.annotations]
    indexes = [list(range(slice.start, slice.stop)) for slice in slices]
    #flatten
    indexes = np.unique([index for l in indexes for index in l])
    for ann in scan.annotations:
        mask = ann.boolean_mask() > 0
        for z in range(mask.shape[2]):
            cmsk = np.zeros((mask.shape[0], mask.shape[1], 2), dtype='bool')
            cmsk[:,:, 0] = mask[:,:,z]
            cmsk[:,:, 1] = np.logical_not(mask[:,:,z])
            X.append(vol[ann.bbox()][:,:,z])
            Y.append(cmsk)
partdb = {'X': X, 'Y': Y}
np.savez_compressed('lidc-small-part' + str(part[2]), db=partdb)

100%|██████████| 299/299 [17:43<00:00,  3.56s/it]


## Joining parts

In [9]:
X = []
Y = []
for i in tqdm(range(1,3)):
    db = np.load('lidc-small-part' + str(i) + '.npz', allow_pickle=True)['db'][()]
    X = X + db['X']
    Y = Y + db['Y']

100%|██████████| 2/2 [00:01<00:00,  1.19it/s]


In [11]:
for i in range(len(X)):
    new_shape_x = slice(0, (X[i].shape[0] // 3) * 3)
    new_shape_y = slice(0, (X[i].shape[1] // 3) * 3)
    X[i] = X[i][new_shape_x, new_shape_y]
    Y[i] = Y[i][new_shape_x, new_shape_y, :]

In [14]:
idx = np.random.permutation(len(X))
X = [X[i] for i in idx]
Y = [Y[i] for i in idx]
print('Saving training set ...')
traindb = {'X': X, 'Y': Y}
np.savez_compressed('lidc-small-train', db=traindb)

Saving training set ...


In [3]:
X = []
Y = []
for i in tqdm(range(3,4)):
    db = np.load('lidc-small-part' + str(i) + '.npz', allow_pickle=True)['db'][()]
    X = X + db['X']
    Y = Y + db['Y']
idx = np.random.permutation(len(X))
X = [X[i] for i in idx]
Y = [Y[i] for i in idx]
print('Saving validation set ...')
traindb = {'X': X, 'Y': Y}
np.savez_compressed('lidc-small-val', db=traindb)

100%|██████████| 1/1 [00:18<00:00, 18.31s/it]


Saving validation set ...


## Calculate weights

In [9]:
all_masks = np.array(traindb.get('Y'))

In [10]:
all_masks.shape

(11708, 512, 512, 2)

In [11]:
freq = np.mean(all_masks, axis=(0,1,2))
freq

array([6.59386779e-04, 9.99340613e-01])

In [12]:
weights = np.median(freq)/freq
weights

array([7.58280294e+02, 5.00329911e-01])