In [2]:
import pylidc as pl
import numpy as np
from tqdm import tqdm

In [3]:
# start, stop, id
part = (300,500,1)
X = []
Y = []
for scan in tqdm(pl.query(pl.Scan).filter(pl.Scan.id >= part[0], pl.Scan.id < part[1]).all()):
    vol = scan.to_volume(verbose=False)
    slices = [x.bbox()[2] for x in scan.annotations]
    indexes = [list(range(slice.start, slice.stop)) for slice in slices]
    #flatten
    indexes = np.unique([index for l in indexes for index in l])
    mask = np.zeros(vol.shape)
    for ann in scan.annotations:
        mask[ann.bbox()] += ann.boolean_mask()
    mask = mask > 0
    for z in indexes:
        cmsk = np.zeros((mask.shape[0], mask.shape[1], 2), dtype='bool')
        cmsk[:,:, 0] = mask[:,:,z]
        cmsk[:,:, 1] = np.logical_not(mask[:,:,z])
        X.append(vol[:,:,z])
        Y.append(cmsk)
partdb = {'X': X, 'Y': Y}
np.savez_compressed('lidc-part' + str(part[2]), db=partdb)

100%|██████████| 200/200 [18:41<00:00,  5.61s/it]


## Joining parts

In [5]:
db = np.load('lidc-part' + str(3) + '.npz', allow_pickle=True)['db'][()]

In [3]:
X = []
Y = []
for i in tqdm(range(4)):
    db = np.load('lidc-part' + str(i) + '.npz', allow_pickle=True)['db'][()]
    X = X + db['X']
    Y = Y + db['Y']

100%|██████████| 4/4 [01:34<00:00, 23.57s/it]


In [7]:
# shuffle
idx = np.random.permutation(len(X))
X = [X[i] for i in idx]
Y = [Y[i] for i in idx]

In [8]:
bound = int(len(X)*0.8)
# split and save to pklz
print('Saving validation set ...')
valdb = {'X': X[bound:], 'Y': Y[bound:]}
np.savez_compressed('lidc-val', db=valdb)
print('Saving training set ...')
traindb = {'X': X[:bound], 'Y': Y[:bound]}
np.savez_compressed('lidc-train', db=traindb)

Saving validation set ...
Saving training set ...


## Calculate weights

In [9]:
all_masks = np.array(traindb.get('Y'))

In [10]:
all_masks.shape

(11708, 512, 512, 2)

In [11]:
freq = np.mean(all_masks, axis=(0,1,2))
freq

array([6.59386779e-04, 9.99340613e-01])

In [12]:
weights = np.median(freq)/freq
weights

array([7.58280294e+02, 5.00329911e-01])