In [None]:
import os
from vsiprocesssor.vsi_file import VSIFile
from vsiprocesssor.vsi_entropy import vsi_has_sufficient_information
from model.macenko import MacenkoNormalizer
from setup.settings_module import Settings
from numpy.random import randint, seed
import cv2

In [None]:
settings = Settings('settings.cfg')

data_source = settings.data_source
data_dir = settings.data_root
train_he_dir = os.path.join(data_dir, settings.data_train_he)
train_p63_dir = os.path.join(data_dir, settings.data_train_p63)
test_he_dir = os.path.join(data_dir, settings.data_test_he)
test_p63_dir = os.path.join(data_dir, settings.data_test_p63)
# Create directories if they don't exist
os.makedirs(data_dir, exist_ok=True)
os.makedirs(train_he_dir, exist_ok=True)
os.makedirs(train_p63_dir, exist_ok=True)
os.makedirs(test_he_dir, exist_ok=True)
os.makedirs(test_p63_dir, exist_ok=True)

In [None]:
vsi_images = list(map(lambda x: os.path.join(data_source, x),
                      filter(lambda x: not os.path.isdir(x) and x.endswith('.vsi'),
                             os.listdir(data_source))
                      )
                  )
print('Found {} vsi files'.format(len(vsi_images)))
print(vsi_images[0:6])
print()

# Get Test file
test_he = os.path.join(data_source, settings.test_he)
test_p63 = os.path.join(data_source, settings.test_p63)
print(test_p63)
print(test_he)
print()
# Remove test files from dataset
to_remove = []
for index in range(len(vsi_images)):
    file = vsi_images[index]
    if test_he in file or test_p63 in file:
        to_remove.append(file)
# Remove test files from dataset
for file in to_remove:
    vsi_images.remove(file)

# Split into HE and p63 WSIs
vsi_he_images = list(filter(lambda x: 'HE' in x, vsi_images))
vsi_p63_images = list(filter(lambda x: 'p63' in x, vsi_images))
print('Found {} HE vsi files'.format(len(vsi_he_images)))
print('Found {} p63 vsi files'.format(len(vsi_p63_images)))

In [None]:
macenko_target = os.path.join(data_dir, settings.macenko_target)
macenko_target = cv2.imread(macenko_target)
macenko_target = cv2.cvtColor(macenko_target, cv2.COLOR_BGR2RGB)
normalizer = MacenkoNormalizer()
normalizer.fit(macenko_target)

seed(42)

if len(vsi_he_images) < len(vsi_p63_images):
    select_by = len(vsi_he_images)
    train_by = len(vsi_p63_images)
else:
    select_by = len(vsi_p63_images)
    train_by = len(vsi_he_images)

valid_wsis = randint(0, select_by, int(select_by * 0.2))
train_wsis = list(set(range(train_by)) - set(valid_wsis))


for index in valid_wsis[0:6]:
    with VSIFile(vsi_he_images[train_wsis[index]]) as vsi:
        counter = 0
        for (patch, x, y) in vsi:
            if vsi_has_sufficient_information(patch):
                normalized_patch = normalizer.transform(patch)
                cv2.cvtColor(normalized_patch, cv2.COLOR_RGB2BGR)
                cv2.imwrite(os.path.join(train_he_dir, f'{vsi.vsi_name}_{x}_{y}.png'), normalized_patch)
                counter += 1
            if counter == 1:
                break
