# Inicjalizacja

## Inicjalizacja zmiennych

In [1]:
malignant_data = 'drive/My Drive/Data/Malignant/'
benign_data = 'drive/My Drive/Data/Benign/'

tfrecord_path_train_mal = 'drive/My Drive/Data/im_mal_dataset_train.tfrecords'
tfrecord_path_test_mal = 'drive/My Drive/Data/im_mal_dataset_test.tfrecords'
tfrecord_path_train_ben = 'drive/My Drive/Data/im_ben_dataset_train.tfrecords'
tfrecord_path_test_ben = 'drive/My Drive/Data/im_ben_dataset_test.tfrecords'

data_split = (0.7, 0.3, 0)
char_irrel = 7
num_patches_per_image = 10
batch_size = 10
patch_size = 256

generate_malignant = True
generate_benign = True

## Inicjalizacja bibliotek

In [2]:
!pip uninstall tensorflow
!pip uninstall tensorflow-io
!pip install tensorflow-gpu
!pip install --no-deps tensorflow-io
!pip install tensorflow_addons
!pip install pyyaml h5py

Found existing installation: tensorflow-io 0.26.0
Uninstalling tensorflow-io-0.26.0:
  Would remove:
    /usr/local/lib/python3.7/dist-packages/tensorflow_io-0.26.0.dist-info/*
    /usr/local/lib/python3.7/dist-packages/tensorflow_io/*
Proceed (y/n)? n
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
from google.colab import drive
import glob
import tensorflow as tf
import tensorflow_io as tfio
import tensorflow_addons as tfa
import os
import random
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Funkcje

In [5]:
############ Augmentations ######################################
##################################
def random_rotation_im_im(image, target):
    """Rotate augmentation
    Args:
        image: Input image
        target: Target image
    Returns:
        Augmented image and augemented target image
    """
    num_input_channels = tf.shape(image)[2]
    num_target_channels = tf.shape(target)[2]
    im_tar = tf.concat([image, target], axis=2)
    angle = random.randint(1,359)
    im_tar = tfa.image.rotate(im_tar, 
                             angle, 
                             interpolation='NEAREST', 
                             name=None)
    image, target = tf.split(im_tar, num_or_size_splits=[num_input_channels, num_target_channels], axis=2)
    return image, target

##################################
def flip_up_down_im_im(image, target):
    """Flip augmentation
    Args:
        image: Input image
        target: Target image
    Returns:
        Augmented image and augemented target image
    """
    num_input_channels = tf.shape(image)[2]
    num_target_channels = tf.shape(target)[2]
    im_tar = tf.concat([image, target], axis=2)
    im_tar = tf.image.flip_up_down(im_tar)
    image, target = tf.split(im_tar, num_or_size_splits=[num_input_channels, num_target_channels], axis=2)
    return image, target

##################################
def flip_left_right_im_im(image, target):
    """Flip augmentation
    Args:
        image: Input image
        target: Target image
    Returns:
        Augmented image and augemented target image
    """
    num_input_channels = tf.shape(image)[2]
    num_target_channels = tf.shape(target)[2]
    im_tar = tf.concat([image, target], axis=2)
    im_tar = tf.image.flip_left_right(im_tar)
    image, target = tf.split(im_tar, num_or_size_splits=[num_input_channels, num_target_channels], axis=2)
    return image, target


#############################
def rotate_im_im(image, target):
    """Rotation augmentation
    Randomly chosen rotation angle:  0, 90, 180, 270 degrees
    Args:
        image: Input image
        target: Target image
    Returns:
        Augmented image and augemented target image
    """
    num_input_channels = tf.shape(image)[2]
    num_target_channels = tf.shape(target)[2]
    im_tar = tf.concat([image, target], axis=2) 
    im_tar = tf.image.rot90(im_tar, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
    image, target = tf.split(im_tar, num_or_size_splits=[num_input_channels, num_target_channels], axis=2)
    return image, target


#################################
def random_rotation_im_lab(image, target):
    """Rotate augmentation
    Args:
        image: Image
        target: Class label
    Returns:
        Augmented image and label
    """
    angle = random.randint(1,359)
    image = tfa.image.rotate(image, 
                          angle, 
                          interpolation='NEAREST', 
                          name=None)
    return image, target

##################################
def flip_up_down_im_lab(image, target):
    """Flip augmentation
    Args:
        image: Image
        target: Class label
    Returns:
        Augmented image and label
    """
    image = tf.image.flip_up_down(image)
    return image, target

##################################
def flip_left_right_im_lab(image, target):
    """Flip augmentation
    Args:
        image: Image
        target: Class label
    Returns:
        Augmented image and label
    """
    image = tf.image.flip_left_right(image)
    return image, target


#############################
def rotate_im_lab(image, target):
    """Rotation augmentation
    Randomly chosen rotation angle:  0, 90, 180, 270 degrees
    Args:
        image: Image
        target: Class label
    Returns:
        Augmented image and label
    """
    image = tf.image.rot90(image, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
    return image, target

def image_image_feeder(path_base_input, 
                       path_base_target,
                       filenames_input,
                       filenames_target,
                       from_disk=True,
                       num_patches_per_image=6,
                       patch_size = 128,
                       shuffle_buffer_size_images=5,
                       shuffle_buffer_size_patches=10,
                       batch_size=6,
                       intensity_thresh_input = 0.1,
                       intensity_thresh_target = 0.1,
                       accept_thresh_input = 0.5,
                       accept_thresh_target = 0.5,
                       invert_input=False,
                       invert_target=False,
                       max_tries = 10,
                       p_augment=0.5,
                       augmentations=[rotate_im_im, flip_up_down_im_im, flip_left_right_im_im],
                       shuffle_images=True,
                       shuffle_patches=True,
                       standarize=True,
                       num_prefetched_patches=1,
                       num_parallel_calls=4):
    
    num_all_pixels =  patch_size *  patch_size;
    file_paths_input = [os.path.join(path_base_input, fn) for fn in filenames_input]
    file_paths_target = [os.path.join(path_base_target, fn) for fn in filenames_target]
    
    if from_disk:
        dataset = tf.data.Dataset.from_tensor_slices((file_paths_input, file_paths_target))
    else:
        images = load_image_image_ram(file_paths_input, file_paths_target)
        dataset = tf.data.Dataset.from_tensor_slices(images)
    
    if shuffle_images:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer_size_images)
    
    if from_disk:
        dataset = dataset.map(load_paired_images)
    
    if standarize:
        dataset = dataset.map(standarize_image_image, num_parallel_calls=num_parallel_calls)
        
    get_patches_fn_train = lambda image_input, image_target: get_patches_im_im(image_input, image_target, 
                                                                               num_all_pixels, 
                                                                               num_patches=num_patches_per_image, 
                                                                               patch_size=patch_size, 
                                                                               max_tries=max_tries,
                                                                               input_pixel_threshold=intensity_thresh_input, 
                                                                               input_accept_threshold=accept_thresh_input,
                                                                               input_invert=invert_input, 
                                                                               target_pixel_threshold=intensity_thresh_target, 
                                                                               target_accept_threshold=accept_thresh_target,
                                                                               target_invert=invert_target)
    dataset = dataset.map(get_patches_fn_train, num_parallel_calls=num_parallel_calls)
    dataset = dataset.unbatch()
    if len(augmentations) != 0:
        for f in augmentations:
            dataset = dataset.map(lambda x, y: tf.cond(tf.random.uniform([], 0, 1) > (1 - p_augment), lambda: f(x, y), lambda: (x, y)), 
                                              num_parallel_calls=num_parallel_calls)
    if shuffle_patches:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer_size_patches)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(num_prefetched_patches)
    dataset = dataset.repeat()
    return dataset

def load_image_image_ram(input_paths, target_paths):
    #Loading images into memory
    print('Loading input and target images into RAM ...')
    
    imgs_input = None
    imgs_target = None
    
    for filename_input, filename_target in zip(input_paths, target_paths):
        img_input, img_target = load_paired_images(filename_input, filename_target)
        
        img_input = tf.expand_dims(img_input, axis=0)
        img_target = tf.expand_dims(img_target, axis=0)
        if imgs_input is None:
            imgs_input = img_input
        else:
            imgs_input = tf.concat([imgs_input, img_input], axis=0)
        if imgs_target is None:
            imgs_target = img_target
        else:
            imgs_target = tf.concat([imgs_target, img_target], axis=0)
    return imgs_input, imgs_target

def load_paired_images(filename_input, filename_target):
    image_string_input = tf.io.read_file(filename_input)
    image_decoded_input = tfio.experimental.image.decode_tiff(image_string_input)
    image_input = tf.image.convert_image_dtype(image_decoded_input, tf.float32)

    image_string_target = tf.io.read_file(filename_target)
    image_decoded_target = tfio.experimental.image.decode_tiff(image_string_target)
    image_target = tf.image.convert_image_dtype(image_decoded_target, tf.float32)

    return image_input[:,:,0:3], image_target[:,:,0:3]

def standarize_image_image(image_input, image_target):
    return tf.image.per_image_standardization(image_input), tf.image.per_image_standardization(image_target)

def get_patches_im_im(image_input, image_target, 
                      num_all_pixels, 
                      num_patches=10, 
                      patch_size=32, 
                      max_tries=2, 
                      input_pixel_threshold=0, 
                      input_accept_threshold=0,
                      input_invert=False, 
                      target_pixel_threshold=0, 
                      target_accept_threshold=0,
                      target_invert=False):
 
    epsilon = 0.000001
    patches = []
    input_invert = tf.constant(input_invert, dtype=tf.bool)
    target_invert = tf.constant(target_invert, dtype=tf.bool)
    num_input_channels = tf.shape(image_input)[2]
    num_target_channels = tf.shape(image_target)[2]
    num_concat_channels = num_input_channels + num_target_channels
    
    image_input_target_paired = tf.concat([image_input, image_target], axis=2)
    for i in range(num_patches):
        patch = tf.zeros([patch_size, patch_size, num_concat_channels], dtype=tf.float32)
        
        k = 1
        while tf.constant(True, dtype=tf.bool):
            #patch = tf.image.random_crop(image_input_target_paired, [patch_size, patch_size, num_concat_channels])
            patch = tf.image.random_crop(image_input_target_paired, [patch_size, patch_size, tf.shape(image_input)[2]+tf.shape(image_target)[2]])
            img_input = patch[:,:,0:num_input_channels]
            if num_input_channels > 1:
                img_input = tf.image.rgb_to_grayscale(img_input)
            
            #min_input = tf.math.reduce_min(img_input) 
            #max_input = tf.math.reduce_max(img_input) 
            #range_input = max_input - min_input
            #img_input = (img_input - min_input + epsilon) / (range_input + 2*epsilon)
            
            img_target = patch[:,:,num_input_channels:num_concat_channels]
            if num_target_channels > 1:
                img_target = tf.image.rgb_to_grayscale(img_target)  
            
            #min_target = tf.math.reduce_min(img_target) 
            #max_target = tf.math.reduce_max(img_target) 
            #range_target = max_target - min_target
            #img_target = (img_target - min_target + epsilon) / (range_target + 2*epsilon)
            
            img_input = tf.math.floor(img_input - input_pixel_threshold) + 1
            img_input = tf.clip_by_value(img_input, clip_value_min=0, clip_value_max=1)
            img_target = tf.math.floor(img_target - target_pixel_threshold) + 1
            img_target  = tf.clip_by_value(img_target, clip_value_min=0, clip_value_max=1)
            num_input_accepted_pixels = tf.math.reduce_sum(img_input)
            num_target_accepted_pixels = tf.math.reduce_sum(img_target)
            if (k >= max_tries):
                break
            if (((num_input_accepted_pixels/num_all_pixels) >= input_accept_threshold) and 
                ((num_target_accepted_pixels/num_all_pixels) >= target_accept_threshold)) and (~input_invert) and (~target_invert):
                break

            if ((((num_all_pixels - num_input_accepted_pixels)/num_all_pixels) >= input_accept_threshold) and 
                ((num_target_accepted_pixels/num_all_pixels) >= target_accept_threshold)) and (input_invert) and (~target_invert):
                break

            if ((((num_all_pixels - num_input_accepted_pixels)/num_all_pixels) >= input_accept_threshold) and 
                (((num_all_pixels - num_target_accepted_pixels)/num_all_pixels) >= target_accept_threshold)) and (input_invert) and (target_invert):
                break

            if (((num_input_accepted_pixels/num_all_pixels) >= input_accept_threshold) and 
                (((num_all_pixels - num_target_accepted_pixels)/num_all_pixels) >= target_accept_threshold)) and (~input_invert) and (target_invert):
                break

            k = k + 1                
        patches.append(patch)
    patches = tf.stack(patches)
    patches_input, patches_target = tf.split(patches, num_or_size_splits=[num_input_channels, num_target_channels], axis=3)
   
    return patches_input, patches_target

def img_img_dataset_2_tfrecord(dataset, tfrecord_path, num_batches_gen):
    it_dataset = iter(dataset)
    with tf.io.TFRecordWriter(tfrecord_path) as tfrecord_writer:
        for k in range(num_batches_gen): 
            im_input, im_target = it_dataset.next()
            img_img_store_batch_tfrecord(im_input, im_target, tfrecord_writer)

def img_img_store_batch_tfrecord(img_input_batch, img_target_batch, writer):
    num_samples = img_input_batch.shape[0]
    num_samples_target = img_target_batch.shape[0]
    assert num_samples == num_samples_target
    for i in range(num_samples):
        img_input_str = tf.io.serialize_tensor(img_input_batch[i, :, :, :])
        img_target_str = tf.io.serialize_tensor(img_target_batch[i, :, :, :])
        example = image_image_example(img_input_str, img_target_str)
        writer.write(example.SerializeToString())

def image_image_example(input_image_string, target_image_string):
    feature = {
          'input_image_raw': _bytes_feature(input_image_string),
          'target_image_raw': _bytes_feature(target_image_string) 
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def split_data(filepath, proportion, irrelevant_characters):
  files = glob.glob(filepath+'*')
  random.shuffle(files)
  images = [file[:-irrelevant_characters] for file in files]

  counts = dict()
  for i in images:
    counts[i] = counts.get(i, 0) + 1

  i = 0
  train_data_paths = []
  val_data_paths = []
  test_data_paths = []

  for key, value in counts.items():
    if i < len(files)*sum(data_split[0:1]):
      train_data_paths += glob.glob(key + '*')
      i += value
    elif i < len(files)*sum(data_split[0:2]):
      val_data_paths += glob.glob(key + '*')
      i += value
    elif i < len(files)*sum(data_split[0:3]):
      test_data_paths += glob.glob(key + '*')
      i += value
  
  train_data_paths = [path.replace(filepath,'') for path in train_data_paths]
  val_data_paths = [path.replace(filepath,'') for path in val_data_paths]
  test_data_paths = [path.replace(filepath,'') for path in test_data_paths]

  return train_data_paths, val_data_paths, test_data_paths

# Generacja TFRecords

In [10]:
if generate_malignant:
  path_base_input_train = malignant_data[:-1]
  path_base_target_train = malignant_data[:-1]
  train_files_mal, test_files_mal, _ = split_data(malignant_data, data_split, char_irrel)

  train_dset_mal = image_image_feeder(path_base_input_train, path_base_target_train, train_files_mal, train_files_mal, from_disk=True, batch_size = batch_size,
                                    standarize=False, invert_input=True, invert_target=True, patch_size=patch_size, num_patches_per_image=num_patches_per_image)
  
  
  test_dset_mal = image_image_feeder(path_base_input_train, path_base_target_train, test_files_mal, test_files_mal, from_disk=True, batch_size = batch_size,
                                    standarize=False, invert_input=True, invert_target=True, patch_size=patch_size, num_patches_per_image=num_patches_per_image)

  img_img_dataset_2_tfrecord(train_dset_mal, tfrecord_path_train_mal, len(train_files_mal))
  img_img_dataset_2_tfrecord(test_dset_mal, tfrecord_path_test_mal, len(test_files_mal))

NotImplementedError: ignored

In [None]:
if generate_benign:
  path_base_input_train = benign_data[:-1]
  path_base_target_train = benign_data[:-1]
  train_files_ben, test_files_ben, _ = split_data(benign_data, data_split, char_irrel)

  train_dset_ben = image_image_feeder(path_base_input_train, path_base_target_train, train_files_ben, train_files_ben, from_disk=True, batch_size = batch_size,
                                    standarize=False, invert_input=True, invert_target=True, patch_size=patch_size, num_patches_per_image=num_patches_per_image)
  
  test_dset_ben = image_image_feeder(path_base_input_train, path_base_target_train, test_files_ben, test_files_ben, from_disk=True, batch_size = batch_size,
                                    standarize=False, invert_input=True, invert_target=True, patch_size=patch_size, num_patches_per_image=num_patches_per_image)

  img_img_dataset_2_tfrecord(train_dset_ben, tfrecord_path_train_ben,  len(train_files_ben))
  img_img_dataset_2_tfrecord(test_dset_ben, tfrecord_path_test_ben,  len(test_files_ben))