<h3>This notebook contains training code of a neural network performing the "Region of Interest" (ROI) detection work. The NN is trained on cleaned and noisy images. Its output will be used at later stages to preprocess images being fed to the actual classifier. ROI NN will detect bounding boxes surrounding lesions and another algorithm will crop the images so that the aspect ratio required by InceptionResNetV2 NN is used.</h3>

In [4]:
import os
import absl.logging
import PIL.Image

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from typing import Callable
from tensorflow import keras
from typing import Tuple

absl.logging.set_verbosity(absl.logging.ERROR)

In [5]:
get_names = lambda root_path: [
    file_name.split('.')[0]
    for dir_path, _, file_names in os.walk(root_path)
    for file_name in file_names
]
get_paths = lambda path: [f'{os.path.join(root, file)}' for root, dirs, files in os.walk(path) for file in files]
base_dir = os.path.join('..', 'data', 'images_original_inception_resnet_v2_200x150_splitted_with_augmentation')
train_dir = os.path.join(base_dir, 'training')
valid_dir = os.path.join(base_dir, 'validation')

In [7]:
SMALLER_WIDTH = 600 // 3
SMALLER_HEIGHT = 450 // 3

In [6]:
def parse_csv(file_path: str) -> Tuple[pd.Series, np.ndarray]:
    df = pd.read_csv(file_path)
    filenames = df['filename'].values
    x1 = df['x1'].values
    y1 = df['y1'].values
    x2 = df['x2'].values
    y2 = df['y2'].values

    return filenames, np.array([x1, y1, x2, y2]).T


def process_path(image_path: str, coords: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)

    return img, coords


def load_and_preprocess_data(csv_file_path: str, img_dir: str) -> tf.data.Dataset:
    image_filenames, coords = parse_csv(csv_file_path)
    image_filenames = [img_dir + '/' + fname for fname in image_filenames]

    return tf.data.Dataset\
        .from_tensor_slices((image_filenames, coords))\
        .map(process_path)\
        .batch(32)