In [1]:
from multiprocessing import Pool
import pickle
import gzip
import numpy as np
import os, os.path
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
from PIL import Image, ImageOps
from itertools import repeat
from sklearn.model_selection import train_test_split
import re
import keras
from keras import backend as K
from data_manager import DataManager, load_img, list_images
from data_generator import CustomDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from datetime import datetime
import tensorflow
from model import UNet
import tensorflow.keras.activations as activations
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

In [2]:
gpus = tensorflow.config.experimental.list_physical_devices('GPU')

print("Available GPUs: ", [gpu.name for gpu in gpus])
gpu_index = 3
print(f"Running on GPU {gpu_index}")
tf.config.experimental.set_visible_devices(devices=gpus[gpu_index], device_type='GPU')
tf.config.experimental.set_memory_growth(device=gpus[gpu_index], enable=True)

Available GPUs:  ['/physical_device:GPU:0', '/physical_device:GPU:1', '/physical_device:GPU:2', '/physical_device:GPU:3']
Running on GPU 3


In [3]:
BASE = os.getcwd()
manager = DataManager()
BATCH_SIZE = 6
EPOCHS = 200

In [85]:
def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pickle.load(f)
        return loaded_object

def resize2SquareKeepingAspectRation(img, size, interpolation):
    h, w = img.shape[:2]
    c = None if len(img.shape) < 3 else img.shape[2]
    if h == w: 
        return cv2.resize(img, (size, size), interpolation)
    if h > w: 
        dif = h
    else:     
        dif = w
    x_pos = int((dif - w)/2.)
    y_pos = int((dif - h)/2.)
    if c is None:
        mask = np.zeros((dif, dif), dtype=img.dtype)
        mask[y_pos:y_pos+h, x_pos:x_pos+w] = img[:h, :w]
    else:
        mask = np.zeros((dif, dif, c), dtype=img.dtype)
        mask[y_pos:y_pos+h, x_pos:x_pos+w, :] = img[:h, :w, :]
    return cv2.resize(mask, (size, size), interpolation)

def grays_to_RGB(img):
    # turn image into grayscale RGB
    return np.array(Image.fromarray(img).convert("RGB"))

def save_img(img, img_idx, path, pid, is_mask=False):
    filename = path + '/' + str(pid) + '_' + str(img_idx) 
    if is_mask: 
        filename += '_mask.png' 
        img = np.asarray(img, dtype="uint8") # convert bool mask into uint8 so cv2 doesn't scream
    else:
        filename += '.png'
        img = grays_to_RGB(img)
    
    cv2.imwrite(filename, img)

def make_dir(path):
    try:
        os.mkdir(path)
    except OSError:
        print (f"Creation of the directory {path} failed", end='\r')

def gen_dataset(imgs, dataset, pid, labels=None, typeof_dataset=None):
    output_dir = BASE + '/data/'+dataset+'/'
    if os.path.isdir(output_dir) is False:
        make_dir(output_dir)
    if typeof_dataset is not None: # this is only for train
        output_dir+=typeof_dataset #+ '/'
        if os.path.isdir(output_dir) is False:
            make_dir(output_dir)
    
    for i, img in enumerate(imgs):
        save_img(img, i, output_dir, pid)
        if labels is not None: # this is only for train
            save_img(labels[i], i, output_dir, pid, is_mask=True)
    
def list_images(directory, ext='jpg|jpeg|bmp|png|tif'):
    return [os.path.join(directory, f) for f in os.listdir(directory)
            if os.path.isfile(os.path.join(directory, f)) and re.match('([\w]+\.(?:' + ext + '))', f)]

def preprocess(img, denoise=False):
    """
    Preprocess step after image augmentation, and before feeding into conv net.
    """
    if denoise:
        img = cv2.fastNlMeansDenoising(img, h=7)
    
    img = resize2SquareKeepingAspectRation(img, DataManager.EX_IMG_TARGET_COLS, cv2.INTER_AREA)
    img = np.expand_dims(img, axis=-1)
    return img


def transform(img, mask, augment=True):
    """
    Transforms an (img, mask) pair with same augmentation params
    """
    if augment:
        pass
        #img, mask = augmenter.augment_batch(np.array([img, mask]), same_transform=True)
    img = preprocess(img)
    mask = preprocess(mask).astype('float32')
    return np.array([img]), np.array([mask])

In [6]:
# train_data = load_zipped_pickle("data/train.pkl")
# test_data = load_zipped_pickle("data/test.pkl")
# for data in tqdm(train_data, total=len(train_data)):
#     imgs = data['video'].T
#     typeof_ds = data['dataset']
#     labels = data['label'].T
#     pacient = data['name']
#     gen_dataset(imgs, "train", pacient, labels, typeof_ds)

# for data in tqdm(test_data, total=len(test_data)):
#     imgs = data['video'].T
#     pacient = data['name']
#     gen_dataset(imgs, "test", pacient)
# min_w = 1000
# max_w = 0
# min_h = 1000
# max_h = 0
# for data in tqdm(test_data, total=len(test_data)):
#     imgs = data['video'].T
#     pacient = data['name']
#     if min_w > imgs.shape[1]:
#         min_w = imgs.shape[1]
#     if max_w < imgs.shape[1]:
#         max_w = imgs.shape[1]
#     if min_h > imgs.shape[2]:
#         min_h = imgs.shape[2]
#     if max_h < imgs.shape[2]:
#         max_h = imgs.shape[2]

# print("Min width: {} Max width: {}".format(min_w, max_w))
# print("Min height: {} Max height: {}".format(min_h, max_h))

In [8]:
manager.create_train_data()

Loading training amateur images...
Done: 0/8170 images
Done: 100/8170 images
Done: 200/8170 images
Done: 300/8170 images
Done: 400/8170 images
Done: 500/8170 images
Done: 600/8170 images
Done: 700/8170 images
Done: 800/8170 images
Done: 900/8170 images
Done: 1000/8170 images
Done: 1100/8170 images
Done: 1200/8170 images
Done: 1300/8170 images
Done: 1400/8170 images
Done: 1500/8170 images
Done: 1600/8170 images
Done: 1700/8170 images
Done: 1800/8170 images
Done: 1900/8170 images
Done: 2000/8170 images
Done: 2100/8170 images
Done: 2200/8170 images
Done: 2300/8170 images
Done: 2400/8170 images
Done: 2500/8170 images
Done: 2600/8170 images
Done: 2700/8170 images
Done: 2800/8170 images
Done: 2900/8170 images
Done: 3000/8170 images
Done: 3100/8170 images
Done: 3200/8170 images
Done: 3300/8170 images
Done: 3400/8170 images
Done: 3500/8170 images
Done: 3600/8170 images
Done: 3700/8170 images
Done: 3800/8170 images
Done: 3900/8170 images
Done: 4000/8170 images
Done: 4100/8170 images
Done: 4200/

In [12]:
manager.create_test_data()

Creating test images...
Done: 0/1572 images
Done: 100/1572 images
Done: 200/1572 images
Done: 300/1572 images
Done: 400/1572 images
Done: 500/1572 images
Done: 600/1572 images
Done: 700/1572 images
Done: 800/1572 images
Done: 900/1572 images
Done: 1000/1572 images
Done: 1100/1572 images
Done: 1200/1572 images
Done: 1300/1572 images
Done: 1400/1572 images
Done: 1500/1572 images
Saving test samples...
Saving to .npy files done.


In [9]:
X_train, X_val, y_train, y_val = manager.load_train_val_data("expert")

In [89]:
X_train.shape

(1470, 1007, 732, 1)

In [127]:
datagen = CustomDataGenerator(
    X_train,
    y_train,
    (224, 224),
    preprocess,
    batch_size=2,
    shuffle=True,
    seed=42,
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    rotation_range=5, #degrees
    width_shift_range=10, #pixels, if <1 fraction
    height_shift_range=10,
    horizontal_flip=False,
    shear_range=5,
    rescale=1./255)
    #preprocessing_function=preprocess)
datagen.generator.fit(X_train)

In [96]:
net = UNet(True)
net.build((None, 224, 224, 1))
net.summary()
run_id = str(datetime.now())
model_checkpoint = ModelCheckpoint('./results/net.hdf5', monitor='val_loss', save_best_only=True)
tb = TensorBoard(log_dir='./logs/{}'.format(run_id), histogram_freq=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.25, patience=4, min_lr=1e-6)
print('Training on model')
net.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
net.fit(datagen, epochs=2, steps_per_epoch=len(X_train)//2, callbacks=[model_checkpoint, reduce_lr, tb])

Epoch 1/2
Epoch 2/2
153/735 [=====>........................] - ETA: 5:58 - loss: 0.0196 - accuracy: 0.9999

In [None]:
model.fit_generator(train_generator, validation_data=val_generator, validation_steps=X_val.shape[0],
                     steps_per_epoch=X_train.shape[0], epochs=EPOCHS, verbose=2,
                     callbacks=[model_checkpoint, reduce_lr, tb], max_queue_size=1000)