In [1]:
import os
import random
import math
from contextlib import redirect_stdout

import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from IPython.display import display
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

from utils import PlotLosses

ruta = 'C:/Users/marco/Documents/ConvCervix'
directorio_experimento = f'{ruta}/segmentacion_nucleo'
if not os.path.exists(directorio_experimento):
    os.mkdir(directorio_experimento)
os.chdir(directorio_experimento)
gpus = tf.config.experimental.list_physical_devices('GPU')

tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
tf.compat.v1.disable_eager_execution()

In [10]:
def conv2d_block(input_tensor, n_filters, kernel_size=3, batchnorm=True):
    # first layer
    x = tf.keras.layers.Conv2D(filters=n_filters, kernel_size=(kernel_size, kernel_size), kernel_initializer="he_normal",
               padding="same")(input_tensor)
    if batchnorm:
        x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)
    # second layer
    x = tf.keras.layers.Conv2D(filters=n_filters, kernel_size=(kernel_size, kernel_size), kernel_initializer="he_normal",
               padding="same")(x)
    if batchnorm:
        x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)
    return x

def unet(input_img, n_filters=16, dropout=0.5, batchnorm=True, optimizer='adam', loss='binary-crossentropy', metrics='accuracy'):
    # contracting path
    inputTensor = Input(input_img)
    c1 = conv2d_block(inputTensor, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)
    p1 = tf.keras.layers.MaxPooling2D((2, 2)) (c1)
    p1 = tf.keras.layers.Dropout(dropout*0.5)(p1)

    c2 = conv2d_block(p1, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)
    p2 = tf.keras.layers.MaxPooling2D((2, 2)) (c2)
    p2 = tf.keras.layers.Dropout(dropout)(p2)

    c3 = conv2d_block(p2, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)
    p3 = tf.keras.layers.MaxPooling2D((2, 2)) (c3)
    p3 = tf.keras.layers.Dropout(dropout)(p3)

    c4 = conv2d_block(p3, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)
    p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2)) (c4)
    p4 = tf.keras.layers.Dropout(dropout)(p4)
    
    c5 = conv2d_block(p4, n_filters=n_filters*16, kernel_size=3, batchnorm=batchnorm)
    
    # expansive path
    u6 = tf.keras.layers.Conv2DTranspose(n_filters*8, (3, 3), strides=(2, 2), padding='same') (c5)
    u6 = concatenate([u6, c4])
    u6 = tf.keras.layers.Dropout(dropout)(u6)
    c6 = conv2d_block(u6, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)

    u7 = tf.keras.layers.Conv2DTranspose(n_filters*4, (3, 3), strides=(2, 2), padding='same') (c6)
    u7 = tf.keras.layers.concatenate([u7, c3])
    u7 = tf.keras.layers.Dropout(dropout)(u7)
    c7 = conv2d_block(u7, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)

    u8 = tf.keras.layers.Conv2DTranspose(n_filters*2, (3, 3), strides=(2, 2), padding='same') (c7)
    u8 = tf.keras.layers.concatenate([u8, c2])
    u8 = tf.keras.layers.Dropout(dropout)(u8)
    c8 = conv2d_block(u8, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)

    u9 = tf.keras.layers.Conv2DTranspose(n_filters*1, (3, 3), strides=(2, 2), padding='same') (c8)
    u9 = tf.keras.layers.concatenate([u9, c1], axis=3)
    u9 = tf.keras.layers.Dropout(dropout)(u9)
    c9 = conv2d_block(u9, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)
    
    outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid') (c9)
    model = Model(inputs=[inputTensor], outputs=[outputs])
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    
    return model

def dice_coef(y_true, y_pred):
    smooth = 1.
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.keras.backend.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (tf.keras.backend.sum(y_true_f) + tf.keras.backend.sum(y_pred_f) + smooth)

def bce_dice_loss(y_true, y_pred):
    return 0.5 * tf.keras.losses.binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)

def carga_datos(df, x, y):
    for i, row in df.iterrows():
        x[i] = cv2.imread(row['file'])
        mascara = cv2.imread(row['file_mask'], cv2.IMREAD_GRAYSCALE)
        mascara = mascara[:, :, np.newaxis]
        mascara = mascara/255
        y[i] = mascara
    return x, y


In [12]:
BATCH_SIZE = 64
WIDTH = 256 # 256
HEIGTH = 256 # 256
LR = 1e-4
OPT = tf.keras.optimizers.Adam(lr=LR)
SEED = 111091
DROPOUT = 0.5
np.random.seed(SEED)
EPOCHS = 50
FILTERS = 16
BATCHNORM = True
LOSS_FUNC = bce_dice_loss
METRICS = [tf.keras.metrics.MeanIoU(num_classes=2), 'accuracy']
KFOLD_SPLITS = 10
INPUT_FORM = (WIDTH, HEIGTH, 3)
EVALUACIONES = []
NUM_CLASSES = 2
avg_acc = []
avg_loss = []
avg_io = []
kf = KFold(n_splits=KFOLD_SPLITS, shuffle=True, random_state=SEED)

datos = pd.read_csv(f'{ruta}/database/augment_database/database_augment.csv')

datos_random = datos.sample(frac=1).reset_index(drop=True)

HYP = {'Valor': [BATCH_SIZE, WIDTH, HEIGTH, LR, OPT, SEED, EPOCHS, DROPOUT, BATCHNORM, KFOLD_SPLITS, NUM_CLASSES]}
df_hyp = pd.DataFrame(HYP, 
                      index=['BATCH_SIZE', 'WIDTH', 'HEIGTH', 'LR', 'OPT', 'SEED', 'EPOCHS', 'DROPOUT', 'BATCHNORM','KFOLD_SPLITS',' NUM_CLASSES'])
display(df_hyp)
df_hyp.to_csv('hiperparametros.csv')

In [13]:
for i, (train_indices, val_indices) in enumerate(kf.split(datos_random)):
    fold_dir = f'fold_{i}'
    print(f'Iniciando Fold: {i}')
    if not os.path.exists(fold_dir):
        os.mkdir(fold_dir)

    print(f'Datos de entrenamiento: {len(train_indices)}')
    print(f'Datos de validacion: {len(val_indices)}')
    
    print('Dividiendo datos')
    train = datos.iloc[train_indices]
    val = datos.iloc[val_indices]
    train.to_csv(os.path.join(fold_dir, "datos_train.csv"))
    val.to_csv(os.path.join(fold_dir, "datos_val.csv"))
    
    xtrain = np.ndarray(shape=(len(train.index), WIDTH, HEIGHT, 3), dtype=np.float16)
    ytrain = np.ndarray(shape=(len(train.index), WIDTH, HEIGHT, 1), dtype=np.float16)
    
    xval = np.ndarray(shape=(len(val.index), WIDTH, HEIGHT, 3), dtype=np.float16)
    yval = np.ndarray(shape=(len(val.index), WIDTH, HEIGHT, 1), dtype=np.float16)
    
    xtrain, ytrain = carga_datos(train, xtrain, ytrain)
    
    xval, yval = carga_datos(val, xval, yval)

    print('Construyendo modelo')
    model = unet(INPUT_FORM, 
             n_filters=FILTERS, 
             dropout=DROPOUT, 
             batchnorm=BATCHNORM, 
             optimizer=OPT, 
             loss=LOSS_FUNC, 
             metrics=METRICS)
    
    print('Creando generadores')
    data_gen_args = dict(horizontal_flip=True,
                         vertical_flip=True,
                         fill_mode='constant')
    X_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**data_gen_args)
    Y_datagen = tf.keras.preprocessing.image.ImageDataGenerator(**data_gen_args)
    X_train_augmented = X_datagen.flow(xtrain, batch_size=BATCH_SIZE, shuffle=True, seed=SEED)
    Y_train_augmented = Y_datagen.flow(ytrain, batch_size=BATCH_SIZE, shuffle=True, seed=SEED)
     
    X_datagen_val = tf.keras.preprocessing.image.ImageDataGenerator()
    Y_datagen_val = tf.keras.preprocessing.image.ImageDataGenerator()
    X_test_augmented = X_datagen_val.flow(xval, batch_size=BATCH_SIZE, shuffle=True, seed=SEED)
    Y_test_augmented = Y_datagen_val.flow(yval, batch_size=BATCH_SIZE, shuffle=True, seed=SEED)
    
    # combinar generadores
    train_generator = zip(X_train_augmented, Y_train_augmented)
    test_generator = zip(X_test_augmented, Y_test_augmented)
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(f"{fold_dir}/model.best.h5", 
                                                 monitor="val_mean_iou", 
                                                 verbose=1, 
                                                 mode='max', 
                                                 save_best_only=True)
    
    plot_losses = PlotLosses(figsize=(10,6))
    callbacks_list = [
                      plot_losses, 
                      checkpoint, 
                      tf.keras.callbacks.CSVLogger(os.path.join(fold_dir, 'log.csv'))
    ]
    
    with open('summary.txt', 'w') as f:
        with redirect_stdout(f):
            model.summary()
            
    tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=True,
    show_layer_names=True,
    rankdir='TB'
    )
    
    print('Iniciando entrenamiento')
    history = model.fit(train_generator, 
                    validation_data=test_generator, 
                    validation_steps=math.ceil(len(val_indices) / BATCH_SIZE), # batch_size/2
                    steps_per_epoch=math.ceil(len(train_indices) / BATCH_SIZE),  # len(x)/(batch_size*2)
                    epochs=EPOCKS, 
                    callbacks=callbacks_list, 
                    verbose=1, 
                    shuffle=True)
    
    acc = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']
    
    mean_iou = history.history['mean_iou']
    val_mean_iou = history.history['val_mean_iou']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    avg_loss.append(val_loss[-1])
    avg_acc.append(val_accuracy[-1]*100)
    avg_mean_iou.append(val_mean_iou[-1]*100)
    
    print('Salvando modelo')       
    model.save(os.path.join(fold_dir, 'model.h5'))

print(f'Resultados de la validación cruzada (K = {KFOLD_SPLITS})')
print(f'Pérdida {np.mean(avg_loss)} (+/- {np.std(avg_loss)}%)')
print(f'Precisión {np.mean(avg_acc)}% (+/- {np.std(avg_acc)}%)')
print(f'IOU promedio {np.mean(avg_mean_iou)}% (+/- {np.std(avg_mean_iou)}%)')

In [None]:
rendimiento = dict(acc=avg_acc, loss=avg_loss, mean_iou=avg_mean_iou)
df_rendimiento = pd.DataFrame(rendimiento, index=list(range(0,KFOLD_SPLITS)))
df_rendimiento.index.name = 'fold'
df_rendimiento.to_csv('rendimiento_experimento.csv')