In [1]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Activation, Conv2D, MaxPooling2D, AveragePooling2D, UpSampling2D, Conv2DTranspose, Concatenate, BatchNormalization, Cropping2D

import keras

import os
from glob import glob, iglob
from typing import List, Tuple, Union
import enum

import matplotlib.pyplot as plt
import numpy as np

from utils import multipredict

2024-04-04 17:04:30.473263: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
tf.config.list_physical_devices()

2024-04-04 17:04:33.648741: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-04 17:04:33.657036: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-04 17:04:33.657327: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Dataset

In [3]:
IMG_HEIGHT =512
IMG_WIDTH  = 512
IMG_CHANNELS = 3
NUM_CLASSES = 5
BATCH_SIZE = 4

DATA_DIR = "dataset"

def read_image(image_path:str) -> tf.Tensor:
    '''
    Read image from its path. Returns a tensor.
    '''

    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=IMG_CHANNELS)
    image.set_shape([None, None, IMG_CHANNELS])
    image = tf.image.resize(images=image, size=[IMG_WIDTH, IMG_HEIGHT])
    image = tf.cast(image, dtype=tf.uint8)

    return image


def read_mask(mask_path:str, num_classes:int=NUM_CLASSES):
    '''
    Read mask from its path. Returns a hot encoded tensor
    '''
    
    mask = tf.io.read_file(mask_path)
    mask = tf.image.decode_png(mask, channels=1)
    mask.set_shape([None, None, 1])
    mask = tf.image.resize(images=mask, size=[IMG_WIDTH, IMG_HEIGHT])
    mask = tf.cast(mask, dtype=tf.uint8)
    # Other classified as number 8 (check)
    if num_classes > 1:
        mask = tf.squeeze(mask,axis=2) #remove extra axis
        mask = tf.one_hot(mask, depth = num_classes)
        
    return mask

def load_data(image_list:List[str], mask_list:List[str]) -> Tuple[tf.Tensor,tf.Tensor]:
    '''
    Auxiliar function to read both image and mask
    '''
    image = read_image(image_list)
    mask = read_mask(mask_list)
    return image, mask

def data_generator(image_list:List[str], mask_list:List[str],batch_size:int=BATCH_SIZE) -> tf.data.Dataset:
    '''
    Return a dataset from a list of images paths
    '''
    dataset = tf.data.Dataset.from_tensor_slices((image_list, mask_list))
    dataset = dataset.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size, drop_remainder=True)
    return dataset

train_dataset = data_generator(
    sorted(glob('dataset/augmented_images_train/*.jpg')),#+glob('dataset/train/*.jpg')),
    sorted(glob('dataset/augmented_masks_train/*.png')),#+glob('dataset/masks/*.jpg')),
)
val_dataset = data_generator(
    sorted(glob('dataset/augmented_images_val/*.jpg')),
    sorted(glob('dataset/augmented_masks_val/*.png')),
)

# Ading class weights
class_weights = tf.constant([1,1,2,1,0.8])#tf.constant([1,1,5,1,0.8])
def map_weights(image, label):
    # Assuming label is one-hot encoded, calculate weights based on the class
    weights = tf.reduce_sum(label * class_weights, axis=-1)  # Calculate weights based on class
    return image, label, weights

# Map the function to the dataset
train_dataset = train_dataset.map(map_weights)



print("Train Dataset:  ", train_dataset)
print("Val Dataset:  ", val_dataset)
# # print("Test Dataset:  ", test_dataset)

2024-04-04 17:04:35.534833: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-04 17:04:35.535268: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-04 17:04:35.535600: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Train Dataset:   <_MapDataset element_spec=(TensorSpec(shape=(4, 512, 512, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(4, 512, 512, 5), dtype=tf.float32, name=None), TensorSpec(shape=(4, 512, 512), dtype=tf.float32, name=None))>
Val Dataset:   <_BatchDataset element_spec=(TensorSpec(shape=(4, 512, 512, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(4, 512, 512, 5), dtype=tf.float32, name=None))>


# Model: DeepLabV3+

In [4]:
def convolution_block(
    block_input,
    num_filters=256,
    kernel_size=3,
    dilation_rate=1,
    use_bias=False,
):
    x = Conv2D(
        num_filters,
        kernel_size=kernel_size,
        dilation_rate=dilation_rate,
        padding="same",
        use_bias=use_bias,
        kernel_initializer=keras.initializers.HeNormal(),
    )(block_input)
    x = BatchNormalization()(x)
    return tf.keras.ops.nn.relu(x)


def DilatedSpatialPyramidPooling(dspp_input):
    dims = dspp_input.shape
    x = AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
    x = convolution_block(x, kernel_size=1, use_bias=True)
    out_pool = UpSampling2D(
        size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]),
        interpolation="bilinear",
    )(x)

    out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
    out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
    out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
    out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)

    x = Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
    output = convolution_block(x, kernel_size=1)
    return output


def DeeplabV3Plus(image_size, num_classes):
    model_input = keras.Input(shape=(image_size, image_size, 3))
    preprocessed = keras.applications.resnet50.preprocess_input(model_input)
    resnet50 = keras.applications.ResNet50(
        weights="imagenet", include_top=False, input_tensor=preprocessed
    )
    # freeze layers
    for layer in resnet50.layers:
        layer.trainable = False
    
    x = resnet50.get_layer("conv4_block6_2_relu").output
    x = DilatedSpatialPyramidPooling(x)

    input_a = UpSampling2D(
        size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
        interpolation="bilinear",
    )(x)
    input_b = resnet50.get_layer("conv2_block3_2_relu").output
    input_b = convolution_block(input_b, num_filters=48, kernel_size=1)

    x = Concatenate(axis=-1)([input_a, input_b])
    x = convolution_block(x)
    x = convolution_block(x)
    x = UpSampling2D(
        size=(image_size // x.shape[1], image_size // x.shape[2]),
        interpolation="bilinear",
    )(x)
    model_output = Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x)
    return keras.Model(inputs=model_input, outputs=model_output)


model = DeeplabV3Plus(image_size=IMG_WIDTH, num_classes=NUM_CLASSES)
model.summary()


# Train

In [5]:
model.compile(
    optimizer=Adam(
        learning_rate = 1e-3,
        beta_1 = .99,
        beta_2 = .999
    ),
    loss='categorical_crossentropy',
    metrics=[
        tf.keras.metrics.OneHotIoU(
            
            num_classes=NUM_CLASSES,
            target_class_ids=[i for i in range(NUM_CLASSES)],
            sparse_y_pred = False # when false retrive prediction with tf.argmax
        ),
    ]
)


In [6]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        patience=6,
        monitor='val_loss'
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,
        patience=3,
        verbose=1,
        mode='auto',
        min_delta=0.0001,
    ),
    tf.keras.callbacks.TensorBoard(
        log_dir='logs'
    ),
    tf.keras.callbacks.History(
    ),
    tf.keras.callbacks.ModelCheckpoint(
        os.path.join('models','checkpoints','Checkpoint.model.keras'),
        monitor='val_loss',
        verbose=2,
        save_best_only=True,
        save_weights_only=False,
        mode='auto',
        save_freq='epoch',
        initial_value_threshold=None
    ),
    # tf.keras.callbacks.BackupAndRestore(
    #     os.path.join('models','checkpoints','BackupCheckpoint.model.keras'),
    #     save_freq='epoch',
    #     delete_checkpoint=True
    # )
]

In [7]:
history = model.fit(
    x=train_dataset,
    epochs=100,
    verbose=2,
    callbacks=callbacks,
    validation_data=val_dataset,
    shuffle=True,
    initial_epoch=0,
)

Epoch 1/100


I0000 00:00:1712243120.975347    8078 service.cc:145] XLA service 0x763f80004230 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1712243120.975508    8078 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce GTX 1050, Compute Capability 6.1
W0000 00:00:1712243121.739859    8078 assert_op.cc:38] Ignoring Assert operator Assert/Assert
2024-04-04 17:05:21.790037: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-04-04 17:05:23.597836: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
2024-04-04 17:05:37.082479: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 38.91GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were ava

In [None]:
model.save('models/DeepLabV3+.model.keras')

In [None]:
history.history.keys()

In [None]:
plt.plot(history.history['val_loss'],'r-', label='Validation Loss')
plt.plot(history.history['loss'],'r--', label='Loss')
plt.plot(history.history['one_hot_io_u'],'b--',label='IoU')
plt.plot(history.history['val_one_hot_io_u'],'b-',label='Validation IOU')

plt.xlabel('Epochs')
plt.xlim(0)
plt.ylim(0)
plt.legend()

In [None]:
test_images = [os.path.join('dataset','images',name.strip()) for name in open('dataset/test.txt').readlines()]
test_masks = [os.path.join('dataset','masks',name.strip()) for name in open('dataset/test.txt').readlines()]

In [None]:
model.evaluate(val_dataset.take(2))

In [None]:
multipredict(test_images[8:10],test_masks[8:10],model)
# plt.savefig("DeepLabV3+512x512.jpg")