In [None]:
#from google.colab import drive
#drive.mount('/content/drive/')

In [None]:
import os
import tensorflow as tf
import numpy as np
import cv2 as cv

In [None]:
# Defining meta-information for model
IMG_WIDTH = 256
IMG_HEIGHT = 256
interpolation_method = cv.INTER_AREA # Reasoning: seems to close gaps well but results in smoother lines - nice for these longer lines you have with your id

# Defining Paths
dirname = os.path.dirname(__file__)
base_path = os.path.join(dirname, 'correct_path_to_dataset/2_segmentation/2_segmentation/') # define the correct path
train_path = base_path + 'Train/'
test_path = base_path + 'Test/'

train_ids_dir = os.path.join(train_path, 'Ids')
train_masks_dir = os.path.join(train_path, 'GroundTruth')

train_ids_list = sorted(os.listdir(train_ids_dir))
train_masks_list = sorted(os.listdir(train_masks_dir))

test_ids_dir = os.path.join(test_path, 'Ids')
test_masks_dir = os.path.join(test_path, 'GroundTruth')

test_ids_list = sorted(os.listdir(test_ids_dir))
test_masks_list = sorted(os.listdir(test_masks_dir))


# Handle Training Data
train_images = []
train_masks = []

for img_name, mask_name in zip(train_ids_list, train_masks_list):
    img_path = os.path.join(train_ids_dir, img_name)
    mask_path = os.path.join(train_masks_dir, mask_name)

    # Load and preprocess images
    img = cv.imread(img_path)
    img = cv.resize(img,(IMG_WIDTH,IMG_HEIGHT),interpolation=interpolation_method)
    train_images.append(img)

    # Load and preprocess masks (assuming they are grayscale)
    mask = cv.imread(mask_path, cv.IMREAD_GRAYSCALE)
    mask = cv.resize(mask,(IMG_WIDTH,IMG_HEIGHT),interpolation=interpolation_method)
    train_masks.append(mask)

# Convert lists to numpy arrays
train_images = np.array(train_images)
train_masks = np.array(train_masks)

# Normalize pixel values if needed (e.g., scale to [0, 1]) (done in next code cell)
train_images = train_images / 255.0
train_masks = train_masks / 255.0


# Handle Testing Data
test_images = []
test_masks = []

for img_name, mask_name in zip(test_ids_list, test_masks_list):
    img_path = os.path.join(test_ids_dir, img_name)
    mask_path = os.path.join(test_masks_dir, mask_name)

    # Load and preprocess images
    img = cv.imread(img_path)
    img = cv.resize(img,(IMG_WIDTH,IMG_HEIGHT),interpolation=interpolation_method)
    test_images.append(img)

    # Load and preprocess masks (assuming they are grayscale)
    mask = cv.imread(mask_path, cv.IMREAD_GRAYSCALE)
    mask = cv.resize(mask,(IMG_WIDTH,IMG_HEIGHT),interpolation=interpolation_method)
    test_masks.append(mask)

# Convert lists to numpy arrays
test_images = np.array(test_images)
test_masks = np.array(test_masks)

# Normalize pixel values if needed (e.g., scale to [0, 1]) (done in next code cell)
test_images = test_images / 255.0
test_masks = test_masks / 255.0

In [None]:
# Defining our U-Net (4 down, 1 bottleneck, 4 up)
IMG_CHANNELS = 3

inputs = tf.keras.layers.Input((IMG_WIDTH,IMG_HEIGHT,IMG_CHANNELS))

# downsampling
c1 = tf.keras.layers.Conv2D(16, (3,3), activation='relu', padding='same')(inputs)
c1 = tf.keras.layers.Dropout(0.05)(c1)
c1 = tf.keras.layers.Conv2D(16, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
p1 = tf.keras.layers.MaxPooling2D((2,2))(c1)

c2 = tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
c2 = tf.keras.layers.Dropout(0.05)(c2)
c2 = tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
p2 = tf.keras.layers.MaxPooling2D((2,2))(c2)

c3 = tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
c3 = tf.keras.layers.Dropout(0.1)(c3)
c3 = tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
p3 = tf.keras.layers.MaxPooling2D((2,2))(c3)

c4 = tf.keras.layers.Conv2D(128, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
c4 = tf.keras.layers.Dropout(0.15)(c4)
c4 = tf.keras.layers.Conv2D(128, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
p4 = tf.keras.layers.MaxPooling2D((2,2))(c4)

c5 = tf.keras.layers.Conv2D(256, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
c5 = tf.keras.layers.Dropout(0.15)(c5)
c5 = tf.keras.layers.Conv2D(256, (3,3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)

# upsampling
u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2))(c5)
u6 = tf.keras.layers.concatenate([u6, c4])
c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
c6 = tf.keras.layers.Dropout(0.1)(c6)
c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2))(c6)
u7 = tf.keras.layers.concatenate([u7, c3])
c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
c7 = tf.keras.layers.Dropout(0.1)(c7)
c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2))(c7)
u8 = tf.keras.layers.concatenate([u8, c2])
c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
c8 = tf.keras.layers.Dropout(0.05)(c8)
c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2))(c8)
u9 = tf.keras.layers.concatenate([u9, c1])
c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
c9 = tf.keras.layers.Dropout(0.05)(c9)
c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

model = None
model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()

In [None]:
# https://stackoverflow.com/questions/63952338/how-to-save-best-weights-and-best-model-using-keras
checkpoint = None
callbacks_list = None
checkpoint = tf.keras.callbacks.ModelCheckpoint('/content/drive/My Drive/ipda/models/ipda_task_2_clean_best_weights.h5', monitor='val_binary_io_u_4', verbose=1, save_best_only=True,  mode='max')
callbacks_list = [checkpoint]

# Why adam for optimizer? Generally good performance according to https://stackoverflow.com/questions/37214884/how-do-i-choose-an-optimizer-for-my-tensorflow-model and common use in practice according to lecture slides
# Why binary_focal_crossentropy for loss? We work with binary segmentation (imbalanced problem).
# Why accuracy for metrics? Imbalanced problem, so we add Recall to the mix insteadof just accuracy
# https://www.tensorflow.org/api_docs/python/tf/keras/metrics/MeanIoU
model.compile(optimizer='adam', loss='binary_focal_crossentropy', metrics=['accuracy','Recall',tf.keras.metrics.BinaryIoU(target_class_ids=[1], threshold=0.5)])
fitting_information = model.fit(train_images, train_masks, validation_split=0.2, epochs=100, batch_size=16, callbacks=callbacks_list)

In [None]:
# Test model and print validation and test accuracies
model.load_weights('/content/drive/My Drive/ipda/models/ipda_task_2_clean_best_weights.h5')
test_loss, test_acc, test_recall, test_binary_io_u = model.evaluate(test_images, test_masks)

print(f"training loss: {fitting_information.history['loss'][-1]}, training accuracy: {fitting_information.history['accuracy'][-1]}, training recall: {fitting_information.history['recall'][-1]}, training binary_io_u: {fitting_information.history['binary_io_u_4'][-1]}")
print(f"validation loss: {fitting_information.history['val_loss'][-1]}, validation accuracy: {fitting_information.history['val_accuracy'][-1]}, validation recall: {fitting_information.history['val_recall'][-1]}, validation binary_io_u: {fitting_information.history['val_binary_io_u_4'][-1]}")
print(f"test loss: {test_loss}, test accuracy: {test_acc}, test recall: {test_recall}, test binary_io_u: {test_binary_io_u}")

training loss: 0.0008193336543627083, training accuracy: 0.9937513470649719, training recall: 0.9707491397857666, training binary_io_u: 0.9680373072624207
validation loss: 0.010075886733829975, validation accuracy: 0.9886512756347656, validation recall: 0.9129956960678101, validation binary_io_u: 0.9271654486656189
test loss: 0.0058863540180027485, test accuracy: 0.9904333353042603, test recall: 0.9397119879722595, test binary_io_u: 0.947098970413208


In [None]:
# Save Model
model_save_path = os.path.join(dirname,'models/ipda_task_2_clean_6')
model.save(model_save_path)

In [None]:
# Performing Segmentation w/ Mask
threshold_value = 0.2

def threshold_mask(mask,threshold_value):
    return np.where(mask >= threshold_value, 1, 0).astype(np.uint8)

def resize_mask_to_image(image, mask):
    # image (shape: (256, 256, 3)) and mask (shape: ((256, 256, 1))) are both np.arrays
    image_height, image_width = image.shape[:2]
    resized_mask = cv.resize(mask, (image_width, image_height), interpolation=cv.INTER_CUBIC)
    return resized_mask

def apply_mask_to_image(image, mask):
    # Resize Mask
    resized_mask = resize_mask_to_image(image, mask)

    # apply thresholding
    thresholded_mask = threshold_mask(resized_mask,threshold_value)

    masked_image = cv.bitwise_and(image, image, mask=thresholded_mask)
    return masked_image

def create_bounding_box(mask):
    thresholded_mask = threshold_mask(mask,threshold_value)

    # Find contours from the mask
    contours, _ = cv.findContours(thresholded_mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

    # Get the bounding box coordinates of the largest contour
    if contours:
        largest_contour = max(contours, key=cv.contourArea)
        x, y, w, h = cv.boundingRect(largest_contour)
        return x, y, w, h
    else:
        return None

def crop_image_using_mask(image, mask):
    # Resize Mask
    resized_mask = resize_mask_to_image(image, mask)

    # Create bounding box around the mask
    x, y, w, h = create_bounding_box(resized_mask)

    if x is not None:
        # Crop the image using the bounding box coordinates
        cropped_image = image[y:y+h, x:x+w]
        return cropped_image
    else:
        return None