In [1]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# TOKEN = user_secrets.get_secret("GITHUB_TOKEN")
#
# !git clone https://{TOKEN}@github.com/nicoloalbergoni/DeeplabV3Plus-TF2.git
# %load_ext tensorboard
# %cd ./DeeplabV3Plus-TF2

In [2]:
#!python download_and_prepare_voc.py --remove_cmap --download_berkley

In [3]:
import os
import random
import h5py
import numpy as np
import seaborn as sns
from tqdm import tqdm
import tensorflow as tf
from model import DeeplabV3Plus
import tensorflow_addons as tfa
from matplotlib import pyplot as plt
from superresolution import Superresolution
from utils import load_image, get_prediction, create_mask, plot_prediction
from superres_utils import get_img_paths, filter_by_class, min_max_normalization, plot_image, plot_histogram, list_precomputed_data_paths, check_hdf5_validity
import keras_tuner as kt

In [4]:
BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, "data")
PASCAL_ROOT = os.path.join(DATA_DIR, "VOCdevkit", "VOC2012")
IMGS_PATH = os.path.join(PASCAL_ROOT, "JPEGImages")

SUPERRES_ROOT = os.path.join(DATA_DIR, "superres_root")
PRECOMPUTED_OUTPUT_DIR = os.path.join(SUPERRES_ROOT, "precomputed_features")
STANDARD_OUTPUT_DIR = os.path.join(SUPERRES_ROOT, "standard_output")
SUPERRES_OUTPUT_DIR = os.path.join(SUPERRES_ROOT, "superres_output")

# SEED = np.random.randint(0, 1000)
SEED = 1234
IMG_SIZE = (512, 512)
BATCH_SIZE = 2
BUFFER_SIZE = 1000
EPOCHS = 30
CLASSES = 21
RESHAPE_MASKS = True
NUM_AUG = 50
CLASS_ID = 8
NUM_SAMPLES = 300

In [5]:
image_list_path = os.path.join(DATA_DIR, "augmented_file_lists", "valaug.txt")
image_paths = get_img_paths(image_list_path, IMGS_PATH)

if NUM_SAMPLES is not None:
    image_paths = image_paths[:NUM_SAMPLES]

images_dict = filter_by_class(image_paths, class_id=CLASS_ID)

print(f"Valid images: {len(images_dict)} (Initial:  {len(image_paths)})")

valid_filenames = list(images_dict.keys())

model_no_upsample = DeeplabV3Plus(
    input_shape=(512, 512, 3),
    classes=21,
    OS=16,
    last_activation=None,
    load_weights=True,
    backbone="mobilenet",
    alpha=1.).build_model(final_upsample=False)

model_standard = DeeplabV3Plus(
    input_shape=(512, 512, 3),
    classes=21,
    OS=16,
    last_activation=None,
    load_weights=True,
    backbone="mobilenet",
    alpha=1.).build_model(final_upsample=True)

Valid images: 26 (Initial:  300)


# Compute standard output for comparison

In [6]:
def compute_standard_output(image_dict, model, dest_folder, filter_class_id=None):
    standard_masks = {}
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    for key in tqdm(image_dict):
        standard_mask = get_prediction(model, image_dict[key])
        if filter_class_id is not None:
            standard_mask = tf.where(standard_mask == filter_class_id, standard_mask, 0) # Set to 0 all predictions different from the given class
        tf.keras.utils.save_img(f"{dest_folder}/{key}.png", standard_mask, scale=False)
        standard_masks[key] = standard_mask

    return standard_masks

In [7]:
standard_masks_dict = compute_standard_output(images_dict, model_standard, dest_folder=STANDARD_OUTPUT_DIR, filter_class_id=CLASS_ID)

100%|██████████| 26/26 [00:06<00:00,  3.90it/s]


# Precompute Augmented Output Features

In [8]:
def create_augmented_copies(image, num_aug, angle_max, shift_max):

    batched_images = tf.tile(tf.expand_dims(image, axis=0), [num_aug, 1, 1, 1])  # Size [num_aug, 512, 512, 3]
    angles = np.random.uniform(-angle_max, angle_max, num_aug)
    shifts = np.random.uniform(-shift_max, shift_max, (num_aug, 2))
    # First sample is not augmented
    angles[0] = 0
    shifts[0] = np.array([0, 0])
    angles = angles.astype("float32")
    shifts = shifts.astype("float32")

    rotated_images = tfa.image.rotate(batched_images, angles, interpolation="bilinear")
    translated_images = tfa.image.translate(rotated_images, shifts, interpolation="bilinear")

    return translated_images, angles, shifts

def compute_augmented_features(image_filenames, model, dest_folder, filter_class_id, mode="slice", num_aug=100, angle_max=0.5, shift_max=30, save_output=False, relu_output=False):

    augmented_features = {}

    for filename in tqdm(image_filenames):

        # Load image
        image_path = os.path.join(IMGS_PATH, f"{filename}.jpg")
        image = load_image(image_path, image_size=IMG_SIZE, normalize=True)

        # Create augmented copies
        augmented_images, angles, shifts = create_augmented_copies(image, num_aug=num_aug, angle_max=angle_max, shift_max=shift_max)

        # Create destination folder
        output_folder = os.path.join(dest_folder, filename)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        # Get model prediction for each aumented copy
        predictions = model.predict(augmented_images, batch_size=BATCH_SIZE)

        class_masks = []
        max_masks = []

        for i, prediction in enumerate(predictions):

            if mode == "slice":
                class_slice = prediction[:, :, filter_class_id]
                class_mask = class_slice[..., np.newaxis]

                no_class_prediction = np.delete(prediction, filter_class_id, axis=-1)
                max_mask = no_class_prediction.max(axis=-1)
                max_mask = max_mask[..., np.newaxis]

                # ReLU is only needed when working with slices
                if relu_output:
                    class_mask = (tf.nn.relu(class_mask)).numpy()
                    max_mask = (tf.nn.relu(max_mask)).numpy()

                max_masks.append(max_mask)

            elif mode == "argmax":
                class_mask = create_mask(prediction)
                # Set to 0 all predictions different from the given class
                class_mask = tf.where(class_mask == filter_class_id, class_mask, 0)
                class_mask = tf.cast(class_mask, tf.float32) # Necessary for super-resolution operations
                class_mask = class_mask.numpy()

            class_masks.append(class_mask)

            if save_output:
                tf.keras.utils.save_img(f"{output_folder}/{i}_class.png", class_mask, scale=True)
                if mode == "slice":
                    tf.keras.utils.save_img(f"{output_folder}/{i}_max.png", max_mask, scale=True)

            # np.save(os.path.join(output_folder, f"{filename}_angles"), angles)
            # np.save(os.path.join(output_folder, f"{filename}_shifts"), shifts)

        file = h5py.File(f"{output_folder}/{filename}.hdf5", "w")
        file.create_dataset("class_masks", data=class_masks)

        if mode == "slice":
            file.create_dataset("max_masks", data=max_masks)

        file.create_dataset("angles", data=angles)
        file.create_dataset("shifts", data=shifts)
        file.attrs["filename"] = filename
        file.attrs["mode"] = mode

        file.close()

        augmented_features[filename] = { "class": class_masks, "max": max_masks }

    return augmented_features

In [9]:
angle_max = 0.5  # in radians
shift_max = 30

augmented_features_dict = compute_augmented_features(images_dict, model_no_upsample, mode="slice", dest_folder=PRECOMPUTED_OUTPUT_DIR, filter_class_id=CLASS_ID, num_aug=NUM_AUG, angle_max=angle_max, shift_max=shift_max, save_output=True, relu_output=False)

100%|██████████| 26/26 [01:36<00:00,  3.72s/it]


# Compute Super-Resolution Output

In [10]:
def threshold_image(class_mask, max_mask=None, th_val=.15):
    if max_mask is not None:
        th_mask = tf.where(class_mask >= max_mask, CLASS_ID, 0)
    else:
        sample_th = tf.cast(tf.reduce_max(class_mask), tf.float32) * th_val
        th_mask = tf.where(class_mask > sample_th, CLASS_ID, 0)

    return th_mask.numpy()


def custom_IOU(y_true, y_pred, class_id):
    y_true_squeeze = tf.squeeze(y_true)
    y_pred_squeeze = tf.squeeze(y_pred)
    classes = [0, class_id] # Only check in background and given class

    y_true_squeeze = tf.where(y_true_squeeze != class_id, 0, y_true_squeeze)

    ious = []
    for i in classes:
        true_labels = tf.equal(y_true_squeeze, i)
        pred_labels = tf.equal(y_pred_squeeze, i)
        inter = tf.cast(true_labels & pred_labels, tf.int32)
        union = tf.cast(true_labels | pred_labels, tf.int32)

        iou = tf.reduce_sum(inter) / tf.reduce_sum(union)
        ious.append(iou)

    ious = tf.stack(ious)
    legal_labels = ~tf.math.is_nan(ious)
    ious = tf.gather(ious, indices=tf.where(legal_labels))
    return tf.reduce_mean(ious)


def evaluate_IOU(true_mask, superres_mask, img_size=(512, 512)):
    true_mask = tf.reshape(true_mask, (img_size[0] * img_size[1], 1))
    superres_mask = tf.reshape(superres_mask, (img_size[0] * img_size[1], 1))

    superres_IOU = custom_IOU(true_mask, superres_mask, class_id=CLASS_ID)

    return superres_IOU.numpy()

In [11]:
def compute_superresolution_output(precomputed_data_paths, superres_args, dest_folder, mode="slice", num_aug=100, global_normalize=True, save_output=False):

    superres_masks = {}
    class_losses = {}
    ious = {}

    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    for file_path in tqdm(precomputed_data_paths):

        file = h5py.File(f"{file_path}", "r")

        if not check_hdf5_validity(file, num_aug=num_aug):
            print(f"File: {file_path} is invalid, skipping...")
            file.close()
            continue


        filename = file.attrs["filename"]
        angles = file["angles"][:]
        shifts = file["shifts"][:]

        class_masks = file["class_masks"][:]
        class_masks = tf.stack(class_masks)

        if mode == "slice":
            max_masks = file["max_masks"][:]
            max_masks = tf.stack(max_masks)

        file.close()


        superresolution_obj = Superresolution(
            **superres_args,
            num_aug=NUM_AUG,
            verbose=False
        )


        global_min = tf.reduce_min(class_masks) if global_normalize else None
        global_max = tf.reduce_max(class_masks) if global_normalize else None

        class_masks = tf.map_fn(fn=lambda image: min_max_normalization(image.numpy(), new_min=0.0, new_max=1.0, global_min=global_min, global_max=global_max), elems=class_masks)

        target_image_class, class_loss = superresolution_obj.compute_output(class_masks, angles, shifts)
        target_image_class = (target_image_class[0]).numpy()
        print(f"Final class loss for image {filename}: {class_loss}")

        if mode == "slice":

            global_min = tf.reduce_min(max_masks) if global_normalize else None
            global_max = tf.reduce_max(max_masks) if global_normalize else None

            max_masks = tf.map_fn(fn=lambda image: min_max_normalization(image.numpy(), new_min=0.0, new_max=1.0, global_min=global_min, global_max=global_max), elems=max_masks)


            target_image_max, max_loss = superresolution_obj.compute_output(max_masks, angles, shifts)
            target_image_max = (target_image_max[0]).numpy()
            print(f"Final max loss for image {filename}: {max_loss}")



        class_losses[filename] = class_loss

        th_image = threshold_image(target_image_class, max_mask= None if mode == "argmax" else target_image_max)

        true_mask_path = os.path.join(DATA_DIR, "VOCdevkit/VOC2012/SegmentationClassAug", f"{filename}.png")
        true_mask = load_image(true_mask_path, image_size=IMG_SIZE, normalize=False,
                               is_png=True, resize_method="nearest")

        iou = evaluate_IOU(true_mask, th_image)
        ious[filename] = iou

        # superres_masks[filename] = { "class": target_image_class, "max": target_image_max } if mode == "slice" else target_image_class

        if save_output:
            tf.keras.utils.save_img(f"{dest_folder}/{filename}_th_{mode}.png", th_image, scale=True)



    mean_iou = np.mean(np.fromiter(ious.values(), dtype=float))

    with open(os.path.join(DATA_DIR, "ious.txt"), "a") as f:
        f.write(mean_iou)

    print(f"Final Mean IOU: {mean_iou}")

    return 1.0 - mean_iou

In [12]:
class SuperresTuner(kt.RandomSearch):

    precomputed_data_paths = list_precomputed_data_paths(PRECOMPUTED_OUTPUT_DIR)

    def run_trial(self, trial, **kwargs):
        hp = trial.hyperparameters

        superres_args = {
            "lambda_tv": hp.Float("lambda_tv", min_value=0.002, max_value= 0.1, step=0.01),
            "lambda_eng": hp.Float("lambda_eng", min_value=0.001, max_value= 0.1, step=0.01),
            "num_iter": hp.Int("num_iter", min_value=400, max_value=1000, step=50),
            "learning_rate" : hp.Float("learning_rate", min_value=1e-4, max_value= 1e-2, step=0.1),
        }

        return compute_superresolution_output(self.precomputed_data_paths, superres_args, dest_folder=SUPERRES_OUTPUT_DIR, mode="slice",  num_aug=NUM_AUG, global_normalize=False, save_output=True)

In [12]:
tuner = SuperresTuner(
    # No hypermodel or objective specified.
    max_trials=30,
    overwrite=True,
    directory=DATA_DIR,
    project_name="Tuner_Trials",
)

tuner.search()
#print(tuner.get_best_hyperparameters()[0].get("x"))

Trial 12 Complete [00h 23m 37s]
default_objective: 0.20777671702730172

Best default_objective So Far: 0.20755350577812637
Total elapsed time: 04h 18m 00s

Search: Running Trial #13

Hyperparameter    |Value             |Best Value So Far 
lambda_tv         |0.082             |0.092             
lambda_eng        |0.011             |0.071             
num_iter          |1000              |950               
learning_rate     |0.0001            |0.0001            



  0%|          | 0/26 [00:00<?, ?it/s]

Final class loss for image 2007_008815: 25782.349609375


  4%|▍         | 1/26 [01:17<32:19, 77.56s/it]

Final max loss for image 2007_008815: 55640.72265625
Final class loss for image 2007_009346: 21520.96484375


  8%|▊         | 2/26 [02:35<31:11, 77.96s/it]

Final max loss for image 2007_009346: 51747.12890625
Final class loss for image 2008_000345: 26179.8828125


 12%|█▏        | 3/26 [03:53<29:52, 77.95s/it]

Final max loss for image 2008_000345: 47305.8671875
Final class loss for image 2008_002152: 27288.84375


 15%|█▌        | 4/26 [05:11<28:30, 77.76s/it]

Final max loss for image 2008_002152: 38853.34765625
Final class loss for image 2008_002623: 26135.75


 19%|█▉        | 5/26 [06:28<27:08, 77.52s/it]

Final max loss for image 2008_002623: 55755.4140625
Final class loss for image 2008_006341: 31263.8671875


 23%|██▎       | 6/26 [07:46<25:58, 77.91s/it]

Final max loss for image 2008_006341: 42193.4375
Final class loss for image 2008_007548: 18146.265625


 27%|██▋       | 7/26 [09:04<24:38, 77.83s/it]

Final max loss for image 2008_007548: 38399.296875
Final class loss for image 2009_000080: 17032.734375


 27%|██▋       | 7/26 [10:25<28:18, 89.38s/it]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\users\alber\desktop\deeplabv3plus-tf2\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\alber\AppData\Local\Temp/ipykernel_13656/2040541666.py", line 9, in <module>
    tuner.search()
  File "c:\users\alber\desktop\deeplabv3plus-tf2\venv\lib\site-packages\keras_tuner\engine\base_tuner.py", line 179, in search
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "C:\Users\alber\AppData\Local\Temp/ipykernel_13656/1002174236.py", line 15, in run_trial
    return compute_superresolution_output(self.precomputed_data_paths, superres_args, dest_folder=SUPERRES_OUTPUT_DIR, mode="slice",  num_aug=NUM_AUG, global_normalize=False, save_output=True)
  File "C:\Users\alber\AppData\Local\Temp/ipykernel_13656/3666301978.py", line 58, in compute_superresolution_output
    target_image_max, max_loss = superresolution_obj.compute_output(max_ma

TypeError: object of type 'NoneType' has no len()

In [None]:
# super resolution parameters
learning_rate = 1e-3
lambda_eng = 0.0001 * NUM_AUG
lambda_tv = 0.002 * NUM_AUG
num_iter = 400


precomputed_data_paths = list_precomputed_data_paths(PRECOMPUTED_OUTPUT_DIR)

superres_masks_dict, losses = compute_superresolution_output(precomputed_data_paths, mode="argmax", dest_folder=SUPERRES_OUTPUT_DIR, num_aug=NUM_AUG, global_normalize=False, save_output=True)

In [45]:
superres_masks_dict_th = {}

for key in superres_masks_dict:
    target_dict = superres_masks_dict[key]
    th_mask = tf.where(target_dict["class"] >= target_dict["max"], CLASS_ID, 0)
    superres_masks_dict_th[key] = th_mask.numpy()

In [13]:
superres_masks_dict_th = {}

for key in superres_masks_dict:
    sample_th = tf.cast(tf.reduce_max(superres_masks_dict[key]), tf.float32) * 0.15
    th_mask = tf.where(superres_masks_dict[key] > sample_th, CLASS_ID, 0)
    superres_masks_dict_th[key] = th_mask

# Evaluation

In [14]:
def custom_IOU(y_true, y_pred, class_id):
    y_true_squeeze = tf.squeeze(y_true)
    y_pred_squeeze = tf.squeeze(y_pred)
    classes = [0, class_id] # Only check in background and given class

    y_true_squeeze = tf.where(y_true_squeeze != class_id, 0, y_true_squeeze)

    ious = []
    for i in classes:
        true_labels = tf.equal(y_true_squeeze, i)
        pred_labels = tf.equal(y_pred_squeeze, i)
        inter = tf.cast(true_labels & pred_labels, tf.int32)
        union = tf.cast(true_labels | pred_labels, tf.int32)

        iou = tf.reduce_sum(inter) / tf.reduce_sum(union)
        ious.append(iou)

    ious = tf.stack(ious)
    legal_labels = ~tf.math.is_nan(ious)
    ious = tf.gather(ious, indices=tf.where(legal_labels))
    return tf.reduce_mean(ious)


def evaluate_IOU(true_mask, standard_mask, superres_mask, img_size=(512, 512)):
    true_mask = tf.reshape(true_mask, (img_size[0] * img_size[1], 1))
    standard_mask = tf.reshape(standard_mask, (img_size[0] * img_size[1], 1))
    superres_mask = tf.reshape(superres_mask, (img_size[0] * img_size[1], 1))

    # standard_IOU = Mean_IOU(true_mask, standard_mask)
    # superres_IOU = Mean_IOU(true_mask, superres_mask)

    standard_IOU = custom_IOU(true_mask, standard_mask, class_id=CLASS_ID)
    superres_IOU = custom_IOU(true_mask, superres_mask, class_id=CLASS_ID)

    return standard_IOU.numpy(), superres_IOU.numpy()

def compare_results(image_dict, standard_dict, superres_dict, image_size=(512, 512), verbose=False):
    standard_IOUs = []
    superres_IOUs = []

    for key in image_dict:
        true_mask_path = os.path.join(DATA_DIR, "VOCdevkit/VOC2012/SegmentationClassAug", f"{key}.png")
        true_mask = load_image(true_mask_path, image_size=image_size, normalize=False,
                               is_png=True, resize_method="nearest")

        standard_mask = standard_dict[key]
        superres_image = superres_dict[key]

        standard_IOU, superres_IOU = evaluate_IOU(true_mask, standard_mask, superres_image, img_size=image_size)
        standard_IOUs.append(standard_IOU)
        superres_IOUs.append(superres_IOU)

        if verbose:
            print(f"IOUs for image {key} - Standard: {str(standard_IOU)}, Superres: {str(superres_IOU)}")

    return standard_IOUs, superres_IOUs

In [15]:
standard_IOUs, superres_IOUs = compare_results(images_dict, standard_masks_dict, superres_masks_dict_th, image_size=IMG_SIZE, verbose=False)

In [16]:
print(f"Standard mean IOU: {np.mean(standard_IOUs)},  Superres mean IOU: {np.mean(superres_IOUs)}")

Standard mean IOU: 0.8432970924901599,  Superres mean IOU: 0.8232844974653806


# Tests

In [None]:
def plot_standard_superres(input_image, ground_truth, standard_mask, superres_mask):
    plt.figure(figsize=(18, 18))

    plt.subplot(1, 3, 1)
    plt.title("Ground Truth")
    plt.imshow(tf.keras.preprocessing.image.array_to_img(input_image))
    plt.imshow(tf.keras.preprocessing.image.array_to_img(ground_truth), alpha=0.5)
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.title("Sandard predicted Mask")
    plt.imshow(tf.keras.preprocessing.image.array_to_img(input_image))
    plt.imshow(tf.keras.preprocessing.image.array_to_img(standard_mask), alpha=0.5)
    plt.axis('off')

    plt.subplot(1, 3, 3)
    plt.title("Superresolution Mask")
    plt.imshow(tf.keras.preprocessing.image.array_to_img(input_image))
    plt.imshow(tf.keras.preprocessing.image.array_to_img(superres_mask), alpha=0.5)
    plt.axis('off')


    plt.show()

In [None]:
sample_key = random.choice(valid_filenames)
sample_image = images_dict[sample_key]
sample_standard = standard_masks_dict[sample_key]
sample_superres = superres_masks_dict[sample_key]["class"]
sample_superres_th = superres_masks_dict_th[sample_key]

true_mask_path = os.path.join(DATA_DIR, "VOCdevkit/VOC2012/SegmentationClassAug", f"{sample_key}.png")
true_mask = load_image(true_mask_path, image_size=IMG_SIZE, normalize=False, is_png=True, resize_method="nearest")

#plot_prediction([sample_image, true_mask, sample_standard], only_prediction=False, show_overlay=True)
#print_labels([true_mask, sample_standard])

In [None]:
#plot_image(sample_superres_th)

In [None]:
plot_standard_superres(sample_image, true_mask, sample_standard, sample_superres)
plot_standard_superres(sample_image, true_mask, sample_standard, sample_superres_th)
evaluate_IOU(true_mask, sample_standard, sample_superres_th)

In [20]:
th_ranges = np.arange(0.50, 1.0, 0.05)
results = {}

for th_value in th_ranges:
    superres_masks_dict_th_test = {}
    for key in superres_masks_dict:
        target_dict = superres_masks_dict[key]
        target_mask = target_dict["class"]

        th_mask_value = tf.cast(tf.reduce_max(target_mask), tf.float32) * th_value
        th_mask = tf.where(target_mask > th_mask_value, CLASS_ID, 0)
        superres_masks_dict_th_test[key] = th_mask.numpy()

    standard_IOUs, superres_IOUs = compare_results(images_dict, standard_masks_dict, superres_masks_dict_th_test, image_size=IMG_SIZE, verbose=False)

    mean_standard_IOUs = np.mean(standard_IOUs)
    mean_superres_IOUs = np.mean(superres_IOUs)
    results[th_value] = (mean_standard_IOUs, mean_superres_IOUs)
    print(f"Threshold Value: {th_value} -- Standard mean IOU: {np.mean(standard_IOUs)},  Superres mean IOU: {np.mean(superres_IOUs)}")

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
plot_image(augmented_features_dict[sample_key]["class"][0])

In [None]:
plt.figure(figsize=(40, 40))
ax = sns.heatmap(a[..., -1], annot=True, cbar=False, fmt=".2f")