In [1]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# TOKEN = user_secrets.get_secret("GITHUB_TOKEN")
#
# !git clone -b parameters_search https://{TOKEN}@github.com/nicoloalbergoni/DeeplabV3Plus-TF2.git
# %load_ext tensorboard
# %cd ./DeeplabV3Plus-TF2

In [2]:
#!python download_and_prepare_voc.py --remove_cmap --download_berkley

In [3]:
import os
import random
import h5py
import numpy as np
import seaborn as sns
from tqdm import tqdm
import tensorflow as tf
from model import DeeplabV3Plus
import tensorflow_addons as tfa
from matplotlib import pyplot as plt
from superresolution import Superresolution
from utils import load_image, get_prediction, create_mask, plot_prediction
from superres_utils import get_img_paths, filter_by_class, min_max_normalization, plot_image, plot_histogram, list_precomputed_data_paths, check_hdf5_validity
import keras_tuner as kt

In [4]:
BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, "data")
PASCAL_ROOT = os.path.join(DATA_DIR, "VOCdevkit", "VOC2012")
IMGS_PATH = os.path.join(PASCAL_ROOT, "JPEGImages")

SUPERRES_ROOT = os.path.join(DATA_DIR, "superres_root")
PRECOMPUTED_OUTPUT_DIR = os.path.join(SUPERRES_ROOT, "precomputed_features")
STANDARD_OUTPUT_DIR = os.path.join(SUPERRES_ROOT, "standard_output")
SUPERRES_OUTPUT_DIR = os.path.join(SUPERRES_ROOT, "superres_output")

# SEED = np.random.randint(0, 1000)
SEED = 1234
IMG_SIZE = (512, 512)
BATCH_SIZE = 1
BUFFER_SIZE = 1000
EPOCHS = 30
CLASSES = 21
RESHAPE_MASKS = True
NUM_AUG = 150
CLASS_ID = 8
NUM_SAMPLES = 500

In [5]:
image_list_path = os.path.join(DATA_DIR, "augmented_file_lists", "trainaug.txt")
image_paths = get_img_paths(image_list_path, IMGS_PATH)

if NUM_SAMPLES is not None:
    image_paths = image_paths[:NUM_SAMPLES]

images_dict = filter_by_class(image_paths, class_id=CLASS_ID)

print(f"Valid images: {len(images_dict)} (Initial:  {len(image_paths)})")

valid_filenames = list(images_dict.keys())

model_no_upsample = DeeplabV3Plus(
    input_shape=(512, 512, 3),
    classes=21,
    OS=16,
    last_activation=None,
    load_weights=True,
    backbone="mobilenet",
    alpha=1.).build_model(final_upsample=False)

model_standard = DeeplabV3Plus(
    input_shape=(512, 512, 3),
    classes=21,
    OS=16,
    last_activation=None,
    load_weights=True,
    backbone="mobilenet",
    alpha=1.).build_model(final_upsample=True)

Valid images: 47 (Initial:  500)


# Compute standard output for comparison

In [6]:
def compute_standard_output(image_dict, model, dest_folder, filter_class_id=None):
    standard_masks = {}
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    for key in tqdm(image_dict):
        standard_mask = get_prediction(model, image_dict[key])
        if filter_class_id is not None:
            standard_mask = tf.where(standard_mask == filter_class_id, standard_mask, 0) # Set to 0 all predictions different from the given class
        tf.keras.utils.save_img(f"{dest_folder}/{key}.png", standard_mask, scale=False)
        standard_masks[key] = standard_mask

    return standard_masks

In [7]:
standard_masks_dict = compute_standard_output(images_dict, model_standard, dest_folder=STANDARD_OUTPUT_DIR, filter_class_id=CLASS_ID)

100%|██████████| 47/47 [00:22<00:00,  2.10it/s]


# Precompute Augmented Output Features

In [8]:
def create_augmented_copies(image, num_aug, angle_max, shift_max, chunk_size=20):

    if (num_aug % chunk_size) != 0:
        raise Exception("Num aug must be a multiple of 50")

    num_chunks = num_aug // chunk_size

    angles = np.random.uniform(-angle_max, angle_max, num_aug)
    shifts = np.random.uniform(-shift_max, shift_max, (num_aug, 2))
    angles[0] = 0
    shifts[0] = np.array([0, 0])
    angles = angles.astype("float32")
    shifts = shifts.astype("float32")

    angles_chunks = np.split(angles, num_chunks)
    shifts_chunks = np.split(shifts, num_chunks)

    augmented_chunks = []

    for i in range(num_chunks):
        images_chunk = tf.tile(tf.expand_dims(image, axis=0), [chunk_size, 1, 1, 1])
        rotated_chunk = tfa.image.rotate(images_chunk, angles_chunks[i], interpolation="bilinear")
        translated_chunk= tfa.image.translate(rotated_chunk, shifts_chunks[i], interpolation="bilinear")
        augmented_chunks.append(translated_chunk.numpy())

    #augmented_copies = np.concatenate(augmented_chunks, axis=0)

    return augmented_chunks, angles, shifts

def compute_augmented_features(image_filenames, model, dest_folder, filter_class_id, mode="slice", num_aug=100, angle_max=0.5, shift_max=30, save_output=False, relu_output=False):

    augmented_features = {}

    for filename in tqdm(image_filenames):

        # Load image
        image_path = os.path.join(IMGS_PATH, f"{filename}.jpg")
        image = load_image(image_path, image_size=IMG_SIZE, normalize=True)

        # Create augmented copies
        augmented_images, angles, shifts = create_augmented_copies(image, num_aug=num_aug, angle_max=angle_max, shift_max=shift_max, chunk_size=1)

        # Create destination folder
        output_folder = os.path.join(dest_folder, filename)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        class_masks = []
        max_masks = []

        for i, augmented_copy in enumerate(augmented_images):
            predictions = model.predict(augmented_copy, batch_size=BATCH_SIZE)
            prediction = predictions[0]

            if mode == "slice":
                class_slice = prediction[:, :, filter_class_id]
                class_mask = class_slice[..., np.newaxis]

                no_class_prediction = np.delete(prediction, filter_class_id, axis=-1)
                max_mask = no_class_prediction.max(axis=-1)
                max_mask = max_mask[..., np.newaxis]

                # ReLU is only needed when working with slices
                if relu_output:
                    class_mask = (tf.nn.relu(class_mask)).numpy()
                    max_mask = (tf.nn.relu(max_mask)).numpy()

                max_masks.append(max_mask)

            elif mode == "argmax":
                class_mask = create_mask(prediction)
                # Set to 0 all predictions different from the given class
                class_mask = tf.where(class_mask == filter_class_id, class_mask, 0)
                class_mask = tf.cast(class_mask, tf.float32) # Necessary for super-resolution operations
                class_mask = class_mask.numpy()

            class_masks.append(class_mask)

            if save_output:
                tf.keras.utils.save_img(f"{output_folder}/{i}_class.png", class_mask, scale=True)
                if mode == "slice":
                    tf.keras.utils.save_img(f"{output_folder}/{i}_max.png", max_mask, scale=True)

            # np.save(os.path.join(output_folder, f"{filename}_angles"), angles)
            # np.save(os.path.join(output_folder, f"{filename}_shifts"), shifts)

        file = h5py.File(f"{output_folder}/{filename}.hdf5", "w")
        file.create_dataset("class_masks", data=class_masks)

        if mode == "slice":
            file.create_dataset("max_masks", data=max_masks)

        file.create_dataset("angles", data=angles)
        file.create_dataset("shifts", data=shifts)
        file.attrs["filename"] = filename
        file.attrs["mode"] = mode

        file.close()

        augmented_features[filename] = { "class": class_masks, "max": max_masks }

    return augmented_features

In [9]:
angle_max = 0.5  # in radians
shift_max = 30

augmented_features_dict = compute_augmented_features(images_dict, model_no_upsample, mode="slice", dest_folder=PRECOMPUTED_OUTPUT_DIR, filter_class_id=CLASS_ID, num_aug=NUM_AUG, angle_max=angle_max, shift_max=shift_max, save_output=True, relu_output=False)

100%|██████████| 47/47 [16:19<00:00, 20.83s/it]


# Compute Super-Resolution Output

In [10]:
def threshold_image(class_mask, max_mask=None, th_val=.15):
    if max_mask is not None:
        th_mask = tf.where(class_mask >= max_mask, CLASS_ID, 0)
    else:
        sample_th = tf.cast(tf.reduce_max(class_mask), tf.float32) * th_val
        th_mask = tf.where(class_mask > sample_th, CLASS_ID, 0)

    return th_mask.numpy()


def custom_IOU(y_true, y_pred, class_id):
    y_true_squeeze = tf.squeeze(y_true)
    y_pred_squeeze = tf.squeeze(y_pred)
    classes = [0, class_id] # Only check in background and given class

    y_true_squeeze = tf.where(y_true_squeeze != class_id, 0, y_true_squeeze)

    ious = []
    for i in classes:
        true_labels = tf.equal(y_true_squeeze, i)
        pred_labels = tf.equal(y_pred_squeeze, i)
        inter = tf.cast(true_labels & pred_labels, tf.int32)
        union = tf.cast(true_labels | pred_labels, tf.int32)

        iou = tf.reduce_sum(inter) / tf.reduce_sum(union)
        ious.append(iou)

    ious = tf.stack(ious)
    legal_labels = ~tf.math.is_nan(ious)
    ious = tf.gather(ious, indices=tf.where(legal_labels))
    return tf.reduce_mean(ious)


def evaluate_IOU(true_mask, superres_mask, img_size=(512, 512)):
    true_mask = tf.reshape(true_mask, (img_size[0] * img_size[1], 1))
    superres_mask = tf.reshape(superres_mask, (img_size[0] * img_size[1], 1))

    superres_IOU = custom_IOU(true_mask, superres_mask, class_id=CLASS_ID)

    return superres_IOU.numpy()

In [11]:
def compute_superresolution_output(precomputed_data_paths, superres_args, dest_folder, mode="slice", num_aug=100, global_normalize=True, save_output=False):

    superres_masks = {}
    class_losses = {}
    ious = {}

    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    for file_path in tqdm(precomputed_data_paths):

        file = h5py.File(f"{file_path}", "r")

        if not check_hdf5_validity(file, num_aug=num_aug):
            print(f"File: {file_path} is invalid, skipping...")
            file.close()
            continue


        filename = file.attrs["filename"]
        angles = file["angles"][:]
        shifts = file["shifts"][:]

        class_masks = file["class_masks"][:]
        class_masks = tf.stack(class_masks)

        if mode == "slice":
            max_masks = file["max_masks"][:]
            max_masks = tf.stack(max_masks)

        file.close()


        superresolution_obj = Superresolution(
            **superres_args,
            num_aug=NUM_AUG,
            verbose=False
        )


        global_min = tf.reduce_min(class_masks) if global_normalize else None
        global_max = tf.reduce_max(class_masks) if global_normalize else None

        class_masks = tf.map_fn(fn=lambda image: min_max_normalization(image.numpy(), new_min=0.0, new_max=1.0, global_min=global_min, global_max=global_max), elems=class_masks)

        target_image_class, class_loss = superresolution_obj.compute_output(class_masks, angles, shifts)
        target_image_class = (target_image_class[0]).numpy()
        print(f"Final class loss for image {filename}: {class_loss}")

        if mode == "slice":

            global_min = tf.reduce_min(max_masks) if global_normalize else None
            global_max = tf.reduce_max(max_masks) if global_normalize else None

            max_masks = tf.map_fn(fn=lambda image: min_max_normalization(image.numpy(), new_min=0.0, new_max=1.0, global_min=global_min, global_max=global_max), elems=max_masks)


            target_image_max, max_loss = superresolution_obj.compute_output(max_masks, angles, shifts)
            target_image_max = (target_image_max[0]).numpy()
            print(f"Final max loss for image {filename}: {max_loss}")



        class_losses[filename] = class_loss

        th_image = threshold_image(target_image_class, max_mask= None if mode == "argmax" else target_image_max)

        true_mask_path = os.path.join(DATA_DIR, "VOCdevkit/VOC2012/SegmentationClassAug", f"{filename}.png")
        true_mask = load_image(true_mask_path, image_size=IMG_SIZE, normalize=False,
                               is_png=True, resize_method="nearest")

        iou = evaluate_IOU(true_mask, th_image)
        ious[filename] = iou

        # superres_masks[filename] = { "class": target_image_class, "max": target_image_max } if mode == "slice" else target_image_class

        if save_output:
            tf.keras.utils.save_img(f"{dest_folder}/{filename}_th_{mode}.png", th_image, scale=True)



    mean_iou = np.mean(np.fromiter(ious.values(), dtype=float))

    with open(os.path.join(DATA_DIR, "ious.txt"), "a") as f:
        f.write(str(mean_iou))

    print(f"Final Mean IOU: {mean_iou}")

    return 1.0 - mean_iou

In [12]:
class SuperresTuner(kt.RandomSearch):

    precomputed_data_paths = list_precomputed_data_paths(PRECOMPUTED_OUTPUT_DIR)

    def run_trial(self, trial, **kwargs):
        hp = trial.hyperparameters

        superres_args = {
            "lambda_tv": hp.Float("lambda_tv", min_value=0.01, max_value= 2.0),
            "lambda_eng": hp.Float("lambda_eng", min_value=0.01, max_value= 4.0),
            # "num_iter": hp.Int("num_iter", min_value=400, max_value=800, step=50),
            "num_iter": 450,
            "learning_rate" : 1e-3,
            "loss_coeff": False
        }

        global_normalize = hp.Boolean("global_normalize")

        return compute_superresolution_output(self.precomputed_data_paths, superres_args, dest_folder=SUPERRES_OUTPUT_DIR, mode="slice",  num_aug=NUM_AUG, global_normalize=global_normalize, save_output=False)

In [13]:
tuner = SuperresTuner(
    # No hypermodel or objective specified.
    max_trials=30,
    overwrite=True,
    directory=DATA_DIR,
    project_name="Tuner_Trials",
)

tuner.search()

Trial 9 Complete [01h 15m 32s]
default_objective: 0.19258085407634706

Best default_objective So Far: 0.19252010934839425
Total elapsed time: 11h 33m 56s

Search: Running Trial #10

Hyperparameter    |Value             |Best Value So Far 
lambda_tv         |1.455             |0.64807           
lambda_eng        |1.7508            |3.2873            
global_normalize  |False             |True              



  0%|          | 0/47 [00:00<?, ?it/s]

Final class loss for image 2007_000549: 115478.0703125


  2%|▏         | 1/47 [01:41<1:17:48, 101.50s/it]

Final max loss for image 2007_000549: 96140.125
Final class loss for image 2007_008575: 111697.09375


  4%|▍         | 2/47 [03:24<1:16:46, 102.38s/it]

Final max loss for image 2007_008575: 103427.5
Final class loss for image 2008_003045: 57006.66015625


  6%|▋         | 3/47 [05:06<1:15:05, 102.40s/it]

Final max loss for image 2008_003045: 104134.4296875
Final class loss for image 2008_003303: 80741.1953125


  9%|▊         | 4/47 [06:48<1:13:08, 102.05s/it]

Final max loss for image 2008_003303: 101143.078125
Final class loss for image 2008_005134: 85563.71875


 11%|█         | 5/47 [08:31<1:11:43, 102.47s/it]

Final max loss for image 2008_005134: 96781.015625
Final class loss for image 2008_005247: 81417.296875


 13%|█▎        | 6/47 [10:14<1:10:12, 102.75s/it]

Final max loss for image 2008_005247: 104669.1484375
Final class loss for image 2008_005337: 65533.6484375


 15%|█▍        | 7/47 [11:58<1:08:46, 103.17s/it]

Final max loss for image 2008_005337: 82504.03125
Final class loss for image 2008_005566: 65017.21875


 17%|█▋        | 8/47 [13:42<1:07:10, 103.36s/it]

Final max loss for image 2008_005566: 73830.5390625
Final class loss for image 2008_005600: 49153.890625


 19%|█▉        | 9/47 [15:25<1:05:23, 103.25s/it]

Final max loss for image 2008_005600: 111111.1015625
Final class loss for image 2008_006099: 102411.5


 21%|██▏       | 10/47 [17:04<1:02:49, 101.88s/it]

Final max loss for image 2008_006099: 92926.9609375
Final class loss for image 2008_007085: 49882.109375


 23%|██▎       | 11/47 [18:40<59:57, 99.92s/it]   

Final max loss for image 2008_007085: 96853.09375
Final class loss for image 2008_007130: 112320.359375


 26%|██▌       | 12/47 [20:15<57:30, 98.58s/it]

Final max loss for image 2008_007130: 79084.828125
Final class loss for image 2008_007324: 69137.6015625


 28%|██▊       | 13/47 [21:51<55:29, 97.93s/it]

Final max loss for image 2008_007324: 109762.046875
Final class loss for image 2009_000150: 63699.56640625


 30%|██▉       | 14/47 [23:28<53:33, 97.38s/it]

Final max loss for image 2009_000150: 112030.3984375
Final class loss for image 2009_000304: 54090.48046875


 32%|███▏      | 15/47 [25:04<51:45, 97.03s/it]

Final max loss for image 2009_000304: 129774.296875
Final class loss for image 2009_002053: 91448.421875


 34%|███▍      | 16/47 [26:41<50:05, 96.96s/it]

Final max loss for image 2009_002053: 105071.3046875
Final class loss for image 2009_002813: 75594.859375


 36%|███▌      | 17/47 [28:16<48:16, 96.55s/it]

Final max loss for image 2009_002813: 84229.828125
Final class loss for image 2009_002972: 51719.24609375


 38%|███▊      | 18/47 [29:53<46:44, 96.69s/it]

Final max loss for image 2009_002972: 102494.515625
Final class loss for image 2009_003697: 39724.9140625


 40%|████      | 19/47 [31:30<45:04, 96.59s/it]

Final max loss for image 2009_003697: 126783.6484375
Final class loss for image 2009_003698: 90656.65625


 43%|████▎     | 20/47 [33:06<43:27, 96.58s/it]

Final max loss for image 2009_003698: 104470.171875
Final class loss for image 2009_005177: 100651.6328125


 45%|████▍     | 21/47 [34:42<41:48, 96.47s/it]

Final max loss for image 2009_005177: 100323.5625
Final class loss for image 2010_000054: 130455.21875


 47%|████▋     | 22/47 [36:17<40:01, 96.07s/it]

Final max loss for image 2010_000054: 103454.4453125
Final class loss for image 2010_000157: 109937.3984375


 49%|████▉     | 23/47 [37:54<38:27, 96.13s/it]

Final max loss for image 2010_000157: 81159.953125
Final class loss for image 2010_000172: 40770.22265625


 51%|█████     | 24/47 [39:30<36:48, 96.04s/it]

Final max loss for image 2010_000172: 126644.65625
Final class loss for image 2010_000586: 90273.078125


 53%|█████▎    | 25/47 [41:05<35:11, 95.96s/it]

Final max loss for image 2010_000586: 112090.4453125
Final class loss for image 2010_000661: 65435.9296875


 55%|█████▌    | 26/47 [42:43<33:47, 96.55s/it]

Final max loss for image 2010_000661: 98763.296875
Final class loss for image 2010_001555: 110490.8046875


 57%|█████▋    | 27/47 [44:20<32:09, 96.48s/it]

Final max loss for image 2010_001555: 100347.9921875
Final class loss for image 2010_001647: 77252.9296875


 60%|█████▉    | 28/47 [45:56<30:31, 96.41s/it]

Final max loss for image 2010_001647: 115035.078125
Final class loss for image 2010_001884: 44815.97265625


 62%|██████▏   | 29/47 [47:36<29:17, 97.63s/it]

Final max loss for image 2010_001884: 138669.734375
Final class loss for image 2010_002040: 127950.390625


 62%|██████▏   | 29/47 [49:16<30:34, 101.93s/it]


KeyboardInterrupt: 

In [None]:
print(tuner.get_best_hyperparameters()[0].get("lambda_tv"))
print(tuner.get_best_hyperparameters()[0].get("lambda_eng"))
print(tuner.get_best_hyperparameters()[0].get("global_normalize"))