# Image Colorization Using Deep Learning

# Set-Up

In [1]:
import numpy as np
import keras
#import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from tensorflow.keras.applications.vgg16 import *

tf.random.set_seed(42) 
np.random.seed(42)

Using TensorFlow backend.


# Data Preparation

## Key Variables

In [0]:
IMG_SIZE     = [224, 224]
#IMG_WIDTH     = 224
#IMG_HEIGHT    = 224
N_CHANNELS    = 3
N_CLASSES     = 338
#TEST_SIZE    = 0.15
DATA_DIR   = "/content/drive/My Drive/sample_10/"

## Loading Data

In [0]:
# Data preprocessing is already done, now load the .npy files to load train and test data

x_train = np.load(file=DATA_DIR+"x_train.npy", allow_pickle=True)
y_train = np.load(file= DATA_DIR+"y_train.npy", allow_pickle=True)

#x_test = np.load(file=DATA_DIR+"x_test.npy", allow_pickle=True)
#y_test = np.load(file=DATA_DIR+"y_test.npy", allow_pickle=True)

In [4]:
print(len(x_train))

8


In [5]:
print(x_train.shape)

(8, 224, 224, 3)


In [6]:
print(y_train.shape)

(8, 224, 224)


In [7]:
print(y_train[0])

[[ 84  84  84 ... 112 112 112]
 [ 84  84  84 ... 112 112 112]
 [ 84  84  84 ... 112 112 112]
 ...
 [ 73  86  86 ...  73  86  85]
 [ 73  73  86 ...  73  86  85]
 [ 73  73  85 ...  73  86  86]]


In [22]:
y_train_new = tf.keras.utils.to_categorical(y=y_train, num_classes=N_CLASSES)
print(y_train_new.shape)

(8, 224, 224, 338)


# Model Architecture

## Model Building

In [0]:
def fcn_8s(image_size, ch_in=3, ch_out=3):
    """
    Build a FCN-8s Keras model, with the VGG-16 layers pretrained on ImageNet.
    :param image_size:  Image size (H x W)
    :param ch_in:       Number of input channels
    :param ch_out:      Number of output cannels
    :return:            Keras model
    """
    
    inputs = Input(shape=(*image_size, ch_in), name='input')

    # Building a pre-trained VGG-16 feature extractor (i.e., without the final FC layers)
    vgg16 = VGG16(include_top=False, weights='imagenet', input_tensor=inputs)
    # Recovering the feature maps generated by each of the 3 final blocks:
    f3 = vgg16.get_layer('block3_pool').output  # shape: (28, 28, 256)
    f4 = vgg16.get_layer('block4_pool').output  # shape: (14, 14, 512)
    f5 = vgg16.get_layer('block5_pool').output  # shape: ( 7,  7, 512)

    # Replacing VGG dense layers by convolutions:
    f5_conv1 = Conv2D(filters=4086, kernel_size=7, padding='same',
                      activation='relu')(f5)
    f5_drop1 = Dropout(0.5)(f5_conv1)
    f5_conv2 = Conv2D(filters=4086, kernel_size=1, padding='same',
                      activation='relu')(f5_drop1)
    f5_drop2 = Dropout(0.5)(f5_conv2)
    f5_conv3 = Conv2D(filters=ch_out, kernel_size=1, padding='same',
                      activation=None)(f5_drop2)


    # Using a transposed conv (w/ s=2) to upscale `f5` into a 14 x 14 map
    # so it can be merged with features from `f4_conv1` obtained from `f4`:
    f5_conv3_x2 = Conv2DTranspose(filters=ch_out, kernel_size=4, strides=2,
                                use_bias=False, padding='same', activation='relu')(f5)
    f4_conv1 = Conv2D(filters=ch_out, kernel_size=1, padding='same',
                      activation=None)(f4)

    # Merging the 2 feature maps (addition):
    merge1 = add([f4_conv1, f5_conv3_x2])

    # We repeat the operation to merge `merge1` and `f3` into a 28 x 28 map:
    merge1_x2 = Conv2DTranspose(filters=ch_out, kernel_size=4, strides=2,
                                use_bias=False, padding='same', activation='relu')(merge1)
    f3_conv1 = Conv2D(filters=ch_out, kernel_size=1, padding='same',
                      activation=None)(f3)
    merge2 = add([f3_conv1, merge1_x2])

    # Finally, we use another transposed conv to decode and up-scale the feature map
    # to the original shape, i.e., using a stride 8 to go from 28 x 28 to 224 x 224 here:
    outputs = Conv2DTranspose(filters=ch_out, kernel_size=16, strides=8,
                              padding='same', activation='softmax')(merge2)
    
    fcn8s_model = Model(inputs, outputs)
    return fcn8s_model

## Model Instantiation and summary

In [10]:
fcn8s_model = fcn_8s(IMG_SIZE, N_CHANNELS, N_CLASSES)
fcn8s_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 224, 224, 64) 1792        input[0][0]                      
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 224, 224, 64) 36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 112, 112, 64) 0           block1_conv2[0][0]               
______________________________________________________________________________________________

# Training Preparation 

## Metrics and Loss

**Metrices:** Accuracy and IoU


**Loss:** cross-entropy vs. Dice

In [0]:
accuracy = tf.metrics.Accuracy()
meanIoU  = tf.metrics.MeanIoU(num_classes=N_CLASSES)
#loss_c = tf.losses.SparseCategoricalCrossentropy()
loss_c = tf.keras.losses.CategoricalCrossentropy()

#loss_c = tf.keras.losses.SparseCategoricalCrossentropy()

Since we are doing multi-class classification task (pixel-level classification), we can use **cross-entropy loss**. However, to prevent the model from developing bias towards larger classes, **dice loss function** will be used because it is not affected by class proportions.

In [0]:
# TODO: Dice loss

## Optimizer and Callbacks

In [0]:
optimizer = tf.keras.optimizers.Adam()

# TODO: Callbacks, Maybe no need of Callbacks.. just use history.history

## Hyper Parameters

In [0]:
BATCH_SIZE     = 8
NUM_EPOCHS     = 1

## Helper Functions

# Training

In [0]:
fcn8s_model.compile(optimizer=optimizer, 
                    loss=loss_c, 
                    metrics=[accuracy, meanIoU])

In [0]:
#y_train = keras.utils.to_categorical(y_train, N_CLASSES, "int64")

In [26]:
# Note: Train dataset is 85%, so saving 17.65% for validation set gives 70% train set, 15% validation set and 15% test set
history = fcn8s_model.fit(
    x=x_train, y=y_train_new, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_split=0.1765 
)

print(history.history)

{'loss': [16.05471420288086], 'accuracy': [0.8869365453720093], 'mean_io_u_1': [0.4982612431049347], 'val_loss': [13.421496391296387], 'val_accuracy': [0.9892381429672241], 'val_mean_io_u_1': [0.5424519777297974]}


In [0]:
# Plot accuracy and loss plots

'''
fig, ax = plt.subplots(2, 2, figsize=(15, 10), sharex='col')
ax[0, 0].set_title("loss")
ax[0, 1].set_title("val-loss")
ax[1, 0].set_title("acc")
ax[1, 1].set_title("val-acc")

ax[0, 0].plot(history.history['loss'])
ax[0, 1].plot(history.history['val_loss'])
ax[1, 0].plot(history.history['acc'])
ax[1, 1].plot(history.history['val_acc'])'''

# Testing

In [0]:
# For test_dataset, model.evaluate() returns the loss value & metrics values for the model in test mode

# Prediction

In [0]:
# Predict label map 
# label_map = np.argmax(fcn8s_model.predict(image), axis=-1)

# Note:  maybe save the model after training and load for prediction? See Exam 1 for reference

# Post-process Image

In [44]:
gpu = tf.test.is_gpu_available(
    cuda_only=False, min_cuda_compute_capability=None
)

print(gpu)

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True
