# Deep Colorization

This notebook creates a model that is able to colorize images to a certain extent, combining a deep Convolutional Neural Network architecture. The dataset to be used for the training originally is MS-COCO but we will use CIFAR10 in this notebook and later use the original dataset.

This notebook is getting original implementation in https://github.com/titu1994/keras-mobile-colorizer to ipynb. There would a sepearate notebook with improvements to above mentioned implementation.

## Install required modules

In [89]:
# Install pip packages in the current Jupyter kernel
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install keras
!{sys.executable} -m pip install scikit-image
!{sys.executable} -m pip install tensorflow



In [90]:
## Import required modules
import keras
from keras.layers import Conv2D, Input, Reshape, RepeatVector, concatenate, UpSampling2D, Flatten, Conv2DTranspose
from keras.models import Model

from keras import backend as K
from keras.callbacks import ModelCheckpoint, TensorBoard

from keras.losses import mean_squared_error
from keras.optimizers import Adam

import numpy as np

from skimage.color import rgb2gray
from skimage.transform import resize

import tensorflow as tf
import utils

weights_file_name = 'weights/mobilenet_model_v2.h5'

## Build the Neural Network


### Hyperparameters

In [91]:
### Hyperparameters
batch_size = 100
epochs = 100
image_size = 256
# nb_train_images = 60000 # there are 82783 images in MS-COCO, set this to how many samples you want to train on.

## Get the Data
Run the following cell to download the dataset.

In [92]:
from keras.datasets import cifar10

# Load data
(y_train, _), (y_test, _) = cifar10.load_data()
x_train = np.expand_dims(rgb2gray(y_train), axis=3)
x_test = np.expand_dims(rgb2gray(y_test), axis=3)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(50000, 32, 32, 1)
(50000, 32, 32, 3)
(10000, 32, 32, 1)
(10000, 32, 32, 3)


## Preprocess all the data and save it
Running the code cell below will preprocess all the data and save it to file.

In [93]:
img_height, img_width = x_train.shape[1], x_train.shape[2]
print("Image Height : {}, Weight : {}".format(img_height, img_width))

image_size = img_height

Image Height : 32, Weight : 32


### Check the Version of TensorFlow and Access to GPU¶

In [94]:
import warnings

# Check TensorFlow Version
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.8.0


  


### Metrics and Loss function


In [95]:
mse_weight = 1.0 #1e-3

# set these to zeros to prevent learning
perceptual_weight = 1. / (2. * 128. * 128.) # scaling factor
attention_weight = 1.0 # 1.0


# shows the minimum value of the AB channels
def y_true_min(yt, yp):
    return K.min(yt)


# shows the maximum value of the RGB AB channels
def y_true_max(yt, yp):
    return K.max(yt)


# shows the minimum value of the predicted AB channels
def y_pred_min(yt, yp):
    return K.min(yp)


# shows the maximum value of the predicted AB channels
def y_pred_max(yt, yp):
    return K.max(yp)


def gram_matrix(x):
    assert K.ndim(x) == 4

    with K.name_scope('gram_matrix'):
        if K.image_data_format() == "channels_first":
            batch, channels, width, height = K.int_shape(x)
            features = K.batch_flatten(x)
        else:
            batch, width, height, channels = K.int_shape(x)
            features = K.batch_flatten(K.permute_dimensions(x, (0, 3, 1, 2)))

        gram = K.dot(features, K.transpose(features)) # / (channels * width * height)
    return gram


def l2_norm(x):
    return K.sqrt(K.sum(K.square(x)))


def attention_vector(x):
    if K.image_data_format() == "channels_first":
        batch, channels, width, height = K.int_shape(x)
        filters = K.batch_flatten(K.permute_dimensions(x, (1, 0, 2, 3)))  # (channels, batch*width*height)
    else:
        batch, width, height, channels = K.int_shape(x)
        filters = K.batch_flatten(K.permute_dimensions(x, (3, 0, 1, 2)))  # (channels, batch*width*height)

    filters = K.mean(K.square(filters), axis=0)  # (batch*width*height,)
    filters = filters / l2_norm(filters)  # (batch*width*height,)
    return filters


def total_loss(y_true, y_pred):
    mse_loss = mse_weight * mean_squared_error(y_true, y_pred)
    perceptual_loss = perceptual_weight * K.sum(K.square(gram_matrix(y_true) - gram_matrix(y_pred)))
    attention_loss = attention_weight * l2_norm(attention_vector(y_true) - attention_vector(y_pred))

    return mse_loss + perceptual_loss + attention_loss

### Build Model


In [96]:
def build_mobilenet_model(img_size, lr=1e-3):
    '''
    Creates a Colorizer model. Note the difference from the report
    - https://github.com/baldassarreFe/deep-koalarization/blob/master/report.pdf
    I use a long skip connection network to speed up convergence and
    boost the output quality.
    '''
    ## Encoder Model
    encoder_input = Input(shape=(img_size, img_size, 1,))
    encoder1 = Conv2D(64, (3, 3), padding='same', activation='relu', strides=(2, 2))(encoder_input)
    encoder = Conv2D(128, (3, 3), padding='same', activation='relu')(encoder1)
    encoder2 = Conv2D(128, (3, 3), padding='same', activation='relu', strides=(2, 2))(encoder)
    encoder = Conv2D(256, (3, 3), padding='same', activation='relu')(encoder2)
    encoder = Conv2D(256, (3, 3), padding='same', activation='relu', strides=(2, 2))(encoder)
    encoder = Conv2D(512, (3, 3), padding='same', activation='relu')(encoder)
    encoder = Conv2D(512, (3, 3), padding='same', activation='relu')(encoder)
    encoder = Conv2D(256, (3, 3), padding='same', activation='relu')(encoder)

    ## Input Fusion
    # Decide the image shape at runtime to allow prediction on
    # any size image, even if training is on 128x128
    batch, height, width, channels = K.int_shape(encoder)

    #mobilenet_features_ip = Input(shape=(1000,))
    #fusion = RepeatVector(height * width)(mobilenet_features_ip)
    #fusion = Reshape((height, width, 1000))(fusion)
    #fusion = concatenate([encoder, fusion], axis=-1)
    fusion = Conv2D(256, (1, 1), padding='same', activation='relu')(encoder)

    ## Decoder Model
    decoder = Conv2D(128, (3, 3), padding='same', activation='relu')(fusion)
    decoder = UpSampling2D()(decoder)
    #decoder = Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same', activation='relu')(decoder)
    decoder = concatenate([decoder, encoder2], axis=-1)
    decoder = Conv2D(64, (3, 3), padding='same', activation='relu')(decoder)
    decoder = Conv2D(64, (3, 3), padding='same', activation='relu')(decoder)
    decoder = UpSampling2D()(decoder)
    #decoder = Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same', activation='relu')(decoder)
    decoder = concatenate([decoder, encoder1], axis=-1)
    decoder = Conv2D(32, (3, 3), padding='same', activation='relu')(decoder)
    decoder = Conv2DTranspose(3, (4, 4), strides=(2, 2), padding='same', activation='tanh')(decoder)
    # decoder = Conv2D(2, (3, 3), padding='same', activation='tanh')(decoder)
    # decoder = UpSampling2D((2, 2))(decoder)

    model = Model([encoder_input], decoder, name='Colorizer')
    model.compile(optimizer=Adam(lr), loss=total_loss, metrics=[y_true_max,
                                                                y_true_min,
                                                                y_pred_max,
                                                                y_pred_min])

    print("Model built and compiled")
    return model

In [97]:
# Model Summary
model = build_mobilenet_model(image_size, 1e-3)
model.summary()

Model built and compiled
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_24 (InputLayer)           (None, 32, 32, 1)    0                                            
__________________________________________________________________________________________________
conv2d_119 (Conv2D)             (None, 16, 16, 64)   640         input_24[0][0]                   
__________________________________________________________________________________________________
conv2d_120 (Conv2D)             (None, 16, 16, 128)  73856       conv2d_119[0][0]                 
__________________________________________________________________________________________________
conv2d_121 (Conv2D)             (None, 8, 8, 128)    147584      conv2d_120[0][0]                 
____________________________________________________________________________________

## Training the Neural Network

In [99]:
import os

# Continue training if weights are available
if os.path.exists(weights_file_name):
    model.load_weights(weights_file_name)

# Use Batchwise TensorBoard callback
tensorboard = TensorBoard(batch_size=batch_size)
checkpoint = ModelCheckpoint(weights_file_name, monitor='loss', verbose=1, save_best_only=True)
callbacks_list = [checkpoint, tensorboard]

# Train Network
print(x_train.shape)
print(y_train.shape)
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=callbacks_list,
          verbose=1,
          validation_data=(x_test, y_test))

(50000, 32, 32, 1)
(50000, 32, 32, 3)
Train on 50000 samples, validate on 10000 samples
Epoch 1/100
 8500/50000 [====>.........................] - ETA: 16:41 - loss: 711206676549969.3750 - y_true_max: 255.0000 - y_true_min: 0.0000e+00 - y_pred_max: 1.0000 - y_pred_min: -1.0000

KeyboardInterrupt: 

## Test Model
Test the model against the test dataset.

In [88]:
# Load the best weights
from keras.models import load_model
best_model = load_model(weights_file_name)

# Test the model
x_test, y_test = utils.prepare_input_image_batch(test_data, batch_size=batch_size)

score = best_model.evaluate(x_test, y_test, batch_size, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

OSError: Unable to open file (unable to open file: name = 'weights/mobilenet_model_v2.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
# Show images for test data
To be added
# predictions = model.predict(x_test, batch_size, verbose=1)
#postprocess_output(x_test, predictions, image_size=image_size)

## Test Model on other images
Test the model against other images.

In [None]:
# To be added