- The architecture of the model used is as followed :
    - The model follows an encoder - decoder architecture where encode extracts the low-level features from the input grayscale image and the decoder predicts the ab channels in the LAB color space
    - The fusion layer concatenates both the low-level features which captures the texture information and the high-level features which captures the semantic information which helps the decoder in better prediction of ab channels.
    - The encoder layer consists of 7 convolution layers which helps in downsampling the image and extract the low-level features.
    - The decoder layer consists of 3 convulotion layers (conv8_1, conv8_2, conv8_3) which helps in upsampling the low-level feature maps to predict the ab channels.
    - The output of the decoder layer is added to L layer of the input image to obtain the colorized image.

- During testing, the pre-trained model takes a input grayscale image and passes it through the encoder and decoder layer and predicts the ab color channels which are then added to the L channel of the input image to obtain the final colorized image.

In [5]:
import numpy as np
import cv2
import matplotlib.pyplot as plt

convNet_architecture_path = 'models/colorization_deploy_v2.prototxt'    # This file contains the entire architecture for the underlying convolutional neural network 
preTrained_weights_path = 'models/colorization_release_v2.caffemodel'   # This file contains the weights from the pre-trained model which we make use for the prediction of the a,b channels for the new image
clusterCenters_path = 'models/hull_pts.npy'    # This file contains the cluster points which are obtained after the quantization of the a,b color space and these cluster points are such that they cover the entire color space 

img_path = 'nature.jpg'

model = cv2.dnn.readNetFromCaffe(convNet_architecture_path, preTrained_weights_path)
points = np.load(clusterCenters_path)

points = points.transpose().reshape(2, 313, 1, 1)
model.getLayer(model.getLayerId('class8_ab')).blobs = [points.astype(np.float32)]   # This is the layer that is responsible for the prediction of the ab channels
model.getLayer(model.getLayerId('conv8_313_rh')).blobs = [np.full([1, 313], 2.606, dtype="float32")] # This is the layer that provides the 313 bins which are obtained after quantization of a continous color space to a discrete color space and this is the input to the 'class8_ab' layer

img = cv2.imread(img_path)

bw_img = img.astype("float32") / 255.0
lab_img = cv2.cvtColor(bw_img, cv2.COLOR_BGR2LAB)

resized_img = cv2.resize(lab_img, (224, 224))
L = cv2.split(resized_img)[0]       # L channel is extracted from the normalized input image
L -= 60

model.setInput(cv2.dnn.blobFromImage(L))    # The obtained L channel is sent as the input to the model to make the prediction
ab_channel = model.forward()[0, :, :, :].transpose((1, 2, 0))

ab_channel = cv2.resize(ab_channel, (img.shape[1], img.shape[0]))
L = cv2.split(lab_img)[0]

colorized = np.concatenate((L[:, :, np.newaxis], ab_channel), axis=2)   # The obtained ab channel predidction is concatenated with the L channel to obtain the final image in LAB color space
colorized = cv2.cvtColor(colorized, cv2.COLOR_LAB2BGR)  # The image in the LAB color space is then converted into RGB color space and displayed
colorized = (255.0 * colorized).astype("uint8")

cv2.imshow("Input Image", bw_img)
cv2.imshow("Output Image", colorized)
cv2.waitKey(0)



(224, 224)


-1