# Final Project for IANNwTF 2022/23 


Learning to colorize grayscale dog pictures with the Stanford Dog Dataset.  

In [1]:
import tensorflow as tf
import numpy as np
import tensorboard
from PIL import Image
import os
from datetime import datetime
from skimage.color import rgb2lab, rgb2gray, lab2rgb
from skimage.io import imread, imshow
import matplotlib.pyplot as plt
import random
from tqdm import tqdm
from keras.layers import Dense, Conv2D, Reshape, GlobalAveragePooling2D, MaxPooling2D, UpSampling2D, Flatten


2023-02-23 15:06:26.982038: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-23 15:06:27.353817: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-02-23 15:06:27.523062: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-02-23 15:06:28.258279: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: li

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)


1 Physical GPUs, 1 Logical GPUs


2023-02-23 15:06:29.471482: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-23 15:06:29.513133: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-23 15:06:29.513483: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-23 15:06:29.518636: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compi

In [3]:
%load_ext tensorboard

In [4]:
# prepare data

# makes images same size and fills gaps at the edges with black pixels

def distortion_free_resize(image, img_size):
    w, h = img_size
    image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
    # Check tha amount of padding needed to be done.
    pad_height = h - tf.shape(image)[0]
    pad_width = w - tf.shape(image)[1]

    # Only necessary if you want to do same amount of padding on both sides.
    if pad_height % 2 != 0:
        height = pad_height // 2
        pad_height_top = height + 1
        pad_height_bottom = height
    else:
        pad_height_top = pad_height_bottom = pad_height // 2

    if pad_width % 2 != 0:
        width = pad_width // 2
        pad_width_left = width + 1
        pad_width_right = width
    else:
        pad_width_left = pad_width_right = pad_width // 2

    image = tf.pad(
        image,
        paddings=[
            [pad_height_top, pad_height_bottom],
            [pad_width_left, pad_width_right],
            [0, 0],
        ],
    )

    #image = tf.transpose(image, perm=[1, 0, 2])
    return image


In [5]:
def prepare_datasets():
    # go through folders 
    # make pairs of images + breed
    # (not needed for grayscale but might need it later)
    # divide into test and train
    base_path = "data/Images"
    lookup_table_breeds = {}
    train_img = []
    train_lbl = []
    test_img = []
    test_lbl = []
    for num,folder in enumerate(os.listdir(base_path)):
        lookup_table_breeds[folder[10:]] = num
        image_paths = os.path.join(base_path, folder)
        for count, image_path in enumerate(os.listdir(image_paths)):
            path = os.path.join(image_paths, image_path)
            if 0.9 * len(list(folder)) < count:
                # makes images same size and fills gaps at the edges with black pixels
                image = distortion_free_resize(tf.image.decode_jpeg(tf.io.read_file(path),3), (128,128))
                # convert into Lab color space
                train_img.append(rgb2lab(image/255))
                train_lbl.append(lookup_table_breeds[folder[10:]])

            else:
                # makes images same size and fills gaps at the edges with black pixels
                image = distortion_free_resize(tf.image.decode_jpeg(tf.io.read_file(path),3), (128,128))
                # convert into Lab color space
                test_img.append(rgb2lab(image/255))            
                test_lbl.append(lookup_table_breeds[folder[10:]])

    train_images = tf.data.Dataset.from_tensor_slices(train_img)
    tf.data.Dataset.save(train_images, "saved_datasets/train_images")
    print(train_images)
    train_labels = tf.data.Dataset.from_tensor_slices(train_lbl)
    tf.data.Dataset.save(train_labels, "saved_datasets/train_labels")
    print(train_labels)

    test_images = tf.data.Dataset.from_tensor_slices(test_img)
    tf.data.Dataset.save(test_images, "saved_datasets/test_images")
    print(test_images)
    test_labels = tf.data.Dataset.from_tensor_slices(test_lbl)
    tf.data.Dataset.save(test_labels, "saved_datasets/test_labels")
    print(test_labels)

    return train_images, train_labels, test_images, test_labels


In [6]:
def load_datasets():
    train_images = tf.data.Dataset.load("saved_datasets/train_images")
    train_labels = tf.data.Dataset.load("saved_datasets/train_labels")
    test_images = tf.data.Dataset.load("saved_datasets/test_images")
    test_labels = tf.data.Dataset.load("saved_datasets/test_labels")

    return train_images, train_labels, test_images, test_labels

datasets_stored = True

if datasets_stored:
    train_images, train_labels, test_images, test_labels = load_datasets()
else:
    train_images, train_labels, test_images, test_labels = prepare_datasets()


In [49]:
batch_size = 64

def preprocess_dataset(images, labels):
    
    # flip each image left-right with a chance of 0.3
    images = images.map(lambda x: (tf.reverse(x, axis=[-2])) if random.random() < 0.5 else (x))
    images = images.map(lambda x: (tf.reverse(x, axis=[-3])) if random.random() < 0.5 else (x))

    # divide into greyscale input and color output

    images = images.map(lambda x: ((tf.expand_dims(x[:,:,0], -1))/100, (x[:,:,1:]/128)))
    labels = labels.map(lambda x: tf.one_hot(x, 120))
    labels = labels.map(lambda x: (tf.cast(x, tf.int16)))

    zipped = tf.data.Dataset.zip((images, labels))
    
    zipped = zipped.cache().shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return zipped



train_dataset = preprocess_dataset(train_images, train_labels)
test_dataset = preprocess_dataset(test_images, test_labels)


print(train_dataset)
print(test_dataset)

# the dataset has the format
# greyscale images (64,64), a and b terms from lab color space (64,64,2), onehotted labels (120)

<PrefetchDataset element_spec=((TensorSpec(shape=(None, 128, 128, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 128, 128, 2), dtype=tf.float32, name=None)), TensorSpec(shape=(None, 120), dtype=tf.int16, name=None))>
<PrefetchDataset element_spec=((TensorSpec(shape=(None, 128, 128, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 128, 128, 2), dtype=tf.float32, name=None)), TensorSpec(shape=(None, 120), dtype=tf.int16, name=None))>


In [50]:


# or take different crops from the pictures

# show sample pictures from dataset


In [110]:
class Low_Level_Features(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2D(64, 3, activation='relu', padding='same', strides=1) 
        self.conv2 = Conv2D(128, 3, activation='relu', padding='same', strides=1) 
        self.conv3 = Conv2D(128, 3, activation='relu', padding='same', strides=2) 
        self.conv4 = Conv2D(256, 3, activation='relu', padding='same', strides=1) 
        self.conv5 = Conv2D(256, 3, activation='relu', padding='same', strides=2) 
        self.conv6 = Conv2D(512, 3, activation='relu', padding='same', strides=1) 

    def __call__(self, x, training=False):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)

        return x

In [111]:
class Mid_Level_Features(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2D(512, 3, activation='relu', padding='same', strides=1) 
        self.conv2 = Conv2D(256, 3, activation='relu', padding='same', strides=1) 

    def __call__(self, x, training=False):
        x = self.conv1(x)
        x = self.conv2(x)

        return x

In [112]:
class High_Level_Features(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2D(512, 3, activation='relu', padding='same', strides=2) 
        self.conv2 = Conv2D(512, 3, activation='relu', padding='same', strides=1) 
        self.conv3 = Conv2D(512, 3, activation='relu', padding='same', strides=2) 
        self.conv4 = Conv2D(512, 3, activation='relu', padding='same', strides=1) 
        self.flatten = Flatten()
        self.dense1 = Dense(1024, activation="relu")
        self.dense2 = Dense(512, activation="relu")
        self.dense3 = Dense(256, activation="relu")

    def __call__(self, x, training=False):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)

        return x

In [113]:
class Classification_Network(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense1 = Dense(256, activation="relu")
        self.dense2 = Dense(120, activation="softmax")

    def __call__(self, x, training=False):
        x = self.dense1(x)
        x = self.dense2(x)
        return x

In [132]:
class Fusion_Layer(tf.keras.Model):
    def __init__(self):
        super().__init__()
        #32,256,256
        #256
        self.dense1 = Dense(256, activation="relu")
        self.conv = Conv2D(256,1, activation="relu", padding="same", strides=1)

    def __call__(self, mid_level, global_vector, training=False):
        #x = tf.concat((mid_level, global_vector), axis=-1)
        #print(x)
        #x = self.dense1(x) 
        global_vector = tf.expand_dims(tf.expand_dims(global_vector,axis=1),axis=1)
        print(mid_level, global_vector)

        x = self.conv(tf.concat((mid_level, global_vector), axis = -1))
        return x

In [133]:
class Colorization_Network(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2D(128, 3, activation='relu', padding='same', strides=1) 
        self.upsampling1 = UpSampling2D(2)
        self.conv2 = Conv2D(64, 3, activation='relu', padding='same', strides=1) 
        self.conv3 = Conv2D(64, 3, activation='relu', padding='same', strides=1)
        self.upsampling2 = UpSampling2D(2) 
        self.conv4 = Conv2D(32, 3, activation='relu', padding='same', strides=1)
        self.upsampling3 = UpSampling2D(2) 
        self.conv5 = Conv2D(2, 3, activation='relu', padding='same', strides=1) 

    def __call__(self):
        x = self.conv1(x)
        x = self.upsampling1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.upsampling2(x)
        x = self.conv4(x)
        x = self.upsampling3(x)
        x = self.conv5(x)

        return x


In [134]:
class Colorization_Model(tf.keras.Model):
    def __init__(self, optimizer, loss_function_color, loss_function_category):
        super().__init__()
        self.low_level = Low_Level_Features()        
        self.mid_level = Mid_Level_Features()
        self.high_level = High_Level_Features()
        self.fusion = Fusion_Layer()
        self.colorization = Colorization_Network()
        self.classification = Classification_Network()

        self.metrics_list = [
            tf.keras.metrics.Mean(name="loss_color"),
            tf.keras.metrics.Mean(name="loss_category")]

        self.optimizer = optimizer
        self.loss_function_color = loss_function_color
        self.loss_function_category = loss_function_category

    @property
    def metrics(self):
        return self.metrics_list

    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_state()

    def call(self, input, training=False):
        low = self.low_level(input)
        middle = self.mid_level(low)
        high = self.high_level(low)
        fused = self.fusion(middle, high)
        colored = self.colorization(fused)
        label = self.classification(high)
        return colored, label

    @tf.function
    def train_step(self, data):
        images,  label = data
        grey_image, color_image = images
        with tf.GradientTape() as tape: 
            predicted_color, predicted_label = self(grey_image, training = True)
            loss_color = self.loss_function_color(color_image, predicted_color)
            loss_category  = self.loss_function_category(label, predicted_label)

        gradients = tape.gradient([loss_color, loss_category], self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients,self.trainable_variables))
        self.metrics[0].update_state(loss_color)  
        self.metrics[1].update_state(loss_category)  
        return gradients

    @tf.function
    def test_step(self, data):
        images, label = data
        grey_image, color_image = images    
        predicted_color, predicted_label = self(grey_image, training = True)
        loss_color = self.loss_function_color(color_image, predicted_color)
        loss_category  = self.loss_function_category(label, predicted_label)            
        self.metrics[0].update_state(loss_color)  
        self.metrics[1].update_state(loss_category)  
        return predicted_color, color_image, predicted_label


In [135]:
# autoencoder from https://arxiv.org/pdf/1712.03400.pdf

# model

# create the whole autoencoder model
# (steal from https://towardsdatascience.com/image-colorization-using-convolutional-autoencoders-fdabc1cb1dbe )

#encoder
class Encoder(tf.keras.Model):
  def __init__(self):
    super().__init__()
    #input 1,128,128
    self.conv1 = Conv2D(64, 3, activation='relu', padding='same', strides=1) 
    self.conv2 = Conv2D(128, 3, activation='relu', padding='same', strides=2) 
    self.conv3 = Conv2D(128, 3, activation='relu', padding='same', strides=1) 
    self.conv4 = Conv2D(256, 3, activation='relu', padding='same', strides=2) 
    self.conv5 = Conv2D(256, 3, activation='relu', padding='same', strides=1) 
    self.conv6 = Conv2D(512, 3, activation='relu', padding='same', strides=1) 
    self.conv7 = Conv2D(512, 3, activation='relu', padding='same', strides=1) 
    self.conv8 = Conv2D(256, 3, activation='relu', padding='same', strides=1) 

    self.flatten = Flatten()



  @tf.function
  def __call__(self, x, training=False):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.conv3(x)
    x = self.conv4(x)
    x = self.conv5(x)
    x = self.conv6(x)
    x = self.conv7(x)
    x = self.conv8(x)
    x = self.flatten(x)
    return x


# decoder
class Decoder(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.reshape = Reshape((32, 32, 256))
        
        self.conv1 = Conv2D(256, 3, activation="relu", padding="same", strides=1)
        self.conv2 = Conv2D(128, 3, activation="relu", padding="same")
        self.upsampling2 = UpSampling2D(2)
        self.conv3 = Conv2D(64, 3, activation="relu", padding="same")
        self.conv4 = Conv2D(64, 3, activation="tanh", padding="same")
        self.upsampling4 = UpSampling2D(2)
        self.conv5 = Conv2D(32, 3, activation="tanh", padding="same")
        self.conv5 = Conv2D(2, 3, activation="tanh", padding="same")


    @tf.function
    def __call__(self, x, training=False):
        x = self.reshape(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.upsampling2(x)        
        x = self.conv3(x)
        x = self.conv4(x)        
        x = self.upsampling4(x)
        x = self.conv5(x)
        return x

class Autoencoder(tf.keras.Model):
  def __init__(self, optimizer, loss_function):
    super().__init__()
    self.enc = Encoder()
    self.dec = Decoder()

    self.metrics_list = [
      tf.keras.metrics.Mean(name="loss")]

    self.optimizer = optimizer
    self.loss_function = loss_function

  @property
  def metrics(self):
    return self.metrics_list
  
  def get_encoder(self):
    return self.enc
   
  def get_decoder(self):
    return self.dec
    
  def reset_metrics(self):
     for metric in self.metrics:
        metric.reset_state()

  def call(self, input, training=False):
    embedding = self.enc(input)
    output = self.dec(embedding)
    return output

  @tf.function
  def train_step(self, data):
    images,  label = data
    grey_image, color_image = images
    with tf.GradientTape() as tape: 
      prediction = self(grey_image, training = True)
      loss = self.loss_function(color_image, prediction)

    gradients = tape.gradient(loss, self.trainable_variables)
    self.optimizer.apply_gradients(zip(gradients,self.trainable_variables))
    self.metrics[0].update_state(loss)  
    return gradients

  @tf.function
  def test_step(self, data):
    images, label = data
    grey_image, color_image = images    
    prediction = self(grey_image, training = False)
    loss = self.loss_function(color_image, prediction)
    self.metrics[0].update_state(loss)
    return prediction, color_image


In [136]:
# training loop

# log results with tensorboard 
# save model to be able to reuse it

def training_loop(model, train_ds, test_ds, epochs, train_summary_writer, test_summary_writer, save_path):
    for epoch in range(epochs):
        model.reset_metrics()

        for data in tqdm(train_ds, position=0, leave=True):
            model.train_step(data)

        with train_summary_writer.as_default():
            tf.summary.scalar(model.metrics[0].name, model.metrics[0].result(), step=epoch)
            tf.summary.scalar(model.metrics[1].name, model.metrics[1].result(), step=epoch)
        
        print("Epoch: ", epoch+1)
        print("Loss Color: ", model.metrics[0].result().numpy(), "(Train)")
        print("Loss Category: ", model.metrics[1].result().numpy(), "(Train)")
        model.reset_metrics()

        for data in tqdm(test_ds, position=0, leave=True):
            prediction, target = model.test_step(data)

        with test_summary_writer.as_default():
            tf.summary.scalar(model.metrics[0].name, model.metrics[0].result(), step=epoch)
            tf.summary.scalar(model.metrics[1].name, model.metrics[1].result(), step=epoch)
            
        print("Loss Color: ", model.metrics[0].result().numpy(), "(Test)")
        print("Loss Category: ", model.metrics[1].result().numpy(), "(Test)")



In [137]:
# train

epochs = 30
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_function_color = tf.keras.losses.MeanSquaredError()
loss_function_category = tf.keras.losses.CategoricalCrossentropy()
#autoencoder = Autoencoder(optimizer=optimizer, loss_function=loss_function)

colorization_model= Colorization_Model(optimizer=optimizer, loss_function_color=loss_function_color, loss_function_category=loss_function_category)

current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
save_path = f"models/{current_time}"
train_log_path = f"logs/{current_time}/train"
test_log_path = f"logs/{current_time}/test"
train_summary_writer = tf.summary.create_file_writer(train_log_path)
test_summary_writer = tf.summary.create_file_writer(test_log_path)
training_loop(colorization_model, train_dataset, test_dataset, epochs, train_summary_writer, test_summary_writer, save_path)

  0%|          | 0/282 [00:00<?, ?it/s]

Tensor("colorization__model_6/conv2d_228/Relu:0", shape=(64, 32, 32, 256), dtype=float32) Tensor("colorization__model_6/ExpandDims_1:0", shape=(64, 1, 1, 256), dtype=float32)





ValueError: in user code:

    File "/tmp/ipykernel_5121/1995622918.py", line 41, in train_step  *
        predicted_color, predicted_label = self(grey_image, training = True)
    File "/home/lotta/apps/anaconda3/envs/tf-gpu/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_filebhopfgqs.py", line 13, in tf__call
        fused = ag__.converted_call(ag__.ld(self).fusion, (ag__.ld(middle), ag__.ld(high)), None, fscope)
    File "/tmp/__autograph_generated_file5x0viw1p.py", line 12, in tf____call__
        x = ag__.converted_call(ag__.ld(self).conv, (ag__.converted_call(ag__.ld(tf).concat, ((ag__.ld(mid_level), ag__.ld(global_vector)),), dict(axis=-1), fscope),), None, fscope)

    ValueError: Exception encountered when calling layer "colorization__model_6" "                 f"(type Colorization_Model).
    
    in user code:
    
        File "/tmp/ipykernel_5121/1995622918.py", line 31, in call  *
            fused = self.fusion(middle, high)
        File "/tmp/ipykernel_5121/1992193503.py", line 16, in __call__  *
            x = self.conv(tf.concat((mid_level, global_vector), axis = -1))
    
        ValueError: Dimension 1 in both shapes must be equal, but are 32 and 1. Shapes are [64,32,32] and [64,1,1]. for '{{node colorization__model_6/concat}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](colorization__model_6/conv2d_228/Relu, colorization__model_6/ExpandDims_1, colorization__model_6/concat/axis)' with input shapes: [64,32,32,256], [64,1,1,256], [] and with computed input tensors: input[2] = <-1>.
    
    
    Call arguments received by layer "colorization__model_6" "                 f"(type Colorization_Model):
      • input=tf.Tensor(shape=(64, 128, 128, 1), dtype=float32)
      • training=True


In [None]:
%tensorboard --logdir logs