# **CAP 5404 Deep Learning for Computer Graphics**
# *Project II. Neural Networks & Computer Graphics*

Pranath Reddy Kumbam (**UFID**: 8512-0977)


## Part 4: Transfer Learning for Colorization

### Load Datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Path to Working Directory 
%cd drive/My Drive/Acad/DLCG/Project2

/content/drive/My Drive/Acad/DLCG/Project2


In [None]:
# Import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import math
import random
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from skimage.metrics import structural_similarity as ssim

In [None]:
# Reshape to single channel
def shape(images):
    data = []
    for sample in images:
      sample = sample.reshape(1,128,128)
      data.append(sample)
    data = np.asarray(data)
    return data

# Import Data
l_train = shape(np.load('./Data/arrays/NCD/L_train.npy')/255)
a_train = shape(np.load('./Data/arrays/NCD/a_train.npy')/255)
b_train = shape(np.load('./Data/arrays/NCD/b_train.npy')/255)

l_test = shape(np.load('./Data/arrays/NCD/L_test.npy')/255)
a_test = shape(np.load('./Data/arrays/NCD/a_test.npy')/255)
b_test = shape(np.load('./Data/arrays/NCD/b_test.npy')/255)

x_train = l_train
y_train = np.concatenate((a_train, b_train), axis=1)

x_test = l_test[int(l_test.shape[0]*0.5):]
y_test = np.concatenate((a_test[int(a_test.shape[0]*0.5):], b_test[int(b_test.shape[0]*0.5):]), axis=1)
x_val = l_test[:int(l_test.shape[0]*0.5)]
y_val = np.concatenate((a_test[:int(a_test.shape[0]*0.5)], b_test[:int(b_test.shape[0]*0.5)]), axis=1)
batch_size = 100

# Shuffle Data
x_train, y_train = shuffle(x_train, y_train, random_state=0)
x_test, y_test = shuffle(x_test, y_test, random_state=0)

# Split into batches
batch_size = 100
a = 0
b = batch_size
data_temp = []
data_temp2 = []
for i in range(int(x_train.shape[0]/batch_size)):
    data_temp.append(x_train[a:b])
    data_temp2.append(y_train[a:b])
    a += batch_size
    b += batch_size
x_train = np.asarray(data_temp)
y_train = np.asarray(data_temp2)

# Print data shape
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_val.shape)
print(y_val.shape)

(57, 100, 1, 128, 128)
(57, 100, 2, 128, 128)
(73, 1, 128, 128)
(73, 2, 128, 128)
(72, 1, 128, 128)
(72, 2, 128, 128)


### Define and load pre-trained Models

In [None]:
# Models
class DCA(nn.Module):
    def __init__(self):
        super(DCA, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 8, 4, stride=2, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Conv2d(8, 16, 4, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 32, 4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 4),
            nn.BatchNorm2d(64),
            nn.Flatten(),
            nn.Linear(1600, 512),
            nn.BatchNorm1d(512),
            nn.Linear(512, 1600)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 4),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 32, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 8, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(8, 2, 4, stride=2, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = x.reshape(-1,64,5,5)
        x = self.decoder(x)
        return x

class DCA2(nn.Module):
    def __init__(self):
        super(DCA2, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 8, 4, stride=2, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Conv2d(8, 16, 4, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 32, 4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 4),
            nn.BatchNorm2d(64),
            nn.Flatten(),
            nn.Linear(1600, 512),
            nn.BatchNorm1d(512),
            nn.Linear(512, 1600)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 4),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 32, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 8, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(8, 2, 4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = x.reshape(-1,64,5,5)
        x = self.decoder(x)
        return x

# Push models to device and load pre-trained models
model1 = DCA().to("cuda")
model1 = torch.load('./Out/DCA_Faces_Sigmoid.pth')
model2 = DCA2().to("cuda")
model2 = torch.load('./Out/DCA_Faces_Tanh.pth')

### Train/Save Tanh Model on NCD and Test 

In [None]:
model = model2

# Function to calculate PSNR
def calculate_psnr(img1, img2):
    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(1.0 / math.sqrt(mse))

# Loss Function
criteria = torch.nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
n_epochs = 1000

# Training
loss_array = []
pbar = tqdm(range(1, n_epochs+1))
for epoch in pbar:
    train_loss = 0.0
    
    for i in range(x_train.shape[0]):

        data = torch.from_numpy(x_train[i].astype('float32'))
        if torch.cuda.is_available():
          data = data.cuda()
        # Scale a and b to range [-1,1] as mentioned in the project description 
        labels = torch.tensor(((y_train[i]-0.5)*2), dtype=torch.float, device="cuda")
        optimizer.zero_grad()
        outputs = model(data)
        loss = criteria(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss

    train_loss_avg = train_loss/x_train.shape[0]
    loss_array.append(train_loss_avg.detach().cpu().numpy())
    pbar.set_postfix({ 'Training Loss': train_loss_avg.detach().cpu().numpy() })  

# Export Training Loss for Plot
np.save('./Out/DCA_train_loss_NCD_Tanh.npy', loss_array)

# Export Trained Model for Transfer Learning
torch.save(model, './Out/DCA_NCD_Tanh.pth')

# Testing on Test Data
test_data = torch.from_numpy(x_test.astype('float32'))
if torch.cuda.is_available():
  test_data = test_data.cuda()
test_labels = torch.tensor(((y_test-0.5)*2), dtype=torch.float, device="cuda")
test_outputs1 = model(test_data)
test_outputs1 = test_outputs1.detach().cpu().numpy()
# Scale it back to [0,1] for testing
test_outputs1 = (test_outputs1/2)+0.5
a_loss = calculate_psnr(test_outputs1[:, 0], y_test[:, 0])
b_loss = calculate_psnr(test_outputs1[:, 1], y_test[:, 1])
a_loss2 = ssim(test_outputs1[:, 0], y_test[:, 0])
b_loss2 = ssim(test_outputs1[:, 1], y_test[:, 1])
a_loss3 = 0
b_loss3 = 0
for i in range(test_outputs1[:, 0].shape[0]):
  a_loss3 += mean_squared_error(test_outputs1[:, 0][i], y_test[:, 0][i])
  b_loss3 += mean_squared_error(test_outputs1[:, 1][i], y_test[:, 1][i])
print("Test a PSNR Result: " + str(a_loss))
print("Test b PSNR Result: " + str(b_loss))
print("Test a SSIM Result: " + str(a_loss2))
print("Test b SSIM Result: " + str(b_loss2))
print("Test a MSE Result: " + str(a_loss3/test_outputs1[:, 0].shape[0]))
print("Test b MSE Result: " + str(b_loss3/test_outputs1[:, 0].shape[0]))

  0%|          | 0/1000 [00:00<?, ?it/s]



Test a PSNR Result: 24.78775010723101
Test b PSNR Result: 25.229870835519645
Test a SSIM Result: 0.8033417885131019
Test b SSIM Result: 0.8370629943623971
Test a MSE Result: 0.003320664423298037
Test b MSE Result: 0.002999251719068501


### Visualising Reconstructions for Tanh Model

In [None]:
import cv2
from google.colab.patches import cv2_imshow

# Reconstruction
test_L = (x_test*255)
test_L = np.transpose(test_L, (0,2,3,1))
test_LAB = np.concatenate((x_test*255, y_test*255), axis=1)
test_LAB = np.transpose(test_LAB, (0,2,3,1))

# Plot Reconstructions
for i in tqdm(range(x_test.shape[0])):
  img_l = test_L[i]
  img_gt = cv2.cvtColor(test_LAB[i].astype('uint8'), cv2.COLOR_LAB2RGB)
  y_pred = test_outputs1
  pred_LAB = np.concatenate((x_test*255, y_pred*255), axis=1)
  pred_LAB = np.transpose(pred_LAB, (0,2,3,1))
  img_pred = cv2.cvtColor(pred_LAB[i].astype('uint8'), cv2.COLOR_LAB2RGB)

  fig = plt.figure(figsize=(8, 8))
  ax1 = fig.add_subplot(1, 3, 1)
  plt.imshow(img_l.reshape(128,128), cmap='gray')
  ax2 = fig.add_subplot(1, 3, 2)
  plt.imshow(img_pred)
  ax3 = fig.add_subplot(1, 3, 3)
  plt.imshow(img_gt)
  ax1.title.set_text('Input grayscale image')
  ax2.title.set_text('Colorized image')
  ax3.title.set_text('Ground truth')
  plt.savefig('./Out/NCD_Recon/Tanh/Result_' + str(i+1) + '.png', format='png', dpi=300)

Output hidden; open in https://colab.research.google.com to view.

### Train/Save Sigmoid Model on NCD and Test 

In [None]:
model = model1

# Function to calculate PSNR
def calculate_psnr(img1, img2):
    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(1.0 / math.sqrt(mse))

# Loss Function
criteria = torch.nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
n_epochs = 1000

# Training
loss_array = []
pbar = tqdm(range(1, n_epochs+1))
for epoch in pbar:
    train_loss = 0.0
    
    for i in range(x_train.shape[0]):

        data = torch.from_numpy(x_train[i].astype('float32'))
        if torch.cuda.is_available():
          data = data.cuda()
        labels = torch.tensor(y_train[i], dtype=torch.float, device="cuda")
        optimizer.zero_grad()
        outputs = model(data)
        loss = criteria(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss

    train_loss_avg = train_loss/x_train.shape[0]
    loss_array.append(train_loss_avg.detach().cpu().numpy())
    pbar.set_postfix({ 'Training Loss': train_loss_avg.detach().cpu().numpy() })  

# Export Training Loss for Plot
np.save('./Out/DCA_train_loss_NCD_Sigmoid.npy', loss_array)

# Export Trained Model for Transfer Learning
torch.save(model, './Out/DCA_NCD_Sigmoid.pth')

# Testing on Test Data
test_data = torch.from_numpy(x_test.astype('float32'))
if torch.cuda.is_available():
  test_data = test_data.cuda()
test_labels = torch.tensor(y_test, dtype=torch.float, device="cuda")
test_outputs2 = model(test_data)
test_outputs2 = test_outputs2.detach().cpu().numpy()
a_loss = calculate_psnr(test_outputs2[:, 0], y_test[:, 0])
b_loss = calculate_psnr(test_outputs2[:, 1], y_test[:, 1])
a_loss2 = ssim(test_outputs2[:, 0], y_test[:, 0])
b_loss2 = ssim(test_outputs2[:, 1], y_test[:, 1])
a_loss3 = 0
b_loss3 = 0
for i in range(test_outputs2[:, 0].shape[0]):
  a_loss3 += mean_squared_error(test_outputs2[:, 0][i], test_labels[:, 0].cpu().numpy()[i])
  b_loss3 += mean_squared_error(test_outputs2[:, 1][i], test_labels[:, 1].cpu().numpy()[i])
print("Test a PSNR Result: " + str(a_loss))
print("Test b PSNR Result: " + str(b_loss))
print("Test a SSIM Result: " + str(a_loss2))
print("Test b SSIM Result: " + str(b_loss2))
print("Test a MSE Result: " + str(a_loss3/test_outputs2[:, 0].shape[0]))
print("Test b MSE Result: " + str(b_loss3/test_outputs2[:, 0].shape[0]))

  0%|          | 0/1000 [00:00<?, ?it/s]



Test a PSNR Result: 22.973361160442238
Test b PSNR Result: 24.74351522727649
Test a SSIM Result: 0.7061590330740382
Test b SSIM Result: 0.8046572475751508
Test a MSE Result: 0.005042709130116049
Test b MSE Result: 0.003354659480639185


### Visualising Reconstructions for Sigmoid Model

In [None]:
import cv2
from google.colab.patches import cv2_imshow

# Reconstruction
test_L = (x_test*255)
test_L = np.transpose(test_L, (0,2,3,1))
test_LAB = np.concatenate((x_test*255, y_test*255), axis=1)
test_LAB = np.transpose(test_LAB, (0,2,3,1))

# Plot Reconstructions
for i in tqdm(range(x_test.shape[0])):
  img_l = test_L[i]
  img_gt = cv2.cvtColor(test_LAB[i].astype('uint8'), cv2.COLOR_LAB2RGB)
  y_pred = test_outputs2
  pred_LAB = np.concatenate((x_test*255, y_pred*255), axis=1)
  pred_LAB = np.transpose(pred_LAB, (0,2,3,1))
  img_pred = cv2.cvtColor(pred_LAB[i].astype('uint8'), cv2.COLOR_LAB2RGB)

  fig = plt.figure(figsize=(8, 8))
  ax1 = fig.add_subplot(1, 3, 1)
  plt.imshow(img_l.reshape(128,128), cmap='gray')
  ax2 = fig.add_subplot(1, 3, 2)
  plt.imshow(img_pred)
  ax3 = fig.add_subplot(1, 3, 3)
  plt.imshow(img_gt)
  ax1.title.set_text('Input grayscale image')
  ax2.title.set_text('Colorized image')
  ax3.title.set_text('Ground truth')
  plt.savefig('./Out/NCD_Recon/Sigmoid/Result_' + str(i+1) + '.png', format='png', dpi=300)

Output hidden; open in https://colab.research.google.com to view.

### Compare Visualizations

In [None]:
import cv2
from google.colab.patches import cv2_imshow

# Reconstruction
test_L = (x_test*255)
test_L = np.transpose(test_L, (0,2,3,1))
test_LAB = np.concatenate((x_test*255, y_test*255), axis=1)
test_LAB = np.transpose(test_LAB, (0,2,3,1))

for i in tqdm(range(x_test.shape[0])):
  img_l = test_L[i]
  img_gt = cv2.cvtColor(test_LAB[i].astype('uint8'), cv2.COLOR_LAB2RGB)
  y_pred1 = test_outputs1
  y_pred2 = test_outputs2
  pred_LAB1 = np.concatenate((x_test*255, y_pred1*255), axis=1)
  pred_LAB1 = np.transpose(pred_LAB1, (0,2,3,1))
  img_pred1 = cv2.cvtColor(pred_LAB1[i].astype('uint8'), cv2.COLOR_LAB2RGB)
  pred_LAB2 = np.concatenate((x_test*255, y_pred2*255), axis=1)
  pred_LAB2 = np.transpose(pred_LAB2, (0,2,3,1))
  img_pred2 = cv2.cvtColor(pred_LAB2[i].astype('uint8'), cv2.COLOR_LAB2RGB)

  fig = plt.figure(figsize=(12, 8))
  ax1 = fig.add_subplot(1, 4, 1)
  plt.imshow(img_l.reshape(128,128), cmap='gray')
  ax2 = fig.add_subplot(1, 4, 2)
  plt.imshow(img_pred1)
  ax3 = fig.add_subplot(1, 4, 3)
  plt.imshow(img_pred2)
  ax4 = fig.add_subplot(1, 4, 4)
  plt.imshow(img_gt)
  ax1.title.set_text('Input grayscale image')
  ax2.title.set_text('Colorized image (Tanh)')
  ax3.title.set_text('Colorized image (Sigmoid)')
  ax4.title.set_text('Ground truth')
  plt.savefig('./Out/NCD_Recon/Compare/Result_' + str(i+1) + '.png', format='png', dpi=300)

Output hidden; open in https://colab.research.google.com to view.