# **CAP 5404 Deep Learning for Computer Graphics**
# *Project II. Neural Networks & Computer Graphics*

Pranath Reddy Kumbam (**UFID**: 8512-0977)



## Part 3: Colorization with Tanh

### Load Datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Path to Working Directory 
%cd drive/My Drive/Acad/DLCG/Project2

/content/drive/My Drive/Acad/DLCG/Project2


In [None]:
# Import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
import math
import random
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
# Reshape to single channel
def shape(images):
    data = []
    for sample in images:
      sample = sample.reshape(1,128,128)
      data.append(sample)
    data = np.asarray(data)
    return data

# Import Data
l_train = shape(np.load('./Data/arrays/Faces/L_train.npy')/255)
a_train = shape(np.load('./Data/arrays/Faces/a_train.npy')/255)
b_train = shape(np.load('./Data/arrays/Faces/b_train.npy')/255)

l_test = shape(np.load('./Data/arrays/Faces/L_test.npy')/255)
a_test = shape(np.load('./Data/arrays/Faces/a_test.npy')/255)
b_test = shape(np.load('./Data/arrays/Faces/b_test.npy')/255)

x_train = l_train
y_train = np.concatenate((a_train, b_train), axis=1)

x_test = l_test[int(l_test.shape[0]*0.5):]
y_test = np.concatenate((a_test[int(a_test.shape[0]*0.5):], b_test[int(b_test.shape[0]*0.5):]), axis=1)
x_val = l_test[:int(l_test.shape[0]*0.5)]
y_val = np.concatenate((a_test[:int(a_test.shape[0]*0.5)], b_test[:int(b_test.shape[0]*0.5)]), axis=1)
batch_size = 100

# Shuffle Data
x_train, y_train = shuffle(x_train, y_train, random_state=0)
x_test, y_test = shuffle(x_test, y_test, random_state=0)

# Split into batches
batch_size = 100
a = 0
b = batch_size
data_temp = []
data_temp2 = []
for i in range(int(x_train.shape[0]/batch_size)):
    data_temp.append(x_train[a:b])
    data_temp2.append(y_train[a:b])
    a += batch_size
    b += batch_size
x_train = np.asarray(data_temp)
y_train = np.asarray(data_temp2)

# Print data shape
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_val.shape)
print(y_val.shape)

(60, 100, 1, 128, 128)
(60, 100, 2, 128, 128)
(75, 1, 128, 128)
(75, 2, 128, 128)
(75, 1, 128, 128)
(75, 2, 128, 128)


### Define Model

In [None]:
# As described in the project description 
# A simple encoder-decoder model with five conv blocks and batchnorm
class DCA(nn.Module):
    def __init__(self):
        super(DCA, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 8, 4, stride=2, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.Conv2d(8, 16, 4, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 32, 4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 4),
            nn.BatchNorm2d(64),
            nn.Flatten(),
            nn.Linear(1600, 512),
            nn.BatchNorm1d(512),
            nn.Linear(512, 1600)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 4),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 32, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 8, 4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(8, 2, 4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = x.reshape(-1,64,5,5)
        x = self.decoder(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DCA().to(device) # Push model to device

### Train Model

In [None]:
# Reset Model
for layer in model.children():
   if hasattr(layer, 'reset_parameters'):
       layer.reset_parameters()

# Function to calculate PSNR
def calculate_psnr(img1, img2):
    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(1.0 / math.sqrt(mse))

# Loss Function
criteria = torch.nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)
n_epochs = 100

# Training
loss_array = []
pbar = tqdm(range(1, n_epochs+1))
for epoch in pbar:
    train_loss = 0.0
    
    for i in range(x_train.shape[0]):

        data = torch.from_numpy(x_train[i].astype('float32'))
        if torch.cuda.is_available():
          data = data.cuda()
        # Scale a and b to range [-1,1] as mentioned in the project description 
        labels = torch.tensor(((y_train[i]-0.5)*2), dtype=torch.float, device="cuda")
        optimizer.zero_grad()
        outputs = model(data)
        loss = criteria(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss

    train_loss_avg = train_loss/x_train.shape[0]
    loss_array.append(train_loss_avg)
    pbar.set_postfix({ 'Training Loss': train_loss_avg.detach().cpu().numpy() })  

# Validation Result for Hyperparameter Search
val_data = torch.from_numpy(x_val.astype('float32'))
if torch.cuda.is_available():
  val_data = val_data.cuda()
val_labels = torch.tensor(((y_val-0.5)*2), dtype=torch.float, device="cuda")
val_outputs = model(val_data)
val_outputs = val_outputs.detach().cpu().numpy()
# Scale it back to [0,1] for testing
val_outputs = (val_outputs/2)+0.5
loss = calculate_psnr(val_outputs, y_val)
print("Val PSNR Result: " + str(loss))

  0%|          | 0/100 [00:00<?, ?it/s]

Val PSNR Result: 32.18249875883066


### Save Best Model and Test 

In [None]:
# Reset Model
for layer in model.children():
   if hasattr(layer, 'reset_parameters'):
       layer.reset_parameters()

model = DCA().to(device)

# Function to calculate PSNR
def calculate_psnr(img1, img2):
    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(1.0 / math.sqrt(mse))

# Loss Function
criteria = torch.nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
n_epochs = 500

# Training
loss_array = []
pbar = tqdm(range(1, n_epochs+1))
for epoch in pbar:
    train_loss = 0.0
    
    for i in range(x_train.shape[0]):

        data = torch.from_numpy(x_train[i].astype('float32'))
        if torch.cuda.is_available():
          data = data.cuda()
        # Scale a and b to range [-1,1] as mentioned in the project description 
        labels = torch.tensor(((y_train[i]-0.5)*2), dtype=torch.float, device="cuda")
        optimizer.zero_grad()
        outputs = model(data)
        loss = criteria(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss

    train_loss_avg = train_loss/x_train.shape[0]
    loss_array.append(train_loss_avg.detach().cpu().numpy())
    pbar.set_postfix({ 'Training Loss': train_loss_avg.detach().cpu().numpy() })  

# Export Training Loss for Plot
np.save('./Out/DCA_train_loss_Faces_Tanh.npy', loss_array)

# Export Trained Model for Transfer Learning
torch.save(model, './Out/DCA_Faces_Tanh.pth')

# Testing on Test Data
test_data = torch.from_numpy(x_test.astype('float32'))
if torch.cuda.is_available():
  test_data = test_data.cuda()
test_labels = torch.tensor(((y_test-0.5)*2), dtype=torch.float, device="cuda")
test_outputs = model(test_data)
test_outputs = test_outputs.detach().cpu().numpy()
# Scale it back to [0,1] for testing
test_outputs = (test_outputs/2)+0.5
a_loss = calculate_psnr(test_outputs[:, 0], y_test[:, 0])
b_loss = calculate_psnr(test_outputs[:, 1], y_test[:, 1])
a_loss2 = 0
b_loss2 = 0
for i in range(test_outputs[:, 0].shape[0]):
  a_loss2 += mean_squared_error(test_outputs[:, 0][i], y_test[:, 0][i])
  b_loss2 += mean_squared_error(test_outputs[:, 1][i], y_test[:, 1][i])
print("Test a PSNR Result: " + str(a_loss))
print("Test b PSNR Result: " + str(b_loss))
print("Test a MSE Result: " + str(a_loss2/test_outputs[:, 0].shape[0]))
print("Test b MSE Result: " + str(b_loss2/test_outputs[:, 0].shape[0]))

  0%|          | 0/500 [00:00<?, ?it/s]

Test a PSNR Result: 36.52177502507118
Test b PSNR Result: 34.199743860274495
Test a MSE Result: 0.0002227524541331855
Test b MSE Result: 0.00038021181992561


### Visualising Reconstructions

In [None]:
import cv2
from google.colab.patches import cv2_imshow

# Reconstruction
test_L = (x_test*255)
test_L = np.transpose(test_L, (0,2,3,1))
test_LAB = np.concatenate((x_test*255, y_test*255), axis=1)
test_LAB = np.transpose(test_LAB, (0,2,3,1))

# Plot Reconstructions
for i in tqdm(range(x_test.shape[0])):
  img_l = test_L[i]
  img_gt = cv2.cvtColor(test_LAB[i].astype('uint8'), cv2.COLOR_LAB2RGB)
  y_pred = test_outputs
  pred_LAB = np.concatenate((x_test*255, y_pred*255), axis=1)
  pred_LAB = np.transpose(pred_LAB, (0,2,3,1))
  img_pred = cv2.cvtColor(pred_LAB[i].astype('uint8'), cv2.COLOR_LAB2RGB)

  fig = plt.figure(figsize=(8, 8))
  ax1 = fig.add_subplot(1, 3, 1)
  plt.imshow(img_l.reshape(128,128), cmap='gray')
  ax2 = fig.add_subplot(1, 3, 2)
  plt.imshow(img_pred)
  ax3 = fig.add_subplot(1, 3, 3)
  plt.imshow(img_gt)
  ax1.title.set_text('Input grayscale image')
  ax2.title.set_text('Colorized image')
  ax3.title.set_text('Ground truth')
  plt.savefig('./Out/Face_Recon/Tanh/Result_' + str(i+1) + '.png', format='png', dpi=300)

Output hidden; open in https://colab.research.google.com to view.