In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
ROOT = 'drive/20180424_SK_Lab2/super_resolution'

import sys
sys.path.insert(0, ROOT)

import tensorflow as tf
import numpy as np
import time
import glob
from PIL import Image

from matplotlib import pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (15, 9)
plt.rcParams['axes.grid'] = False

from layers import *

In [0]:
# Compute PSNR
def PSNR(y_true, y_pred, shave_border=4, maxVal=255):      
    target_data = np.array(y_true, dtype=np.float32)
    ref_data = np.array(y_pred, dtype=np.float32)
    diff = ref_data - target_data
    if shave_border > 0:
        diff = diff[shave_border:-shave_border, shave_border:-shave_border]
    rmse = np.sqrt(np.mean(np.power(diff, 2)))
    return 20 * np.log10(maxVal/rmse)

# RGB2YCbCr
def _rgb2ycbcr(img, maxVal=255):
    O = np.array([[16],
                  [128],
                  [128]])
    T = np.array([[0.256788235294118, 0.504129411764706, 0.097905882352941],
                  [-0.148223529411765, -0.290992156862745, 0.439215686274510],
                  [0.439215686274510, -0.367788235294118, -0.071427450980392]])
    if maxVal == 1:
        O = O / 255.0
    t = np.reshape(img, (img.shape[0]*img.shape[1], img.shape[2]))
    t = np.dot(t, np.transpose(T))
    t[:, 0] += O[0]
    t[:, 1] += O[1]
    t[:, 2] += O[2]
    ycbcr = np.reshape(t, [img.shape[0], img.shape[1], img.shape[2]])
    return ycbcr

# SRCNN

In [0]:
# Network parameters
B = 4
H = 32
W = 32
C = 1
r = 2    # scale factor for SR
lr_init = 0.0001
momentum = 0.9

tf.reset_default_graph()

# Network
def SRCNN(x):
    '''
    Your code here
    '''

    return x

# Whole model
inputs = tf.placeholder(tf.float32, shape=[None, None, None, 1])
labels = tf.placeholder(tf.float32, shape=[None, None, None, 1])
lr = tf.placeholder(tf.float32, shape=[])

with tf.variable_scope('SRCNN') as scope:
    outputs = SRCNN(inputs)
    
# Mean squared error
loss = 
    '''
    Your code here
    '''

def do_SRCNN(phase):
    assert phase == 'Train' or phase == 'Test'

    # Momentum optimizer
    if phase == 'Train':
        trainable_vars_last_layer = [v for v in tf.trainable_variables() if 'conv3' in v.name]
        trainable_vars = [v for v in tf.trainable_variables() if v.name.startswith('SRCNN/') and not v in trainable_vars_last_layer]

        opt1 = tf.train.MomentumOptimizer(lr, momentum)
        opt2 = tf.train.MomentumOptimizer(lr*0.1, momentum)

        grads = tf.gradients(loss, trainable_vars + trainable_vars_last_layer)
        grads1 = grads[:len(trainable_vars)]
        grads2 = grads[len(trainable_vars):]

        train_op1 = opt1.apply_gradients(zip(grads1, trainable_vars))
        train_op2 = opt2.apply_gradients(zip(grads2, trainable_vars_last_layer))
        train_SRCNN = tf.group(train_op1, train_op2)

    # Data preparation
    def PrepareTrainImages(train_data):
        # Training images
        train_labels = np.zeros((len(train_data), H - 12, W - 12, C), dtype=np.uint8)
        train_images = np.zeros((len(train_data), H, W, C), dtype=np.uint8)
        for i in range(len(train_data)):
            img = Image.fromarray(train_data[i])
            # Bicubic down-upsampling
            w, h = img.size
            img_input = img.resize((w//r, h//r), Image.ANTIALIAS)
            img_input = img_input.resize((w, h), Image.BICUBIC)
            img = np.asarray(img)
            img_input = np.asarray(img_input)
            # Random crop
            r_y = np.random.randint(img.shape[0] - H)
            r_x = np.random.randint(img.shape[1] - W)
            img = img[r_y:r_y+H, r_x:r_x+W, :]
            img_input = img_input[r_y:r_y+H, r_x:r_x+W, :]
            # Random LR flip
            if np.random.random() < 0.5:
                img = np.copy(img[:, ::-1, :])
                img_input = np.copy(img_input[:, ::-1, :])
            train_labels[i, :, :, :] = _rgb2ycbcr(img)[6:-6, 6:-6, 0:1]
            train_images[i, :, :, :] = _rgb2ycbcr(img_input)[:, :, 0:1]
        train_labels = train_labels / 255.0
        train_images = train_images / 255.0
        num_training_samples = train_images.shape[0]

        return train_images, train_labels, num_training_samples

    train_data = np.load(ROOT + '/train_91.npy')
    train_images, train_labels, num_training_samples = PrepareTrainImages(train_data)

    # Test images
    test_filenames = glob.glob(ROOT + '/Set5/*.bmp')
    num_test_samples = len(test_filenames)

    # TF saver
    saver = tf.train.Saver()

    # TF Session
    with tf.Session() as sess:
        # Load or init variables
        if phase == 'Train':
            tf.global_variables_initializer().run()
        else:
            saver.restore(sess, ROOT + '/SRCNN_models/SRCNN_iter10000.ckpt')
            print("Model restored.")

        if phase == 'Train':
            e = 0 # epoch
            p = 0 # pointer
            lr_curr = lr_init  # learning rate

            # Training
            for i in range(0, 200000):
                t = time.time()
                l_total,  _= sess.run([loss, train_SRCNN], feed_dict={inputs: train_images[p:p+B], labels: train_labels[p:p+B], lr:lr_curr})
                dT = time.time() - t
                if (i + 1) % 100 == 0:
                    print('Epoch: {:3d} | Iter: {:4d} | Loss: {:4.3e} | dT: {:4.3f}s'.format(e + 1, i + 1, l_total, dT))

                p += B
                if p >= num_training_samples:
                    e += 1
                    p = 0
                    train_images, train_labels, num_training_samples = PrepareTrainImages(train_data)

                if (i + 1) % 10000 == 0:
                    save_path = saver.save(sess, ROOT + '/SRCNN_models/SRCNN_iter'+str(i + 1)+'.ckpt')
                    print("Model saved in file: %s" % save_path)
        elif phase == 'Test':
          # Test
          for i in range(0, num_test_samples):
              # Read a test image
              img = Image.open(test_filenames[i])
              img = img.convert('RGB')
              # Bicubic down-upsampling
              w, h = img.size
              img_input = img.resize((w//r, h//r), Image.ANTIALIAS)
              img_input = img_input.resize((w, h), Image.BICUBIC)
              img = np.asarray(img)
              img = _rgb2ycbcr(img)[:, :, 0:1]
              img_input = np.asarray(img_input)
              img_input = _rgb2ycbcr(img_input)[:, :, 0:1]
              img_input = img_input / 255.0

              t = time.time()
              output = sess.run(outputs, feed_dict={inputs: img_input[np.newaxis, ]})
              dT = time.time() - t
              res = (np.clip(output[0,:,:,0],0,1)*255).astype(np.uint8)

              fig = plt.figure()
              ax1 = fig.add_subplot(1, 3, 1)
              ax1.imshow(img_input[6:-6,6:-6,0], cmap='gray')
              ax1.set_xlabel('BICUBIC')
              ax2 = fig.add_subplot(1, 3, 2)
              ax2.imshow(np.clip(output[0,:,:,0],0,1), cmap='gray')
              ax2.set_xlabel('SRCNN')
              ax3 = fig.add_subplot(1, 3, 3)
              ax3.imshow(img[6:-6, 6:-6, 0] / 255.0, cmap='gray')
              ax3.set_xlabel('GROUND TRUTH')
              plt.title('Test image #: {:3d} | PSNR: {:3f} | dT: {:4.3f}s'.format(i, PSNR(img[6:-6, 6:-6, 0], res), dT))

In [0]:
do_SRCNN('Train')

In [0]:
do_SRCNN('Test')

# VDSR

In [0]:
# Network parameters
B = 4
H = 41
W = 41
C = 1
r = 2    # scale factor for SR

tf.reset_default_graph()

# Network
def VDSR(x):
    '''
    Your code here
    '''   
    return x

# Whole model
inputs = tf.placeholder(tf.float32, shape=[None, None, None, 1])
labels = tf.placeholder(tf.float32, shape=[None, None, None, 1])

with tf.variable_scope('VDSR') as scope:
    outputs = VDSR(inputs)
    
# Mean squared error
loss = tf.reduce_mean(tf.square(outputs - labels))

def do_VDSR(phase):
    assert phase == 'Train' or phase == 'Test'

    # Momentum optimizer
    if phase == 'Train':
        trainable_vars = [v for v in tf.trainable_variables() if v.name.startswith('VDSR/')]
        opt = tf.train.AdamOptimizer(0.001)
        grads_and_vars = opt.compute_gradients(loss, var_list=trainable_vars)
        grads_and_vars = [(tf.clip_by_norm(x[0], 0.1), x[1]) for x in grads_and_vars]
        train_VDSR = opt.apply_gradients(grads_and_vars)

    # Data preparation
    def PrepareTrainImages(train_data):
        # Training images
        train_labels = np.zeros((len(train_data), H, W, C), dtype=np.uint8)
        train_images = np.zeros((len(train_data), H, W, C), dtype=np.uint8)
        for i in range(len(train_data)):
            img = Image.fromarray(train_data[i])
            # Bicubic down-upsampling
            w, h = img.size
            img_input = img.resize((w//r, h//r), Image.ANTIALIAS)
            img_input = img_input.resize((w, h), Image.BICUBIC)
            img = np.asarray(img)
            img_input = np.asarray(img_input)
            # Random crop
            r_y = np.random.randint(img.shape[0] - H)
            r_x = np.random.randint(img.shape[1] - W)
            img = img[r_y:r_y+H, r_x:r_x+W, :]
            img_input = img_input[r_y:r_y+H, r_x:r_x+W, :]
            # Random LR flip
            if np.random.random() < 0.5:
                img = np.copy(img[:, ::-1, :])
                img_input = np.copy(img_input[:, ::-1, :])
            train_labels[i, :, :, :] = _rgb2ycbcr(img)[:, :, 0:1]
            train_images[i, :, :, :] = _rgb2ycbcr(img_input)[:, :, 0:1]
        train_labels = train_labels / 255.0
        train_images = train_images / 255.0
        num_training_samples = train_images.shape[0]

        return train_images, train_labels, num_training_samples

    train_data = np.load(ROOT + '/train_91.npy')
    train_images, train_labels, num_training_samples = PrepareTrainImages(train_data)

    # Test images
    test_filenames = glob.glob(ROOT + '/Set5/*.bmp')
    num_test_samples = len(test_filenames)

    # TF saver
    saver = tf.train.Saver()

    # TF Session
    with tf.Session() as sess:
        # Load or init variables
        if phase == 'Train':
            tf.global_variables_initializer().run()
        else:
            saver.restore(sess, ROOT + '/VDSR_models/VDSR_iter10000.ckpt')
            print("Model restored.")

        if phase == 'Train':
            e = 0 # epoch
            p = 0 # pointer

            # Training
            for i in range(0, 50000):
                t = time.time()
                l_total, _ = sess.run([loss, train_VDSR], feed_dict={inputs: train_images[p:p+B], labels: train_labels[p:p+B]})
                dT = time.time() - t
                if (i + 1) % 100 == 0:
                    print('Epoch: {:3d} | Iter: {:4d} | Loss: {:4.3e} | dT: {:4.3f}s'.format(e + 1, i + 1, l_total, dT))

                p += B
                if p >= num_training_samples:
                    e += 1
                    p = 0
                    train_images, train_labels, num_training_samples = PrepareTrainImages(train_data)

                if (i + 1) % 10000 == 0:
                    save_path = saver.save(sess, ROOT + '/VDSR_models/VDSR_iter'+str(i + 1)+'.ckpt')
                    print("Model saved in file: %s" % save_path)
        elif phase == 'Test':
          # Test
          for i in range(0, num_test_samples):
              # Read a test image
              img = Image.open(test_filenames[i])
              img = img.convert('RGB')
              # Bicubic down-upsampling
              w, h = img.size
              img_input = img.resize((w//r, h//r), Image.ANTIALIAS)
              img_input = img_input.resize((w, h), Image.BICUBIC)
              img = np.asarray(img)
              img = _rgb2ycbcr(img)[:, :, 0:1]
              img_input = np.asarray(img_input)
              img_input = _rgb2ycbcr(img_input)[:, :, 0:1]
              img_input = img_input / 255.0

              t = time.time()
              output = sess.run(outputs, feed_dict={inputs: img_input[np.newaxis, ]})
              dT = time.time() - t
              res = (np.clip(output[0,:,:,0],0,1)*255).astype(np.uint8)

              fig = plt.figure()
              ax1 = fig.add_subplot(1, 3, 1)
              ax1.imshow(img_input[:,:,0], cmap='gray')
              ax1.set_xlabel('BICUBIC')
              ax2 = fig.add_subplot(1, 3, 2)
              ax2.imshow(np.clip(output[0,:,:,0],0,1), cmap='gray')
              ax2.set_xlabel('VDSR')
              ax3 = fig.add_subplot(1, 3, 3)
              ax3.imshow(img[:, :, 0] / 255.0, cmap='gray')
              ax3.set_xlabel('GROUND TRUTH')
              plt.title('Test image #: {:3d} | PSNR: {:3f} | dT: {:4.3f}s'.format(i, PSNR(img[:, :, 0], res), dT))

In [0]:
do_VDSR('Train')

In [0]:
do_VDSR('Test')

# VDSR SP

In [0]:
# Huber error
def Huber(y_true, y_pred, delta, axis=None):
    abs_error = tf.abs(y_pred - y_true)
    quadratic = tf.minimum(abs_error, delta)
    # The following expression is the same in value as
    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
    # This is necessary to avoid doubling the gradient, since there is already a
    # nonzero contribution to the gradient from the quadratic term.
    linear = (abs_error - quadratic)
    losses = 0.5 * quadratic**2 + delta * linear
    return tf.reduce_mean(losses, axis=axis)

In [0]:
# Network parameters
B = 4
H = 24
W = 24
C = 1
r = 2    # scale factor for SR

tf.reset_default_graph()

# Network
def VDSR_SP(x):
    '''
    Your code here
    '''
    return x

# Whole model
inputs = tf.placeholder(tf.float32, shape=[None, None, None, 1])
labels = tf.placeholder(tf.float32, shape=[None, None, None, 1])

with tf.variable_scope('VDSR') as scope:
    outputs = VDSR_SP(inputs)
    
# Mean squared error
loss = Huber(labels, outputs, 0.01)

def do_VDSR_SP(phase):
    assert phase == 'Train' or phase == 'Test'

    # Momentum optimizer
    if phase == 'Train':
        trainable_vars = [v for v in tf.trainable_variables() if v.name.startswith('VDSR/')]
        opt = tf.train.AdamOptimizer(0.001)
        grads_and_vars = opt.compute_gradients(loss, var_list=trainable_vars)
        grads_and_vars = [(tf.clip_by_norm(x[0], 0.1), x[1]) for x in grads_and_vars]
        train_VDSR = opt.apply_gradients(grads_and_vars)

    # Data preparation
    def PrepareTrainImages(train_data):
        # Training images
        train_labels = np.zeros((len(train_data), H*r, W*r, C), dtype=np.uint8)
        train_images = np.zeros((len(train_data), H, W, C), dtype=np.uint8)
        for i in range(len(train_data)):
            img = Image.fromarray(train_data[i])
            # Bicubic downsampling
            w, h = img.size
            img_input = img.resize((w//r, h//r), Image.ANTIALIAS)
            img = np.asarray(img)
            img_input = np.asarray(img_input)
            # Random crop
            r_y = np.random.randint(img_input.shape[0] - H)
            r_x = np.random.randint(img_input.shape[1] - W)
            img = img[r_y*r:(r_y+H)*r, r_x*r:(r_x+W)*r, :]
            img_input = img_input[r_y:r_y+H, r_x:r_x+W, :]
            # Random LR flip
            if np.random.random() < 0.5:
                img = np.copy(img[:, ::-1, :])
                img_input = np.copy(img_input[:, ::-1, :])
            train_labels[i, :, :, :] = _rgb2ycbcr(img)[:, :, 0:1]
            train_images[i, :, :, :] = _rgb2ycbcr(img_input)[:, :, 0:1]
        train_labels = train_labels / 255.0
        train_images = train_images / 255.0
        num_training_samples = train_images.shape[0]

        return train_images, train_labels, num_training_samples

    train_data = np.load(ROOT + '/train_91.npy')
    train_images, train_labels, num_training_samples = PrepareTrainImages(train_data)

    # Test images
    test_filenames = glob.glob(ROOT + '/Set5/*.bmp')
    num_test_samples = len(test_filenames)

    # TF saver
    saver = tf.train.Saver()

    # TF Session
    with tf.Session() as sess:
        # Load or init variables
        if phase == 'Train':
            tf.global_variables_initializer().run()
        else:
            saver.restore(sess, ROOT + '/VDSR_models/VDSR_SP_iter10000.ckpt')
            print("Model restored.")

        if phase == 'Train':
            e = 0 # epoch
            p = 0 # pointer

            # Training
            for i in range(0, 50000):
                t = time.time()
                l_total, _ = sess.run([loss, train_VDSR], feed_dict={inputs: train_images[p:p+B], labels: train_labels[p:p+B]})
                dT = time.time() - t
                if (i + 1) % 100 == 0:
                    print('Epoch: {:3d} | Iter: {:4d} | Loss: {:4.3e} | dT: {:4.3f}s'.format(e + 1, i + 1, l_total, dT))

                p += B
                if p >= num_training_samples:
                    e += 1
                    p = 0
                    train_images, train_labels, num_training_samples = PrepareTrainImages(train_data)

                if (i + 1) % 10000 == 0:
                    save_path = saver.save(sess, ROOT + '/VDSR_models/VDSR_SP_iter'+str(i + 1)+'.ckpt')
                    print("Model saved in file: %s" % save_path)
        elif phase == 'Test':
          # Test
          for i in range(0, num_test_samples):
              # Read a test image
              img = Image.open(test_filenames[i])
              img = img.convert('RGB')
              # Bicubic down-upsampling
              w, h = img.size
              img_input = img.resize((w//r, h//r), Image.ANTIALIAS)
              img = np.asarray(img)
              img = _rgb2ycbcr(img)[:, :, 0:1]
              img_input = np.asarray(img_input)
              img_input = _rgb2ycbcr(img_input)[:, :, 0:1]
              img_input = img_input / 255.0

              t = time.time()
              output = sess.run(outputs, feed_dict={inputs: img_input[np.newaxis, ]})
              dT = time.time() - t
              res = (np.clip(output[0,:,:,0],0,1)*255).astype(np.uint8)

              fig = plt.figure()
              ax1 = fig.add_subplot(1, 3, 1)
              ax1.imshow(img_input[:,:,0], cmap='gray')
              ax1.set_xlabel('BICUBIC')
              ax2 = fig.add_subplot(1, 3, 2)
              ax2.imshow(np.clip(output[0,:,:,0],0,1), cmap='gray')
              ax2.set_xlabel('VDSR SP')
              ax3 = fig.add_subplot(1, 3, 3)
              ax3.imshow(img[:, :, 0] / 255.0, cmap='gray')
              ax3.set_xlabel('GROUND TRUTH')
              plt.title('Test image #: {:3d} | PSNR: {:3f} | dT: {:4.3f}s'.format(i, PSNR(img[:, :, 0], res), dT))

In [0]:
do_VDSR_SP('Train')

In [0]:
do_VDSR_SP('Test')