# Model selection

A baseline model is proposed first, based on which several modifications are tested to see if the performance can be further improved. These modifications include the location of upsampling layer, patch size, loss function, and using subpixel as upsampling layer. In this notebook, 7 models are trained using 4000 patches and the performances of models are evaluated on the first 10 images of the DIV2K validation set.

In [1]:
from tensorflow import config

gpu_devices = config.experimental.list_physical_devices('GPU')

for device in gpu_devices: config.experimental.set_memory_growth(device, True)

## Load images from directory

In [None]:
# load training images from directory

import os
import numpy as np
from PIL import Image

LR_train_path = './datasets/DIV2K_train_LR_bicubic/X2/'
HR_train_path = './datasets/DIV2K_train_HR/'

LR_train_imgs = []
HR_train_imgs = []

for path, subpath, files in os.walk(LR_train_path):
    files.sort()
    for i in files:
        if i == '.DS_Store':
            continue
        img = Image.open(LR_train_path + i)
        LR_train_imgs.append(np.asarray(img))

for path, subpath, files in os.walk(HR_train_path):
    files.sort()
    for i in files:
        if i == '.DS_Store':
            continue
        img = Image.open(HR_train_path + i)
        HR_train_imgs.append(np.asarray(img)) 

print(len(LR_train_imgs))
print(len(HR_train_imgs))

## Preprocess (patch extraction + normalization)

In [None]:
# randomly extract pathches from training images (X2 upscaling)

from extract_patches import *

patch_height = 48
patch_width = 48
patch_num = 4000
up_scale = 2

LR_patch_train, HR_patch_train = train_patch(LR_train_imgs, HR_train_imgs, patch_height, patch_width, patch_num, up_scale)


print(LR_patch_train.shape)
print(HR_patch_train.shape)

In [4]:
# normaliza imgs from 0~255 to 0~1

def normalize(imgs):
    return imgs / 255

HR_patch_train = normalize(HR_patch_train)
LR_patch_train = normalize(LR_patch_train)

print(LR_patch_train.shape)
print(HR_patch_train.shape)

(4000, 48, 48, 3)
(4000, 96, 96, 3)


## Build different network architecture for comparison

In [5]:
# define subpixel layer for up-sampling

import tensorflow as tf
from keras.layers import Lambda

def pixelshuffler(input_shape, batch_size, scale=2):
    def subpixel_shape(input_shape=input_shape, batch_size=batch_size):
        dim = [batch_size,
               input_shape[1] * scale,
               input_shape[2] * scale,
               int(input_shape[3]/ (scale ** 2))]
        output_shape = tuple(dim)

        return output_shape

    def pixelshuffle_upscale(x):
        return tf.nn.depth_to_space(input=x, block_size=scale)

    return Lambda(function=pixelshuffle_upscale, output_shape=subpixel_shape)

Using TensorFlow backend.


In [5]:
# define baseline model architecture

from keras.models import Model, Sequential
from keras.layers import PReLU, Input, Conv2D, UpSampling2D, Dropout, add
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

def res_block(inputs):
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
    #x = BatchNormalization(momentum=0.8)(x)
    x = PReLU(shared_axes=[1, 2])(x)
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    #x = BatchNormalization(momentum=0.8)(x)
    return add([x, inputs])


def baseline(patch_height, patch_width, channel, upscale=2):
    # conv and then upsample
    
    inputs = Input(shape=(patch_height, patch_width, channel))
    x_init = Conv2D(filters=64, kernel_size=(9, 9), strides=(1, 1), padding='same')(inputs)
    x = PReLU(shared_axes=[1, 2])(x_init)
    
    # residual_block
    for i in range(8):
        x = res_block(x)
        
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = add([x, x_init])
    
    # up_block
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = UpSampling2D(size=(2, 2))(x)  # size:upsampling factor
    x = PReLU(shared_axes=[1, 2])(x)
    
    # output_block
    output = Conv2D(filters=3, kernel_size=(9, 9), strides=(1, 1), padding='same')(x)
    output = Conv2D(3, (1, 1), activation='sigmoid',padding='same')(output)
    
    model = Model(inputs=inputs, outputs=output)
    
    return model



Using TensorFlow backend.


In [16]:
# modify the location of upsampling layer
# define pre-upsampling network architecture

from keras.models import Model, Sequential
from keras.layers import PReLU, Input, Conv2D, UpSampling2D, Dropout, add
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

def res_block(inputs):
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
    x = PReLU(shared_axes=[1, 2])(x)
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    return add([x, inputs])

def model1(patch_height, patch_width, channel, upscale=2):
    # upsample and then conv
    
    inputs = Input(shape=(patch_height, patch_width, channel))
    x_init = Conv2D(filters=64, kernel_size=(9, 9), strides=(1, 1), padding='same')(inputs)
    x = PReLU(shared_axes=[1, 2])(x_init)
    
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = add([x, x_init])
    
    # up_block
    x = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = UpSampling2D(size=(2, 2))(x)
    x = PReLU(shared_axes=[1, 2])(x)
    
    # residual_block
    for i in range(8):
        x = res_block(x)
    
    # output_block
    output = Conv2D(filters=3, kernel_size=(9, 9), strides=(1, 1), padding='same')(x)
    output = Conv2D(3, (1, 1), activation='sigmoid',padding='same')(output)
    
    model = Model(inputs=inputs, outputs=output)
    
    return model

In [6]:
# modify upsampling layer to subpixel layer

from keras.models import Model, Sequential
from keras.layers import PReLU, Input, Conv2D, UpSampling2D, Dropout, add
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

def res_block(inputs):
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
    x = PReLU(shared_axes=[1, 2])(x)
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    return add([x, inputs])


def model2(patch_height, patch_width, channel, upscale=2):
    # conv and then upsample
    
    inputs = Input(shape=(patch_height, patch_width, channel))
    x_init = Conv2D(filters=64, kernel_size=(9, 9), strides=(1, 1), padding='same')(inputs)
    x = PReLU(shared_axes=[1, 2])(x_init)
    
    # residual_block
    for i in range(8):
        x = res_block(x)
        
    x = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = add([x, x_init])
    
    # up_block
    x = Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same')(x)
    x = pixelshuffler(input_shape=(48,48,3), batch_size=4, scale=upscale)(x)
    x = PReLU(shared_axes=[1, 2])(x)
    
    # output_block
    output = Conv2D(filters=3, kernel_size=(9, 9), strides=(1, 1), padding='same')(x)
    output = Conv2D(3, (1, 1), activation='sigmoid',padding='same')(output)
    
    model = Model(inputs=inputs, outputs=output)
    
    return model

## Define perceptual loss

In [6]:
# define perceptual loss based on the first 5 layers of VGG19 model

from keras.applications.vgg19 import VGG19
from keras.layers import Input, Lambda
import keras

# get VGG network
def get_VGG19(input_size):
    
    vgg_input = Input(input_size)
    vgg = VGG19(include_top=False, input_tensor=vgg_input)
    for l in vgg.layers: 
        l.trainable = False
    vgg_output = vgg.get_layer('block2_conv2').output
    
    return vgg_input, vgg_output

def perceptual_loss(y_true, y_pred):
    
    y_true = vgg_content(y_true)
    y_predict = vgg_content(y_pred)
    loss = keras.losses.mean_squared_error(y_true, y_predict)
    
    return loss


vgg_input, vgg_output = get_VGG19(input_size=(96,96,3))
vgg_content = Model(vgg_input, vgg_output)
#vgg_content.summary()

## Train 7 models for 10 epochs (batch_size=4, validation_split=0.2)

In [9]:
# baaseline model

model = baseline(48, 48, 3)
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./model_selection/baseline.h5', verbose=1, 
                               monitor='val_loss', mode='auto', save_best_only=True)

history = model.fit(LR_patch_train, HR_patch_train, epochs=10, verbose=1, 
                    batch_size=4, validation_split=0.2,
                    callbacks=[checkpointer]
                   )

Train on 3200 samples, validate on 800 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.00338, saving model to ./model_selection/baseline.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.00338 to 0.00231, saving model to ./model_selection/baseline.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.00231 to 0.00184, saving model to ./model_selection/baseline.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.00184 to 0.00168, saving model to ./model_selection/baseline.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.00168 to 0.00153, saving model to ./model_selection/baseline.h5
Epoch 6/10

Epoch 00006: val_loss improved from 0.00153 to 0.00144, saving model to ./model_selection/baseline.h5
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.00144
Epoch 8/10

Epoch 00008: val_loss improved from 0.00144 to 0.00131, saving model to ./model_selection/baseline.h5
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.00131
Epoch 10/10

Epoch 00010: val_los

In [18]:
# pre-upsampling network

model = model1(48, 48, 3)
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./model_selection/architecture.h5', verbose=1, 
                               monitor='val_loss', mode='auto', save_best_only=True)

history = model.fit(LR_patch_train, HR_patch_train, epochs=10, verbose=1, 
                    batch_size=4, validation_split=0.2,
                    callbacks=[checkpointer]
                   )

Train on 3200 samples, validate on 800 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.00507, saving model to ./model_selection/architecture.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.00507 to 0.00245, saving model to ./model_selection/architecture.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.00245 to 0.00198, saving model to ./model_selection/architecture.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.00198 to 0.00194, saving model to ./model_selection/architecture.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.00194 to 0.00187, saving model to ./model_selection/architecture.h5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.00187
Epoch 7/10

Epoch 00007: val_loss improved from 0.00187 to 0.00158, saving model to ./model_selection/architecture.h5
Epoch 8/10

Epoch 00008: val_loss improved from 0.00158 to 0.00156, saving model to ./model_selection/architecture.h5
Epoch 9/10

Epoch 00009: val_loss improved from 0.00156 to 0.00130, 

In [8]:
# change loss function from MSE to defined perceptual loss

model = baseline(48, 48, 3)
model.compile(optimizer=Adam(lr=1e-4), loss=perceptual_loss, metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./model_selection/loss_function.h5', verbose=1, 
                               monitor='val_loss', mode='auto', save_best_only=True)

history = model.fit(LR_patch_train, HR_patch_train, epochs=10, verbose=1, 
                    batch_size=4, validation_split=0.2,
                    callbacks=[checkpointer]
                   )

Train on 3200 samples, validate on 800 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.43804, saving model to ./model_selection/loss_function.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.43804 to 0.36274, saving model to ./model_selection/loss_function.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.36274 to 0.33961, saving model to ./model_selection/loss_function.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.33961 to 0.32908, saving model to ./model_selection/loss_function.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.32908 to 0.32740, saving model to ./model_selection/loss_function.h5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.32740
Epoch 7/10

Epoch 00007: val_loss improved from 0.32740 to 0.29939, saving model to ./model_selection/loss_function.h5
Epoch 8/10

Epoch 00008: val_loss improved from 0.29939 to 0.29723, saving model to ./model_selection/loss_function.h5
Epoch 9/10

Epoch 00009: val_loss improved from 0.29723 to 0.

In [9]:
# modify the upsampling layer to subpixel layer

model = model2(48, 48, 3)
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./model_selection/subpixel_up1.h5', verbose=1, 
                               monitor='val_loss', mode='auto', save_best_only=True)

history = model.fit(LR_patch_train, HR_patch_train, epochs=10, verbose=1, 
                    batch_size=4, validation_split=0.2,
                    callbacks=[checkpointer]
                   )

Train on 3200 samples, validate on 800 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.00408, saving model to ./model_selection/subpixel_up1.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.00408 to 0.00379, saving model to ./model_selection/subpixel_up1.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.00379 to 0.00234, saving model to ./model_selection/subpixel_up1.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.00234 to 0.00218, saving model to ./model_selection/subpixel_up1.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.00218 to 0.00175, saving model to ./model_selection/subpixel_up1.h5
Epoch 6/10

Epoch 00006: val_loss improved from 0.00175 to 0.00170, saving model to ./model_selection/subpixel_up1.h5
Epoch 7/10

Epoch 00007: val_loss improved from 0.00170 to 0.00145, saving model to ./model_selection/subpixel_up1.h5
Epoch 8/10

Epoch 00008: val_loss improved from 0.00145 to 0.00139, saving model to ./model_selection/subpixel_up1.h5
Epoch 9/10

E

In [6]:
# input patch size = 16*16

model = baseline(16, 16, 3)
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['accuracy'])
#checkpointer = ModelCheckpoint(filepath='./model_selection/baseline1.h5', verbose=1, 
#                               monitor='val_loss', mode='auto', save_best_only=True)

history = model.fit(LR_patch_train, HR_patch_train, epochs=10, verbose=1, 
                    batch_size=4, validation_split=0.2,
                    #callbacks=[checkpointer]
                   )

Train on 3200 samples, validate on 800 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
# input patch size = 32*32

model = baseline(32, 32, 3)
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['accuracy'])
#checkpointer = ModelCheckpoint(filepath='./model_selection/patch3232.h5', verbose=1, 
#                               monitor='val_loss', mode='auto', save_best_only=True)

history = model.fit(LR_patch_train, HR_patch_train, epochs=10, verbose=1, 
                    batch_size=4, validation_split=0.2,
                    #callbacks=[checkpointer]
                   )

Train on 3200 samples, validate on 800 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
# input patch size = 64*64

model = baseline(64, 64, 3)
model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./model_selection/patch6464_model.h5', verbose=1, 
                               monitor='val_loss', mode='auto', save_best_only=True)

history = model.fit(LR_patch_train, HR_patch_train, epochs=10, verbose=1, 
                    batch_size=4, validation_split=0.2,
                    callbacks=[checkpointer]
                   )

Train on 3200 samples, validate on 800 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.00330, saving model to ./model_selection/patch6464_model.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.00330 to 0.00226, saving model to ./model_selection/patch6464_model.h5
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.00226
Epoch 4/10

Epoch 00004: val_loss improved from 0.00226 to 0.00210, saving model to ./model_selection/patch6464_model.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.00210 to 0.00147, saving model to ./model_selection/patch6464_model.h5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.00147
Epoch 7/10

Epoch 00007: val_loss improved from 0.00147 to 0.00134, saving model to ./model_selection/patch6464_model.h5
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.00134
Epoch 9/10

Epoch 00009: val_loss improved from 0.00134 to 0.00123, saving model to ./model_selection/patch6464_model.h5
Epoch 10/10

Epoch 00010: val_loss improve

## Compare performance on the first 10 test images

In [None]:
# load test images from directory

import os
import numpy as np
from PIL import Image

LR_train_path = './datasets/DIV2K_valid_LR_bicubic/X2/'
HR_train_path = './datasets/DIV2K_valid_HR//'

LR_train_imgs = []
HR_train_imgs = []

for path, subpath, files in os.walk(LR_train_path):
    files.sort()
    for i in files:
        if i == '.DS_Store':
            continue
        img = Image.open(LR_train_path + i)
        LR_train_imgs.append(np.asarray(img))

for path, subpath, files in os.walk(HR_train_path):
    files.sort()
    for i in files:
        if i == '.DS_Store':
            continue
        img = Image.open(HR_train_path + i)
        HR_train_imgs.append(np.asarray(img)) 

        
LR_valid_imgs = LR_valid_imgs[:10]
HR_valid_imgs = HR_valid_imgs[:10]

print(len(LR_valid_imgs))
print(len(HR_valid_imgs))

In [None]:
# load trained models

import tensorflow as tf
from keras.models import load_model

model = load_model('./model_selection/baseline.h5', custom_objects={'tf': tf})

In [None]:
# normalize images for predicting

def normalize(imgs):
    return imgs / 255

def denormalize(imgs):
    imgs = imgs * 255
    return imgs.astype(np.uint8)

for i in range(len(LR_valid_imgs)):
    LR_valid_imgs[i] = normalize(LR_valid_imgs[i])

### Use trained model to predict test images

In [None]:
# predict and reconstruct test images
# stride should be smaller than patch size to cover all the pixels

import time
from extract_patches import *

test_num = 10
patch_height = 48
patch_width = 48
stride = 40
up_scale = 2

time_start=time.time()

predicted_HR_list = test_patch(LR_valid_imgs, test_num, patch_height, patch_width, stride, model, up_scale)

time_end=time.time()
print('Time cost to predict: ', time_end-time_start, 's')

for i in range(len(predicted_HR_list)):
    predicted_HR_list[i] = denormalize(predicted_HR_list[i])

In [None]:
# compare with HR images 
# calculate PSNR(peak_signal_noise_ratio) and SSIM(structural_similarity) metrics

from skimage.metrics import peak_signal_noise_ratio, structural_similarity

PSNR_val = []
SSIM_val = []

for i in range(len(predicted_HR_list)):
    PSNR = peak_signal_noise_ratio(HR_valid_imgs[i], predicted_HR_list[i])
    SSIM = structural_similarity(HR_valid_imgs[i], predicted_HR_list[i], multichannel=True)
    PSNR_val.append(PSNR)
    SSIM_val.append(SSIM)

print('PSNR: ', PSNR_val)
print('SSIM: ', SSIM_val)