In [1]:
import dataloader
import numpy as np

In [2]:
dataloader.args['train_scales'] = 2
dataloader.args['train'] = 'data/*.jpg'

In [21]:
dataloader.args

{'batch_shape': 192,
 'batch_size': 15,
 'buffer_fraction': 5,
 'buffer_size': 1500,
 'train': 'data/*.jpg',
 'train_blur': None,
 'train_jpeg': [],
 'train_noise': None,
 'train_scales': 2,
 'zoom': 2}

In [3]:
loader = dataloader.DataLoader()

In [4]:
seed_size = 192 // 2
images = np.zeros((15, 3, 192, 192), dtype=np.float32)
seeds = np.zeros((15, 3, seed_size, seed_size), dtype=np.float32)

In [5]:
loader.copy(images, seeds)

In [25]:
images.shape, seeds.shape

((15, 3, 192, 192), (15, 3, 96, 96))

In [8]:
seeds[11]

array([[[-0.3392157 , -0.3392157 , -0.33529413, ..., -0.00588235,
         -0.06470588, -0.07647058],
        [-0.35882354, -0.35882354, -0.32745099, ...,  0.10392159,
          0.07647061,  0.0411765 ],
        [-0.34705883, -0.3392157 , -0.32745099, ...,  0.08039218,
          0.1156863 ,  0.11960787],
        ..., 
        [-0.31176472, -0.32352942, -0.31960785, ..., -0.10784313,
         -0.07254902, -0.04509804],
        [-0.31960785, -0.31176472, -0.32745099, ..., -0.09999999,
         -0.08823529, -0.06470588],
        [-0.31568629, -0.31960785, -0.31960785, ..., -0.09607843,
         -0.08823529, -0.10784313]],

       [[-0.28039217, -0.28039217, -0.2764706 , ...,  0.13921571,
          0.08431375,  0.07254905],
        [-0.30000001, -0.30000001, -0.26862746, ...,  0.23725492,
          0.21764708,  0.18235296],
        [-0.28823531, -0.28039217, -0.26862746, ...,  0.21764708,
          0.25294119,  0.25686276],
        ..., 
        [-0.21372548, -0.22156861, -0.22156861, ...,

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd

In [10]:
ps = nn.PixelShuffle(2)
input = autograd.Variable(torch.Tensor(1, 64, 192, 192))
output = ps(input)
output.size()

torch.Size([1, 16, 384, 384])

In [6]:
# try the nets
class BasicLayer(nn.Module):
    def __init__(self, input_channel, output_channel, kernel=3, stride=1, pad=1, alpha=0.25):
        super(BasicLayer, self).__init__()
        self.conv = nn.Conv2d(input_channel, output_channel, kernel, stride=stride, padding=pad)
        self.prelu = nn.PReLU(init=alpha)
    
    def forward(self, input):
        x = self.conv(input)
        return self.prelu(x)

In [28]:
layer = BasicLayer(64, 6, 5, 2, 2)

In [29]:
input = autograd.Variable(torch.Tensor(1, 64, 192, 192))
output = layer(input)
output.shape

torch.Size([1, 6, 96, 96])

In [7]:
class ResidualBlockLayer(nn.Module):
    def __init__(self, input_channel):
        super(ResidualBlockLayer, self).__init__()
        self.basic_layer = BasicLayer(input_channel, input_channel, 3, 1, 1, 0.1)
    def forward(self, input):
        x = self.basic_layer(input)
        return torch.add(input, x)

In [15]:
res_layer = ResidualBlockLayer(64)
output = res_layer(input)
output.shape

torch.Size([1, 64, 192, 192])

In [9]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.residual_size = 4
        
        # init
        self.init_layer = BasicLayer(3, 64, kernel=7, pad=3)
        
        # residual layers
        self.block_layer = []
        for i in range(self.residual_size):
            self.block_layer.append(ResidualBlockLayer(64))
        
        # upscale layers
        self.upscale_layer = BasicLayer(64, 64 * 4)
        self.pixel_shuffle = nn.PixelShuffle(2)
        
        self.output_layer = nn.Conv2d(64, 3, 7, padding=3)
    
    def forward(self, input):
        x = self.init_layer(input)
        for i in range(self.residual_size):
            x = self.block_layer[i](x)
        
        x = self.upscale_layer(x)
        x = self.pixel_shuffle(x)
        return self.output_layer(x)

In [32]:
gen = Generator()
input = autograd.Variable(torch.from_numpy(seeds[:1]))
gen_output = gen(input)
gen_output.shape

torch.Size([1, 3, 192, 192])

In [11]:
# preload vgg19
from torchvision.models import vgg

In [19]:
vgg_pretrained = vgg.vgg19(pretrained=True)
vgg_pretrained

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU(

In [8]:
class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        
        self.lambd = lambd
    
    def forward(self, input):
        return self.lambd(input)

In [23]:
offset = np.array([103.939, 116.779, 123.680], dtype=np.float32).reshape((1,3,1,1))
offset = autograd.Variable(torch.from_numpy(offset), requires_grad=False)
lambd = LambdaLayer(lambda x: ((x+0.5)*255.0) - offset)
input = autograd.Variable(torch.Tensor(1, 3, 192, 192))
output = lambd(input)
output.shape

torch.Size([1, 3, 192, 192])

In [9]:
class Perceptual(nn.Module):
    def __init__(self):
        super(Perceptual, self).__init__()
        
        offset_ = np.array([103.939, 116.779, 123.680], dtype=np.float32).reshape((1,3,1,1))
        self.offset = autograd.Variable(torch.from_numpy(offset_), requires_grad=False)
        self.lambd = lambd = LambdaLayer(lambda x: ((x+0.5)*255.0) - self.offset)
        
        # init with pretrained vgg19
        original = vgg.vgg19(pretrained=True)
        self.features = list(original.features.children())[:32]
    
    def forward(self, input):
        conv_1_2, conv_2_2, conv_3_2 = None, None, None
        x = self.lambd(input)
        for i in range(len(self.features)):
            x = self.features[i](x)
            if i == 3:
                conv_1_2 = x.clone()
            elif i == 7:
                conv_2_2 = x.clone()
            elif i == 11:
                conv_3_2 = x.clone()
        
        return conv_1_2, conv_2_2, conv_3_2, x

In [12]:
perc = Perceptual()
input = autograd.Variable(torch.Tensor(1, 3, 192, 192))
conv_1_2, conv_2_2, conv_3_2, conv_5_4 = perc(input)
conv_1_2.shape, conv_2_2.shape, conv_3_2.shape, conv_5_4.shape

(torch.Size([1, 64, 192, 192]),
 torch.Size([1, 128, 96, 96]),
 torch.Size([1, 256, 48, 48]),
 torch.Size([1, 512, 12, 12]))

In [17]:
class Discriminator(nn.Module):
    def __init__(self, channels):
        super(Discriminator, self).__init__()
        
        self.channels = channels
        self.input_channel = 64
        
        self.batch_norm1 = nn.BatchNorm2d(self.input_channel)
        self.conv_layer1_1 = BasicLayer(self.input_channel, self.channels, 5, 2, 2)
        self.conv_layer1_2 = BasicLayer(self.channels, self.channels, 5, 2, 2)
        
        self.batch_norm2 = nn.BatchNorm2d(2 * self.input_channel)
        self.conv_layer2 = BasicLayer(2 * self.input_channel, 2 * self.channels, 5, 2, 2)
        
        self.batch_norm3 = nn.BatchNorm2d(4 * self.input_channel)
        self.conv_layer3 = BasicLayer(4 * self.input_channel, 3 * self.channels, 3, 1, 1)
        
        self.conv_layer4 = BasicLayer(6 * self.channels, 4 * self.channels, 1, 1, 0)
        self.conv_layer5 = BasicLayer(4 * self.channels, 3 * self.channels, 3, stride=2)
        self.conv_layer6 = BasicLayer(3 * self.channels, 2 * self.channels, 1, 1, 0)
        
        self.batch_norm7 = nn.BatchNorm2d(2 * self.channels)
        self.conv_layer7 = nn.Conv2d(2 * self.channels, 1, 1)
    
    def forward(self, conv_1_2, conv_2_2, conv_3_2):
        x1 = self.batch_norm1(conv_1_2)
        x1 = self.conv_layer1_1(x1)
        x1 = self.conv_layer1_2(x1)
        
        x2 = self.batch_norm2(conv_2_2)
        x2 = self.conv_layer2(x2) 
        
        x3 = self.batch_norm3(conv_3_2)
        x3 = self.conv_layer3(x3) 
        x = torch.cat([x1, x2, x3], dim=1)
        print(x1.shape, x2.shape, x3.shape, x.shape)
        
        x = self.conv_layer4(x)
        x = self.conv_layer5(x)
        x = self.conv_layer6(x)
        
        x = self.batch_norm7(x)
        return self.conv_layer7(x)

In [19]:
disc = Discriminator(32)
disc_output = disc(conv_1_2.detach(), conv_2_2.detach(), conv_3_2.detach())
disc_output.shape

torch.Size([1, 32, 48, 48]) torch.Size([1, 64, 48, 48]) torch.Size([1, 96, 48, 48]) torch.Size([1, 192, 48, 48])


torch.Size([1, 1, 24, 24])

In [14]:
def mse_loss(input, target):
    return torch.sum((input - target) ** 2) / input.data.nelement()

def loss_perceptual(input, target):
    return mse_loss(input, target)

In [15]:
def loss_total_variation(x):
    return torch.mean(((x[:,:,:-1,:-1] - x[:,:,1:,:-1])**2 + (x[:,:,:-1,:-1] - x[:,:,:-1,1:])**2)**1.25)

In [32]:
target_5_4 = conv_5_4.detach()
# target_5_4.requires_grad = False

out1 = loss_perceptual(conv_5_4, target_5_4 * 0.5)
out2 = loss_total_variation(gen_output)
output2 = out1 + out2
print(output2)
output2.backward()
print(output2)

Variable containing:
1.00000e-02 *
  8.8149
[torch.FloatTensor of size 1]

Variable containing:
1.00000e-02 *
  8.8149
[torch.FloatTensor of size 1]



In [16]:
def softminus(x):
    return x - F.softplus(x)

tx = autograd.Variable(torch.Tensor([-5, -1, 0, 1, 5]))
print(tx)
print(softminus(tx))
print(F.softplus(tx))

Variable containing:
-5
-1
 0
 1
 5
[torch.FloatTensor of size 5]

Variable containing:
-5.0067
-1.3133
-0.6931
-0.3133
-0.0067
[torch.FloatTensor of size 5]

Variable containing:
 0.0067
 0.3133
 0.6931
 1.3133
 5.0067
[torch.FloatTensor of size 5]



In [17]:
def loss_adversarial(input):
    return torch.mean(1 - softminus(input))

tx = autograd.Variable(torch.Tensor([-5, -1, 0, 1, 5]))
output = loss_adversarial(tx)
output

Variable containing:
 2.4666
[torch.FloatTensor of size 1]

In [18]:
def loss_discriminator(input, target):
    return torch.mean(softminus(input) - F.softplus(target))

i = autograd.Variable(torch.Tensor([-5, -1, 0, 1, 5]))
t = autograd.Variable(torch.Tensor([-2, -1, 0, 1, 2]))
output = loss_discriminator(i, t)
output

Variable containing:
-2.3813
[torch.FloatTensor of size 1]

In [19]:
# construct full network
class Enhancer(nn.Module):
    def __init__(self):
        super(Enhancer, self).__init__()
        
        self.generator = Generator()
        self.perceptual = Perceptual()
        self.discriminator = Discriminator(32)
    
    def create_new_discriminator(size):
        self.discriminator = Discriminator(size)
    
    def forward(self, inputs, seeds):
        inputs = autograd.Variable(torch.from_numpy(inputs))
        seeds = autograd.Variable(torch.from_numpy(seeds))
        
        gen_out = self.generator(seeds)
        c12, c22, c32, perc_out = self.perceptual(torch.cat([inputs, gen_out], dim=0))
        disc_out = self.discriminator(c12, c22, c32)
        
        return gen_out, c12, c22, c32, perc_out, disc_out
    
    def discriminator_clone(self):
        disc = Discriminator(self.discriminator.channels)

        mp = list(disc.parameters())
        mcp = list(self.discriminator.parameters())
        n = len(mp)
        for i in range(0, n):
            mp[i].data[:] = mcp[i].data[:]
            
        return disc
    
    def assign_back_discriminator(self, disc):
        mp = list(self.discriminator.parameters())
        mcp = list(disc.parameters())
        n = len(mp)
        for i in range(0, n):
            mp[i].data[:] = mcp[i].data[:]

In [71]:
i1 = autograd.Variable(torch.Tensor(1, 3, 192, 192))
i2 = autograd.Variable(torch.Tensor(1, 3, 192, 192))
torch.cat([i1, i2], dim=0).shape

torch.Size([2, 3, 192, 192])

In [20]:
enhancer = Enhancer()
gen_out, c12, c22, c32, c52, disc_out = enhancer(images[:1], seeds[:1])

In [21]:
# clone discriminator on the full network
disc = enhancer.discriminator_clone()

In [22]:
disc_out2 = disc(c12.detach(), c22.detach(), c32.detach())

In [23]:
import torch.optim as optim

In [24]:
optimizer1 = optim.Adam(enhancer.generator.parameters(), lr = 1)

In [62]:
# get current lr
lr = -1000
for param_group in optimizer1.param_groups:
    lr = param_group['lr']
lr

1

In [69]:
learning_rate_for_step(optimizer1, lr, 75, 0.5, 0)

# get updated lr
for param_group in optimizer1.param_groups:
    lr = param_group['lr']
lr

0.0625

In [25]:
optimizer1.step()

In [117]:
# adversarial loss
# loss perceptual using c22 for pretrain for 50 epochs, or using c52 for actual training for 250 epochs
gen_loss = loss_perceptual(c52[:1], c52[1:]) * 1e0 \
    + loss_total_variation(gen_out) * 2e5 \
    + loss_adversarial(disc_out[1:]) * 5e2
gen_loss

Variable containing:
 1922.5225
[torch.FloatTensor of size 1]

In [116]:
disc_loss = loss_discriminator(disc_out2[:1], disc_out2[1:])
disc_loss

Variable containing:
-1.3808
[torch.FloatTensor of size 1]

In [118]:
gen_loss.backward()

In [119]:
disc_loss.backward()

In [121]:
# update both networks then copy back disc value to enchancer
enhancer.assign_back_discriminator(disc)

In [126]:
# pretrain
# --smoothness-weight=1e7 --adversary-weight=0.0
# add_arg('--generator-start',    default=0, type=int,                help='Epoch count to start training generator.')
# add_arg('--discriminator-start',default=1, type=int,                help='Epoch count to update the discriminator.')
# add_arg('--adversarial-start',  default=2, type=int,                help='Epoch for generator to use discriminator.')
# add_arg('--perceptual-weight',  default=1e0, type=float,            help='Weight for VGG-layer perceptual loss.')
# add_arg('--smoothness-weight',  default=2e5, type=float,            help='Weight of the total-variation loss.')
# add_arg('--adversary-weight',   default=5e2, type=float,            help='Weight of adversarial loss compoment.')
# train
# --smoothness-weight=2e4 --adversary-weight=1e3 --generator-start=5 --discriminator-start=0 --adversarial-start=5 
# --discriminator-size=64

In [65]:
# decay learning rate
def learning_rate_for_step(optimizer, current_lr, lr_period, decay_lr, step):
    l_r, t_cur = current_lr, step
    if t_cur % lr_period == 0: 
        l_r *= decay_lr
        
    for param_group in optimizer.param_groups:
        param_group['lr'] = l_r