In [0]:
import chainer
chainer.print_runtime_info()
print('GPU availability:', chainer.cuda.available)
print('cuDNN availablility:', chainer.cuda.cudnn_enabled)

Platform: Linux-4.14.79+-x86_64-with-Ubuntu-18.04-bionic
Chainer: 5.4.0
NumPy: 1.16.3
CuPy:
  CuPy Version          : 5.4.0
  CUDA Root             : /usr/local/cuda
  CUDA Build Version    : 10000
  CUDA Driver Version   : 10000
  CUDA Runtime Version  : 10000
  cuDNN Build Version   : 7301
  cuDNN Version         : 7301
  NCCL Build Version    : 2402
  NCCL Runtime Version  : 2402
iDeep: 2.0.0.post3
GPU availability: True
cuDNN availablility: True


In [0]:
from google.colab import drive
drive.mount('/content/drive/')

In [0]:
# Network_______________________________________________________________________
import numpy as np

import chainer
import chainer.links as L
import chainer.distributions as D
from chainer import functions as F
from chainer import backend


def add_noise(h, sigma=0.2):
    xp = backend.get_array_module(h.array)
    if chainer.config.train:
        return h + sigma * xp.random.randn(*h.shape)
    else:
        return h


class Generator(chainer.Chain):

    def __init__(self, n_hidden=128, bottom_width=4, ch=512, wscale=0.02):
        super(Generator, self).__init__()
        self.n_hidden = n_hidden
        self.ch = ch
        self.bottom_width = bottom_width

        with self.init_scope():
            w = chainer.initializers.Normal(wscale)
            self.l0 = L.Linear(self.n_hidden, bottom_width * bottom_width * ch,
                               initialW=w)
            self.c1 = L.Convolution2D(ch, ch // 2, 3, 1, 1, initialW=w)
            self.c2 = L.Convolution2D(ch // 2, ch // 4, 3, 1, 1, initialW=w)
            self.c3 = L.Convolution2D(ch // 4, ch // 8, 3, 1, 1, initialW=w)
            self.c4 = L.Convolution2D(ch // 8, 3, 3, 1, 1, initialW=w)
            self.bn0 = L.BatchNormalization(bottom_width * bottom_width * ch)
            self.bn1 = L.BatchNormalization(ch // 2)
            self.bn2 = L.BatchNormalization(ch // 4)
            self.bn3 = L.BatchNormalization(ch // 8)

    def make_hidden(self, batchsize):
        dtype = chainer.get_dtype()
        return np.random.uniform(-1, 1, (batchsize, self.n_hidden, 1, 1))\
            .astype(dtype)

    def forward(self, z):
        h = F.reshape(F.relu(self.bn0(self.l0(z))),
                      (len(z), self.ch, self.bottom_width, self.bottom_width))
        h = F.unpooling_2d(h, ksize=2, outsize=(8,8)) # 8x8
        h = F.relu(self.bn1(self.c1(h)))
        h = F.unpooling_2d(h, ksize=2, outsize=(16,16)) # 16x16
        h = F.relu(self.bn2(self.c2(h)))
        h = F.unpooling_2d(h, ksize=2, outsize=(32,32)) # 32x32
        h = F.relu(self.bn3(self.c3(h)))
        x = F.sigmoid(self.c4(h))
        return x

    
# Discriminator with Variational Discriminator Bottleneck
class Discriminator(chainer.Chain):

    def __init__(self, n_latent=256, ch=256, bottom_width=4, wscale=0.02, k=1):
        self.k = k
        super(Discriminator, self).__init__()
        with self.init_scope():
            w = chainer.initializers.Normal(wscale)
            self.c0_0 = L.Convolution2D(3, ch // 8, 3, 1, 1, initialW=w)
            self.c0_1 = L.Convolution2D(ch // 8, ch // 4, 4, 2, 1, initialW=w)
            self.c1_0 = L.Convolution2D(ch // 4, ch // 4, 3, 1, 1, initialW=w)
            self.c1_1 = L.Convolution2D(ch // 4, ch // 2, 4, 2, 1, initialW=w)
            self.c2_0 = L.Convolution2D(ch // 2, ch // 2, 3, 1, 1, initialW=w)
            self.c2_1 = L.Convolution2D(ch // 2, ch // 1, 4, 2, 1, initialW=w)
            self.c3_0 = L.Convolution2D(ch // 1, ch // 1, 3, 1, 1, initialW=w)
            self.mu = L.Convolution2D(ch // 1, n_latent, 1, 1, 0, initialW=w)
            self.ln_sigma = L.Convolution2D(ch // 1, n_latent, 1, 1, 0, initialW=w)
            self.out = L.Linear(n_latent * bottom_width**2, 1, initialW=w)
            self.bn0_1 = L.BatchNormalization(ch // 4, use_gamma=False)
            self.bn1_0 = L.BatchNormalization(ch // 4, use_gamma=False)
            self.bn1_1 = L.BatchNormalization(ch // 2, use_gamma=False)
            self.bn2_0 = L.BatchNormalization(ch // 2, use_gamma=False)
            self.bn2_1 = L.BatchNormalization(ch // 1, use_gamma=False)
            self.bn3_0 = L.BatchNormalization(ch // 1, use_gamma=False)

    def forward(self, x, gen=False):
        h = F.leaky_relu(self.c0_0(x))
        h = F.leaky_relu(self.bn0_1(self.c0_1(h)))
        h = F.leaky_relu(self.bn1_0(self.c1_0(h)))
        h = F.leaky_relu(self.bn1_1(self.c1_1(h)))
        h = F.leaky_relu(self.bn2_0(self.c2_0(h)))
        h = F.leaky_relu(self.bn2_1(self.c2_1(h)))
        h = F.leaky_relu(self.bn3_0(self.c3_0(h)))
        mu = self.mu(h)
        ln_sigma = self.ln_sigma(h)  # log(sigma)
        
        dist = D.Normal(loc=mu, log_scale=ln_sigma)
        if gen:
            out = F.sigmoid(self.out(mu))
        else:
            z = dist.sample(self.k)
            z = F.sum(z, axis=0) / self.k
            out = F.sigmoid(self.out(z))
        return out, dist

In [0]:
# Updater_______________________________________________________________________
import numpy as np

import chainer
import chainer.functions as F
from chainer import Variable

# Classic Adversarial Loss
def loss_dcgan_dis(dis_fake, dis_real):
    L1 = F.mean(F.softplus(-dis_real))
    L2 = F.mean(F.softplus(dis_fake))
    loss = L1 + L2
    return loss


def loss_dcgan_gen(dis_fake):
    loss = F.mean(F.softplus(-dis_fake))
    return loss


# Hinge Loss
def loss_hinge_dis(dis_fake, dis_real):
    loss = F.mean(F.relu(1. - dis_real))
    loss += F.mean(F.relu(1. + dis_fake))
    return loss


def loss_hinge_gen(dis_fake):
    loss = -F.mean(dis_fake)
    return loss


# VGAN Loss
class Prior(chainer.Link):

    def __init__(self, n_latent=256, bottom_width=4, batchsize=None):
        super(Prior, self).__init__()

        self.loc = np.zeros(
            (batchsize, n_latent, bottom_width, bottom_width), np.float32)
        self.scale = np.ones(
            (batchsize, n_latent, bottom_width, bottom_width), np.float32)
        self.register_persistent('loc')
        self.register_persistent('scale')

    def forward(self):
        return D.Normal(self.loc, scale=self.scale)

    
def loss_vgan_dis(dis_fake, dis_real, latent_gx, latent_px, r_z, Ic=0.1, beta=1.0):
    L1 = F.mean(F.softplus(-dis_real))
    L2 = F.mean(F.softplus(dis_fake))
    kl_gx = F.mean(F.sum(chainer.kl_divergence(latent_gx, r_z), axis=-1))
    kl_px = F.mean(F.sum(chainer.kl_divergence(latent_px, r_z), axis=-1))
    kl = (kl_gx + kl_px) * 0.5
    L3 = (kl - Ic) * beta
    loss = L1 + L2 + L3
    return loss, kl


def loss_vgan_gen(dis_fake):
    loss = F.mean(F.softplus(-dis_fake))
    return loss


class Updater(chainer.training.StandardUpdater):
    def __init__(self, *args, **kwargs):
        self.models = kwargs.pop('models')
        self.n_dis = kwargs.pop('n_dis')
        self.beta= kwargs.pop("beta")
        self.beta_step = kwargs.pop("beta_step")
        self.Ic = kwargs.pop("Ic")
        self.k = kwargs.pop("k")
        self.loss_type = kwargs.pop('loss_type')
        if self.loss_type == 'dcgan':
            self.loss_gen = loss_dcgan_gen
            self.loss_dis = loss_dcgan_dis
        elif self.loss_type == 'hinge':
            self.loss_gen = loss_hinge_gen
            self.loss_dis = loss_hinge_dis
        elif self.loss_type == "vgan":
            self.loss_gen = loss_vgan_gen
            self.loss_dis = loss_vgan_dis
        else:
            raise NotImplementedError
        super(Updater, self).__init__(*args, **kwargs)
        
    def update_beta(self, avg_kl):
        with chainer.using_config("train", False), chainer.using_config("enable_backprop", False):
            new_beta = self.beta - self.beta_step * (self.Ic - avg_kl)
            new_beta = max(new_beta.data, 0)
            self.beta = Variable(new_beta)
        
    def update_core(self):
        gen = self.models['gen']
        dis = self.models['dis']
        prior = self.models["prior"]
        gen_optimizer = self.get_optimizer('opt_gen')
        dis_optimizer = self.get_optimizer('opt_dis')
        xp = gen.xp
        for i in range(self.n_dis):
            x_real = self.get_iterator("main").next()
            x_real = Variable(self.converter(x_real, self.device)) / 255.
            batchsize = len(x_real)
            if i == 0:
                z = Variable(xp.asarray(gen.make_hidden(batchsize)))
                x_fake = gen(z)
                dis_fake, _ = dis(x_fake, gen=True)
                loss_gen = self.loss_gen(dis_fake)
                gen.cleargrads()
                loss_gen.backward()
                gen_optimizer.update()
                chainer.reporter.report({'loss_gen': loss_gen})
            
            z = Variable(xp.asarray(gen.make_hidden(batchsize)))
            x_fake = gen(z)
            dis_fake, latent_gx = dis(x_fake, gen=True)
            dis_real, latent_px = dis(x_real)
            r_z = prior()
            x_fake.unchain_backward()

            loss_dis, avg_kl = self.loss_dis(
                dis_fake, dis_real, latent_gx, latent_px, r_z, self.Ic, self.beta)
            dis.cleargrads()
            loss_dis.backward()
            dis_optimizer.update()
            chainer.reporter.report({'loss_dis': loss_dis})
            
            self.update_beta(avg_kl) # Updating beta

# Please change variable "out" to your directory in which you want to save several snapshots.

In [0]:
# Train_________________________________________________________________________
import os

import chainer
from chainer import training
from chainer.training import extension
from chainer.training import extensions
from chainer.datasets import split_dataset
from chainer import serializers


def make_optimizer(model, alpha=0.0002, beta1=0., beta2=0.9):
    optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1, beta2=beta2)
    optimizer.setup(model)
    return optimizer

def main():

    # Prepare parameter1
    batchsize = 256
    iteration = int((40000 * 200) / batchsize)
    gpu = 0
    dim_z = 256
    n_dis = 1
    AdamAlpha = 0.0002
    beta = 0.1
    beta_step = 0.00001
    Ic = 0.1
    k = 1 # If the "batchsize" is lower than 128, "k" must be set more than 1.
    loss_type = "vgan" # dcgan or hinge or vgan
    out = "" 
    resume = ""
    seed = 0
    display_interval = 10
    snapshot_interval = 40 # snapshot per 40 epochs
    iteration_decay_start = 0
    iteration_decay_end = iteration
    
    # Dataset
    train, test = chainer.datasets.get_cifar10(withlabel=False, scale=255.)
    train, valid = split_dataset(train, int(len(train) * 0.8))
    print('# data-size: {}'.format(len(train)))
    print('# data-shape: {}'.format(train[0][0].shape))
    print('')
    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    valid_iter = chainer.iterators.SerialIterator(valid, batchsize, False, False)
    
    print('GPU: {}'.format(gpu))
    print('# Minibatch-size: {}'.format(batchsize))
    print('# epoch: {}'.format(int((batchsize*iteration)/len(train))))
    
    # Set up a models
    gen = Generator()
    dis = Discriminator(n_latent=dim_z, k=k)
    prior = Prior(dim_z, batchsize=batchsize)
    
    if gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu).use()
        gen.to_gpu() # Copy the model to the GPU
        dis.to_gpu()
        prior.to_gpu()
        
    # Optimizer
    opt_gen = make_optimizer(gen)
    opt_dis = make_optimizer(dis)
    
    # Trainer
    updater = Updater(
        models={"gen": gen, "dis": dis, "prior": prior},
        n_dis=n_dis,
        beta=beta,
        beta_step=beta_step,
        Ic=Ic,
        k=k,
        loss_type=loss_type,
        iterator=train_iter,
        optimizer={"opt_gen": opt_gen, "opt_dis": opt_dis},
        device=gpu)
    trainer = training.Trainer(updater, (iteration, "iteration"), out=out)
    
    # Set up logging
    snapshot_interval = (snapshot_interval, "epoch")
    display_interval = (display_interval, "iteration")
    trainer.extend(extensions.snapshot_object(gen, 'gen_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(dis, 'dis_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'loss_gen', "loss_dis", "elapsed_time"]), trigger=display_interval)
    trainer.extend(extensions.PlotReport( ['loss_gen', "loss_dis"], x_key='iteration', trigger=display_interval, file_name="plot.png"))
    trainer.extend(extensions.ProgressBar(update_interval=10))
    ext_opt_gen = extensions.LinearShift('alpha', (AdamAlpha, 0.),
                                         (iteration_decay_start, iteration_decay_end), opt_gen)
    ext_opt_dis = extensions.LinearShift('alpha', (AdamAlpha, 0.),
                                         (iteration_decay_start, iteration_decay_end), opt_dis)
    trainer.extend(ext_opt_gen)
    trainer.extend(ext_opt_dis)
    
    if resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(resume, trainer)

    # Run the training
    print("start training")
    trainer.run()

    # Save decoder and discriminator models
    if gpu >= 0:
        gen.to_cpu()
        dis.to_cpu()
    chainer.serializers.save_npz(os.path.join(out, 'gen.npz'), gen)
    chainer.serializers.save_npz(os.path.join(out, "dis.npz"), dis)


if __name__ == '__main__':
    main()

KeyboardInterrupt: ignored

In [0]:
from chainer import serializers
import matplotlib.pyplot as plt
from PIL import Image
from chainer.cuda import to_cpu
from chainer import Variable


# Prepare parameter
gpu = 0
batchsize = 1
    
# Make Encoder and decoder models
inf_gen = Generator()

# Load model parameter
serializers.load_npz(
    "drive/My Drive/Google_Colab/DCGAN_VDB/results/DCGAN_VDB_nonAdaptive/gen.npz", inf_gen) # Please fill out the snapshot path into "".

inf_gen.to_gpu(gpu)

with chainer.using_config("train", False), chainer.using_config("enable_backprop", False):
    inf_z = Variable(inf_gen.xp.asarray(inf_gen.make_hidden(batchsize)))
    inf_x = inf_gen(z=inf_z)
    
inf_x = to_cpu(inf_x.array)
inf_x = (inf_x + 1) / 2 # If you use tanh in the output of the generator, you must be enable this row. 
inf_x = inf_x * 255
inf_x_show = np.zeros((3, 32, 32))
for i in range(3):
    for j in range(32):
        for k in range(32):
            inf_x_show[i][j][k] = inf_x[0][i][j][k]
            
inf_x_show = inf_x_show.transpose(1, 2, 0)
pil_img = Image.fromarray(np.uint8(inf_x_show))
plt.imshow(pil_img)