In [12]:
from utils.data_loader import get_data_loader

from models.dcgan import DCGAN_MODEL
import sys
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch import autograd
import time as t
import matplotlib.pyplot as plt
import os
from utils.tensorboard_logger import Logger
from itertools import chain
from torchvision import utils

In [13]:


SAVE_PER_TIMES = 100

class WGAN_Generator(torch.nn.Module):
    def __init__(self, channels):
        super().__init__()
        # Filters [1024, 512, 256]
        # Input_dim = 100
        # Output_dim = C (number of channels)
        self.main_module = nn.Sequential(
            # Z latent vector 100
            nn.ConvTranspose2d(in_channels=100, out_channels=1024, kernel_size=4, stride=1, padding=0),
            nn.BatchNorm2d(num_features=1024),
            nn.ReLU(True),

            # State (1024x4x4)
            nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(True),

            # State (512x8x8)
            nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(True),

            # State (256x16x16)
            nn.ConvTranspose2d(in_channels=256, out_channels=channels, kernel_size=4, stride=2, padding=1))
            # output of main module --> Image (Cx32x32)

        self.output = nn.Tanh()

    def forward(self, x):
        x = self.main_module(x)
        return self.output(x)


class WGAN_Discriminator(torch.nn.Module):
    def __init__(self, channels):
        super().__init__()
        # Filters [256, 512, 1024]
        # Input_dim = channels (Cx64x64)
        # Output_dim = 1
        self.main_module = nn.Sequential(
            # Omitting batch normalization in critic because our new penalized training objective (WGAN with gradient penalty) is no longer valid
            # in this setting, since we penalize the norm of the critic's gradient with respect to each input independently and not the enitre batch.
            # There is not good & fast implementation of layer normalization --> using per instance normalization nn.InstanceNorm2d()
            # Image (Cx32x32)
            nn.Conv2d(in_channels=channels, out_channels=256, kernel_size=4, stride=2, padding=1),
            nn.InstanceNorm2d(256, affine=True),
            nn.LeakyReLU(0.2, inplace=True),

            # State (256x16x16)
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
            nn.InstanceNorm2d(512, affine=True),
            nn.LeakyReLU(0.2, inplace=True),

            # State (512x8x8)
            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=4, stride=2, padding=1),
            nn.InstanceNorm2d(1024, affine=True),
            nn.LeakyReLU(0.2, inplace=True))
            # output of main module --> State (1024x4x4)

        self.output = nn.Sequential(
            # The output of D is no longer a probability, we do not apply sigmoid at the output of D.
            nn.Conv2d(in_channels=1024, out_channels=1, kernel_size=4, stride=1, padding=0))


    def forward(self, x):
        x = self.main_module(x)
        return self.output(x)

    def feature_extraction(self, x):
        # Use discriminator for feature extraction then flatten to vector of 16384
        x = self.main_module(x)
        return x.view(-1, 1024*4*4)


class WGAN_GP(object):
    def __init__(self, args):
        print("WGAN_GradientPenalty init model.")
        self.G = WGAN_Generator(args.channels)
        self.D = WGAN_Discriminator(args.channels)
        self.C = args.channels

        # Check if cuda is available
        self.check_cuda(args.cuda)

        # WGAN values from paper
        self.learning_rate = 1e-4
        self.b1 = 0.5
        self.b2 = 0.999
        self.batch_size = 64

        # WGAN_gradient penalty uses ADAM
        self.d_optimizer = optim.Adam(self.D.parameters(), lr=self.learning_rate, betas=(self.b1, self.b2))
        self.g_optimizer = optim.Adam(self.G.parameters(), lr=self.learning_rate, betas=(self.b1, self.b2))

        # Set the logger
        self.logger = Logger('./logs')
        self.logger.writer.flush()
        self.number_of_images = 10

        self.generator_iters = args.generator_iters
        self.critic_iter = 5
        self.lambda_term = 10

    def get_torch_variable(self, arg):
        if self.cuda:
            return Variable(arg).cuda(self.cuda_index)
        else:
            return Variable(arg)

    def check_cuda(self, cuda_flag=False):
        print(cuda_flag)
        if cuda_flag:
            self.cuda_index = 0
            self.cuda = True
            self.D.cuda(self.cuda_index)
            self.G.cuda(self.cuda_index)
            print("Cuda enabled flag: {}".format(self.cuda))
        else:
            self.cuda = False


    def train(self, train_loader):
        print("STARTING")
        # Now batches are callable self.data.next()
        self.data = self.get_infinite_batches(train_loader)
        results = [[],[]]
        one = torch.tensor(1, dtype=torch.float)
        mone = one * -1
        if self.cuda:
            one = one.cuda(self.cuda_index)
            mone = mone.cuda(self.cuda_index)

        for g_iter in range(self.generator_iters):
            # Requires grad, Generator requires_grad = False
            for p in self.D.parameters():
                p.requires_grad = True

            d_loss_real = 0
            d_loss_fake = 0
            Wasserstein_D = 0
            # Train Dicriminator forward-loss-backward-update self.critic_iter times while 1 Generator forward-loss-backward-update
            for d_iter in range(self.critic_iter):
                print(d_iter)
                self.D.zero_grad()

                images = self.data.__next__()
                # Check for batch to have full batch_size
                if (images.size()[0] != self.batch_size):
                    continue

                z = torch.rand((self.batch_size, 100, 1, 1))

                images, z = self.get_torch_variable(images), self.get_torch_variable(z)

                # Train discriminator
                # WGAN - Training discriminator more iterations than generator
                # Train with real images
                d_loss_real = self.D(images)
                d_loss_real = d_loss_real.mean()
                d_loss_real.backward(mone)

                # Train with fake images
                z = self.get_torch_variable(torch.randn(self.batch_size, 100, 1, 1))

                fake_images = self.G(z)
                d_loss_fake = self.D(fake_images)
                d_loss_fake = d_loss_fake.mean()
                d_loss_fake.backward(one)

                # Train with gradient penalty
                gradient_penalty = self.calculate_gradient_penalty(images.data, fake_images.data)
                gradient_penalty.backward()


                d_loss = d_loss_fake - d_loss_real + gradient_penalty
                Wasserstein_D = d_loss_real - d_loss_fake
                self.d_optimizer.step()
                print(f'  Discriminator iteration: {d_iter}/{self.critic_iter}, loss_fake: {d_loss_fake}, loss_real: {d_loss_real}')

            # Generator update
            for p in self.D.parameters():
                p.requires_grad = False  # to avoid computation

            self.G.zero_grad()
            # train generator
            # compute loss with fake images
            z = self.get_torch_variable(torch.randn(self.batch_size, 100, 1, 1))
            fake_images = self.G(z)
            g_loss = self.D(fake_images)
            g_loss = g_loss.mean()
            g_loss.backward(mone)
            g_cost = -g_loss
            self.g_optimizer.step()
            print(f'Generator iteration: {g_iter}/{self.generator_iters}, g_loss: {g_loss}')
            # Saving model and sampling images every 1000th generator iterations
            if (g_iter) % SAVE_PER_TIMES == 0:
                results[0].append((float(g_loss))
                results[1].append(float(d_loss)))
                print(results[-1])

                # Denormalize images and save them in grid 8x8
                # Testing
                #print("Real Inception score: {}".format(inception_score))
                print("Generator iter: {}".format(g_iter))

        return results


    def calculate_gradient_penalty(self, real_images, fake_images):
        eta = torch.FloatTensor(self.batch_size,1,1,1).uniform_(0,1)
        eta = eta.expand(self.batch_size, real_images.size(1), real_images.size(2), real_images.size(3))
        if self.cuda:
            eta = eta.cuda(self.cuda_index)
        else:
            eta = eta

        interpolated = eta * real_images + ((1 - eta) * fake_images)

        if self.cuda:
            interpolated = interpolated.cuda(self.cuda_index)
        else:
            interpolated = interpolated

        # define it to calculate gradient
        interpolated = Variable(interpolated, requires_grad=True)

        # calculate probability of interpolated examples
        prob_interpolated = self.D(interpolated)

        # calculate gradients of probabilities with respect to examples
        gradients = autograd.grad(outputs=prob_interpolated, inputs=interpolated,
                               grad_outputs=torch.ones(
                                   prob_interpolated.size()).cuda(self.cuda_index) if self.cuda else torch.ones(
                                   prob_interpolated.size()),
                               create_graph=True, retain_graph=True)[0]

        grad_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.lambda_term
        return grad_penalty

    def real_images(self, images, number_of_images):
        if (self.C == 3):
            return self.to_np(images.view(-1, self.C, 32, 32)[:self.number_of_images])
        else:
            return self.to_np(images.view(-1, 32, 32)[:self.number_of_images])

    def generate_img(self, z, number_of_images):
        samples = self.G(z).data.cpu().numpy()[:number_of_images]
        generated_images = []
        for sample in samples:
            if self.C == 3:
                generated_images.append(sample.reshape(self.C, 32, 32))
            else:
                generated_images.append(sample.reshape(32, 32))
        return generated_images

    def to_np(self, x):
        return x.data.cpu().numpy()


    def get_infinite_batches(self, data_loader):
        while True:
            for i, (images, _) in enumerate(data_loader):
                yield images

    def generate_latent_walk(self, number):
        if not os.path.exists('interpolated_images/'):
            os.makedirs('interpolated_images/')

        number_int = 10
        # interpolate between twe noise(z1, z2).
        z_intp = torch.FloatTensor(1, 100, 1, 1)
        z1 = torch.randn(1, 100, 1, 1)
        z2 = torch.randn(1, 100, 1, 1)
        if self.cuda:
            z_intp = z_intp.cuda()
            z1 = z1.cuda()
            z2 = z2.cuda()

        z_intp = Variable(z_intp)
        images = []
        alpha = 1.0 / float(number_int + 1)
        print(alpha)
        for i in range(1, number_int + 1):
            z_intp.data = z1*alpha + z2*(1.0 - alpha)
            alpha += alpha
            fake_im = self.G(z_intp)
            fake_im = fake_im.mul(0.5).add(0.5) #denormalize
            images.append(fake_im.view(self.C,32,32).data.cpu())

        grid = utils.make_grid(images, nrow=number_int )
        utils.save_image(grid, 'interpolated_images/interpolated_{}.png'.format(str(number).zfill(3)))
        print("Saved interpolated images.")


In [20]:
def main(args):
    model = None
    if args.model == 'DCGAN':
        model = DCGAN_MODEL(args)
    elif args.model == 'WGAN':
        model = WGAN_GP(args)
    else:
        print("Model type non-existing. Try again.")
        return 0

    print(model)
    # Load datasets to train and test loaders
    train_loader, test_loader = get_data_loader(args)
    #feature_extraction = FeatureExtractionTest(train_loader, test_loader, args.cuda, args.batch_size)

    # Start model training
    results = model.train(train_loader)
    for i in range(5):
        model.generate_latent_walk(i)
    
    return results


In [15]:
DEBUG = True

if DEBUG:
    ITERS = "2"
    EPOCHS = "2"
    CUDA = False
else:
    ITERS = "40000"
    EPOCHS = "400"
    CUDA = True

def parse_args(args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default='DCGAN', choices=['DCGAN', 'WGAN'])
    parser.add_argument('--is_train', type=str, default='True')
    parser.add_argument('--dataroot', required=True)
    parser.add_argument('--dataset', type=str, default='mnist', choices=['fashion-mnist'])
    parser.add_argument('--download', type=str, default='False')
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--cuda',  type=bool, default='False')
    parser.add_argument('--load_D', type=str, default='False')
    parser.add_argument('--load_G', type=str, default='False')
    parser.add_argument('--generator_iters', type=int, default=10000)
    parser.add_argument('--channels',type=int,default=1)
    return parser.parse_args(args)

 
    
wgan_args = ["--model", "WGAN",
           "--is_train", "True",
           "--epochs", EPOCHS,
           "--cuda",CUDA,

           "--download", "False",
           "--dataroot", "datasets/fashion-mnist",
           "--dataset", "fashion-mnist",
           "--generator_iters", ITERS,
           "--cuda", "False",
           "--batch_size" ,"64"]

dcgan_args = ["--model", "DCGAN",
           "--is_train", "True",
           "--epochs", EPOCHS,
           "--cuda",CUDA,
           "--download", "False",
           "--dataroot", "datasets/fashion-mnist",
           "--dataset", "fashion-mnist",
           "--generator_iters", ITERS,
           "--cuda", "False",
           "--batch_size" ,"64"]

In [21]:
args = parse_args(wgan_args)
args.cuda = CUDA
results = main(args)
results

WGAN_GradientPenalty init model.
False
<__main__.WGAN_GP object at 0x0000012D64059908>
STARTING
0
  Discriminator iteration: 0/5, loss_fake: -0.06678056716918945, loss_real: -0.273993581533432
1
  Discriminator iteration: 1/5, loss_fake: -2.2785754203796387, loss_real: 5.808892250061035
2
  Discriminator iteration: 2/5, loss_fake: -4.645873069763184, loss_real: 10.705155372619629
3
  Discriminator iteration: 3/5, loss_fake: -7.492238998413086, loss_real: 14.515768051147461
4
  Discriminator iteration: 4/5, loss_fake: -10.970818519592285, loss_real: 17.995986938476562
Generator iteration: 0/2, g_loss: -14.342090606689453
(tensor(-14.3421, grad_fn=<MeanBackward0>), tensor(-20.2938, grad_fn=<AddBackward0>))
Generator iter: 0
0
  Discriminator iteration: 0/5, loss_fake: 12.766979217529297, loss_real: 20.55562973022461
1
  Discriminator iteration: 1/5, loss_fake: 6.900254249572754, loss_real: 23.583248138427734
2
  Discriminator iteration: 2/5, loss_fake: -3.0525715351104736, loss_real: 23.

[(tensor(-14.3421, grad_fn=<MeanBackward0>),
  tensor(-20.2938, grad_fn=<AddBackward0>))]

In [17]:
args = parse_args(dcgan_args)
main(args)

DCGAN model initalization.


AssertionError: Torch not compiled with CUDA enabled

In [28]:
final_
for a,b in results:
    a,b = print(float(a),float(b))

-14.342090606689453 -20.2938175201416


TypeError: cannot unpack non-iterable NoneType object

  This is separate from the ipykernel package so we can avoid doing imports until


ModuleNotFoundError: No module named 'matplotlib.backends.backend_a'