In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.datasets import make_moons

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torchvision.utils import make_grid

In [None]:
def plot_gan_training(losses, title):
    n_itr = len(losses)
    xs = np.arange(n_itr)

    plt.figure(figsize=(7, 5))
    plt.plot(xs, losses)
    plt.title(title)
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.show()

# Task 1: Vanila GAN for 1d task

In this task you will train simple GAN model for 1d distribution.

In [None]:
def generate_1d_data(count):
    gaussian1 = np.random.normal(loc=-1, scale=0.25, size=(count // 2,))
    gaussian2 = np.random.normal(loc=0.5, scale=0.5, size=(count // 2,))
    data = (np.concatenate([gaussian1, gaussian2]) + 1).reshape([-1, 1])
    return (data - data.min()) / (data.max() - data.min())


def visualize_1d_data(data):
    plt.figure(figsize=(7, 4))
    plt.hist(data, bins=50)
    plt.show()

In [None]:
COUNT = 20000

train_data = generate_1d_data(COUNT)
visualize_1d_data(train_data)

In [None]:
def make_step(loss_fn, x, generator, discriminator, optimizer):
    loss = loss_fn(generator, discriminator, x)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss


def train_epoch(
    generator, 
    discriminator, 
    gen_loss_fn, 
    discr_loss_fn, 
    train_loader, 
    gen_optimizer, 
    discr_optimizer, 
    discr_steps=1
):
    generator.train()
    discriminator.train()

    gen_losses, discr_losses = [], []
    for idx, x in enumerate(train_loader):
        x = x.cuda().float()
        discr_loss = make_step(discr_loss_fn, x, generator, discriminator, discr_optimizer)
        discr_losses.append(discr_loss.item())

        if idx % discr_steps == 0:
            gen_loss = make_step(gen_loss_fn, x, generator, discriminator, gen_optimizer)
            gen_losses.append(gen_loss.item())
    return {
        'generator_losses': gen_losses,
        'discriminator_losses': discr_losses
    }


def train_model(
    generator, 
    discriminator, 
    gen_loss_fn, 
    discr_loss_fn, 
    train_loader, 
    epochs,
    lr,
    discr_steps=1
):
    gen_optimizer = optim.Adam(generator.parameters(), lr=lr, betas=(0, 0.9))
    discr_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=(0, 0.9))

    train_losses = {}
    for epoch in range(epochs):
        train_loss = train_epoch(
            generator, 
            discriminator, 
            gen_loss_fn, 
            discr_loss_fn,
            train_loader, 
            gen_optimizer, 
            discr_optimizer, 
            discr_steps=discr_steps
        )
        
        for k in train_loss.keys():
            if k not in train_losses:
                train_losses[k] = []
            train_losses[k].extend(train_loss[k])

    return train_losses

Generator and Discriminator models are simple MLP models.

The objective function is 
$$min_{G} max_{D} \mathbb{E}_{\mathbf{x} \sim \pi(\mathbf{x})} [\log D(\mathbf{x})] + \mathbb{E}_{\mathbf{z} \sim p(\mathbf{z})}[\log (1-D(G(\mathbf{z})))]$$

In [None]:
class FullyConnectedMLP(nn.Module):
    # do not change this class
    def __init__(self, input_dim, hiddens, output_dim):
        assert isinstance(hiddens, list)
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hiddens = hiddens

        model = []
        # your code
        self.net = nn.Sequential(*model)

    def forward(self, x):
        batch_size = x.shape[0]
        x = x.view(batch_size, -1)
        return self.net(x).view(batch_size, self.output_dim)


class MLPGenerator(nn.Module):
    def __init__(self, latent_dim, hiddens, data_dim):
        super().__init__()
        self.latent_dim = latent_dim
        self.mlp = FullyConnectedMLP(latent_dim, hiddens, data_dim)
    
    def forward(self, z):
        # apply mlp and sigmoid activation
        
    def sample(self, n):
        # sample from uniform and apply the model


class MLPDiscriminator(nn.Module):
    def __init__(self, latent_dim, hiddens, data_dim):
        super().__init__()
        self.mlp = FullyConnectedMLP(latent_dim, hiddens, data_dim)
    
    def forward(self, z):
        # apply mlp and sigmoid activation

In [None]:
BATCH_SIZE = 
GEN_HIDDENS = 
DISCR_HIDDENS = 
EPOCHS = 
LR = 
DISCR_STEPS = 

train_loader = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

# model
generator = MLPGenerator(latent_dim=1, hiddens=GEN_HIDDENS, data_dim=1).cuda()
discriminator = MLPDiscriminator(latent_dim=1, hiddens=DISCR_HIDDENS, data_dim=1).cuda()


def gen_loss(generator, discriminator, x):
    # sample data from generator (number of samples = x.shape[0])
    fake_data = 
    return # log(1 - D(x))


def discr_loss(generator, discriminator, x):
    # sample data from generator (number of samples = x.shape[0])
    fake_data = 
    return # - log(D(x_real))- log(1 - D(x_fake))

# train
train_losses = train_model(
    generator, 
    discriminator, 
    gen_loss, 
    discr_loss, 
    train_loader, 
    epochs=EPOCHS,
    lr=LR,
    discr_steps=DISCR_STEPS
)

In [None]:
plot_gan_training(train_losses['discriminator_losses'], 'Discriminator loss')
plot_gan_training(train_losses['generator_losses'], 'Generator loss')

In [None]:
def make_inference(generator, discriminator, n_samples=5000):
    generator.eval()
    discriminator.eval()
    xs = np.linspace(0, 1, 1000)
    samples = generator.sample(n_samples).cpu().detach().numpy()
    discr_output = discriminator(torch.FloatTensor(xs).cuda().unsqueeze(1)).cpu().detach().numpy()
    return samples, xs, discr_output


def plot_results(data, samples, xs, ys, title):
    plt.figure(figsize=(7, 5))
    plt.hist(samples, bins=50, density=True, alpha=0.7, label='fake')
    plt.hist(data, bins=50, density=True, alpha=0.7, label='real')

    plt.plot(xs, ys, label='discrim')
    plt.legend()
    plt.title(title)

In [None]:
samples, xs, discr_output = make_inference(generator, discriminator)
plot_results(train_data, samples, xs, discr_output, 'Results')

Now we'll use the non-saturating formulation of the GAN objective. Now, we have two separate losses:
$$L^{(D)} = \mathbb{E}_{\mathbf{x} \sim \pi(\mathbf{x})} [\log D(\mathbf{x})] + \mathbb{E}_{\mathbf{z} \sim p(\mathbf{z})}[\log (1-D(G(\mathbf{z})))]$$
$$L^{(G} = - \mathbb{E}_{\mathbf{z} \sim p(\mathbf{z})} \log(D(G(\mathbf{z}))$$

In [None]:
BATCH_SIZE = 
GEN_HIDDENS = 
DISCR_HIDDENS = 
EPOCHS = 
LR = 
DISCR_STEPS = 


train_loader = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

# model
generator = MLPGenerator(latent_dim=1, hiddens=GEN_HIDDENS, data_dim=1).cuda()
discriminator = MLPDiscriminator(latent_dim=1, hiddens=DISCR_HIDDENS, data_dim=1).cuda()


def gen_loss(generator, discriminator, x):
    # sample data from generator (number of samples = x.shape[0])
    fake_data = 
    return # -log(D(x))


# train
train_losses = train_model(
    generator, 
    discriminator, 
    gen_loss, 
    discr_loss, 
    train_loader, 
    epochs=EPOCHS,
    lr=LR,
    discr_steps=DISCR_STEPS
)

In [None]:
plot_gan_training(train_losses['discriminator_losses'], 'Discriminator loss')
plot_gan_training(train_losses['generator_losses'], 'Generator loss')

In [None]:
samples, xs, discr_output = make_inference(generator, discriminator)
plot_results(train_data, samples, xs, discr_output, 'Results')

# Task 2: WGAN-GP for CIFAR 10

In this task you will fit Wasserstein GAN with Gradient Penalty model to the CIFAR10 dataset (https://drive.google.com/file/d/16j3nrJV821VOkkuRz7aYam8TyIXLnNme/view?usp=sharing).  

In [None]:
def load_pickle(path):
    with open(path, 'rb') as f:
        data = pickle.load(f)
    train_data, test_data = data['train'], data['test']
    return train_data, test_data


def show_samples(samples, title, nrow=10):
    samples = (torch.FloatTensor(samples) / 255).permute(0, 3, 1, 2)
    grid_img = make_grid(samples, nrow=nrow)
    plt.figure()
    plt.title(title)
    plt.imshow(grid_img.permute(1, 2, 0))
    plt.axis('off')
    plt.show()


def visualize_data(data, title):
    idxs = np.random.choice(len(data), replace=False, size=(100,))
    images = train_data[idxs]
    show_samples(images, title)

In [None]:
train_data, test_data = load_pickle(os.path.join('drive', 'My Drive', 'DGM2020', 'homework_supplementary', 'cifar10.pkl'))
visualize_data(train_data, 'CIFAR10 samples')

In [None]:
def gradient_penalty(critic, real_data, fake_data):
    batch_size = real_data.shape[0]

    # Calculate interpolation
    # x_t = t * x_real + (1 - t) x_fake
    
    # sample t
    t = 
    
    # create x_t
    interpolated = 
    interpolated.requires_grad = True

    # apply critic to x_t
    d_output = 
    gradients = torch.autograd.grad(outputs=d_output, inputs=interpolated,
                                    grad_outputs=torch.ones(d_output.size()).cuda(),
                                    create_graph=True, retain_graph=True)[0]

    gradients = gradients.reshape(batch_size, -1)
    
    # compute gradient norm
    gradients_norm = 
    return ((gradients_norm - 1) ** 2).mean()

def train(
    generator, 
    critic, 
    gen_optimizer,
    critic_optimizer,
    train_loader,
    critic_steps, 
    batch_size,
    n_epochs
):
    train_losses = []

    critic.train()
    generator.train()

    curr_iter = 0
    for epoch_i in range(n_epochs):
        batch_loss_history = []

        for batch_i, x in enumerate(tqdm(train_loader, desc=f'epoch {epoch_i}', leave=False)):
            curr_iter += 1
            x = x.clone().detach().cuda().float()
            x = 2 * (x - 0.5)

            # do a critic update
            critic_optimizer.zero_grad()
            fake_data = generator.sample(x.shape[0])
            gp = gradient_penalty(critic, x, fake_data)
            # D(x_fake) - D(x_real) + 10 * grad_pen
            d_loss = 
            d_loss.backward()
            critic_optimizer.step()
            # generator update
            if curr_iter % critic_steps == 0:
                gen_optimizer.zero_grad()
                fake_data = generator.sample(batch_size)
                # -D(x_fake)
                g_loss = 
                g_loss.backward()
                gen_optimizer.step()

                batch_loss_history.append(g_loss.data.cpu().numpy())

        epoch_loss = np.mean(batch_loss_history)
        train_losses.append(epoch_loss)

    train_losses = np.array(train_losses)
    return train_losses

In [None]:
class Generator(nn.Module):
    def __init__(self, dim=64):
        super().__init__()
        self.dim = dim
        
        # apply linear layer with output units 4 * 4 * 4 * dim, then relu
        self.preprocess = nn.Sequential()
        
        # apply transposed conv with stride 2, kernel size 2
        # then BN, then relu
        self.block1 = nn.Sequential()
        
        # apply transposed conv with stride 2, kernel size 2
        # then BN, then relu
        self.block2 = nn.Sequential()
        self.deconv_out = nn.ConvTranspose2d(dim, 3, 2, stride=2)
        self.tanh = nn.Tanh()
        self.noise = torch.distributions.Normal(torch.tensor(0.), torch.tensor(1.))

    def forward(self, input):
        output = self.preprocess(input)
        output = output.view(-1, 4 * self.dim, 4, 4)
        output = self.block1(output)
        output = self.block2(output)
        output = self.deconv_out(output)
        output = self.tanh(output)
        return output.view(-1, 3, 32, 32)

    def sample(self, n_samples):
        z = self.noise.sample([n_samples, 128]).cuda()
        return self.forward(z)


class Critic(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim
        
        # sequence of Conv2D with stride 2 and LeakyRelU
        self.net = nn.Sequential()
        self.linear = nn.Linear(4 * 4 * 4 * dim, 1)

    def forward(self, input):
        output = self.net(input)
        output = output.view(-1, 4 * 4 * 4 * self.dim)
        output = self.linear(output)
        return output

In [None]:
train_data, test_data = load_pickle(os.path.join('drive', 'My Drive', 'DGM2020', 'homework_supplementary', 'cifar10.pkl'))
train_data = np.transpose(train_data, (0, 3, 1, 2)) / 255.0

BATCH_SIZE = 
train_loader = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

DIM = 
N_ITERATIONS = 
CRITIC_STEPS = 
N_EPOCHS = CRITIC_STEPS * N_ITERATIONS // len(train_loader)
print('Total number of epochs:', N_EPOCHS)

generator = Generator(dim=DIM).cuda()
critic = Critic(dim=DIM).cuda()

gen_optimizer = torch.optim.Adam(generator.parameters(), lr=2e-4, betas=(0, 0.9))
critic_optimizer = torch.optim.Adam(critic.parameters(), lr=2e-4, betas=(0, 0.9))

losses = train(
    generator, 
    critic, 
    gen_optimizer,
    critic_optimizer,
    train_loader,
    CRITIC_STEPS, 
    BATCH_SIZE, 
    N_EPOCHS
)

plot_gan_training(losses, 'Generator loss')

generator.eval()
critic.eval()
with torch.no_grad():
    samples = generator.sample(1000)
    samples = samples.permute(0, 2, 3, 1).cpu().detach().numpy() * 0.5 + 0.5
    

show_samples(samples[:100] * 255.0, title='CIFAR-10 generated samples')