# Generative adversarial network (using the GAN class)

In [1]:
from __future__ import print_function
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
from tensorboardX import SummaryWriter

from tqdm import tqdm

batch_size = 64
epochs = 10
seed = 1
torch.manual_seed(seed)

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [2]:
root = '../data'
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Lambda(lambd=lambda x: x.view(-1))])
kwargs = {'batch_size': batch_size, 'num_workers': 1, 'pin_memory': True}

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(root=root, train=True, transform=transform, download=True),
    shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(root=root, train=False, transform=transform),
    shuffle=False, **kwargs)

In [3]:
from pixyz.distributions import Deterministic, DataDistribution
from pixyz.distributions import Normal
from pixyz.models import GAN
from pixyz.utils import print_latex

In [4]:
x_dim = 784
z_dim = 100

# generator model p(x|z)    
class Generator(Deterministic):
    def __init__(self):
        super(Generator, self).__init__(cond_var=["z"], var=["x"], name="p")

        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8, affine=False))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(z_dim, 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024, x_dim),
            nn.Sigmoid()
        )

    def forward(self, z):
        x = self.model(z)
        return {"x": x}

# prior model p(z)
prior = Normal(loc=torch.tensor(0.), scale=torch.tensor(1.),
               var=["z"], features_shape=[z_dim], name="p_{prior}").to(device)

# generative model
p_g = Generator()
p = (p_g*prior).marginalize_var("z").to(device)

# data distribution
p_data = DataDistribution(["x"]).to(device)

In [5]:
print(p)
print_latex(p)

Distribution:
  p(x) = \int p(x|z)p_{prior}(z)dz
Network architecture:
  Normal(
    name=p_{prior}, distribution_name=Normal,
    var=['z'], cond_var=[], input_var=[], features_shape=torch.Size([100])
    (loc): torch.Size([1, 100])
    (scale): torch.Size([1, 100])
  )
  Generator(
    name=p, distribution_name=Deterministic,
    var=['x'], cond_var=['z'], input_var=['z'], features_shape=torch.Size([])
    (model): Sequential(
      (0): Linear(in_features=100, out_features=128, bias=True)
      (1): LeakyReLU(negative_slope=0.2, inplace)
      (2): Linear(in_features=128, out_features=256, bias=True)
      (3): BatchNorm1d(256, eps=0.8, momentum=0.1, affine=False, track_running_stats=True)
      (4): LeakyReLU(negative_slope=0.2, inplace)
      (5): Linear(in_features=256, out_features=512, bias=True)
      (6): BatchNorm1d(512, eps=0.8, momentum=0.1, affine=False, track_running_stats=True)
      (7): LeakyReLU(negative_slope=0.2, inplace)
      (8): Linear(in_features=512, out_feat

<IPython.core.display.Math object>

In [6]:
print(p_data)
print_latex(p_data)

Distribution:
  p_{data}(x)
Network architecture:
  DataDistribution(
    name=p_{data}, distribution_name=Data distribution,
    var=['x'], cond_var=[], input_var=['x'], features_shape=torch.Size([])
  )


<IPython.core.display.Math object>

In [7]:
# discriminator model p(t|x)
class Discriminator(Deterministic):
    def __init__(self):
        super(Discriminator, self).__init__(cond_var=["x"], var=["t"], name="d")

        self.model = nn.Sequential(
            nn.Linear(x_dim, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        t = self.model(x)
        return {"t": t}
    
d = Discriminator().to(device)

In [8]:
print(d)
print_latex(d)

Distribution:
  d(t|x)
Network architecture:
  Discriminator(
    name=d, distribution_name=Deterministic,
    var=['t'], cond_var=['x'], input_var=['x'], features_shape=torch.Size([])
    (model): Sequential(
      (0): Linear(in_features=784, out_features=512, bias=True)
      (1): LeakyReLU(negative_slope=0.2, inplace)
      (2): Linear(in_features=512, out_features=256, bias=True)
      (3): LeakyReLU(negative_slope=0.2, inplace)
      (4): Linear(in_features=256, out_features=1, bias=True)
      (5): Sigmoid()
    )
  )


<IPython.core.display.Math object>

In [9]:
model = GAN(p_data, p, d,
            optimizer=optim.Adam, optimizer_params={"lr":0.0002},
            d_optimizer=optim.Adam, d_optimizer_params={"lr":0.0002})
print(model)
print_latex(model)

Distributions (for training): 
  p(x) 
Loss function: 
  mean(D_{JS}^{Adv} \left[p_{data}(x)||p(x) \right]) 
Optimizer: 
  Adam (
  Parameter Group 0
      amsgrad: False
      betas: (0.9, 0.999)
      eps: 1e-08
      lr: 0.0002
      weight_decay: 0
  )


<IPython.core.display.Math object>

In [10]:
def train(epoch):
    train_loss = 0
    train_d_loss = 0
    for x, _ in tqdm(train_loader):
        x = x.to(device)
        loss, d_loss = model.train({"x": x})
        train_loss += loss
        train_d_loss += d_loss
 
    train_loss = train_loss * train_loader.batch_size / len(train_loader.dataset)
    train_d_loss = train_d_loss * train_loader.batch_size / len(train_loader.dataset)
    print('Epoch: {} Train loss: {:.4f}, {:.4f}'.format(epoch, train_loss.item(), train_d_loss.item()))
    return train_loss

In [11]:
def test(epoch):
    test_loss = 0
    test_d_loss = 0
    for x, _ in test_loader:
        x = x.to(device)
        loss, d_loss = model.test({"x": x})
        test_loss += loss
        test_d_loss += d_loss

    test_loss = test_loss * test_loader.batch_size / len(test_loader.dataset)
    test_d_loss = test_d_loss * test_loader.batch_size / len(test_loader.dataset)
    
    print('Test loss: {:.4f}, {:.4f}'.format(test_loss, test_d_loss.item()))
    return test_loss

In [12]:
def plot_image_from_latent(z_sample):
    with torch.no_grad():
        sample = p_g.sample({"z": z_sample})["x"].view(-1, 1, 28, 28).cpu()
        return sample

In [13]:
writer = SummaryWriter()

z_sample = torch.randn(64, z_dim).to(device)
_x, _y = iter(test_loader).next()
_x = _x.to(device)
_y = _y.to(device)

for epoch in range(1, epochs + 1):
    train_loss = train(epoch)
    test_loss = test(epoch)
    
    sample = plot_image_from_latent(z_sample)

    writer.add_scalar('train_loss', train_loss.item(), epoch)
    writer.add_scalar('test_loss', test_loss.item(), epoch)      
    
    writer.add_images('Image_from_latent', sample, epoch)
    
writer.close()

100%|██████████| 938/938 [00:09<00:00, 95.84it/s]


Epoch: 1 Train loss: 5.4801, 0.1775


  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 16.9259, 5.9919


100%|██████████| 938/938 [00:10<00:00, 93.51it/s]

Epoch: 2 Train loss: 6.1298, 0.0840



  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 19.2121, 4.8468


100%|██████████| 938/938 [00:09<00:00, 94.13it/s]

Epoch: 3 Train loss: 6.6186, 0.0875



  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 19.1694, 5.4619


100%|██████████| 938/938 [00:09<00:00, 95.84it/s]

Epoch: 4 Train loss: 6.2090, 0.0978



  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 19.2751, 5.6214


100%|██████████| 938/938 [00:10<00:00, 92.62it/s]


Epoch: 5 Train loss: 6.3099, 0.1051


  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 20.0078, 3.5164


100%|██████████| 938/938 [00:10<00:00, 93.10it/s] 


Epoch: 6 Train loss: 6.1372, 0.1177


  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 18.1406, 4.0445


100%|██████████| 938/938 [00:10<00:00, 92.93it/s]

Epoch: 7 Train loss: 5.9906, 0.1203



  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 21.6356, 4.3760


100%|██████████| 938/938 [00:10<00:00, 91.73it/s]

Epoch: 8 Train loss: 5.8850, 0.1263



  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 21.3998, 3.8537


100%|██████████| 938/938 [00:09<00:00, 94.22it/s]

Epoch: 9 Train loss: 6.1321, 0.1136



  0%|          | 0/938 [00:00<?, ?it/s]

Test loss: 24.5080, 4.3141


100%|██████████| 938/938 [00:09<00:00, 94.41it/s]


Epoch: 10 Train loss: 6.0489, 0.1069
Test loss: 23.6262, 5.2636
