In [2]:
# prerequisites
# https://github.com/lyeoni/pytorch-mnist-GAN
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image

import matplotlib.pyplot as plt

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
bs = 10000
n_samples = 10000
tr_split_len = n_samples
te_split_len = n_samples

# MNIST Dataset
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./mnist_data/', train=False, transform=transform, download=False)

part_tr = torch.utils.data.random_split(train_dataset, [tr_split_len, len(train_dataset)-tr_split_len])[0]
part_te = torch.utils.data.random_split(test_dataset, [te_split_len, len(test_dataset)-te_split_len])[0]

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=part_tr, batch_size=bs, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=part_te, batch_size=bs, shuffle=False)

In [4]:
print(type(train_dataset))
print(train_dataset)

print(type(train_loader))
print(train_loader)

print(len(train_loader.dataset))

<class 'torchvision.datasets.mnist.MNIST'>
Dataset MNIST
    Number of datapoints: 60000
    Root location: ./mnist_data/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )
<class 'torch.utils.data.dataloader.DataLoader'>
<torch.utils.data.dataloader.DataLoader object at 0x000002D74782F0C8>
10000


In [5]:
class Generator(nn.Module):
    def __init__(self, g_input_dim, g_output_dim):
        super(Generator, self).__init__()       
        self.fc1 = nn.Linear(g_input_dim, 256)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features*2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features*2)
        self.fc4 = nn.Linear(self.fc3.out_features, g_output_dim)
    
    # forward method
    def forward(self, x): 
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.leaky_relu(self.fc3(x), 0.2)
        return torch.tanh(self.fc4(x))
    
class Discriminator(nn.Module):
    def __init__(self, d_input_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(d_input_dim, 1024)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features//2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features//2)
        self.fc4 = nn.Linear(self.fc3.out_features, 1)
    
    # forward method
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = F.dropout(x, 0.3)
        return torch.sigmoid(self.fc4(x))
    
class RelDiscriminator(nn.Module):
    def __init__(self, d_input_dim):
        super(RelDiscriminator, self).__init__()
        self.fc1 = nn.Linear(d_input_dim, 1024)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features//2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features//2)
        self.fc4 = nn.Linear(self.fc3.out_features, 1)
    
    # forward method
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc2(x), 0.2)
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc3(x), 0.2)
        x = F.dropout(x, 0.3)
        return self.fc4(x) # needs linear output

In [6]:
# build network
z_dim = 100
mnist_dim = train_dataset.train_data.size(1) * train_dataset.train_data.size(2)

G = Generator(g_input_dim = z_dim, g_output_dim = mnist_dim).to(device)
D = Discriminator(mnist_dim).to(device)



In [7]:
G

Generator(
  (fc1): Linear(in_features=100, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)

In [8]:
D

Discriminator(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)

In [9]:
# loss
criterion = nn.BCELoss() 

# optimizer
lr = 0.0002 
G_optimizer = optim.Adam(G.parameters(), lr = lr)
D_optimizer = optim.Adam(D.parameters(), lr = lr)

In [10]:
def D_train(x):
    #=======================Train the discriminator=======================#
    D.zero_grad()

    # train discriminator on real
    x_real, y_real = x.view(-1, mnist_dim), torch.ones(bs, 1)
    x_real, y_real = Variable(x_real.to(device)), Variable(y_real.to(device))

    D_output = D(x_real)
    D_real_loss = criterion(D_output, y_real)
    D_real_score = D_output

    # train discriminator on facke
    z = Variable(torch.randn(bs, z_dim).to(device))
    x_fake, y_fake = G(z), Variable(torch.zeros(bs, 1).to(device))

    D_output = D(x_fake)
    D_fake_loss = criterion(D_output, y_fake)
    D_fake_score = D_output

    # gradient backprop & optimize ONLY D's parameters
    D_loss = D_real_loss + D_fake_loss
    D_loss.backward()
    D_optimizer.step()
        
    return  D_loss.data.item()

In [11]:
def G_train(x):
    #=======================Train the generator=======================#
    G.zero_grad()

    z = Variable(torch.randn(bs, z_dim).to(device))
    y = Variable(torch.ones(bs, 1).to(device))

    G_output = G(z)
    D_output = D(G_output)
    G_loss = criterion(D_output, y)

    # gradient backprop & optimize ONLY G's parameters
    G_loss.backward()
    G_optimizer.step()
        
    return G_loss.data.item()

In [12]:
#n_epoch = 200
#n_epoch = 200*(60000//n_samples) #200 by 60000 is default
n_epoch = 1*(60000//n_samples) #200 by 60000 is default

print_stride = 1000

for epoch in range(1, n_epoch+1):           
    D_losses, G_losses = [], []
    for batch_idx, (x, _) in enumerate(train_loader):
        D_losses.append(D_train(x))
        G_losses.append(G_train(x))
        
        
            
    if epoch % print_stride == 0:
            with torch.no_grad():
                test_z = Variable(torch.randn(9, z_dim).to(device))
                generated = G.forward(test_z).cpu().detach().numpy()

                for i in range(9):
                    plt.subplot(330 + 1 + i)
                    # plot raw pixel data
                    element = generated[i,:].reshape(28,28)
                    plt.imshow(element, cmap=plt.get_cmap('gray'))
                    #print('pen: ', pen)
                    #print('numer: ', numer)
                    #print('denom: ', denom)
                plt.show()
                print('[%d/%d]: loss_d: %.3f, loss_g: %.3f' % ((epoch), n_epoch, torch.mean(torch.FloatTensor(D_losses)), torch.mean(torch.FloatTensor(G_losses))))

RuntimeError: CUDA out of memory. Tried to allocate 30.00 MiB (GPU 0; 8.00 GiB total capacity; 372.33 MiB already allocated; 5.89 GiB free; 382.00 MiB reserved in total by PyTorch)

In [None]:
with torch.no_grad():
    test_z = Variable(torch.randn(bs, z_dim).to(device))
    generated = G(test_z)

    save_image(generated.view(generated.size(0), 1, 28, 28), f'./samples/sample_{n_samples}' + '.png')

## Relative Discriminator

In [None]:
# TAKEN FROM ANOTEHR

def RD_train(x_r):
    RD.zero_grad()

    # My code
    latent_samples = Variable(torch.randn(batch_size, latent_dim))# SHOULD BE DOING UNIFORM (rand()) FOR TRANSFER TO DACCA!!!!
    x_f = G(latent_samples)

    sout = torch.sigmoid(RD(x_r) - RD(x_f))# where C is some discrim with LINEAR OUTPUT
    RD_loss = -torch.mean(torch.log(sout))
    
    RD_loss.backward() #retain_graph=True
    RD_optimizer.step()
        
    return  RD_loss.data.item()

# For paper recreation
def RD_G_train(x_r):
    G.zero_grad()
    RD.zero_grad()

    # My code
    latent_samples = Variable(torch.randn(batch_size, latent_dim)
    x_f = G(latent_samples)
    
    sout = torch.sigmoid(RD(x_f) - RD(x_r)

    G_loss = -torch.mean(torch.log(sout))
    
    # gradient backprop & optimize ONLY G's parameters
    
    G_loss.backward() #retain_graph=True
    G_optimizer.step()
        
    return G_loss.data.item()