## Things to Try

In [13]:
import os
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import save_image
import torchvision.datasets as dset
import torch.optim as optim
from torch.autograd import Variable
import torchvision.utils as vutils
import torch.nn.functional as F
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
image_size = 64
num_epochs = 250
batch_size = 128

In [15]:
transform = transforms.Compose([
                                transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), 
                                                     (0.5, 0.5, 0.5)),])
dataset = dset.CIFAR10(root = '/home/tyler/data/image', 
                        download = True, transform = transform)
dataloader = torch.utils.data.DataLoader(dataset, 
                                         batch_size = batch_size, 
                                         shuffle = True, 
                                         num_workers = 0)

Files already downloaded and verified


In [16]:
## weight initalization for network
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

### Generator

Produces an image

Q: why the convtranspose2d numbers? A: they are the inverse of discriminator

Q: Why first convtranspose2d in channels is 100? This is just a hyper-parameter. You can choose how many channels you want to randomly generate for the input to your generator.

In [17]:
class Generator(nn.Module):
    # initializers
    def __init__(self, d=128):
        super(Generator, self).__init__()
        self.deconv1 = nn.ConvTranspose2d(100, d*8, 4, 1, 0, bias=False)
        self.deconv1_bn = nn.BatchNorm2d(d*8)
        self.deconv2 = nn.ConvTranspose2d(d*8, d*4, 4, 2, 1, bias=False)
        self.deconv2_bn = nn.BatchNorm2d(d*4)
        self.deconv3 = nn.ConvTranspose2d(d*4, d*2, 4, 2, 1, bias=False)
        self.deconv3_bn = nn.BatchNorm2d(d*2)
        self.deconv4 = nn.ConvTranspose2d(d*2, d, 4, 2, 1, bias=False)
        self.deconv4_bn = nn.BatchNorm2d(d)
        self.deconv5 = nn.ConvTranspose2d(d, 3, 4, 2, 1, bias=False)

    # forward method
    def forward(self, input):
        # x = F.relu(self.deconv1(input))
        x = F.leaky_relu(self.deconv1_bn(self.deconv1(input)), 0.2)
        x = F.leaky_relu(self.deconv2_bn(self.deconv2(x)), 0.2)
        x = F.leaky_relu(self.deconv3_bn(self.deconv3(x)), 0.2)
        x = F.leaky_relu(self.deconv4_bn(self.deconv4(x)), 0.2)
        x = F.tanh(self.deconv5(x))

        return x

## Discriminator

Just a normal conv net to tell if an image is fake or not.

In [102]:
class Discriminator(nn.Module):
    # initializers
    def __init__(self, d=128):
        super(Discriminator, self).__init__()
        self.d = d
        self.conv1 = nn.Conv2d(3, d, 4, 2, 1, bias=False)
        self.conv2 = nn.Conv2d(d, d*2, 4, 2, 1, bias=False)
        self.conv2_bn = nn.BatchNorm2d(d*2)
        self.conv3 = nn.Conv2d(d*2, d*4, 4, 2, 1, bias=False)
        self.conv3_bn = nn.BatchNorm2d(d*4)
        self.conv4 = nn.Conv2d(d*4, d*8, 4, 2, 1, bias=False)
        self.conv4_bn = nn.BatchNorm2d(d*8)
        self.conv5 = nn.Conv2d(d*8, 1, 4, 1, 0, bias=False)
        
        self.conv11_f = nn.Conv2d(d*4, 64, kernel_size=1)
        self.conv11_g = nn.Conv2d(d*4, 64, kernel_size=1)
        self.conv11_h = nn.Conv2d(d*4, d*4, kernel_size=1)

    # forward method
    def forward(self, input):
        x = F.leaky_relu(self.conv1(input), 0.2)
        x = F.leaky_relu(self.conv2_bn(self.conv2(x)), 0.2)
        x = F.leaky_relu(self.conv3_bn(self.conv3(x)), 0.2)
        
#         b, f, l, w = x.shape
#         f_out = self.conv11_f(x).view(b, 64, -1).transpose(1, 2)
#         g_out = self.conv11_g(x).view(b, 64, -1)
#         h_out = self.conv11_h(x).view(b, f, -1)
#         attention_map = F.softmax(torch.bmm(f_out, g_out), dim=0)
        
#         self_att_feature_maps = torch.bmm(h_out, attention_map).view(b, f, l, w)
        
        
        x = F.leaky_relu(self.conv4_bn(self.conv4(x)), 0.2)
        print(x.shape)
        x = F.sigmoid(self.conv5(x))

        return x.view(-1)

In [103]:
netG = Generator(128).to(device)
netG.apply(weights_init)
netD = Discriminator(128).to(device)
netD.apply(weights_init)

Discriminator(
  (conv1): Conv2d(3, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (conv2): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (conv2_bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (conv3_bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
  (conv4_bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(1024, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
  (conv11_f): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1))
  (conv11_g): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1))
  (conv11_h): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
)

In [104]:
criterion = nn.BCELoss()
optimizerD = optim.Adam(netD.parameters(), lr = 0.00005, betas = (0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = 0.0002, betas = (0.5, 0.999))

In [105]:
total_step = len(dataloader)
for epoch in range(num_epochs):
    for i, data in enumerate(dataloader):
        
        ## train discriminator
        
        netD.zero_grad()
        
        ## calculate error using real image 
        real, _ = data
        input = Variable(real).to(device)
        # target is 1 b/c real image
        target = Variable(torch.empty(input.size()[0]).uniform_(0.7, 1.2) ).to(device)
        output = netD(input)
        real_score = output
        errD_real = criterion(output, target)
        
        ## calculate error using fake image
        ## first generate an image using generator then discriminate
        ## this is 100 channels, 1x1 random noise that the generate will use
        noise = Variable(torch.randn(input.size()[0], 100, 1, 1)).to(device)
        fake = netG(noise)
        target = Variable(torch.empty(input.size()[0]).uniform_(0.0, 0.3) ).to(device)
        output = netD(fake.detach())
        fake_score = output
        errD_fake = criterion(output, target)
        
        
        errD = errD_real + errD_fake
        errD.backward()
        optimizerD.step()
        
        
        ## train generator
        
        
        ## we want the generator to learn to create realistic images
        ## and thus to produce a 1 from the discriminator
        netG.zero_grad()
        target = Variable(torch.ones(input.size()[0])).to(device)
        output = netD(fake)
        errG = criterion(output, target)
        errG.backward()
        optimizerG.step()
        
        if (i+1) % 50 == 0:
            print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}' 
                  .format(epoch, num_epochs, i+1, total_step, errD.item(), errG.item(), 
                    real_score.mean().item(), fake_score.mean().item()))
            vutils.save_image(real, '%s/real_samples.png' % "./results", normalize = True)
            fake = netG(noise)
            vutils.save_image(fake.data, '%s/fake_samples_epoch_%03d.png' % ("./results", epoch), normalize = True)

torch.Size([128, 1024, 2, 2])


RuntimeError: Expected tensor for argument #1 'input' to have the same dimension as tensor for 'result'; but 4 does not equal 2 (while checking arguments for cudnn_convolution)