This notebook basically refers to https://courses.engr.illinois.edu/ie534/fa2018/secure/GAN.html#part-1-training-a-gan-on-cifar10.

In [11]:
import numpy as np
import pandas as pd
import datetime

import torch
import torch.nn as nn
import torch.utils.data as Data

import torchvision
import torchvision.transforms as transforms

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os

In [12]:
GD_ckpt = torch.load("GD_checkpoint.pth.tar")
test_acc = GD_ckpt["Test_Accuracy"]
train_acc = GD_ckpt["Train Accuracy"]

In [14]:
np.max(test_acc), np.argmax(test_acc)

(0.8672, 147)

In [15]:
np.max(train_acc), np.argmax(train_acc)

(0.94122, 195)

In [9]:
GD_ckpt = torch.load("GD_checkpoint.pth.tar")
test_acc = GD_ckpt["Test_Accuracy"]
train_acc = GD_ckpt["Train Accuracy"]

plt.plot(train_acc, color='blue',linestyle = '-', label="Train")
plt.plot(test_acc, color="red", linestyle = '-', label="Test")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Train Accuracy of Generator-Discriminator")
plt.legend()
plt.savefig("Gen_Disc_Accuracy_Plot.png")
plt.close()

In [3]:
# Save function
def save_checkpoint(obj, is_best, filename="checkpoint.pth.tar"):
    torch.save(obj, filename)
    if is_best:
        shutil.copyfile(filename, "best_"+filename)

In [2]:
# Plot images
def custom_plot(samples):
    fig = plt.figure(figsize=(10, 10))
    gs = gridspec.GridSpec(10, 10)
    gs.update(wspace=0.02, hspace=0.02)
    
    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis("off")
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        plt.imshow(sample)
    return fig

In [4]:
# Data augmentation
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(32, scale=(0.7, 1.0), ratio=(1.0,1.0)),
    transforms.ColorJitter(
            brightness=0.1*torch.randn(1),
            contrast=0.1*torch.randn(1),
            saturation=0.1*torch.randn(1),
            hue=0.1*torch.randn(1)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.CenterCrop(32),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [4]:
# Hyperparameters
BatchSize = 128
num_classes = 10
num_epochs = 200
learning_rate = 1e-4

In [None]:
# Load data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True,
                                        transform=transform_train)
train_loader = Data.DataLoader(trainset, batch_size=BatchSize, shuffle=True, num_workers=8)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True,
                                       transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=BatchSize, shuffle=False, num_workers=8)

In [5]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [6]:
# Create Generator model with structure given in reference
LongGenConv = nn.Sequential(nn.Linear(in_features=100, out_features=196*4*4, bias=True),
                         nn.ConvTranspose2d(in_channels=196,out_channels=196,kernel_size=4,stride=2,padding=1),
                         nn.BatchNorm2d(num_features=196),
                         nn.ReLU(inplace=True), # Conv layer 1
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1),
                         nn.BatchNorm2d(num_features=196),
                         nn.ReLU(inplace=True), # Conv layer 2
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1), 
                         nn.BatchNorm2d(num_features=196),
                         nn.ReLU(inplace=True), # Conv layer 3
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1),
                         nn.BatchNorm2d(num_features=196),
                         nn.ReLU(inplace=True), # Conv layer 4
                         nn.ConvTranspose2d(in_channels=196,out_channels=196,kernel_size=4,stride=2,padding=1),
                         nn.BatchNorm2d(num_features=196),
                         nn.ReLU(inplace=True), # Conv layer 5
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1),
                         nn.BatchNorm2d(num_features=196),
                         nn.ReLU(inplace=True), # Conv layer 6
                         nn.ConvTranspose2d(in_channels=196,out_channels=196,kernel_size=4,stride=2,padding=1),
                         nn.BatchNorm2d(num_features=196),
                         nn.ReLU(inplace=True), # Conv layer 7
                         nn.Conv2d(in_channels=196,out_channels=3,kernel_size=3,stride=1,padding=1)) # Conv layer 8

In [8]:
# Create Discriminator model with structure given in reference
LongConv = nn.Sequential(nn.Conv2d(in_channels=3,out_channels=196,kernel_size=3,stride=1,padding=1),
                         nn.LayerNorm(normalized_shape=(196,32,32)),
                         nn.LeakyReLU(inplace=True), # Conv layer 1
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=2),
                         nn.LayerNorm(normalized_shape=(196,16,16)),
                         nn.LeakyReLU(inplace=True), # Conv layer 2
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1), 
                         nn.LayerNorm(normalized_shape=(196,16,16)),
                         nn.LeakyReLU(inplace=True), # Conv layer 3
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=2),
                         nn.LayerNorm(normalized_shape=(196,8,8)),
                         nn.LeakyReLU(inplace=True), # Conv layer 4
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1),
                         nn.LayerNorm(normalized_shape=(196,8,8)),
                         nn.LeakyReLU(inplace=True), # Conv layer 5
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1),
                         nn.LayerNorm(normalized_shape=(196,8,8)),
                         nn.LeakyReLU(inplace=True), # Conv layer 6
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=1),
                         nn.LayerNorm(normalized_shape=(196,8,8)),
                         nn.LeakyReLU(inplace=True), # Conv layer 7
                         nn.Conv2d(in_channels=196,out_channels=196,kernel_size=3,stride=1,padding=2),
                         nn.LayerNorm(normalized_shape=(196,4,4)),
                         nn.LeakyReLU(inplace=True), # Conv layer 8
                         nn.MaxPool2d(kernel_size=4,stride=4)) # Max Pooling                       
Scorer = nn.Linear(in_features=196,out_features=1,bias=True)
Classifier = nn.Linear(in_features=196,out_features=10,bias=True)

In [7]:
# Define a Generator class
class GenConvNet(nn.Module):
    def __init__(self):
        super(GenConvNet, self).__init__()
        self.GenLayer = LongGenConv
        
    def forward(self,x):
        return self.GenLayer(x)

In [10]:
# Define a Discriminator class
class DiscConvNet(nn.Module):
    def __init__(self):
        super(DiscConvNet, self).__init__()
        self.ConvLayer = LongConv
        self.Class = Classifier
        self.Score = Scorer
        
    def forward(self,x):
        ConvOut = self.ConvLayer(x)
        Out = ConvOut.reshape(ConvOut.shape[0],-1)
        ScoreOut = self.Score(Out)
        ClassOut = self.Class(Out)
        return ScoreOut, ClassOut

In [12]:
# Define model
genmodel = GenConvNet().to(device)
discmodel = DiscConvNet().to(device)

# Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer_g = torch.optim.Adam(genmodel.parameters(), lr=learning_rate, betas=(0,0.9))
optimizer_d = torch.optim.Adam(discmodel.parameters(), lr=learning_rate, betas=(0,0.9))

In [13]:
# Define a way to compute gradient
def calc_gradient_penalty(netD, real_data, fake_data):
    DIM = 32
    LAMBDA = 10
    alpha = torch.rand(BatchSize, 1)
    alpha = alpha.expand(BatchSize, int(real_data.nelement()/batch_size)).contiguous()
    alpha = alpha.view(BatchSize, 3, DIM, DIM)
    alpha = alpha.to(device)
    
    fake_data = fake_data.view(batch_size, 3, DIM, DIM)
    interpolates = alpha * real_data.detach() + ((1 - alpha) * fake_data.detach())

    interpolates = interpolates.to(device)
    interpolates.requires_grad = True

    disc_interpolates, _ = netD(interpolates)

    gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                              grad_outputs=torch.ones(disc_interpolates.size()).to(device),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]

    gradients = gradients.view(gradients.size(0), -1)                              
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA
    return gradient_penalty

In [None]:
# Training process of Generator
for param in discmodel.parameters():
    param.requires_grad = False

genmodel.zero_grad()

# Generate the input noise
rand_label = np.random.randint(0, num_classes, BatchSize)
noise = np.random.normal(0,1,(BatchSize,100))
rand_label_onehot = np.zeros((BatchSize,num_classes))
rand_label_onehot[np.arange(BatchSize), rand_label] = 1
noise[np.arange(BatchSize), :num_classes] = rand_label_onehot[np.arange(BatchSize)]
noise = noise.astype(np.float32)
ts_noise = torch.from_numpy(noise).to(device) # Create noise as a Tensor
fake_label = torch.from_numpy(rand_label).to(device) # Create fake label as a Tensor

# Generate fake images, evaluate it by discriminator
fake_data = genmodel(ts_noise)
gen_score, gen_class = discmodel(fake_data)
gen_loss = criterion(gen_class, fake_label)
gen_cost = -gen_score.mean() + gen_loss
gen_cost.backward()

optimizer_g.step()

In [None]:
# Training process of Discriminator
for param in discmodel.parameters():
    param.requires_grad = True
    
discmodel.zero_grad()

# Generate the input noise
rand_label = np.random.randint(0, num_classes, BatchSize)
noise = np.random.normal(0,1,(BatchSize,100))
rand_label_onehot = np.zeros((BatchSize,num_classes))
rand_label_onehot[np.arange(BatchSize), rand_label] = 1
noise[np.arange(BatchSize), :num_classes] = rand_label_onehot[np.arange(BatchSize)]
noise = noise.astype(np.float32)
ts_noise = torch.from_numpy(noise).to(device) # Create noise as a Tensor
fake_label = torch.from_numpy(rand_label).to(device) # Create fake label as a Tensor

# Generate fake images and evaluate
with torch.no_grad():
    fake_data = genmodel(ts_noise)
disc_fake_score, disc_fake_class = discmodel(fake_data)
disc_fake_loss = criterion(disc_fake_class, fake_label)

# Train discriminator with input from the discriminator
real_data = images.to(device)
read_label = labels.to(device).long()
disc_real_score, disc_real_class = discmodel(real_data)
disc_real_loss = criterion(disc_real_class, real_label)

prediction = disc_real_class.data.max(1)[1]
accuracy = float(prediction.eq(real_label.data).sum()) / float(BatchSize) * 100
grad_penalty = calc_gradient_penalty(discmodel, real_data, fake_data)

disc_cost = disc_fake_score.mean() - disc_real_score.mean() + disc_real_loss + disc_fake_loss + grad_penalty
disc_cost.backward()

optimizer_d.step()

In [None]:
# Test model
discmodel.eval()
with torch.no_grad():
    test_acc = []
    for idx, (images, labels) in enumerate(test_loader):
        images = images.to(device)
        labels = labels.to(device).long()
        
        with torch.no_grad():
            _, output = discmodel(images)
        
        prediction = output.data.max(1)[1]
        accuracy = float(prediction.eq(labels).sum()) / float(BatchSize) * 100
        test_acc.append(accuracy)
        accuracy_test = np.mean(test_accu)
now_time = datetime.datetime.now()
print("Testing", accuracy_test, now_time-start_time)

In [None]:
fig = custom_plot(samples)
plt.savefig("output_%s.png" % str(epoch).zfill(3), bbox_inches="tight")
plt.close(fig)

In [None]:
# Plot fake images
with torch.no_grad():
    genmodel.eval()
    samples = genmodel(ts_noise).cpu().numpy()
    samples += 1
    samples /= 2
    samples = samples.transpose(0,2,3,1)
    genmodel.train()
    
fig = custom_plot(samples)
plt.savefig("output_%s.png" % str(epoch).zfill(3), bbox_inches="tight")
plt.close(fig)

torch.save(genmodel, "tempGenModel.ckpt")
torch.save(discmodel, "tempDiscModel.ckpt")

In [None]:
# Complete process of training
start_time = datetime.datetime.now()
train_acc_seq = []
test_acc_seq = []

for epoch in range(num_epochs):
    # this avoids overflow
    if epoch > 5:
        for group in optimizer_g.param_groups:
            for p in group['params']:
                state = optimizer.state[p]
                if('step' in state and state['step']>=1024):
                    state['step'] = 1000
        for group in optimizer_d.param_groups:
            for p in group['params']:
                state = optimizer.state[p]
                if('step' in state and state['step']>=1024):
                    state['step'] = 1000
    genmodel = genmodel.train()
    discmodel = discmodel.train()
    for i, (images, labels) in enumerate(train_loader):
        # train the generator
        for param in discmodel.parameters():
            param.requires_grad = False
        genmodel.zero_grad()
        ### Generate the input noise
        rand_label = np.random.randint(0, num_classes, BatchSize)
        noise = np.random.normal(0,1,(BatchSize,100))
        rand_label_onehot = np.zeros((BatchSize,num_classes))
        rand_label_onehot[np.arange(BatchSize), rand_label] = 1
        noise[np.arange(BatchSize), :num_classes] = rand_label_onehot[np.arange(BatchSize)]
        noise = noise.astype(np.float32)
        ts_noise = torch.from_numpy(noise).to(device) # Create noise as a Tensor
        fake_label = torch.from_numpy(rand_label).to(device) # Create fake label as a Tensor
        ### Generate fake images, evaluate it by discriminator
        fake_data = genmodel(ts_noise)
        gen_score, gen_class = discmodel(fake_data)
        gen_loss = criterion(gen_class, fake_label)
        gen_cost = -gen_score.mean() + gen_loss
        gen_cost.backward()
        optimizer_g.step()

        # train the discriminator with input from generator
        for param in discmodel.parameters():
            param.requires_grad = True
        discmodel.zero_grad()
        ### Generate fake images and evaluate
        with torch.no_grad():
            fake_data = genmodel(ts_noise)
        disc_fake_score, disc_fake_class = discmodel(fake_data)
        disc_fake_loss = criterion(disc_fake_class, fake_label)
        ### Train discriminator
        real_data = images.to(device)
        read_label = labels.to(device).long()
        disc_real_score, disc_real_class = discmodel(real_data)
        disc_real_loss = criterion(disc_real_class, real_label)

        prediction = disc_real_class.data.max(1)[1]
        accuracy = float(prediction.eq(real_label.data).sum()) / float(BatchSize) * 100
        grad_penalty = calc_gradient_penalty(discmodel, real_data, fake_data)

        disc_cost = disc_fake_score.mean() - disc_real_score.mean() + disc_real_loss + disc_fake_loss + grad_penalty
        disc_cost.backward()

        optimizer_d.step()

In [None]:
# Generate the input noise
label = np.asarray(list(range(10))*10)
noise = np.random.normal(0,1,(100,100))
label_onehot = np.zeros((100,10))
label_onehot[np.arange(100), label] = 1
noise[np.arange(100), :10] = label_onehot[np.arange(100)]
noise = noise.astype(np.float32)

ts_noise = torch.from_numpy(noise).to(device) # Create noise as a Tensor

In [None]:
import types
types.MethodType