In [None]:
import os

gen_dog_imgs = '/kaggle/working/generative-dog-images'
if not os.path.exists(gen_dog_imgs):
    os.makedirs(gen_dog_imgs)

dogs_dir = '/kaggle/working/dogs'
if not os.path.exists(dogs_dir):
    os.makedirs(dogs_dir)    

# !mkdir /kaggle/working/generative-dog-images
!unzip /kaggle/input/generative-dog-images/all-dogs.zip -d /kaggle/working/generative-dog-images > /dev/null 2>&1
!unzip /kaggle/input/generative-dog-images/Annotation.zip -d /kaggle/working/generative-dog-images > /dev/null 2>&1

# Generative Adversarial Network

# Generative Adversarial Network (GAN) is a class of machine learning frameworks and a prominent framework for approaching generative AI. In a GAN, two neural networks, generator and discriminator, contest with each other in the form of a zero-sum game, where one agent's gain is another agent's loss. The core idea of a GAN is to train the generator to "fool" the discriminator rather than directly minimize the individual image distances, and the discriminator is indirectly trained to tell how realistic the generated images may seem. This way, both the generator and the discriminator are updated dynamically against each other to achieve realistic imitation to the original images.

# GAN aims to learn to generate new data with the same statistics as the provided training set. A GAN trained on photographs can generate new iamges superficially authentic to human observers. While GAN is originally intended to be implemented in unsupervised learning, variations of GANs are developed into models suitable for semi-supervised and supervised learning purposes.

# Dataset Overview

# The Stanford Dogs dataset contains images of 120 breeds of dogs worldwide. This dataset has been built using images and annotation from ImageNet for the task of fine-grained image categorization. There are 20,580 images, out of which 12,000 are used for training and 8580 for testing. Class labels and bounding box annotations are provided for all the 12,000 images. In this study, training and testing images are not distinguished as they are repurposed into image generation.

In [None]:
#%matplotlib inline
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import make_grid
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import random
from PIL import Image
import xml.etree.ElementTree as ET 

In [None]:
# Set random seed for reproducibility
manualSeed = 999
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.set_deterministic(True) # Needed for reproducible results

In [None]:
all_dogs_dir = '/kaggle/working/generative-dog-images/all-dogs'
annotation_dir = '/kaggle/working/generative-dog-images/Annotation'

In [None]:
def get_bndbox(filename, square=False):
    tree = ET.parse(filename)
    root = tree.getroot()
    box = root.find('object').find('bndbox')
    xmin = int(box.find('xmin').text)
    ymin = int(box.find('ymin').text)
    xmax = int(box.find('xmax').text)
    ymax = int(box.find('ymax').text)
    
    if square:
        center_x, center_y = (xmin + xmax)//2, (ymin+ymax)//2
        max_w = max(xmax-xmin, ymax-ymin)
        xmin = center_x - max_w//2
        xmax = xmin + max_w
        ymin = center_y - max_w//2
        ymax = ymin + max_w
        
    return xmin, ymin, xmax, ymax  

In [None]:
#from https://www.kaggle.com/korovai/dogs-images-intruders-extraction
intruders = [
    #n02088238-basset
    'n02088238_10870_0.jpg',
    
    #n02088466-bloodhound
    'n02088466_6901_1.jpg',
    'n02088466_6963_0.jpg',
    'n02088466_9167_0.jpg',
    'n02088466_9167_1.jpg',
    'n02088466_9167_2.jpg',
    
    #n02089867-Walker_hound
    'n02089867_2221_0.jpg',
    'n02089867_2227_1.jpg',
    
    #n02089973-English_foxhound # No details
    'n02089973_1132_3.jpg',
    'n02089973_1352_3.jpg',
    'n02089973_1458_1.jpg',
    'n02089973_1799_2.jpg',
    'n02089973_2791_3.jpg',
    'n02089973_4055_0.jpg',
    'n02089973_4185_1.jpg',
    'n02089973_4185_2.jpg',
    
    #n02090379-redbone
    'n02090379_4673_1.jpg',
    'n02090379_4875_1.jpg',
    
    #n02090622-borzoi # Confusing
    'n02090622_7705_1.jpg',
    'n02090622_9358_1.jpg',
    'n02090622_9883_1.jpg',
    
    #n02090721-Irish_wolfhound # very small
    'n02090721_209_1.jpg',
    'n02090721_1222_1.jpg',
    'n02090721_1534_1.jpg',
    'n02090721_1835_1.jpg',
    'n02090721_3999_1.jpg',
    'n02090721_4089_1.jpg',
    'n02090721_4276_2.jpg',
    
    #n02091032-Italian_greyhound
    'n02091032_722_1.jpg',
    'n02091032_745_1.jpg',
    'n02091032_1773_0.jpg',
    'n02091032_9592_0.jpg',
    
    #n02091134-whippet
    'n02091134_2349_1.jpg',
    'n02091134_14246_2.jpg',
    
    #n02091244-Ibizan_hound
    'n02091244_583_1.jpg',
    'n02091244_2407_0.jpg',
    'n02091244_3438_1.jpg',
    'n02091244_5639_1.jpg',
    'n02091244_5639_2.jpg',
    
    #n02091467-Norwegian_elkhound
    'n02091467_473_0.jpg',
    'n02091467_4386_1.jpg',
    'n02091467_4427_1.jpg',
    'n02091467_4558_1.jpg',
    'n02091467_4560_1.jpg',
    
    #n02091635-otterhound
    'n02091635_1192_1.jpg',
    'n02091635_4422_0.jpg',
    
    #n02091831-Saluki
    'n02091831_1594_1.jpg',
    'n02091831_2880_0.jpg',
    'n02091831_7237_1.jpg',
    
    #n02092002-Scottish_deerhound
    'n02092002_1551_1.jpg',
    'n02092002_1937_1.jpg',
    'n02092002_4218_0.jpg',
    'n02092002_4596_0.jpg',
    'n02092002_5246_1.jpg',
    'n02092002_6518_0.jpg',
    
    #02093256-Staffordshire_bullterrier
    'n02093256_1826_1.jpg',
    'n02093256_4997_0.jpg',
    'n02093256_14914_0.jpg',
    
    #n02093428-American_Staffordshire_terrier
    'n02093428_5662_0.jpg',
    'n02093428_6949_1.jpg'
            ]

len(intruders)

In [None]:


dogs_count = 0
for breed in os.listdir(annotation_dir):
    for dog in os.listdir(os.path.join(annotation_dir, breed)):
        #print(dog)
        bndbox = get_bndbox(os.path.join(annotation_dir, breed, dog), square=True)
        jpg_name = os.path.join(all_dogs_dir, dog+'.jpg')
        intruders_join = '\t'.join(intruders)
        if os.path.exists(jpg_name):     
            if dog not in intruders_join:
                img = Image.open(jpg_name).crop(bndbox)
                img.save(os.path.join(dogs_dir, dog+'.jpg'))
                dogs_count+=1
print('number of dogs in the original dataset:', len(os.listdir(dogs_dir))+len(intruders))
print('number of dogs in the dataset excluding intruders:',dogs_count)

In [None]:

class DogDataset(Dataset):
    def __init__(self, data_dir, transforms=None):
        self.files = [os.path.join(data_dir, file) for file in os.listdir(data_dir)]
        self.transforms = transforms
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, index):
        img = Image.open(self.files[index])
        if self.transforms is not None:
            img = self.transforms(img)
        return img

In [None]:
img_size = (64, 64)
image_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(img_size),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 128
trainloader = DataLoader(
    DogDataset(data_dir=dogs_dir, transforms=image_transforms),
    batch_size = batch_size,
    shuffle = True,
    num_workers = 3,
)

# DCGAN

# DCGAN (Deep Convolutional GAN) is a generative adversarial network architecture using deep convolutional neural networks. It is specialized for generating realistic images, mainly square images, in computer vision field. DCGAN can learn and capture detailed features in images of the original training dataset to generate realistic fake images hardly distinguishable by human eyes.

# A great DCGAN tutorial can be founded in: https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html

In [None]:
class Generator(nn.Module):
    def __init__(self, z_channels, out_channels=3):
        super(Generator, self).__init__()
        
        convs = []
        channels = [z_channels, 1024, 512, 256, 128, 64]
        for i in range(1, len(channels)):
            convs.append(nn.ConvTranspose2d(channels[i-1], channels[i], 2, stride=2, bias=False))
            convs.append(nn.BatchNorm2d(channels[i]))
            #convs.append(nn.ReLU(inplace=True))
            convs.append(nn.LeakyReLU(0.1, inplace=True))
        convs.append(nn.ConvTranspose2d(channels[-1], out_channels, 2, stride=2, bias=False))
        convs.append(nn.Tanh())
        
        self.convs = nn.Sequential(*convs)
        
    def forward(self, x):
        return self.convs(x)

In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        channels = [3, 64, 128, 256, 512]
        convs = []
        for i in range(1, len(channels)):
            convs.append(nn.Conv2d(channels[i-1], channels[i], 3, padding=1, stride=2, bias=False))
            if i != 1:
                convs.append(nn.BatchNorm2d(channels[i]))
            convs.append(nn.LeakyReLU(0.2, inplace=True))
        
        convs.append(nn.Conv2d(channels[-1], 1, 4, bias=False))
        convs.append(nn.Sigmoid())
        
        self.convs = nn.Sequential(*convs)
    
    def forward(self, x):
        x = self.convs(x)
        return x.view(-1)

In [None]:
#def weights_init(m):
#    classname = m.__class__.__name__
#    if classname.find('Conv') != -1:
#        m.weight.data.normal_(0.0, 0.02)
#    elif classname.find('BatchNorm') != -1:
#        m.weight.data.normal_(1.0, 0.02)
#        m.bias.data.fill_(0)

In [None]:
# custom weights initialization called on ``netG`` and ``netD``
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [None]:
z_channels = 100
G = Generator(z_channels, 3)
G.apply(weights_init)
D = Discriminator()
D.apply(weights_init)
criterion = nn.BCELoss()

cuda = torch.cuda.is_available()
if cuda:
    print('Use GPU')
    G = G.cuda()
    D = D.cuda()
    criterion = criterion.cuda()
else:
    print('No GPU')

In [None]:
#lr = 0.0002
#b1,b2 = 0.5,0.999

lr = 0.0004
b1,b2 = 0.6,0.999

optimizerG = torch.optim.AdamW(G.parameters(), lr=lr, betas=(b1, b2))
optimizerD = torch.optim.AdamW(D.parameters(), lr=lr, betas=(b1, b2))

In [None]:
fixed_noise = torch.normal(0, 0.1, size=(64, z_channels, 1, 1))
if cuda:
    fixed_noise = fixed_noise.cuda()

#epoches = 100
epoches = 20
generate_imgs = []
G_losses, D_losses = [],[]
for epoch in range(epoches):
    for i, img in enumerate(trainloader):
        z = torch.normal(0, 0.1, size=(img.size(0), z_channels, 1, 1))
        real = torch.ones(img.size(0))
        fake = torch.zeros(img.size(0))
        if cuda:
            img, z = img.cuda(), z.cuda()
            real, fake = real.cuda(), fake.cuda()

        # train D
        D.zero_grad()
        loss_real = criterion(D(img), real)
        loss_real.backward()

        fake_img = G(z)
        loss_fake = criterion(D(fake_img.detach()), fake)
        loss_fake.backward()

        loss_D = (loss_real + loss_fake) / 2

        optimizerD.step()

        # train G
        G.zero_grad()
        loss_G = criterion(D(fake_img), real)
        loss_G.backward()
        optimizerG.step()

    with torch.no_grad():
        noise_img = G(fixed_noise)
        generate_imgs.append(noise_img)
        print(f'[Epoch {epoch+1}/{epoches}] [G loss: {loss_G.item()}] [D loss: {loss_D.item()} | loss_real: {loss_real.item()} loss_fake: {loss_fake.item()}]')
        G_losses.append(loss_G.item()) 
        D_losses.append(loss_D.item())

In [None]:
fig = plt.figure(figsize=(15,10))
x_epoches = list(range(1,epoches+1))
plt.plot(x_epoches, G_losses, 'b-o', x_epoches, D_losses, 'r-x')
plt.legend(['Generator Loss','Discriminator Loss'])
plt.title('Optimizer Loss vs Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML

fig = plt.figure(figsize=(15,15))
plt.axis("off")

imgs = []
for batch_images in generate_imgs:
    imgs.append([plt.imshow(make_grid(batch_images[:64], padding=2, normalize=True).cpu().permute(1,2,0))])

ani = animation.ArtistAnimation(fig, imgs, interval=1000, repeat_delay=1000, blit=True)
HTML(ani.to_jshtml())