In [None]:
!mkdir /kaggle/working/generative-dog-images
!unzip /kaggle/input/generative-dog-images/all-dogs.zip -d /kaggle/working/generative-dog-images > /dev/null 2>&1
!unzip /kaggle/input/generative-dog-images/Annotation.zip -d /kaggle/working/generative-dog-images > /dev/null 2>&1

In [None]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
from PIL import Image
import xml.etree.ElementTree as ET 

In [None]:
all_dogs_dir = '/kaggle/working/generative-dog-images/all-dogs'
annotation_dir = '/kaggle/working/generative-dog-images/Annotation'

In [None]:
def get_bndbox(filename, square=False):
    tree = ET.parse(filename)
    root = tree.getroot()
    box = root.find('object').find('bndbox')
    xmin = int(box.find('xmin').text)
    ymin = int(box.find('ymin').text)
    xmax = int(box.find('xmax').text)
    ymax = int(box.find('ymax').text)
    
    if square:
        center_x, center_y = (xmin + xmax)//2, (ymin+ymax)//2
        max_w = max(xmax-xmin, ymax-ymin)
        xmin = center_x - max_w//2
        xmax = xmin + max_w
        ymin = center_y - max_w//2
        ymax = ymin + max_w
        
    return xmin, ymin, xmax, ymax  

In [None]:
dogs_dir = '/kaggle/working/dogs'
if not os.path.exists(dogs_dir):
    os.makedirs(dogs_dir)

for breed in os.listdir(annotation_dir):
    for dog in os.listdir(os.path.join(annotation_dir, breed)):
        bndbox = get_bndbox(os.path.join(annotation_dir, breed, dog), square=True)
        jpg_name = os.path.join(all_dogs_dir, dog+'.jpg')
        if os.path.exists(jpg_name):
            img = Image.open(jpg_name).crop(bndbox)
            img.save(os.path.join(dogs_dir, dog+'.jpg'))
print('number of dogs:', len(os.listdir(dogs_dir)))

In [None]:
from torch.utils.data import Dataset, DataLoader

class DogDataset(Dataset):
    def __init__(self, data_dir, transforms=None):
        self.files = [os.path.join(data_dir, file) for file in os.listdir(data_dir)]
        self.transforms = transforms
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, index):
        img = Image.open(self.files[index])
        if self.transforms is not None:
            img = self.transforms(img)
        return img

In [None]:
img_size = (64, 64)
image_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(img_size),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 128
trainloader = DataLoader(
    DogDataset(data_dir=dogs_dir, transforms=image_transforms),
    batch_size = batch_size,
    shuffle = True,
    num_workers = 3,
)

# DCGAN

In [None]:
class Generator(nn.Module):
    def __init__(self, z_channels, out_channels=3):
        super(Generator, self).__init__()
        
        convs = []
        channels = [z_channels, 1024, 512, 256, 128, 64]
        for i in range(1, len(channels)):
            convs.append(nn.ConvTranspose2d(channels[i-1], channels[i], 2, stride=2, bias=False))
            convs.append(nn.BatchNorm2d(channels[i]))
            convs.append(nn.ReLU(inplace=True))
        convs.append(nn.ConvTranspose2d(channels[-1], out_channels, 2, stride=2, bias=False))
        convs.append(nn.Tanh())
        
        self.convs = nn.Sequential(*convs)
        
    def forward(self, x):
        return self.convs(x)

In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        channels = [3, 64, 128, 256, 512] # size: 64->32->16->8->4
        convs = []
        for i in range(1, len(channels)):
            convs.append(nn.Conv2d(channels[i-1], channels[i], 3, padding=1, stride=2, bias=False))
            if i != 1:
                convs.append(nn.BatchNorm2d(channels[i]))
            convs.append(nn.LeakyReLU(0.2, inplace=True))
        
        convs.append(nn.Conv2d(channels[-1], 1, 4, bias=False))
        convs.append(nn.Sigmoid())
        
        self.convs = nn.Sequential(*convs)
    
    def forward(self, x):
        x = self.convs(x)
        return x.view(-1)

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [None]:
z_channels = 100
G = Generator(z_channels, 3)
G.apply(weights_init)
D = Discriminator()
D.apply(weights_init)
criterion = nn.BCELoss()

cuda = torch.cuda.is_available()
if cuda:
    print('Use GPU')
    G = G.cuda()
    D = D.cuda()
    criterion = criterion.cuda()
else:
    print('No GPU')

In [None]:
lr = 0.0002
b = 0.5
optimizer_G = torch.optim.Adam(G.parameters(), lr=lr, betas=(b, 0.999))
optimizer_D = torch.optim.Adam(D.parameters(), lr=lr, betas=(b, 0.999))

In [None]:
def range_rand(low, high, shape):
    a = torch.rand(shape)
    a = ((a - torch.min(a)) / (torch.max(a) - torch.min(a)) * (high-low)) + low
    return a

In [None]:
fixed_noise = torch.normal(0, 0.1, size=(64, z_channels, 1, 1))
if cuda:
    fixed_noise = fixed_noise.cuda()

epoches = 100
generate_imgs = []    
for epoch in range(epoches):
    for i, img in enumerate(trainloader):
        z = torch.normal(0, 0.1, size=(img.size(0), z_channels, 1, 1))
        # real = range_rand(0.9, 1, (img.size(0), 1))
        # fake = range_rand(0, 0.1, (img.size(0), 1))
        real = torch.ones(img.size(0))
        fake = torch.zeros(img.size(0))
        if cuda:
            img, z = img.cuda(), z.cuda()
            real, fake = real.cuda(), fake.cuda()

        # train D
        D.zero_grad()
        loss_real = criterion(D(img), real)
        loss_real.backward()

        fake_img = G(z)
        loss_fake = criterion(D(fake_img.detach()), fake)
        loss_fake.backward()

        loss_D = (loss_real + loss_fake) / 2

        optimizer_D.step()

        # train G
        G.zero_grad()
        loss_G = criterion(D(fake_img), real)
        loss_G.backward()
        optimizer_G.step()

    with torch.no_grad():
        noise_img = G(fixed_noise)
        generate_imgs.append(noise_img)
        print(f'[Epoch {epoch+1}/{epoches}] [G loss: {loss_G.item()}] [D loss: {loss_D.item()} | loss_real: {loss_real.item()} loss_fake: {loss_fake.item()}]')

In [None]:
import matplotlib.animation as animation
from IPython.display import HTML

fig = plt.figure(figsize=(10,10))
plt.axis("off")

imgs = []
for batch_images in generate_imgs:
    imgs.append([plt.imshow(make_grid(batch_images[:64], padding=2, normalize=True).cpu().permute(1,2,0))])

ani = animation.ArtistAnimation(fig, imgs, interval=1000, repeat_delay=1000, blit=True)
HTML(ani.to_jshtml())