 # Тренировка нейронных сетей на реальных данных

## Классификация котов и собак

In [None]:
# Запускать только если вы работаете в google collab
from google.colab import drive

drive.mount('/content/drive')

In [None]:
# !wget -nc https://www.dropbox.com/s/gqdo90vhli893e0/data.zip

In [None]:
# !unzip -n data.zip -d /content/drive/MyDrive/hw_01

In [None]:
import os

from matplotlib import pyplot as plt
import numpy as np
import time
# from tqdm import tqdm_notebook as tqdm

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torchsummary import summary

Определим в одном месте все константы, которые понадобятся нам в дальнейшем. Их смысл будет прояснён по мере использования

In [None]:
### Let's have a cell with global hyperparameters for the CNNs in this notebook

# Path to a directory with image dataset and subfolders for training, validation and final testing
DATA_PATH = '/content/drive/MyDrive/hw_01/' # PATH TO THE DATASET

# Number of threads for data loader
NUM_WORKERS = 4

# Image size: even though image sizes are bigger than 96, we use this to speed up training
SIZE_H = SIZE_W = 96
N_CHANNELS = 3

# Number of classes in the dataset
NUM_CLASSES = 2

# Epochs: number of passes over the training data, we use it this small to reduce training babysitting time
EPOCH_NUM = 30

# Batch size: for batch gradient descent optimization, usually selected as 2**K elements
BATCH_SIZE = 128

# Images mean and std channelwise
image_mean = [0.485, 0.456, 0.406]
image_std  = [0.229, 0.224, 0.225]

# Last layer (embeddings) size for CNN models
EMBEDDING_SIZE = 256

In [None]:
# используем GPU при наличии
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

In [None]:
transformer = transforms.Compose([
    transforms.Resize((SIZE_H, SIZE_W)),        # scaling images to fixed size
    transforms.ToTensor(),                      # converting to tensors
    transforms.Normalize(image_mean, image_std) # normalize image data per-channel
])

In [None]:
train_dataset = torchvision.datasets.ImageFolder(os.path.join(DATA_PATH, 'train_11k'), transform=transformer)
val_dataset   = torchvision.datasets.ImageFolder(os.path.join(DATA_PATH, 'val'), transform=transformer)
test_dataset  = torchvision.datasets.ImageFolder(os.path.join(DATA_PATH, 'test_labeled'), transform=transformer)

In [None]:
n_train, n_val, n_test = len(train_dataset), len(val_dataset), len(test_dataset)

In [None]:
n_train, n_val, n_test

In [None]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS
)

In [None]:
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS
)

In [None]:
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS
)

In [None]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

In [None]:
from tqdm import tqdm

def train_model(model, train_loader, val_loader, loss_fn, opt, n_epochs):
    '''
    model: нейросеть для обучения,
    train_loader, val_loader: загрузчики данных
    loss_fn: целевая метрика (которую будем оптимизировать)
    opt: оптимизатор (обновляет веса нейросети)
    n_epochs: кол-во эпох, полных проходов датасета
    '''
    train_loss = []
    val_accuracy = []
    
    for epoch in range(n_epochs):
        start_time = time.time()

        model.train(True) # enable dropout / batch_norm training behavior
        for X_batch, y_batch in tqdm(train_loader):
            # move data to target device
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            # train on batch: compute loss, calc grads, perform optimizer step and zero the grads
            opt.zero_grad()
            predictions = model(X_batch)
            loss = loss_fn(predictions, y_batch)
            loss.backward()
#             torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            opt.step()
            train_loss.append(loss.item())

        model.train(False) # disable dropout / use averages for batch_norm
        for X_batch, y_batch in val_loader:
            # move data to target device
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            # compute logits
            logits = model(X_batch)
            y_pred = logits.max(1)[1].data
            val_accuracy.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))

        # print the results for this epoch:
        print(f'Epoch {epoch + 1} of {n_epochs} took {time.time() - start_time:.3f}s')

        train_loss_value = np.mean(train_loss[-n_train // BATCH_SIZE :])
        val_accuracy_value = np.mean(val_accuracy[-n_val // BATCH_SIZE :]) * 100
        
        print(f"  training loss (in-iteration): \t{train_loss_value:.6f}")
        print(f"  validation accuracy: \t\t\t{val_accuracy_value:.2f} %")

    return train_loss, val_accuracy

## Задание 1. Реализовать сверточную нейросеть для классификации котов и собак (0.4 балла)

### First step

**conv-pool-conv-pool-dense-dense!**

Создайте мини-сверточную нейронную сеть со следующей структурой:
* Входной слой
* 3 классических сверточных блока`convolution->relu->pool`: 
  * свертка 3x3 с 128 фильтрами и функцией активации _ReLU_
  * 2x2 пулинг (или поставьте для предыдущей свертки страйд = 3)
* Flatten
* 30% Dropout 
* Линейный слой с 256 нейронами и функцией активации _ReLU_
* 30% dropout
* Выходной линейный слой.

__Convolutional layers__ в торче создаются как любой другой слой, но у него есть особые параметры:

__`...`__

__`model.add_module('conv1', nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3)) # светрка`__

__`model.add_module('pool1', nn.MaxPool2d(2)) # max pooling 2 на 2`__

__`...`__


Когда вы закончите создание нейросети (когда функция compute_loss не будет поднимать ошибки), обучите её с оптимайзером __Adam__ с LR = 3e-4 (Константа Карпатого)

Если всё верно, вы должны получить минимум __75%__ точности на валидации.

__ХАК_ДНЯ__ : количество каналов должно быть в порядке количества class_labels

__ХАК_ДНЯ_2__ : вы можете поставить stride=2 для Conv2d слоя чтобы увеличить скорость обучения, но помните про размерности

In [None]:
model_cnn = nn.Sequential()

# Your code here: CONV->POOL->CONV-POOL->... as many as you wish
# 1
model_cnn.add_module('conv1', nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3))
model_cnn.add_module('relu1', nn.ReLU())
model_cnn.add_module('pool1', nn.MaxPool2d(2))
# 2
model_cnn.add_module('conv2', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3))
model_cnn.add_module('relu2', nn.ReLU())
model_cnn.add_module('pool2', nn.MaxPool2d(2))
# 3
model_cnn.add_module('conv3', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3))
model_cnn.add_module('relu3', nn.ReLU())
model_cnn.add_module('pool3', nn.MaxPool2d(2))
# End of your code here
# End of your code here
# global average pooling
model_cnn.add_module('gap_5', nn.AvgPool2d(10))
# dropout for regularization
model_cnn.add_module('dropout_5', nn.Dropout(0.3))
# "flatten" the data
model_cnn.add_module('flat', nn.Flatten())
# last fully-connected layer, used to create embedding vectors
model_cnn.add_module('fc_6', nn.Linear(128, EMBEDDING_SIZE))
model_cnn.add_module('relu_6', nn.ReLU())

model_cnn.add_module('dropout_6', nn.Dropout(0.3))

# logits for NUM_CLASSES=2 classes
model_cnn.add_module('fc_logits', nn.Linear(EMBEDDING_SIZE, NUM_CLASSES))
model_cnn.add_module('fc_preds', nn.Sigmoid())

# move model to computing device
model_cnn = model_cnn.to(device)


__Подсказка:__ Можно не считать размерности слоев руками, просто вставьте любую размерность и запуститите (например, 1 юнит) и  запустите compute_loss. Вы увидите что-то в духе:

__`RuntimeError: size mismatch, m1: [5 x 1960], m2: [1 x 64] at /some/long/path/to/torch/operation`__

Видите __1960__? Это та размерность, которую вам нужно выставить.

In [None]:
summary(model_cnn, train_dataset[0][0].shape)

In [None]:
# Оптимайзер
opt = torch.optim.Adam(params=model_cnn.parameters(), lr=3e-4)

# Функция потерь (Лосс функция)
loss_fn =  nn.CrossEntropyLoss()

# Число эпох
n_epochs = 20

In [None]:
opt.zero_grad()
train_loss, val_accuracy = train_model(model_cnn,
                                         train_loader,
                                         val_loader,
                                         loss_fn,
                                         opt,
                                         n_epochs)

In [None]:
def test_model(model, test_loader, subset='test'):
    model.train(False) # disable dropout / use averages for batch_norm
    test_batch_acc = []
    for X_batch, y_batch in test_loader:
        logits = model(X_batch.to(device))
        y_pred = logits.max(1)[1].data
        test_batch_acc.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))

    test_accuracy = np.mean(test_batch_acc)
    
    print("Results:")
    print(f"  {subset} accuracy:\t\t{test_accuracy * 100:.2f} %")
    if test_accuracy > 0.9:
        print("Amazing!")
    elif test_accuracy > 0.7:
        print("Good!")
    else:
        print("We need more magic! Follow instructons below")
    return test_accuracy

In [None]:
best_model_cnn = model_cnn

val_accuracy = test_model(best_model_cnn, val_loader, subset='val')
test_accuracy = test_model(best_model_cnn, test_loader, subset='test')

__Конец первой части__

# CVAE  (0.3 балла)

Теперь построим CVAE модель. Её отличие в том, что на вход энкодеру и декодеру подаётся значение цифры. Таким образом, модели уже не нужно запоминать значение цифры в латентном коде, т.к. одно добавляется нами вручную.

**Упражнение:** Реализуйте CVAE, модифицировав VAE с использованием полносвязных слоёв, который был разобран в начале семинара. 

Значения лэйблов (y) в виде one-hot векторов нужно присоединить (конкатенировать) к векторам, которые подаются на вход энкодеру и декодеру.

Для получение one-hot векторов используйте функцию F.one_hot(input, n_classes)

In [None]:
import torch
import torchvision
from torchvision import transforms, datasets
from ipywidgets import interact
from collections import Counter
import matplotlib.pyplot as plt
%matplotlib inline
import time
import pylab as pl
from IPython import display
import numpy as np
from functools import reduce

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torchvision.utils import save_image

In [None]:
# MNIST dataset
dataset = torchvision.datasets.MNIST(root='/content/drive/MyDrive/hw_01_2/',
                                     train=True,
                                     transform=transforms.ToTensor(),
                                     download=True)

# Data loader
batch_size = 32
data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                          batch_size=batch_size, 
                                          shuffle=True)

In [None]:
@interact(i=(0, len(dataset)-1))
def f(i):
    print(dataset[i][1])
    plt.imshow(dataset[i][0].numpy()[0], cmap='gray')

#Создаем модель
#Your code goes here:

In [None]:
# CVAE model
class CVAE(nn.Module):
    def __init__(self, image_size=28*28, z_size=20):
        super(CVAE, self).__init__()
        self.image_size = image_size
        self.fc1 = nn.Linear(self.image_size + 10, 400)

        self.fc2 = nn.Linear(400, 128)
        self.fc3 = nn.Linear(128, z_size)
        self.fc4 = nn.Linear(128, z_size)
        self.fc5 = nn.Linear(z_size + 10, 400)

        self.fc6 = nn.Linear(400, self.image_size)
        
    def encode(self, x, y):
        x = torch.concat((x.flatten(1), F.one_hot(y, 10)), dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x), self.fc4(x)
    
    def reparameterize(self, mu, log_var):
        std = torch.exp(log_var/2)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z, y):
        z = torch.concat((z, F.one_hot(y, 10)), dim=1)
        x = F.relu(self.fc5(z))
        return F.sigmoid(self.fc6(x))
    
    def forward(self, x, y):
        mu, log_var = self.encode(x, y)
        z = self.reparameterize(mu, log_var)
        out = self.decode(z, y)
        x_reconst = torch.reshape(out, (-1, 1, int(np.sqrt(self.image_size)), int(np.sqrt(self.image_size))))
        return x_reconst, mu, log_var

In [None]:
def train(model, data_reader, optimizer, num_epochs=2):
    # Start training
    loss1 = []
    for epoch in range(num_epochs):
        for i, (x, y) in enumerate(data_loader):
            # Forward pass
            x = x.to(device)
            y = y.to(device)
            x_reconst, mu, log_var = model(x, y)

            # Compute reconstruction loss and kl divergence
            reconst_loss = F.binary_cross_entropy(x_reconst, x, size_average=False)
            kl_div = - 0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())

            # Backprop and optimize
            loss = reconst_loss + kl_div
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                loss1.append(loss.item())
                pl.plot(loss1, color='red')
                display.clear_output(wait=True)
                display.display(pl.gcf())
                print ("Epoch[{}/{}], Step [{}/{}], Reconst Loss: {:.4f}, KL Div: {:.4f}" 
                      .format(epoch+1, num_epochs, i+1, len(data_loader), reconst_loss.item(), kl_div.item()))

In [None]:
model = CVAE().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
train(model, data_loader, optimizer, 10)

In [None]:
n = 10
im_size = 28
from scipy.stats import norm
# Так как сэмплируем из N(0, I), то сетку узлов, в которых генерируем цифры, берем из обратной функции распределения
grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
grid_y = norm.ppf(np.linspace(0.05, 0.95, n))

def draw_manifold(model, lbl, show=True):
    with torch.no_grad():
        # Рисование цифр из многообразия
        figure = np.zeros((im_size * n, im_size * n))
        input_lbl = np.zeros((1, 10))
        input_lbl[0, lbl] = 1
        for i, yi in enumerate(grid_y):
            for j, xi in enumerate(grid_x):
                z_sample = np.zeros((1, 20))
                z_sample[:, :2] = np.array([[xi, yi]])
                z_sample = torch.tensor(z_sample, dtype=torch.float).to(device)
                input_lbl = torch.tensor(input_lbl, dtype=torch.float).to(device)
                x_decoded = model.decode(z_sample, torch.tensor(lbl).reshape(-1)).cpu().numpy().reshape((im_size, im_size))
                digit = x_decoded.squeeze() 
                figure[i * im_size: (i + 1) * im_size,
                    j * im_size: (j + 1) * im_size] = digit
        if show:
            # Визуализация
            plt.figure(figsize=(10, 10))
            plt.imshow(figure, cmap='gray')
            plt.grid(False)
            ax = plt.gca()
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
            plt.show()
    return figure

In [None]:
for i in range(10):
    draw_manifold(model, i)

# GAN 0.3 балла

In [None]:
import os
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as tt
import torch
import torch.nn as nn
import cv2
from tqdm.notebook import tqdm
import torch.nn.functional as F
from torchvision.utils import save_image
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
!wget https://www.dropbox.com/s/329oy3cprlvn5vb/archive.zip

In [None]:
from zipfile import ZipFile

with ZipFile('archive.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall()

In [None]:
DATA_DIR = './cats/'

In [None]:
# set parameters of the transformed data
image_size = 64
batch_size = 128
stats = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)

In [None]:
# As dataset is stored in the directory, we can create dataset
# as ImageFolder PyTorch object and set all the transformations here
train_ds = ImageFolder(DATA_DIR, transform=tt.Compose([
    tt.ToTensor(),
    tt.Normalize(*stats)]))

In [None]:
# Create PyTorch DataLoader object to produce batches
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=2, pin_memory=True)

In [None]:
# for the nicer images visualization 
# we make inverse transformation for normalization
def denorm(img_tensors):
    return img_tensors * stats[1][0] + stats[0][0]

In [None]:
# functions to plot images
def show_images(images, nmax=64):
    fig, ax = plt.subplots(figsize=(8, 8))
    ax.set_xticks([]); ax.set_yticks([])
    ax.imshow(make_grid(denorm(images.detach()[:nmax]), nrow=8).permute(1, 2, 0))

def show_batch(dl, nmax=64):
    for images, _ in dl:
        show_images(images, nmax)
        break

In [None]:
show_batch(train_dl)

In [None]:
# Utils functions for GPU usage of neural networks
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [None]:
device = get_default_device()
device

In [None]:
train_dl = DeviceDataLoader(train_dl, device)

In [None]:
discriminator = nn.Sequential(
    # in: 3 x 64 x 64

    nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 64 x 32 x 32

    nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 128 x 16 x 16

    nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 256 x 8 x 8

    nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 512 x 4 x 4

    nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),
    # out: 1 x 1 x 1

    nn.Flatten(),
    nn.Sigmoid())

In [None]:
discriminator = to_device(discriminator, device)

In [None]:
latent_size = 128

In [None]:
generator = nn.Sequential(
    # in: latent_size x 1 x 1

    nn.ConvTranspose2d(latent_size, 512, kernel_size=4, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(True),
    # out: 512 x 4 x 4

    nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(True),
    # out: 256 x 8 x 8

    nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.ReLU(True),
    # out: 128 x 16 x 16

    nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.ReLU(True),
    # out: 64 x 32 x 32

    nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1, bias=False),
    nn.Tanh()
    # out: 3 x 64 x 64
)

In [None]:
xb = torch.randn(batch_size, latent_size, 1, 1) # random latent tensors
fake_images = generator(xb)
print(fake_images.shape)
show_images(fake_images)

In [None]:
generator = to_device(generator, device)

In [None]:
def train_discriminator(real_images, opt_d):
    # Clear discriminator gradients
    opt_d.zero_grad()

    # Pass real images through discriminator
    real_preds = discriminator(real_images)
    real_targets = torch.ones(real_images.size(0), 1, device=device)
    real_loss = F.binary_cross_entropy(real_preds, real_targets)
    real_score = torch.mean(real_preds).item()
    
    # Generate fake images
    latent = torch.randn(batch_size, latent_size, 1, 1, device=device)
    fake_images = generator(latent)

    # Pass fake images through discriminator
    fake_targets = torch.zeros(fake_images.size(0), 1, device=device)
    fake_preds = discriminator(fake_images)
    fake_loss = F.binary_cross_entropy(fake_preds, fake_targets)
    fake_score = torch.mean(fake_preds).item()

    # Update discriminator weights
    loss = real_loss + fake_loss
    loss.backward()
    opt_d.step()
    return loss.item(), real_score, fake_score

In [None]:
def train_generator(opt_g):
    # Clear generator gradients
    opt_g.zero_grad()
    
    # Generate fake images
    latent = torch.randn(batch_size, latent_size, 1, 1, device=device)
    fake_images = generator(latent)
    
    # Try to fool the discriminator
    preds = discriminator(fake_images)
    targets = torch.ones(batch_size, 1, device=device)
    loss = F.binary_cross_entropy(preds, targets)
    
    # Update generator weights
    loss.backward()
    opt_g.step()
    
    return loss.item()

In [None]:
sample_dir = 'generated'
os.makedirs(sample_dir, exist_ok=True)

In [None]:
def save_samples(index, latent_tensors, show=True):
    fake_images = generator(latent_tensors)
    fake_fname = 'generated-images-{0:0=4d}.png'.format(index)
    save_image(denorm(fake_images), os.path.join(sample_dir, fake_fname), nrow=8)
    print('Saving', fake_fname)
    if show:
        fig, ax = plt.subplots(figsize=(8, 8))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(denorm(fake_images).cpu().detach(), nrow=8).permute(1, 2, 0))
        plt.show()

In [None]:
fixed_latent = torch.randn(64, latent_size, 1, 1, device=device)

In [None]:
save_samples(0, fixed_latent)

In [None]:
def fit(epochs, lr, start_idx=1):
    torch.cuda.empty_cache()
    
    # Losses & scores
    losses_g = []
    losses_d = []
    real_scores = []
    fake_scores = []
    
    # Create optimizers
    opt_d = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))
    opt_g = torch.optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
    
    for epoch in range(epochs):
        for real_images, _ in tqdm(train_dl):
            # Train discriminator
            loss_d, real_score, fake_score = train_discriminator(real_images, opt_d)
            # Train generator
            loss_g = train_generator(opt_g)
            
        # Record losses & scores
        losses_g.append(loss_g)
        losses_d.append(loss_d)
        real_scores.append(real_score)
        fake_scores.append(fake_score)
        
        # Log losses & scores (last batch)
        print("Epoch [{}/{}], loss_g: {:.4f}, loss_d: {:.4f}, real_score: {:.4f}, fake_score: {:.4f}".format(
            epoch+1, epochs, loss_g, loss_d, real_score, fake_score))
    
        # Save generated images
        save_samples(epoch+start_idx, fixed_latent, show=True)
    
    return losses_g, losses_d, real_scores, fake_scores

In [None]:
lr = 0.0002
epochs = 20

In [None]:
history = fit(epochs, lr)

In [None]:
losses_g, losses_d, real_scores, fake_scores = history

In [None]:
from IPython.display import Image

In [None]:
Image('./generated/generated-images-0060.png')

In [None]:
vid_fname = 'gans_training.mp4'

files = [os.path.join(sample_dir, f) for f in os.listdir(sample_dir) if 'generated' in f]
files.sort()

out = cv2.VideoWriter(vid_fname,cv2.VideoWriter_fourcc(*'MP4V'), 1, (530,530))
[out.write(cv2.imread(fname)) for fname in files]
out.release()

In [None]:
plt.plot(losses_d, '-')
plt.plot(losses_g, '-')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['Discriminator', 'Generator'])
plt.title('Losses');

In [None]:
plt.plot(real_scores, '-')
plt.plot(fake_scores, '-')
plt.xlabel('epoch')
plt.ylabel('score')
plt.legend(['Real', 'Fake'])
plt.title('Scores');

## Улучшаем наш ГАН.


1. Попробуйте добавить большеConv-BN блоков в Дискриминатор
2. Попробуйте добавить Pooling в Дискриминатор
3. Попробуйте добавить больше Conv-BN блоков в Генератор
4. Увеличьте `latent_size`
5. Попробуйте использовать функцию активации ELU или LeakyReLU

Используйте, чтобы получить дополнительные подсказки [source](https://machinelearningmastery.com/how-to-train-stable-generative-adversarial-networks/)


In [None]:
discriminator = nn.Sequential(
    nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size=3, padding = 1, bias=False),
    nn.BatchNorm2d(16),
    nn.LeakyReLU(0.2, inplace=True),

    nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size=3, padding = 1, bias=False),
    nn.BatchNorm2d(32),
    nn.LeakyReLU(0.2, inplace=True),

    nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size=3, padding = 1, bias=False),
    nn.BatchNorm2d(64),
    nn.LeakyReLU(0.2, inplace=True),
    nn.MaxPool2d(2),

    nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.LeakyReLU(0.2, inplace=True),
    nn.MaxPool2d(2),
    # out: 128 x 16 x 16

    nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.LeakyReLU(0.2, inplace=True),
    nn.MaxPool2d(2),
    # out: 256 x 8 x 8

    nn.Conv2d(256, 512, kernel_size=3, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.LeakyReLU(0.2, inplace=True),
    nn.MaxPool2d(2),
    # out: 512 x 4 x 4

    nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),
    # out: 1 x 1 x 1

    nn.Flatten(),
    nn.Sigmoid()
    )

In [None]:
discriminator = to_device(discriminator, device)

In [None]:
latent_size = 256

In [None]:
generator = nn.Sequential(
    # in: latent_size x 1 x 1

    nn.ConvTranspose2d(latent_size, 1024, kernel_size=2, bias=False),
    nn.BatchNorm2d(1024),
    nn.ReLU(True),

    nn.ConvTranspose2d(1024, 512, kernel_size=3, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(True),

    nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(True),

    nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.ReLU(True),

    nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.ReLU(True),

    nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1, bias=False),
    nn.Tanh()
)

In [None]:
generator = to_device(generator, device)

In [None]:
lr = 3e-4
epochs = 20

In [None]:
history = fit(epochs, lr)

## Генерируем лица!


1. Добавьте CenterCrop трансформацию к изображениями и уменьшите их размер
2. Используйте более глубокую GAN модель
3. Получите модель, которая выдает приемлимый аутпут с достаточно хорошими лицами (Хорошие - субъективный критерий, сделайте визуализацию и обоснуйте "хорошесть" модели)


In [None]:
!wget http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz -O tmp.tgz

In [None]:
!tar xvzf tmp.tgz && rm tmp.tgz

In [None]:
DATA_DIR = './lfw-deepfunneled/'
image_size = 250
stats = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)

In [None]:
train_ds = ImageFolder(DATA_DIR, transform=tt.Compose([
    tt.CenterCrop(128),
    tt.Resize(64),
    tt.ToTensor(),
    tt.Normalize(*stats)]))

In [None]:
# Create PyTorch DataLoader object to produce batches
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=2, pin_memory=True)

In [None]:
train_dl = DeviceDataLoader(train_dl, device)

In [None]:
discriminator = nn.Sequential(
    # in: 3 x 64 x 64

    nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 64 x 32 x 32

    nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 128 x 16 x 16

    nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 256 x 8 x 8

    nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(512),
    nn.LeakyReLU(0.2, inplace=True),
    # out: 512 x 4 x 4

    nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),
    # out: 1 x 1 x 1

    nn.Flatten(),
    nn.Sigmoid())

In [None]:
discriminator = to_device(discriminator, device)

In [None]:
latent_size = 128

In [None]:
generator = nn.Sequential(
    # in: latent_size x 1 x 1

    nn.ConvTranspose2d(latent_size, 512, kernel_size=4, stride=1, padding=0, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(True),
    # out: 512 x 4 x 4

    nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(True),
    # out: 256 x 8 x 8

    nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(128),
    nn.ReLU(True),
    # out: 128 x 16 x 16

    nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
    nn.BatchNorm2d(64),
    nn.ReLU(True),
    # out: 64 x 32 x 32

    nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1, bias=False),
    nn.Tanh()
    # out: 3 x 64 x 64
)

In [None]:
generator = to_device(generator, device)

In [None]:
latent_size = 128

In [None]:
fixed_latent = torch.randn(64, latent_size, 1, 1, device=device)

In [None]:
lr = 2e-4
epochs = 20

In [None]:
history = fit(epochs, lr)

# Дополнительное задание на +0.5 балла

Overfit it
Будем работать с датасетом Fashion-MNIST (hint: он доступен в torchvision) https://github.com/zalandoresearch/fashion-mnist.

Ваша задача состоит в следующем:

Обучить сеть, которая покажет >= 0.92 test accuracy.
Пронаблюдать и продемонстрировать процесс переобучения сети с увеличением числа параметров (==нейронов) и/или числа слоев и продемонстрировать это наглядно (например, на графиках).
Попробовать частично справиться с переобучением с помощью подходящих приемов (Dropout/batchnorm/augmentation etc.)
Примечание: Пункты 2 и 3 взаимосвязаны, в п.3 Вам прелагается сделать полученную в п.2 сеть менее склонной к переобучению. Пункт 1 является независимым от пунктов 2 и 3.

# 4.1 Finetuning: >= 0.92 test accuracy

In [None]:
!pip install torchvision

In [None]:
import torchvision
import torchvision.transforms as transforms

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [None]:
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [None]:
from PIL import Image

In [None]:
# Train dataset
train_set = torchvision.datasets.FashionMNIST(
    root = '/content/data',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

In [None]:
# Test dataset
test_set = torchvision.datasets.FashionMNIST(
    root = '/content/data',
    train = False,
    download = False,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

In [None]:
class CustomDataset(Dataset):
    
    def __init__(self, dataset):
        self.dataset = dataset
        
    def __getitem__(self, index):
        label = self.dataset[index][1]
        image = self.dataset[index][0]
        image = torch.cat([image, image, image], 0)
        image = transforms.ToPILImage()(image)
        image = image.resize((224, 224))
        image = transforms.ToTensor()(image)
        image = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(image) 
        return image, label
        
    def __len__ (self):
        return len(self.dataset)

In [None]:
custom_train_set = CustomDataset(train_set)
custom_test_set = CustomDataset(test_set)

In [None]:
BATCH_SIZE = 128
CLASSES = 10
EPOCHS = 2
LR = 3e-4

loss_fn =  nn.CrossEntropyLoss()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, CLASSES)
model = model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

In [None]:
train_data_loader = torch.utils.data.DataLoader(
    custom_train_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
test_data_loader = torch.utils.data.DataLoader(
    custom_test_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
def train(model, data_loader, loss_fn, device):
    total_loss = 0
    model.train()
    for data in tqdm(data_loader):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        total_loss += loss
        loss.backward()
        optimizer.step()
    return total_loss

In [None]:
def eval(model, data_loader, loss_fn, device):
    predicted = []
    labels = []
    model.eval()
    with torch.no_grad():
        for data in tqdm(data_loader):
            x, y = data
            x = x.to(device)

            outputs = model(x)
            _, predict = torch.max(outputs.data, 1)
            predict = predict.cpu().detach().numpy().tolist()
            predicted += predict
            labels += y
    return accuracy_score(labels, predicted)

In [None]:
for epoch in range(EPOCHS):
    print(train_nn(model, train_data_loader, loss_fn, device))
    print(eval(model, test_data_loader, loss_fn, device))

### 4.2 Продемонстрировать процесс переобучения сети

In [None]:
BATCH_SIZE = 128
CLASSES = 10
EPOCHS = 100
LR = 3e-4

loss_fn = nn.CrossEntropyLoss()

In [None]:
train_data_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
test_data_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
model_cnn = nn.Sequential()

model_cnn.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3))
# model_cnn.add_module('norm1', nn.BatchNorm2d(64))
model_cnn.add_module('relu1', nn.ReLU())
model_cnn.add_module('pool1', nn.MaxPool2d(2))

model_cnn.add_module('conv2', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3))
# model_cnn.add_module('norm2', nn.BatchNorm2d(128))
model_cnn.add_module('relu2', nn.ReLU())
model_cnn.add_module('pool2', nn.MaxPool2d(2))

model_cnn.add_module('conv3', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3))
# model_cnn.add_module('norm3', nn.BatchNorm2d(256))
model_cnn.add_module('relu3', nn.ReLU())
model_cnn.add_module('pool3', nn.MaxPool2d(2))

# model_cnn.add_module('dropout_5', nn.Dropout(0.3))

model_cnn.add_module('flat', nn.Flatten())

model_cnn.add_module('fc_6', nn.Linear(256, 512))
model_cnn.add_module('relu_6', nn.ReLU())
# model_cnn.add_module('dropout_6', nn.Dropout(0.3))

model_cnn.add_module('fc_7', nn.Linear(512, 256))
model_cnn.add_module('relu_7', nn.ReLU())
# model_cnn.add_module('dropout_7', nn.Dropout(0.3))

model_cnn.add_module('fc_logits', nn.Linear(256, CLASSES))
model_cnn.add_module('fc_preds', nn.Sigmoid())

model_cnn = model_cnn.to(device)

In [None]:
optimizer = torch.optim.AdamW(model_cnn.parameters(), lr=LR)

In [None]:
train_acc_0 = []
test_acc_0 = []
loss_0 = []
for epoch in range(EPOCHS):
    loss_0.append(train(model_cnn, train_data_loader, loss_fn, device))
    train_acc_0.append(eval(model_cnn, train_data_loader, loss_fn, device))
    test_acc_0.append(eval(model_cnn, test_data_loader, loss_fn, device))

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_acc_0)
plt.plot(test_acc_0)
plt.show()

### 4.3 Cправиться с переобучением: dropout/batchnorm/augmentation

### Dropout

In [None]:
model_cnn = nn.Sequential()

model_cnn.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3))
# model_cnn.add_module('norm1', nn.BatchNorm2d(64))
model_cnn.add_module('relu1', nn.ReLU())
model_cnn.add_module('pool1', nn.MaxPool2d(2))

model_cnn.add_module('conv2', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3))
# model_cnn.add_module('norm2', nn.BatchNorm2d(128))
model_cnn.add_module('relu2', nn.ReLU())
model_cnn.add_module('pool2', nn.MaxPool2d(2))

model_cnn.add_module('conv3', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3))
# model_cnn.add_module('norm3', nn.BatchNorm2d(256))
model_cnn.add_module('relu3', nn.ReLU())
model_cnn.add_module('pool3', nn.MaxPool2d(2))

model_cnn.add_module('dropout_5', nn.Dropout(0.3))

model_cnn.add_module('flat', nn.Flatten())

model_cnn.add_module('fc_6', nn.Linear(256, 512))
model_cnn.add_module('relu_6', nn.ReLU())
model_cnn.add_module('dropout_6', nn.Dropout(0.3))

model_cnn.add_module('fc_7', nn.Linear(512, 256))
model_cnn.add_module('relu_7', nn.ReLU())
model_cnn.add_module('dropout_7', nn.Dropout(0.3))

model_cnn.add_module('fc_logits', nn.Linear(256, CLASSES))
model_cnn.add_module('fc_preds', nn.Sigmoid())

model_cnn = model_cnn.to(device)

In [None]:
optimizer = torch.optim.AdamW(model_cnn.parameters(), lr=LR)

In [None]:
train_acc_1 = []
test_acc_1 = []
loss_1 = []
for epoch in range(EPOCHS):
    loss_1.append(train(model_cnn, train_data_loader, loss_fn, device))
    train_acc_1.append(eval(model_cnn, train_data_loader, loss_fn, device))
    test_acc_1.append(eval(model_cnn, test_data_loader, loss_fn, device))

In [None]:
plt.plot(train_acc_1)
plt.plot(test_acc_1)
plt.show()

### Batchnorm

In [None]:
model_cnn = nn.Sequential()

model_cnn.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3))
model_cnn.add_module('norm1', nn.BatchNorm2d(64))
model_cnn.add_module('relu1', nn.ReLU())
model_cnn.add_module('pool1', nn.MaxPool2d(2))

model_cnn.add_module('conv2', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3))
model_cnn.add_module('norm2', nn.BatchNorm2d(128))
model_cnn.add_module('relu2', nn.ReLU())
model_cnn.add_module('pool2', nn.MaxPool2d(2))

model_cnn.add_module('conv3', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3))
model_cnn.add_module('norm3', nn.BatchNorm2d(256))
model_cnn.add_module('relu3', nn.ReLU())
model_cnn.add_module('pool3', nn.MaxPool2d(2))

model_cnn.add_module('dropout_5', nn.Dropout(0.3))

model_cnn.add_module('flat', nn.Flatten())

model_cnn.add_module('fc_6', nn.Linear(256, 512))
model_cnn.add_module('relu_6', nn.ReLU())
model_cnn.add_module('dropout_6', nn.Dropout(0.3))

model_cnn.add_module('fc_7', nn.Linear(512, 256))
model_cnn.add_module('relu_7', nn.ReLU())
model_cnn.add_module('dropout_7', nn.Dropout(0.3))

model_cnn.add_module('fc_logits', nn.Linear(256, CLASSES))
model_cnn.add_module('fc_preds', nn.Sigmoid())

model_cnn = model_cnn.to(device)

In [None]:
optimizer = torch.optim.AdamW(model_cnn.parameters(), lr=LR)

In [None]:
train_acc_2 = []
test_acc_2 = []
loss_2 = []
for epoch in range(EPOCHS):
    loss_2.append(train(model_cnn, train_data_loader, loss_fn, device))
    train_acc_2.append(eval(model_cnn, train_data_loader, loss_fn, device))
    test_acc_2.append(eval(model_cnn, test_data_loader, loss_fn, device))

In [None]:
plt.plot(train_acc_2)
plt.plot(test_acc_2)
plt.show()

#### Augmentation

Pretrain

In [None]:
BATCH_SIZE = 128
CLASSES = 10
EPOCHS = 20
LR = 3e-4

loss_fn = nn.CrossEntropyLoss()

In [None]:
train_data_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
test_data_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
model_cnn = nn.Sequential()

model_cnn.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3))
model_cnn.add_module('norm1', nn.BatchNorm2d(64))
model_cnn.add_module('relu1', nn.ReLU())
model_cnn.add_module('pool1', nn.MaxPool2d(2))

model_cnn.add_module('conv2', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3))
model_cnn.add_module('norm2', nn.BatchNorm2d(128))
model_cnn.add_module('relu2', nn.ReLU())
model_cnn.add_module('pool2', nn.MaxPool2d(2))

model_cnn.add_module('conv3', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3))
model_cnn.add_module('norm3', nn.BatchNorm2d(256))
model_cnn.add_module('relu3', nn.ReLU())
model_cnn.add_module('pool3', nn.MaxPool2d(2))

model_cnn.add_module('dropout_5', nn.Dropout(0.3))

model_cnn.add_module('flat', nn.Flatten())

model_cnn.add_module('fc_6', nn.Linear(256, 512))
model_cnn.add_module('relu_6', nn.ReLU())
model_cnn.add_module('dropout_6', nn.Dropout(0.3))

model_cnn.add_module('fc_7', nn.Linear(512, 256))
model_cnn.add_module('relu_7', nn.ReLU())
model_cnn.add_module('dropout_7', nn.Dropout(0.3))

model_cnn.add_module('fc_logits', nn.Linear(256, CLASSES))
model_cnn.add_module('fc_preds', nn.Sigmoid())

model_cnn = model_cnn.to(device)

In [None]:
optimizer = torch.optim.AdamW(model_cnn.parameters(), lr=LR)

In [None]:
train_acc_3 = []
test_acc_3 = []
loss_3 = []
for epoch in range(EPOCHS):
    loss = train(model_cnn, train_data_loader, loss_fn, device)
    train_acc = eval(model_cnn, train_data_loader, loss_fn, device)
    test_acc = eval(model_cnn, test_data_loader, loss_fn, device)
    loss_3.append(loss)
    train_acc_3.append(train_acc)
    test_acc_3.append(test_acc)
    print(epoch, loss, train_acc, test_acc)

In [None]:
plt.plot(train_acc_3)
plt.plot(test_acc_3)
plt.show()

Augmentation

In [None]:
BATCH_SIZE = 128
CLASSES = 10
EPOCHS = 100
LR = 3e-4

In [None]:
class CustomDatasetAug(Dataset):
    
    def __init__(self, dataset):
        self.dataset = dataset
        
    def __getitem__(self, index):
        label = self.dataset[index][1]
        image = self.dataset[index][0]
        image = transforms.ToPILImage()(image)
        # image = image.resize((32, 32))
        # image = transforms.RandomCrop(28)(image)
        image = transforms.RandomHorizontalFlip()(image)
        # image = transforms.RandomPerspective()(image)
        image = transforms.RandomRotation(15)(image)
        # image = transforms.RandomVerticalFlip()(image)
        image = transforms.ToTensor()(image)
        # image = transforms.Normalize((0.5), (0.5))(image)
        return image, label
        
    def __len__ (self):
        return len(self.dataset)

In [None]:
custom_train_set = CustomDatasetAug(train_set)

In [None]:
train_data_loader = torch.utils.data.DataLoader(
    custom_train_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
test_data_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [None]:
train_acc_aug = []
test_acc_aug = []
loss_aug = []
for epoch in range(EPOCHS):
    loss = train(model_cnn, train_data_loader, loss_fn, device)
    train_acc = eval(model_cnn, train_data_loader, loss_fn, device)
    test_acc = eval(model_cnn, test_data_loader, loss_fn, device)
    loss_aug.append(loss)
    train_acc_aug.append(train_acc)
    test_acc_aug.append(test_acc)
    print(epoch, loss, train_acc, test_acc)

In [None]:
plt.plot(train_acc_aug)
plt.plot(test_acc_aug)
plt.show()