## Importing dependencies

### First Party

Importing custom classes from the codebase:
- **dataset**: helper _torch_ wrapper around _facades_ dataset.
- **pix2pix**: implementation of _Pix2Pix_ architecture. Both generator and discriminator and the corresponding loss functions.

### Third Party

- **os**: to create directories, save weights.
- **opencv**: image processing library, helps to load/save images.
- **numpy**: nd arrays utilities, includes math and arithmetics.
- **torch**: ML framework, extremelly friendly to GPU, the network architecture was specifically designed in torch.
- **torchvision**: set of image transformations, like crop and flip. It is important to use _v2_ transforms as it can apply the same transformations to image and corresponding segmentation labels where stochastic operations used. 
- **matplotlib**: library to visualise data.
- **tqdm**: visualises progress.

In [1]:
# 3rd Party dependencies.
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms.v2 as transforms

from torch.utils.data import DataLoader
from tqdm import tqdm

# 1st Party dependencies.
from dataset.facades_dataset import FacadesDataset
from cyclegan.generator import Generator
from cyclegan.discriminator import Discriminator

%matplotlib inline

## Pre-processing data

I decided to use the same pre-processing as was used for _Pix2Pix_ training. The pre-processing does not create too much disturbance in the data, so the model can learn something from it and at the same time it enriches the train dataset.

In [2]:
# Applying the same transformations as were applied to Pix2Pix train dataset.
train_transforms = transforms.Compose([
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True),
    # Resizing the 256×256 input images to 286×286.
    transforms.Resize((286, 286)), 
    # Randomly cropping back to size 256×256.
    transforms.RandomCrop(256),
    # Mirroring.
    transforms.RandomHorizontalFlip(),
])

default_transforms = transforms.Compose([
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True),
])

In [3]:
facades_train_dataset = FacadesDataset(root_dir='dataset/facades', split='train', transformations=train_transforms)
facades_val_dataset = FacadesDataset(root_dir='dataset/facades', split='val', transformations=default_transforms)

train_dataloader = DataLoader(facades_train_dataset, batch_size=1, shuffle=True, num_workers=4)
val_dataloader = DataLoader(facades_val_dataset, batch_size=1, shuffle=True)

## Training cycle

- **learning rate**: 0.0003
- **lambda for cycle loss**: 10, quite big to pay a lot of attention to _x -> G(x) -> F(G(x))_ cycle.
- **epochs**: 100, seems enough to train 

In [4]:
def train_one_epoch(
    data_loader,
    generator_x,
    discriminator_x,
    generator_y,
    discriminator_y,
    optimiser_generator,
    optimiser_discriminator,
    l1_loss_func, 
    mse_loss_func,
    lambda_factor,
    device):
    d_losses = []
    g_losses = []
    
    for y, x in tqdm(data_loader):
        y = y.to(device)
        x = x.to(device)

        fake_x = generator_x(y)
        d_x_real = discriminator_x(x)
        d_x_fake = discriminator_x(fake_x.detach())
        d_x_real_loss = mse_loss_func(d_x_real, torch.ones_like(d_x_real))
        d_x_fake_loss = mse_loss_func(d_x_fake, torch.zeros_like(d_x_fake))
        d_x_loss = d_x_real_loss + d_x_fake_loss

        fake_y = generator_y(x)
        d_y_real = discriminator_y(y)
        d_y_fake = discriminator_y(fake_y.detach())
        d_y_real_loss = mse_loss_func(d_y_real, torch.ones_like(d_y_real))
        d_y_fake_loss = mse_loss_func(d_y_fake, torch.zeros_like(d_y_fake))
        d_y_loss = d_y_real_loss + d_y_fake_loss

        d_loss = (d_x_loss + d_y_loss) / 2

        optimiser_discriminator.zero_grad()
        d_loss.backward()
        optimiser_discriminator.step()

        # Adversarial losses.
        d_x_fake = discriminator_x(fake_x)
        d_y_fake = discriminator_y(fake_y)
        loss_g_x = mse_loss_func(d_x_fake, torch.ones_like(d_x_fake))
        loss_g_y = mse_loss_func(d_y_fake, torch.ones_like(d_y_fake))

        # Cycle losses.
        cycle_y = generator_y(fake_x)
        cycle_x = generator_x(fake_y)
        cycle_y_loss = l1_loss_func(y, cycle_y)
        cycle_x_loss = l1_loss_func(x, cycle_x)

        # Total generators loss.
        g_loss = loss_g_y \
            + loss_g_x \
            + cycle_y_loss * lambda_factor \
            + cycle_x_loss * lambda_factor

        optimiser_generator.zero_grad()
        g_loss.backward()
        optimiser_generator.step()

        d_losses.append(d_loss.detach().cpu().item())
        g_losses.append(g_loss.detach().cpu().item())

    return np.mean(g_losses), np.mean(d_losses)

In [None]:
# Setup.
device = ('cuda' if torch.cuda.is_available() else 'cpu')
learning_rate = 3 * 1e-4
lambda_cycle = 10
epochs = 100

print('Starting training:', device, 'was selected for training')

# X -> Facade Segmentation.
# Y -> Real facade image.
generator_x = Generator(img_channels=3, num_residuals=9).to(device)
discriminator_x = Discriminator(in_channels=3).to(device)
generator_y = Generator(img_channels=3, num_residuals=9).to(device)
discriminator_y = Discriminator(in_channels=3).to(device)

optimiser_generator = torch.optim.Adam(
    list(generator_x.parameters()) + list(generator_y.parameters()),
    lr=learning_rate,
    betas=(0.5, 0.999),
)

optimiser_discriminator = torch.optim.Adam(
    list(discriminator_x.parameters()) + list(discriminator_y.parameters()),
    lr=learning_rate,
    betas=(0.5, 0.999),
)

l1_loss_function = nn.L1Loss()
mse_loss_function = nn.MSELoss()

generators_history = []
discriminators_history = []

for epoch in range(epochs):
    g_loss, d_loss = train_one_epoch(
        train_dataloader,
        generator_x,
        discriminator_x,
        generator_y,
        discriminator_y,
        optimiser_generator,
        optimiser_discriminator,
        l1_loss_function, 
        mse_loss_function,
        lambda_cycle,
        device)

    generators_history.append(g_loss)
    discriminators_history.append(d_loss)

    print('Epoch:', epoch, 'generators loss:', g_loss, 'discriminators loss:', d_loss)

    weights_dir = os.path.join('out', 'weights', 'cyclegan')
    os.makedirs(weights_dir, exist_ok=True)

    torch.save(generator_x.state_dict(), os.path.join(weights_dir, f"generator-x-{epoch:03d}-{g_loss:.3f}.pt"))
    torch.save(discriminator_x.state_dict(), os.path.join(weights_dir, f"discriminator-x-{epoch:03d}-{d_loss:.3f}.pt"))
    torch.save(generator_y.state_dict(), os.path.join(weights_dir, f"generator-y-{epoch:03d}-{g_loss:.3f}.pt"))
    torch.save(discriminator_y.state_dict(), os.path.join(weights_dir, f"discriminator-y-{epoch:03d}-{d_loss:.3f}.pt"))

Starting training: cuda was selected for training


## Training history



In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10))

ax[0].set_title("Generators loss history")
ax[0].plot(generators_history)

ax[1].set_title("Discriminators loss history")
ax[1].plot(discriminators_history)

plt.tight_layout()
plt.show()

The results seems plausible and quite real, though _Pix2Pix_ produces better results.
At the same time we got 2 generators from _segmented facade_ to _real images_ and vice versa, while _Pix2Pix_ learns mapping in one direction only.