In [1]:
! pip install --quiet torch torchvision matplotlib pytorch-lightning
! pip install --quiet ipywidgets IProgress
! jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [2]:
import time
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from IPython.display import set_matplotlib_formats
from matplotlib.colors import to_rgba
from tqdm.notebook import tqdm
from torch.nn.modules.conv import Conv2d, ConvTranspose2d
from torch.nn.modules.activation import ReLU, Sigmoid
import torchvision
%matplotlib inline

print("Using torch", torch.__version__)
torch.manual_seed(42)
gpu_avail = torch.cuda.is_available()
print(f"Is the GPU available? {gpu_avail}")
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device", device)


Using torch 1.11.0a0+17540c5
Is the GPU available? True
Device cuda


In [3]:
BATCH_SIZE = 32

transform = transforms.Compose([
      transforms.ToTensor(),                 
])
lfw_data = datasets.LFWPeople(root="./data", split="train", download=True, transform=transform)
data_loader = DataLoader(dataset=lfw_data, batch_size=BATCH_SIZE, shuffle=True)

Files already downloaded and verified


In [7]:
class AutoEncoderVGGBlock1(nn.Module):
    def __init__(self):
        super().__init__()
        self.LAMBDA = 1
        vgg19 = torchvision.models.vgg19(pretrained=True)
        vgg_layers = list(list(vgg19.children())[0].children())[:-33]
        vgg = nn.Sequential(*vgg_layers)
        for p in vgg.parameters():
            p.requires_grad = False
        print(vgg_layers)

        self.encoder = vgg

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 64, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, 3, stride=1, padding=1),
            nn.Sigmoid())

    def loss_func(self, original, reconstructed):
        latent_original = self.encoder(original)
        latent_reconstructed = self.encoder(reconstructed)
        second = torch.sum(torch.square(latent_reconstructed - latent_original))
        first = torch.sum(torch.square(reconstructed - original))
        out = first + self.LAMBDA * second
        return out
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True)]


In [8]:
model = AutoEncoderVGGBlock1()

# Setup hyperparameters
# Configure lr and optimizer, may want to add weight decay
learning_rate = 1e-3
optim = torch.optim.Adam(model.parameters(), lr=learning_rate)

# We calculate the reconstruciton loss using l2 Norm
loss_func = torch.nn.MSELoss()

# Number of epochs with batch size of 48 (may change this) on ~13k images per epoch
num_epochs = 10

train_vgg_block_1_ae = False
block_1_train_outputs = []
block_1_losses = []

if train_vgg_block_1_ae == True:
    # Move the model to GPU if exists for training
    model.to(device)
    
    for epoch in range(num_epochs):
        for img, _ in tqdm(data_loader):
            # Move batch to GPU if available
            if device.type == "cuda": img = img.cuda()
            # Autoencode input batch
            recon = model(img)
            # Calculate reconstruction loss
            loss = loss_func(img, recon)
            # Reset optimizer gradients from previous batch
            optim.zero_grad()
            # Calculate gradient loss
            loss.backward()
            # Update parameters based on current gradient
            optim.step()

        # If the current epoch is the final epoch then we want to save the reconstructions and the input just so we can
        # check how things are learning
        # We should remove this later
        if epoch == num_epochs - 1: outputs.append((epoch, img.detach().cpu().numpy(), recon.detach().cpu().numpy()))
        block_1_train_outputs.append((epoch, img.detach().cpu().numpy(), recon.detach().cpu().numpy()))
        block_3_losses.append(loss.item())
        print(f'Epoch: {epoch + 1}, Loss:{loss.item():.4f}')
    # Move the model back to CPU to free up GPU memory
    model.cpu()
    
else:
    model.load_state_dict(torch.load("models/vgg_ae_block_1.pth"))
    block_1_losses = np.loadtxt("models/vgg_ae_block_1_losses.txt").reshape(num_epochs, 1)
    print("Loaded AutoEncoderVGGBlock1 from disk")

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(inplace=True)]


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 1, Loss:0.0026


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 2, Loss:0.0016


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 3, Loss:0.0018


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 4, Loss:0.0012


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 5, Loss:0.0015


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 6, Loss:0.0012


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 7, Loss:0.0010


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 8, Loss:0.0010


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 9, Loss:0.0016


  0%|          | 0/199 [00:00<?, ?it/s]

Epoch: 10, Loss:0.0009


AutoEncoderVGGBlock1(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
  )
  (decoder): Sequential(
    (0): ConvTranspose2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): ConvTranspose2d(64, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Sigmoid()
  )
)

In [33]:
save_vgg_block_1_ae = False
if save_vgg_block_1_ae: 
    torch.save(model.state_dict(), 'models/vgg_ae_block_1.pth')
    np.savetxt("models/vgg_ae_block_1.txt", np.array(block_1_losses)

In [7]:
from torch.nn.modules.conv import Conv2d, ConvTranspose2d
from torch.nn.modules.activation import ReLU, Sigmoid
import torchvision

class AutoEncoderVGGBlock3(nn.Module):
    def __init__(self):
        super().__init__()
        self.LAMBDA = 1
        vgg19 = torchvision.models.vgg19(pretrained=True)
        vgg_layers = list(list(vgg19.children())[0].children())[:-19]
        vgg = nn.Sequential(*vgg_layers)
        for p in vgg.parameters():
            p.requires_grad = False

        self.encoder = vgg
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 256, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 256, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 256, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Upsample(scale_factor=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 128, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.Upsample(scale_factor=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 64, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, 3, stride=1, padding=0),
            nn.Sigmoid())

    def loss_func(self, original, reconstructed):
        latent_original = self.encoder(original)
        latent_reconstructed = self.encoder(reconstructed)
        second = torch.sum(torch.square(latent_reconstructed - latent_original))
        first = torch.sum(torch.square(reconstructed - original))
        out = first + self.LAMBDA * second
        return out
    
    def encode(self, x):
        return self.encoder(x)
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [9]:
model = AutoEncoderVGGBlock3()

# Setup hyperparameters
# Configure lr and optimizer, may want to add weight decay
learning_rate = 1e-3
optim = torch.optim.Adam(model.parameters(), lr=learning_rate)

# We calculate the reconstruciton loss using l2 Norm
recon_loss_func = torch.nn.MSELoss()
feature_loss_func = torch.nn.MSELoss()

# Number of epochs with batch size of 48 (may change this) on ~13k images per epoch
num_epochs = 10

train_vgg_block_3_ae = False
block_3_train_outputs = []
block_3_losses = []

if train_vgg_block_3_ae == True:
    # Move the model to GPU if exists for training
    model = model.to(device)
    
    for epoch in range(num_epochs):
        for img, _ in tqdm(data_loader):
            # Move batch to GPU if available
            img = img.to(device)
            # Autoencode input batch
            recon = model(img)
            # Calculate reconstruction loss
            recon_loss = recon_loss_func(img, recon)
            # Calculate feature loss
            feature_loss = feature_loss_func(model.encode(img), model.encode(recon))
            # Calculate combined l2 norm
            total_loss = recon_loss + feature_loss
            # Reset optimizer gradients from previous batch
            optim.zero_grad()
            # Calculate gradient loss
            total_loss.backward()
            # Update parameters based on current gradient
            optim.step()

        # If the current epoch is the final epoch then we want to save the reconstructions and the input just so we can
        # check how things are learning
        # We should remove this later
        if epoch == num_epochs - 1: outputs.append((epoch, img.detach().cpu().numpy(), recon.detach().cpu().numpy()))
        block_3_train_outputs.append((epoch, img.detach().cpu().numpy(), recon.detach().cpu().numpy()))
        block_3_losses.append(loss.item())
        print(f'Epoch: {epoch + 1}, Loss:{loss.item():.4f}')
    # Move the model back to CPU to free up GPU memory
    model = model.cpu()
    
else:
    model.load_state_dict(torch.load("models/vgg_ae_block_3.pth"))
    block_3_losses = np.loadtxt("models/vgg_ae_block_3_losses.txt").reshape(num_epochs, 1)
    print("Loaded AutoEncoderVGGBlock3 from disk")

Loaded AutoEncoderVGGBlock3 from disk


In [None]:
save_vgg_block_3_ae = False
if save_vgg_block_3_ae: 
    torch.save(model.state_dict(), 'models/vgg_ae_block_3.pth')
    np.savetxt("models/vgg_ae_block_3.txt", np.array(block_3_losses)

### 3 Choices for Style Transfer
- [] Image Masking for multi style transfer
- [] Blending multiple styled images
- [] HSV Color Preserving Style Transfer

### Things for Nick to Do:
- [] Lightning Module
- [] Embedding loss for block 1 (did not do because I am not sure how that worked for the block 3 model, needs training to see how it worked out)
- [] Git repo
- [-] Display evaluations
- [-] Clean up model save/loading
- [-] Save losses in the training loop