In [1]:
import torch

In [2]:
torch.cuda.is_available()

True

# Data

In [3]:
import data_handling
from torch.utils.data import DataLoader
import torchvision

In [4]:
DATASET_DIRECTORY = "./CelebDataProcessed"
ANNOTATIONS_DIRECTORY = "./annotations.csv"
NAME = ""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
BATCH_SIZE = 64
TRANSFORM = torchvision.transforms.Compose([
torchvision.transforms.ToPILImage(),
torchvision.transforms.ToTensor(),
])

# Get the dataset
pubfig = data_handling.PublicFigureDataset(ANNOTATIONS_DIRECTORY, DATASET_DIRECTORY, NAME, transform=TRANSFORM)

# 80-20 train test split
train_size = int(0.8 * len(pubfig))
test_size = len(pubfig) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(pubfig, [train_size, test_size])

# Create the dataloaders
train_dl = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dl = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

### Focused Datasets

In [5]:
dataset_a = data_handling.PublicFigureDataset(ANNOTATIONS_DIRECTORY, DATASET_DIRECTORY, "", transform=TRANSFORM)
dataset_b = data_handling.PublicFigureDataset(ANNOTATIONS_DIRECTORY, DATASET_DIRECTORY, "", transform=TRANSFORM)

In [6]:
train_size_a = int(0.8 * len(dataset_a))
test_size_a = len(dataset_a) - train_size_a
train_dataset_a, test_dataset_a = torch.utils.data.random_split(dataset_a, [train_size_a, test_size_a])

train_size_b = int(0.8 * len(dataset_b))
test_size_b = len(dataset_b) - train_size_b
train_dataset_b, test_dataset_b = torch.utils.data.random_split(dataset_b, [train_size_b, test_size_b])

In [7]:
train_dl_a = DataLoader(train_dataset_a, batch_size=BATCH_SIZE, shuffle=True)
test_dl_a = DataLoader(test_dataset_a, batch_size=BATCH_SIZE, shuffle=True)

train_dl_b = DataLoader(train_dataset_b, batch_size=BATCH_SIZE, shuffle=True)
test_dl_b = DataLoader(test_dataset_b, batch_size=BATCH_SIZE, shuffle=True)

# Model and Parameters

In [8]:
import models
import os

In [9]:
MODEL_NAME = "Focused"
LATENT_DIM = 100
EPOCHS = 10_000
DISCRIM = True

VIDEO_PATH = "./Outputs/Video/" + MODEL_NAME # The directory should have a folder named "/a/" and another named "/b/" for each decoder
if not os.path.exists(VIDEO_PATH):
    os.makedirs(VIDEO_PATH)
newpath = VIDEO_PATH + "/a/"
if not os.path.exists(newpath):
    os.makedirs(newpath)
newpath = VIDEO_PATH + "/b/"
if not os.path.exists(newpath):
    os.makedirs(newpath)

In [10]:
model = models.SingleEnc(LATENT_DIM, discriminator=DISCRIM).to(device)
# model = models.ModifiedSingleEnc(LATENT_DIM, discriminator=DISCRIM).to(device)

In [11]:
loss_fn = torch.nn.MSELoss()
lr= 0.001
optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-05)
optimDis = torch.optim.Adam(model.parameters(), lr=.01, weight_decay=1e-05)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optim, [5, 10], 0.1)

# Misc.

In [12]:
from csv import writer
import pandas as pd

# Initializes the .csv file
def initializeCSV(model_name, col_names=['Epoch', 'Avg_Train_lossA', 'Avg_Train_lossB', 'TestLossA', 'TestLossB']):
    csv = pd.DataFrame(columns=col_names)
    csv.to_csv('./Outputs/CSV/' + model_name + '.csv', index=False)

def writeToCSV(model_name, data):
    with open('./Outputs/CSV/' + model_name + '.csv', 'a') as file:
        writer_object = writer(file, lineterminator = '\n')
        writer_object.writerow(data)
        file.close()

### Perceptual Loss (Unused)

The use of an additional classifier model for perceptual loss causes OutOfMemoryErrors.

In [13]:
from collections import namedtuple

LossOutput = namedtuple("LossOutput", ["conv2d_2b", "conv2d_4b", "last_linear", "logits"])

# https://discuss.pytorch.org/t/how-to-extract-features-of-an-image-from-a-trained-model/119/3
class LossNetwork(torch.nn.Module):
    def __init__(self, model):
        super(LossNetwork, self).__init__()
        self.model = model
        self.layer_name_mapping = {
            '3': "conv2d_2b",
            '8': "conv2d_4b",
            #'15': "last_linear",
            #'22': "logits"
        }
    
    def forward(self, x):
        output = {}
        for name, module in self.model._modules.items():
            x = module(x)
            if name in self.layer_name_mapping:
                output[self.layer_name_mapping[name]] = x
        return LossOutput(**output)

In [14]:
from facenet_pytorch import InceptionResnetV1

# Model pretrained on VGGFace2
r = InceptionResnetV1(pretrained='vggface2').to(device).eval()
loss_network = LossNetwork(r)
loss_network.eval()
del r
# Takes images of size 160px by 160px as input

In [15]:
del loss_network
resnet = InceptionResnetV1(pretrained='vggface2').to(device).eval()

# Training Functions

In [16]:
from torchvision.utils import save_image
from numpy.random import randint
import time
import gc
import helper as h
import numpy as np

In [17]:
def train_epoch(model, model_name, device, trainloader_a, trainloader_b, loss_fn, optimizer, testloader_a, testloader_b, dataset_a, dataset_b, epochs=5, default_dtype=torch.FloatTensor, video_path="", scheduler=None, discriminator=False, discriminatorOpt=None):

    if discriminator:
        initializeCSV(model_name, col_names=['Epoch', 'Avg_Train_lossA', 'Avg_Train_lossB', 'TestLossA', 'TestLossB', 'DiscrimA', 'DiscrimB', 'GenA', 'GenB'])
    else:
        initializeCSV(model_name)
    start_time = time.time()
    iters = 0

    if len(video_path) > 0:
        # Get an image from dataset A
        index = randint(len(dataset_a)) # From the dataset we get a random image, TODO: Feed it a specific control image
        image_a, name = h.getImage(index, dataset_a) 
        image_a = image_a.unsqueeze(0).to(device)
        save_image(image_a, video_path + "/{}a.png".format(name))

        # Get an image from dataset B
        index = randint(len(dataset_b))
        image_b, name = h.getImage(index, dataset_b) 
        image_b = image_b.unsqueeze(0).to(device)
        save_image(image_b, video_path + "/{}b.png".format(name))




    model.train()
    for ep in range(epochs):
        train_loss_a = []
        train_loss_b = []
        if discriminator:
            gen_loss_a = []
            gen_loss_b = []
            dis_loss_a = []
            dis_loss_b = []



        # Use the first trainloader to train decoder A
        for i, (image_batch, _) in enumerate(trainloader_a): # with "_" we just ignore the labels (the second element of the dataloader tuple)

            # Records the images
            if len(video_path) > 0:
                model.eval()
                output = model.decode(model.encode(image_a), "a")
                save_image(output, video_path + "/a/{}_{}.png".format(ep, i))
                model.train()

            iters += 1

            # Move tensor to the proper device
            image_batch = image_batch.type(default_dtype).to(device)

            #######################
            if discriminator:
                loss_a, discriminator_loss_a, generator_loss_a = trainStep(model, image_batch, loss_fn, optimizer, "a", discriminator, discriminatorOpt)
                #loss_b, discriminator_loss_b, generator_loss_b = trainStep(model, image_batch, loss_fn, optimizer, "b", discriminator, discriminatorOpt)

                gen_loss_a.append(generator_loss_a.detach().cpu().numpy())
                #gen_loss_b.append(generator_loss_b.detach().cpu().numpy())

                dis_loss_a.append(discriminator_loss_a.detach().cpu().numpy())
                #dis_loss_b.append(discriminator_loss_b.detach().cpu().numpy())

            else:
                loss_a = trainStep(model, image_batch, loss_fn, optimizer, "a", discriminator, discriminatorOpt)
                #loss_b = trainStep(model, image_batch, loss_fn, optimizer, "b", discriminator, discriminatorOpt)
            ########################

            time_lapse = time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time))
            if i % 20 == 0:
                print('Epoch:{:2d}A | Iter:{:5d} | Time: {} | Train_A Loss: {:.4f}'.format(ep+1, i, time_lapse, loss_a.data))

            # Print batch loss
            train_loss_a.append(loss_a.detach().cpu().numpy())
            #train_loss_b.append(loss_b.detach().cpu().numpy())

        # Use trainloader B to train decoder B
        for i, (image_batch, _) in enumerate(trainloader_b):

            # Records the images
            if len(video_path) > 0:
                model.eval()
                output = model.decode(model.encode(image_b), "b")
                save_image(output, video_path + "/b/{}_{}.png".format(ep, i))
                model.train()

            iters += 1

            # Move tensor to the proper device
            image_batch = image_batch.type(default_dtype).to(device)

            #######################
            if discriminator:
                loss_b, discriminator_loss_b, generator_loss_b = trainStep(model, image_batch, loss_fn, optimizer, "b", discriminator, discriminatorOpt)

                gen_loss_b.append(generator_loss_b.detach().cpu().numpy())

                dis_loss_b.append(discriminator_loss_b.detach().cpu().numpy())

            else:
                #loss_a = trainStep(model, image_batch, loss_fn, optimizer, "a", discriminator, discriminatorOpt)
                loss_b = trainStep(model, image_batch, loss_fn, optimizer, "b", discriminator, discriminatorOpt)
            ########################

            time_lapse = time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time))
            if i % 20 == 0:
                print('Epoch:{:2d}B | Iter:{:5d} | Time: {} | Train_B Loss: {:.4f}'.format(ep+1, i, time_lapse, loss_b.data))

            # Print batch loss
            train_loss_b.append(loss_b.detach().cpu().numpy())
            #train_loss_b.append(loss_b.detach().cpu().numpy())


        
        gc.collect()
        test_loss_a = test_epoch(model, device, testloader_a, loss_fn, type="a")
        test_loss_b = test_epoch(model, device, testloader_b, loss_fn, type="b")
        print('\n EPOCH {}/{} \t Avg. Train_A loss this Epoch {} \t Avg. Train_B loss this Epoch {} \t Test loss A {} \t Test loss B {}'.format(ep + 1, epochs, np.mean(train_loss_a),np.mean(train_loss_b), test_loss_a, test_loss_b))

        scheduler.step()
        # Write the losses to a .csv file
        if discriminator:
            data = [ep + 1, np.mean(train_loss_a), np.mean(train_loss_b), test_loss_a.item(), test_loss_b.item(), np.mean(dis_loss_a), np.mean(dis_loss_b), np.mean(gen_loss_a), np.mean(gen_loss_b)]
            writeToCSV(model_name, data)
        else:
            data = [ep + 1, np.mean(train_loss_a), np.mean(train_loss_b), test_loss_a.item(), test_loss_b.item()]
            writeToCSV(model_name, data)

    return

def test_epoch(model, device, testloader, loss_fn, default_dtype=torch.FloatTensor, type="a"):
    # Set evaluation mode for encoder and decoder
    model.eval()
    with torch.no_grad(): # No need to track the gradients
        val_loss = testStep(model, testloader, loss_fn, device, default_dtype, type=type)
        
    return val_loss.data

def testStep(model, testloader, lossFunction, device, default_dtype, type, discriminator=False):

    conc_out = []
    conc_label = []
    for image_batch, _ in testloader:
        # Move tensor to the proper device
        image_batch = image_batch.type(default_dtype).to(device)
        reconstruction = model.decode(model.encode(image_batch), type)

        # Append the network output and the original image to the lists
        conc_out.append(reconstruction.cpu())
        conc_label.append(image_batch.cpu())

    # Create a single tensor with all the values in the lists
    conc_out = torch.cat(conc_out)
    conc_label = torch.cat(conc_label) 

    # Evaluate global loss
    val_loss = lossFunction(conc_out, conc_label)
    if discriminator:
        with torch.no_grad():
            fake_labels = model.discriminator(reconstruction, type)
        generator_loss = torch.mean(-torch.log(fake_labels))
        val_loss = val_loss + generator_loss
    return val_loss

def trainStep(model, batch, loss_fn, optimizer, type, discrim, discriminatorOptimizer):
    reconstruction = model.decode(model.encode(batch), type)
    reconstruction_loss = loss_fn(reconstruction, batch)

    if(discrim):
        with torch.no_grad():
            fake_labels = model.discriminator(reconstruction, type)
        generator_loss = torch.mean(-torch.log(fake_labels + 1e-12))

        # Evaluate loss
        loss = reconstruction_loss + generator_loss
    else:
        loss = reconstruction_loss

    ###############Perceptual Loss###############
    #transform = torchvision.transforms.Resize(160)
    #resizedBatch = transform(batch)
    #resizedReconstruction = transform(reconstruction)

    #features_y = resnet(resizedBatch)
    #features_xc = resnet(resizedReconstruction)

    #f_xc_c = Variable(features_xc[1].data, requires_grad=False)

    #content_loss = loss_fn(features_y, features_xc)
    #loss = loss + content_loss
    ##############################################

    # Backward pass for autoencoder
    optimizer.zero_grad()
    loss.backward(retain_graph=True)
    optimizer.step()

    if(discrim):
        fake_labels = model.discriminator(reconstruction.detach(), type)
        real_labels = model.discriminator(batch.detach(), type)
        
        discriminator_loss = torch.mean(-(torch.log(real_labels + 1e-12) + torch.log(1 - fake_labels + 1e-12)))

        discriminatorOptimizer.zero_grad()
        discriminator_loss.backward(retain_graph=True)
        discriminatorOptimizer.step()
        return reconstruction_loss, discriminator_loss, generator_loss
        
    return reconstruction_loss


# Train the Model

In [18]:
train_epoch(model, MODEL_NAME, device, train_dl_a, train_dl_a, loss_fn, optim, test_dl_a, test_dl_b, dataset_a, dataset_b, epochs=EPOCHS, video_path=VIDEO_PATH, scheduler=scheduler, discriminator= DISCRIM, discriminatorOpt=optimDis)



KeyboardInterrupt: 

In [None]:
#h.saveWeights(model, "")

In [None]:
print(model)

SingleEnc(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(9, 9), stride=(4, 4))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Dropout2d(p=0.5, inplace=False)
    (4): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): LeakyReLU(negative_slope=0.01)
    (7): Dropout2d(p=0.5, inplace=False)
    (8): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout2d(p=0.5, inplace=False)
    (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (13): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): LeakyReLU(negative_slope=0.01)
    (15): Dropout2d(p=0.5, in