In [1]:
import numpy as np
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms, utils

from skimage import io, transform

import os

  from .collection import imread_collection_wrapper


In [2]:
# Setup tensorboard
import tensorboard

%load_ext tensorboard
%tensorboard --logdir runs

# If you run this notebook locally, you can also access Tensorboard at 127.0.0.1:6006 now.

# Clean up old logs
if os.path.isdir('./runs/'):
    import shutil
    shutil.rmtree('runs/')

from torch.utils.tensorboard import SummaryWriter

# default `log_dir` is "runs"
writer = SummaryWriter('runs')

Reusing TensorBoard on port 6006 (pid 4699), started 0:02:53 ago. (Use '!kill 4699' to kill it.)

In [9]:
# Preprocessing.
if False:
    # Remove all people without pictures.
    df_copy = pd.DataFrame().reindex_like(attributes)
    print(len(df_copy))
    df_copy = df_copy.iloc[0:0]
    print(len(df_copy))
#    attributes = pd.read_csv("person.csv", sep=';')
#    original_attributes = attributes.copy()

#    df_copy = pd.DataFrame().reindex_like(attributes)
#    df_copy = df_copy.iloc[0:0]
#    counter_no_img = 0

#    for index, row in attributes.iterrows():
#            id_nr = row['id'] + '.jpg'
#            img_path = 'front/front/'
#            img = mpimg.imread(img_path + id_nr)

            # histogram, bin_edges = np.histogram(img[:, :, 0], bins=256, range=(0, 256))
            # occurrences = np.count_nonzero(histogram == 0)
            # print(occurrences)
#            if index % 2500 == 0:
#                print(index, counter_no_img)
#            if img.shape[2] == 4: # occurrences>200:
#                counter_no_img += 1
#                df_copy = df_copy.append(original_attributes.iloc[index])
#                attributes = attributes.drop(original_attributes.index[index])

#    print(len(attributes))

#    df_copy.to_csv("attributesPersonsNoImages.csv", index=False)
#    attributes.to_csv("attributesPersonsWithImages.csv", index=False)

    # Use this when the people without images are already filtered out.
    attributes = pd.read_csv("attributesPersonsWithImages.csv")
    
    test_frac = 0.15
    frac = 1 - test_frac
    val_frac = 0.05 / frac # make percentage relative to train_set
    
    # Split test off the training set.
    temp_train_set = attributes.sample(frac=0.85, random_state=3072021)
    test_set = attributes.drop(temp_train_set.index)
    
    # Split validation off the training set.
    train_set = temp_train_set.sample(frac=1 - val_frac, random_state=14072021)
    val_set = temp_train_set.drop(train_set.index)

    train_set.to_csv("trainSet.csv", index=False)
    val_set.to_csv('valSet.csv', index=False)
    test_set.to_csv("testSet.csv", index=False)

#persons_with_no_images = pd.read_csv("attributesPersonsNoImages.csv")
#persons_attributes = pd.read_csv("attributesPersonsWithImages.csv")

#train_set = pd.read_csv("trainSet.csv")
#test_set = pd.read_csv("testSet.csv")

In [4]:
class FaceDataset(Dataset):
    """Face dataset."""
    
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied.
                on a sample.
        """
        self.attributes = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.attributes)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_name = os.path.join(self.root_dir,
                                self.attributes.loc[idx, 'id'] + '.jpg')
        image = Image.open(img_name)
        
        # Create a dictionary with the properties of the image
        # and the image itself.
        sample = self.attributes.iloc[0, :].to_dict()
        sample['image'] = image
        
        if self.transform:
            sample['image'] = self.transform(sample['image'])
        
        return sample

In [5]:
# Define more abstract U-Net classes
# Source: https://github.com/milesial/Pytorch-UNet/blob/6aa14cbbc445672d97190fec06d5568a0a004740/unet/unet_parts.py#L28

# Constructed according to typical behaviour in the U-Net
# where two convolutions with kernel=3 are stacked.
class DoubleConv(nn.Module):
    
    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
            
        # In U-net we do two convolutions, but keep the channels the same
        # (so out_channels twice).
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            )
    
    def forward(self, x):
        return self.double_conv(x)

# Constructed according to typical behaviour in the U-Net
# where we downsample, immediately followed by doubling the
# channels by 2.
class Down(nn.Module):
    
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )
        
    def forward(self, x):
        return self.maxpool_conv(x)

# Does the reverse of the Down class, since we want
# to reconstruct the image.
class Up(nn.Module):
    
    def __init__(self, in_channels, out_channels, bilinear):
        super().__init__()
        
        # If bilinear, use the normal convolution to reduce the number of channels.
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear')
            # Note that here we don't have the halve the in_channels to account for the concatenation.
            # This is because this is already compensated for in the UNet itself. You could see it as
            # the previous layer taking this already into account and therefore outputting half the
            # channel size for the upsampling method.
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            
            # Note that switching to in_channels again (as opposed to in_channels // 2),
            # makes sense because this includes the added features from the encoding part,
            # which doubles the in_channels size.
            self.conv = DoubleConv(in_channels, out_channels)
        
    def forward(self, x1, x2):
        x1 = self.up(x1)

        # Input is BCHW (batch, channel, height, width)
        # In order to account for the correct height and width size
        # when concatenating, we have to pad x1 to match x2.
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])

        x = torch.cat([x2, x1], dim=1)

        return self.conv(x)

In [6]:
# Define a convolutional neural network
class UNet(nn.Module):
    def __init__(self, bilinear):
        super(UNet, self).__init__()
        
        self.n_channels = 3
        self.bilinear = bilinear
        
        factor = 2 if bilinear else 1

        self.inc = DoubleConv(self.n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = nn.Conv2d(64, self.n_channels, kernel_size=1)
    
    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.outc(x)
        return x

In [7]:
# Load the data.
train_set = FaceDataset(csv_file="trainSet.csv",
                        root_dir='data/front/front',
                        transform=transforms.Compose([
                            transforms.Resize((48, 48)),
                            transforms.ToTensor()
                        ]))

test_set = FaceDataset(csv_file="testSet.csv",
                       root_dir='data/front/front',
                       transform=transforms.Compose([
                           transforms.Resize((48, 48)),
                           transforms.ToTensor()
                       ]))

trainloader = DataLoader(train_set, batch_size=4, shuffle=True, num_workers=0)

# Show the data in Tensorboard
dataiter = iter(trainloader)
data = dataiter.next()
img_grid = utils.make_grid(data['image'])
writer.add_image('dataset_images', img_grid)

# Keeps track of how often we train the model,
# this way we will see the loss logs in different plots.
n_runs = 0

In [8]:
net = UNet(bilinear=True)

criterion = nn.MSELoss() # average loss over the whole reconstructed image.
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Add a scheme of our network to Tensorboard.
writer.add_graph(net.cpu(), data['image'])
writer.close()

writer = SummaryWriter('runs/{}'.format(n_runs))

# Train the network
for epoch in range(1):
    running_loss = 0.0
    for i, data in enumerate(trainloader, start=0):
        inputs = data['image']
        labels = data['image']

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Log the loss to Tensorboard,
        # giving a nice loss over time.
        writer.add_scalar('Loss/train', 
                          loss.item(), 
                          epoch * len(trainloader) + i)

        if i % 10 == 0:
            # Get images and swap axes.
            image_output = outputs[0].detach().numpy().T
            image_output = np.swapaxes(image_output, 0, 1)
            image_input = labels[0].detach().numpy().T
            image_input = np.swapaxes(image_input, 0, 1)

            # Add a comparison between input and output image to Tensorboard.
            fig, (ax1, ax2) = plt.subplots(1, 2)
            ax1.imshow(image_input)
            ax2.imshow((image_output * 255).astype(np.uint8))

            writer.add_figure('input vs. output',
                              fig,
                              global_step=epoch * len(trainloader) + i)

        if i == 100:
            break

n_runs = n_runs + 1
print('Finished training')

  "See the documentation of nn.Upsample for details.".format(mode)


Finished training


In [9]:
# Show the feature maps for the latent space.
# Probably we want to either change either
# the latent space itself or the visualisation.

# Calling 'data' on weights makes
# a copy of the tensor for local use.
latent_conv_weights = net.down4.maxpool_conv[1].double_conv[3].weight.data

for i, kernel in enumerate(latent_conv_weights[0]):
    fig, ax1 = plt.subplots(1, 1)
    ax1.imshow((kernel.numpy()))
    writer.add_figure('Latent space weights',
                      fig,
                      global_step=i)

In [10]:
writer.close()
writer.flush()