In [0]:
import os
import torch
import pandas as pd
from skimage import io, transform
import skimage
import numpy as np
import matplotlib.pyplot as plt
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import warnings
warnings.filterwarnings('ignore')

plt.ion()

In [0]:
from google.colab import files
files.upload()

In [0]:
!mkdir data
from shutil import unpack_archive
unpack_archive('faces.zip', 'data/')

In [0]:
landmarks_frame = pd.read_csv('data/faces/face_landmarks.csv')

n = 50
img_name = landmarks_frame.iloc[n, 0]
landmarks = landmarks_frame.iloc[n, 1:].as_matrix()
landmarks = landmarks.astype('float').reshape(-1, 2)

print('Image name: {}'.format(img_name))
print('Landmarks shape: {}'.format(landmarks.shape))
print('First 4 Landmarks: {}'.format(landmarks[:4]))

In [0]:
def show_landmarks(image, landmarks):
  """Show image with landmarks"""
  plt.imshow(image)
  plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker='.', c='r')
  plt.pause(0.001) # Pause a bit so that plot are updated

plt.figure()
show_landmarks(io.imread(os.path.join('data/faces/', img_name)),
               landmarks)
plt.show()

### Dataset class
`torch.utils.data.Dataset` is an abstract class representing a dataset. Your custom dataset should inherit `Dataset` and override the following methods:
- `__len__` so that `len(dataset)` returns the size of the dataset.
- `__getitem__` to support the indexing such that `dataset[i]` can be used to get i-th sample.

In [0]:
class FaceLandmarksDataset(torch.utils.data.Dataset):
  """Face Landmarks dataset."""

  def __init__(self, csv_file, root_dir, transform=None):
    """
      Args:
        csv_file (string): Path to the csv file with annotations.
        root_dir (string): Directory with all the images.
        transform (callable, optional): Optional transform to be applied
          one a sample.
    """
    self.landmarks_frame = pd.read_csv(csv_file)
    self.root_dir = root_dir
    self.transform = transform
  
  def __len__(self):
    return len(self.landmarks_frame)
  
  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()
    
    img_name = os.path.join(self.root_dir,
                            self.landmarks_frame.iloc[idx, 0])
    image = io.imread(img_name)
    landmarks = self.landmarks_frame.iloc[idx, 1:]
    landmarks = np.array([landmarks])
    landmarks = landmarks.astype('float').reshape(-1, 2)
    sample = {'image': image, 'landmarks': landmarks}

    if self.transform is not None:
      sample = self.transform(sample)

    return sample

In [0]:
face_dataset = FaceLandmarksDataset(csv_file='data/faces/face_landmarks.csv',
                                    root_dir='data/faces/')

fig = plt.figure()

for i in range(len(face_dataset)):
  sample = face_dataset[i]

  print(i, sample['image'].shape, sample['landmarks'].shape)

  ax = plt.subplot(1, 4, i + 1)
  plt.tight_layout()
  ax.set_title('Sample #{}'.format(i))
  # ax.axis('off')
  show_landmarks(**sample)

  if i == 3:
    plt.show()
    break

### Transforms
One issue we can see from the above is that the samples are not ot the size size. Most neural networks expect the images of a fixed size. Therefore, we will need to write some preprocessing code. 
- `Rescale`: to scale the image
- `RandomCrop`: to crop from image randomly. This is data augmentation.
- `ToTensor`: to convert the numpy images to torch images (we need to swap axes).

We will write them as callable classes instead of simple functions so that parameters of the transformer need not be passed everytime it's called. For this, we just need to implement `__call__` method and if required, `__init__` method.
We can the use transform like:

tsfm = Transform(params) 


transformed_sample = tsfm(sample)

In [0]:
class Rescale(object):
  """Rescale the image in a sample to given size.
  
  Args:
    output_size (tuple or int): Desired output size. If tuple, output is
    matched to output_size. If int, smaller of image edges is matched to
    output_size keeping aspect ratio the same.
  """

  def __init__(self, output_size):
    assert isinstance(output_size, (int, tuple))
    self.output_size = output_size
  
  def __call__(self, sample):
    image, landmarks = sample['image'], sample['landmarks']

    h, w = image.shape[:2]
    if isinstance(self.output_size, int):
      if h > w:
        new_h, new_w = self.output_size * h / w, self.output_size
      else:
        new_h, new_w = self.output_size, self.output_size * w / h
    else:
      new_h, new_w = self.output_size

    new_h, new_w = int(new_h), int(new_w)
    img = skimage.transform.resize(image, (new_h, new_w))
    # h and w are swapped for landmarks because for images,
    # x and y axes are axis 1 and 0 respectively
    landmarks = landmarks * [new_w / w, new_h / h]

    return {'image': img, 'landmarks': landmarks}

In [0]:
class RandomCrop(object):
  """Crop randomly the image in a sample.

  Args:
    output_size (tuple or int): Desired output size. If int, square crop is made.
  """

  def __init__(self, output_size):
    assert isinstance(output_size, (int, tuple))
    if isinstance(output_size, int):
      self.output_size = (output_size, output_size)
    else:
      assert len(output_Size) == 2
      self.output_size = output_size

  def __call__(self, sample):
    image, landmarks = sample['image'], sample['landmarks']

    h, w = image.shape[:2]
    new_h, new_w = self.output_size

    top = np.random.randint(0, h - new_h)
    left = np.random.randint(0, w - new_w)

    image = image[top: top + new_h,
                  left: left + new_w]

    landmarks = landmarks - [left, top]

    return {'image': image, 'landmarks': landmarks}


In [0]:
class ToTensor(object):
  """Convert ndarrays in sample to Tensors."""

  def __call__(self, sample):
    image, landmarks = sample['image'], sample['landmarks']

    # Swap color axis because
    # numpy image: H x W x C
    # torch image: C x H x W
    image = image.transpose((2, 0, 1))
    return {'image': torch.from_numpy(image),
            'landmarks': torch.from_numpy(landmarks)}

### Compose transforms

We want to rescale the shorter side of the image to 256 and then randomly crop a square of size 224 from it.

In [0]:
scale = Rescale(256)
crop = RandomCrop(128)

composed = torchvision.transforms.Compose([Rescale(256),
                                           RandomCrop(224)])

fig = plt.figure()
sample = face_dataset[65]
for i, tsfrm in enumerate([scale, crop, composed]):
  transformed_sample = tsfrm(sample)

  ax = plt.subplot(1, 3, i + 1)
  plt.tight_layout()
  ax.set_title(type(tsfrm).__name__)
  show_landmarks(**transformed_sample)
plt.show()

In [0]:
transformed_dataset = FaceLandmarksDataset(csv_file='data/faces/face_landmarks.csv',
                                           root_dir='data/faces/',
                                           transform=torchvision.transforms.Compose([
    Rescale(256),
    RandomCrop(224),
    ToTensor()
]))

In [0]:
for i in range(len(transformed_dataset)):
  sample = transformed_dataset[i]

  print(i, sample['image'].size(), sample['landmarks'].size())

  if i == 3:
    break

In [0]:
# torch.utils.data.DataLoader` is an iterator which
# batching, shuffling, load the data in parallel
dataloader = torch.utils.data.DataLoader(transformed_dataset, batch_size=4,
                                         shuffle=True, num_workers=4)

In [0]:
# Helper function to show a batch
def show_landmarks_batch(sample_batched):
    """Show image with landmarks for a batch of samples."""
    images_batch, landmarks_batch = \
            sample_batched['image'], sample_batched['landmarks']
    batch_size = len(images_batch)
    im_size = images_batch.size(2)
    grid_border_size = 2

    grid = utils.make_grid(images_batch)
    plt.imshow(grid.numpy().transpose((1, 2, 0)))

    for i in range(batch_size):
        plt.scatter(landmarks_batch[i, :, 0].numpy() + i * im_size + (i + 1) * grid_border_size,
                    landmarks_batch[i, :, 1].numpy() + grid_border_size,
                    s=10, marker='.', c='r')

        plt.title('Batch from dataloader')

In [0]:
for i_batch, sample_batched in enumerate(dataloader):
  print(i_batch, sample_batched['image'].size(), sample_batched['landmarks'].size())

  if i_batch == 3:
    plt.figure()
    show_landmarks_batch(sample_batched)
    plt.axis('off')
    plt.ioff()
    plt.show()
    break

In [0]:
import torch
from torchvision import transforms, datasets

data_transform = transforms.Compose([transforms.RandomSizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.405],
                                                          std=[0.229, 0.224, 0.225])])

dataset = datasets.ImageFolder(root='train', transform=data_transform)
dataset_loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=4,
                                             shuffle=True,
                                             num_workers=4)