In [1]:
%load_ext autoreload
%autoreload 2

# Read train and Validation data

In [2]:
# import resources for handling dataset
from utils.LIPDataset import LIPDataset, RandomCrop, RandomHorizontalFlip, Resize, plot_data, ToTensor
from torchvision import transforms
import torch




In [3]:
# read dataset and provide augmentation
# TRAIN DATA
train_transform = transforms.Compose([
                Resize(256),
                RandomCrop(224),
                RandomHorizontalFlip(),
                ToTensor()
            ])

train_data = LIPDataset('dataset/train_data.ftr', 'dataset/train_set', train_transform)

In [4]:
# resource to display from dataset
from utils.LIPDataset import untransform_n_display

In [5]:
# visualize a sample
untransform_n_display(train_data, 5000)

In [6]:
# VALIDATION DATA
val_transform = transforms.Compose([
                        Resize(224),
                        ToTensor()
                        ])

val_data = LIPDataset('dataset/val_data.ftr', 'dataset/val_set', val_transform)

In [7]:
# visualize few sample
untransform_n_display(val_data, 6001)

In [8]:
# length of both
len(train_data), len(val_data), train_data[5]['image'].size(), train_data[6]['keypoints'].size()

(30462, 10000, torch.Size([3, 224, 224]), torch.Size([32]))

## Dataloader and batch size

In [9]:
from torch.utils.data import DataLoader

In [10]:
batch_size = 8

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)

valid_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)

In [11]:
samples = next(iter(train_loader))

In [12]:
samples['keypoints'].size() # batches are formed

torch.Size([8, 32])

# Load model. Define lose function and optimizer

In [15]:
from utils.TrainModel import initialize_model

num_keypoints = 16
model = initialize_model(2*num_keypoints) 

In [16]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), strid

In [18]:
# define lose function 
from torch import nn
criterion = nn.MSELoss()


In [20]:
# define optimizer
from torch import optim
optimizer = optim.Adam(model.fc.parameters(), lr=0.0003)

# Training and tracking loss (train + valid)

In [30]:
epoch = 3

# storing loss over whole training
total_loss = list()
# display results 10 times per epoch
prt_freq = 20
num_batches = len(train_loader)
prt_idx = num_batches // prt_freq

# prepare the net for training (autograd, dropout -> on)
model = model.cuda()
model.train()

for i in range(epoch): # looping over whole dataset
    running_loss = 0.0
    # training on batches of data
    for batch_i, samples in enumerate(train_loader):
        # get images and keypoints
        images, keypts = samples['image'].cuda(), samples['keypoints'].cuda()
        # forward pass
        pred = model.forward(images)
        # compute loss
        loss = criterion(pred, keypts)
        # zero the accumulate weight gradients
        optimizer.zero_grad()
        # backward pass (calculate current weight gradients)
        loss.backward()
        # update the weights
        optimizer.step()

        running_loss += loss.item() # storing loss

        # print the loss (frequency  is based on prt_freq value)
        if batch_i % prt_idx == 0: 
            avg_loss = running_loss/prt_idx
            print(f'Epoch: {i}, Batch: {batch_i}, Avg. Loss: {avg_loss}')
            total_loss.append(avg_loss) # storing avg loss for analysis later
            running_loss = 0.0

print("Training complete")

Epoch: 0, Batch: 0, Avg. Loss: 90.04936266447369
Epoch: 0, Batch: 190, Avg. Loss: 21431.451089638158
Epoch: 0, Batch: 380, Avg. Loss: 20144.529869962993
Epoch: 0, Batch: 570, Avg. Loss: 19693.017398231907
Epoch: 0, Batch: 760, Avg. Loss: 19532.474753289473
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 