In [1]:
%load_ext autoreload
%autoreload 2

# Read train and Validation data

In [2]:
# import resources for handling dataset
from utils.LIPDataset import LIPDataset, RandomCrop, RandomHorizontalFlip, Resize, plot_data, ToTensor, Normalize
from torchvision import transforms
import torch




In [3]:
# param to normalize image (expected by pre-trained pytorch model)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

In [4]:
# read dataset and provide augmentation
# TRAIN DATA
train_transform = transforms.Compose([
                Resize((256, 256)),
                RandomCrop((224, 224)),
                RandomHorizontalFlip(),
                ToTensor(),
                Normalize(mean, std, False)
            ])

train_data = LIPDataset('dataset/train_data.ftr', 'dataset/train_set', train_transform)

In [5]:
# resource to display from dataset
from utils.LIPDataset import untransform_n_display

In [6]:
# visualize a sample
untransform_n_display(train_data, 5000 , mean, std)

In [7]:
# VALIDATION DATA
val_transform = transforms.Compose([
                        Resize((224, 224)),
                        ToTensor(),
                        Normalize(mean, std)
                        ])

val_data = LIPDataset('dataset/val_data.ftr', 'dataset/val_set', val_transform)

In [10]:
# visualize few sample
untransform_n_display(val_data, 7006, mean, std)

In [10]:
# length of both
len(train_data), len(val_data), train_data[5]['image'].size(), train_data[6]['keypoints'].size()

(30462, 10000, torch.Size([3, 224, 224]), torch.Size([32]))

## Dataloader and batch size

In [11]:
from torch.utils.data import DataLoader

In [12]:
batch_size = 8

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)

valid_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)

In [13]:
samples = next(iter(train_loader))

In [14]:
samples['keypoints'].size() # batches are formed

torch.Size([8, 32])

# Load model. Define lose function and optimizer

In [15]:
from utils.TrainModel import initialize_model

num_keypoints = 16
model = initialize_model(2*num_keypoints) 

In [16]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), strid

In [17]:
# define lose function 
from torch import nn
criterion = nn.MSELoss()


In [18]:
# define optimizer
from torch import optim
optimizer = optim.Adam(model.fc.parameters(), lr=0.0003)

# Training and tracking loss (train + valid)

In [19]:
epoch = 1

# storing loss over whole training
total_loss = list()
# display results 10 times per epoch
prt_freq = 20
num_batches = len(train_loader)
prt_idx = num_batches // prt_freq

# prepare the net for training (autograd, dropout -> on)
model = model.cuda()
model.train()

for i in range(epoch): # looping over whole dataset
    running_loss = 0.0
    # training on batches of data
    for batch_i, samples in enumerate(train_loader):
        # get images and keypoints
        images, keypts = samples['image'].cuda(), samples['keypoints'].cuda()
        # forward pass
        pred = model.forward(images)
        # compute loss
        loss = criterion(pred, keypts)
        # zero the accumulate weight gradients
        optimizer.zero_grad()
        # backward pass (calculate current weight gradients)
        loss.backward()
        # update the weights
        optimizer.step()

        running_loss += loss.item() # storing loss

        # print the loss (frequency  is based on prt_freq value)
        if batch_i % prt_idx == 0: 
            avg_loss = running_loss/prt_idx
            print(f'Epoch: {i}, Batch: {batch_i}, Avg. Loss: {avg_loss}')
            total_loss.append(avg_loss) # storing avg loss for analysis later
            running_loss = 0.0

print("Training complete")

Epoch: 0, Batch: 0, Avg. Loss: 86.56196546052631
Epoch: 0, Batch: 190, Avg. Loss: 20751.387808388157
Epoch: 0, Batch: 380, Avg. Loss: 21973.666069592928
Epoch: 0, Batch: 570, Avg. Loss: 18089.654263466284
Epoch: 0, Batch: 760, Avg. Loss: 18697.83802682977
Epoch: 0, Batch: 950, Avg. Loss: 16569.53786749589
Epoch: 0, Batch: 1140, Avg. Loss: 17954.466200657895
Epoch: 0, Batch: 1330, Avg. Loss: 16434.89839895148
Epoch: 0, Batch: 1520, Avg. Loss: 17783.917722039474
Epoch: 0, Batch: 1710, Avg. Loss: 16287.275940583882
Epoch: 0, Batch: 1900, Avg. Loss: 16859.090326891448
Epoch: 0, Batch: 2090, Avg. Loss: 17815.22378957648
Epoch: 0, Batch: 2280, Avg. Loss: 17476.997412109376
Epoch: 0, Batch: 2470, Avg. Loss: 16415.324406352796
Epoch: 0, Batch: 2660, Avg. Loss: 15093.656589226974
Epoch: 0, Batch: 2850, Avg. Loss: 17849.136790707238
Epoch: 0, Batch: 3040, Avg. Loss: 16621.934107730263
Epoch: 0, Batch: 3230, Avg. Loss: 16482.134025493422
Epoch: 0, Batch: 3420, Avg. Loss: 16465.39577765214
Epoch: 

In [22]:
torch.save(model.state_dict(), 'models/state_dict.pt')

## Testing on images

In [19]:
import numpy as np

In [31]:
# selecting random data element
idx = np.random.randint(len(val_data))
sample = val_data[idx]
image = sample['image'].unsqueeze(0)

# inference
model = model.cuda()
model.eval()

with torch.no_grad(): # not storing previous computational graph    
    pred = model.forward(image.cuda())

model = model.cpu()
torch.cuda.empty_cache()




In [34]:
pred = pred.to('cpu')

In [36]:
pred, sample['keypoints']

(tensor([[ 0.2113,  0.4996, -0.0052, -0.3993,  0.1494,  0.1929,  0.1166,  0.0252,
           0.1794, -0.0034, -0.0072, -0.0643,  0.1458, -0.2759,  0.0468,  0.0651,
          -0.0216,  0.2557,  0.0238, -0.0361, -0.2516, -0.0501,  0.4746,  0.1285,
           0.1717,  0.1699, -0.0363,  0.0034, -0.2932,  0.2402, -0.0329, -0.0056]]),
 tensor([119.6018, 246.5417,  36.4887, 161.6667,  90.2081, 133.3750, 137.8462,
         124.2812, 174.3348, 128.3229, 186.4977, 242.5000,  83.1131,  93.9688,
          68.9231,  65.6771,  56.7602,  47.4896,  36.4887,  43.4479,  63.8552,
         117.2083,  22.2986, 135.3958,  13.1765,  80.8333, 122.6425,  34.3542,
         159.1312,  75.7812, 122.6425, 100.0312]))