In [1]:
from importlib import reload
from os.path import join
from collections import defaultdict
import sys
import numpy as np
import cv2
cv2.setNumThreads(1)
import os
import itertools

In [2]:
import ipywidgets
from IPython.display import display, clear_output
from matplotlib import pyplot

In [3]:
from datasets.dshdf5pose import Hdf5PoseDataset
import datatransformation
import neuralnets.models
import neuralnets.modelcomponents
import vis
import utils
import train

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as tf
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split, Subset, ConcatDataset
from torchvision import transforms

In [5]:
net = neuralnets.models.NetworkWithPointHead()
state_dict = torch.load( join('..','model_files','best_NetworkWithPointHead.ckpt'))
neuralnets.modelcomponents.clear_denormals_inplace(state_dict)
net.load_state_dict(state_dict)
net.cuda()
net.eval()

Denormals or zeros:
convnet.backbone.4.running_mean         :          6 (18.75%)
convnet.backbone.4.running_var          :         10 (31.25%)
convnet.backbone.8.1.layers.4.running_mean:          2 (2.777777910232544%)
convnet.backbone.8.1.layers.4.running_var:          2 (2.777777910232544%)
convnet.backbone.8.2.layers.4.running_mean:          1 (1.388888955116272%)
convnet.backbone.8.2.layers.4.running_var:          1 (1.388888955116272%)
convnet.backbone.9.1.layers.4.running_mean:          1 (0.8333333730697632%)
convnet.backbone.9.1.layers.4.running_var:          1 (0.8333333730697632%)
convnet.backbone.9.2.layers.4.running_mean:          7 (5.833333969116211%)
convnet.backbone.9.2.layers.4.running_var:          7 (5.833333969116211%)
convnet.backbone.10.1.layers.4.running_mean:         79 (16.45833396911621%)
convnet.backbone.10.1.layers.4.running_var:         79 (16.45833396911621%)
convnet.backbone.10.2.layers.4.running_mean:        141 (29.375001907348633%)
convnet.backbone.10

NetworkWithPointHead(
  (convnet): PretrainedNetwork(
    (backbone): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (4): BatchNorm2d(32, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (7): BatchNorm2d(16, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)
      (8): Sequential(
        (0): _InvertedResidual(
          (layers): Sequential(
            (0): Conv2d(16, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(48, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
            (3): Conv

In [7]:
inputsize = net.input_resolution
datadir = os.environ['DATADIR']

testpreprocess = [
    datatransformation.ApplyRoi(),
    datatransformation.Rescale(inputsize+1),
    datatransformation.CenterCrop(inputsize),
]
normalize_and_tensor = [
    datatransformation.Normalize(monochrome=True),
    datatransformation.ToTensor()
]


ds_test_biwi = Hdf5PoseDataset(join(datadir,'biwi.h5'), shuffle=True, subset=None, transform=transforms.Compose([
    datatransformation.InjectZeroKeypoints3d(),
    datatransformation.InjectPoseEnable(),
    *testpreprocess,
    *normalize_and_tensor
]))

# Maybe not the best way to pick a test set. However, the h5 dataset is shuffled so getting the first 400 items is 
# still a idependent sample from the overall dataet. It should also be deterministically reproducible since
# the h5 is generated using a fixed seed for the shuffeling.
ds_test_aflw = Hdf5PoseDataset(join(datadir,'aflw2k.h5'), shuffle=False, subset=slice(400), transform=transforms.Compose([
    datatransformation.InjectPt3d68Enable(),
    datatransformation.InjectPoseEnable(),
    *testpreprocess,
    *normalize_and_tensor
]))

In [8]:
aflw_loader = DataLoader(ds_test_aflw, 
                          batch_size=32,
                          shuffle=False, 
                          num_workers=5)

biwi_loader = DataLoader(ds_test_biwi, 
                          batch_size=32,
                          shuffle=False, 
                          num_workers=5)

In [9]:
def iterate_predictions(loader, net):
    for batch in loader:
        with torch.no_grad():
            preds = net.inference(batch['image'].cuda())
            preds = { k:v.cpu() for k,v in preds.items() }
        for sample, pred in zip(utils.undo_collate(batch), utils.undo_collate(preds)):
                yield vis.unnormalize_sample_to_numpy(sample, pred)

### Visualize AFLW

In [10]:
%matplotlib notebook
vis.matplotlib_plot_iterable(iterate_predictions(aflw_loader, net), vis.draw_prediction)

<IPython.core.display.Javascript object>

(<Figure size 1000x1000 with 10 Axes>,
 <matplotlib.widgets.Button at 0x7f6cad671090>)

### Visualize BIWI
Can only check if the keypoints look all right. Otherwise nothing can be done currently due to the different coordinate frame.

In [11]:
vis.matplotlib_plot_iterable(iterate_predictions(biwi_loader, net), vis.draw_prediction)

<IPython.core.display.Javascript object>

(<Figure size 1000x1000 with 10 Axes>,
 <matplotlib.widgets.Button at 0x7f6cad097210>)

In [12]:
def report(net, loader):
    poseerrs = train.metrics_over_full_dataset(
        net, 
        [ train.PoseErr() ], 
        loader)
    e_rot, e_posx, e_posy, e_size = np.array(poseerrs).T
    rmse_pos = np.sqrt(np.average(np.sum(np.square(np.hstack([e_posx, e_posy])), axis=1), axis=0))
    rmse_size = np.sqrt(np.average(np.square(e_size)))
    print (f"Average angular error: {np.average(e_rot)*180/np.pi:.03f}°")
    print (f"Position RMSE: {rmse_pos*100:.03f}%")
    print (f"Size RMSE: {rmse_size*100:.03f}%")
    

def rot_err(preds, batch):
    errs = train.PoseErr()(preds, batch)
    return errs[:,0] # Rotation

def pose_err(preds, batch):
    errs = train.PoseErr()(preds, batch)
    return np.amax(errs[:,1:], axis=1)

def display_worst(items):
    def iterate():
        for v, sample, pred in items:
            yield vis.unnormalize_sample_to_numpy(sample, pred)
    
    return vis.matplotlib_plot_iterable(iterate(), vis.draw_prediction)    

In [13]:
net.eval()
report(net, aflw_loader)

Average angular error: 6.292°
Position RMSE: 4.458%
Size RMSE: 5.093%


Previously:
```
Average angular error: 5.965°
Position RMSE: 3.941%
Size RMSE: 5.290%
```
The results unfortunately depend quite strongly on the choice of test set as there are some pretty hard cases in the overall dataset which can spoil the statistics if they make it into the test set.

In [14]:
worst = train.k_worst_over_dataset(net, aflw_loader, rot_err, 9)
display_worst(worst)

<IPython.core.display.Javascript object>

(<Figure size 1000x1000 with 10 Axes>,
 <matplotlib.widgets.Button at 0x7f6ca13c6450>)

In [15]:
worst = train.k_worst_over_dataset(net, aflw_loader, pose_err, 9)
display_worst(worst)

<IPython.core.display.Javascript object>

(<Figure size 1000x1000 with 10 Axes>,
 <matplotlib.widgets.Button at 0x7f6c8d7e7050>)