# [Dense Pose Object Detector](https://arxiv.org/abs/1902.11020)

#### Download and unzip the Linemod Dataset 


In [1]:
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/ape.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/benchviseblue.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/bowl.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/can.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/cat.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/cup.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/driller.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/duck.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/glue.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/holepuncher.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/iron.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/lamp.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/phone.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/cam.zip
# !wget http://campar.in.tum.de/personal/hinterst/index/downloads!09384230443!/eggbox.zip

# !unzip /home/jovyan/work/ape.zip
# !unzip /home/jovyan/work/benchviseblue.zip
# !unzip /home/jovyan/work/bowl.zip
# !unzip /home/jovyan/work/can.zip
# !unzip /home/jovyan/work/cat.zip
# !unzip /home/jovyan/work/cup.zip
# !unzip /home/jovyan/work/driller.zip
# !unzip /home/jovyan/work/duck.zip
# !unzip /home/jovyan/work/glue.zip
# !unzip /home/jovyan/work/holepuncher.zip
# !unzip /home/jovyan/work/iron.zip
# !unzip /home/jovyan/work/lamp.zip
# !unzip /home/jovyan/work/phone.zip
# !unzip /home/jovyan/work/cam.zip
# !unzip /home/jovyan/work/eggbox.zip

In [2]:
!pip install opencv-python

Collecting opencv-python
[?25l  Downloading https://files.pythonhosted.org/packages/d0/f0/cfe88d262c67825b20d396c778beca21829da061717c7aaa8b421ae5132e/opencv_python-4.2.0.34-cp37-cp37m-manylinux1_x86_64.whl (28.2MB)
[K     |████████████████████████████████| 28.2MB 3.3MB/s eta 0:00:01
Installing collected packages: opencv-python
Successfully installed opencv-python-4.2.0.34
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [3]:
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import unet_model as UNET
from torch.utils.data.sampler import SubsetRandomSampler
from create_ground_truth_helper import *
from helper import load_obj
%load_ext autoreload
%autoreload 2

Custom Train Dataset 

In [4]:
class LineMODDataset(Dataset):

    """
    Args:
        root_dir (str): path to the dataset
        classes (dictionary): values of classes to extract from segmentation mask 
        transform : Transforms for input image
            """

    def __init__(self,root_dir,classes=None,transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = classes
        self.list_all_images = load_obj(root_dir + "all_images_adr")
        self.training_images_idx = load_obj(root_dir + "train_images_indices")

    def __len__(self):
        return len(self.training_images_idx)
    def __getitem__(self, i):
        img_adr = self.list_all_images[self.training_images_idx[i]]
        label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1]
        regex = re.compile(r'\d+')
        idx = regex.findall(os.path.split(img_adr)[1])[0]
        image = cv2.imread(img_adr) 
        IDmask = cv2.imread(self.root_dir + label + "/ground_truth/IDmasks/color" + str(idx) + ".png"
                                ,cv2.IMREAD_GRAYSCALE)
        Umask = cv2.imread(self.root_dir + label + "/ground_truth/Umasks/color" + str(idx) + ".png"
                                ,cv2.IMREAD_GRAYSCALE)
        Vmask = cv2.imread(self.root_dir + label + "/ground_truth/Vmasks/color" + str(idx) + ".png"
                                ,cv2.IMREAD_GRAYSCALE)
        # resize the masks
        IDmask = (IDmask/255)*self.classes[label] 
        image = cv2.resize(image,(image.shape[1]//2, image.shape[0]//2), interpolation=cv2.INTER_AREA)
        IDmask = cv2.resize(IDmask,(IDmask.shape[1]//2, IDmask.shape[0]//2), interpolation=cv2.INTER_AREA)
        Umask = cv2.resize(Umask,(Umask.shape[1]//2, Umask.shape[0]//2), interpolation=cv2.INTER_AREA)
        Vmask = cv2.resize(Vmask,(Vmask.shape[1]//2, Vmask.shape[0]//2), interpolation=cv2.INTER_AREA)
        if self.transform:
            image = self.transform(image)
        IDmask = (torch.from_numpy(IDmask)).type(torch.int64)
        Umask = (torch.from_numpy(Umask)).type(torch.int64)
        Vmask = (torch.from_numpy(Vmask)).type(torch.int64)
        return img_adr,image,IDmask,Umask,Vmask

In [5]:
classes = {'ape':1, 'benchviseblue':2, 'bowl':3, 'can':4, 'cat':5, 'cup':6, 'driller':7, 
            'duck':8, 'glue':9, 'holepuncher':10, 'iron':11, 'lamp':12, 'phone':13, 'cam':14,'eggbox':15}

In [6]:
root_dir = "/home/jovyan/work/LineMOD_Dataset/"
train_data = LineMODDataset(
    root_dir, 
    classes = classes, 
    transform = transforms.Compose([transforms.ToTensor()])
)

Train and Valid DataLoader

In [7]:
batch_size = 4
num_workers = 0
valid_size = 0.2
# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
    sampler=valid_sampler, num_workers=num_workers)

Architecture for Correspondence block

In [8]:
correspondence_block = UNET.UNet(n_channels = 3, out_channels_id = 16, out_channels_uv = 256, bilinear=True)
correspondence_block.cuda()

UNet(
  (inc): DoubleConv(
    (double_conv): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (down1): Down(
    (maxpool_conv): Sequential(
      (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (1): DoubleConv(
        (double_conv): Sequential(
          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (4): BatchNorm2d(128, eps=1e-05, moment

Custom Loss Function and Optimizer

In [9]:
criterion_id = nn.CrossEntropyLoss()
criterion_u = nn.CrossEntropyLoss()
criterion_v = nn.CrossEntropyLoss()

# specify optimizer
optimizer = optim.Adam(correspondence_block.parameters(), lr=3e-4,weight_decay=3e-5)

Train the Model

In [10]:
# number of epochs to train the model
n_epochs = 10

valid_loss_min = np.Inf # track change in validation loss

for epoch in range(1, n_epochs+1):
    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    print("------ Epoch ",epoch," ---------")
    
    ###################
    # train the model #
    ###################
    correspondence_block.train()
    for _,image, idmask,umask,vmask in train_loader:
        # move tensors to GPU if CUDA is available
        image, idmask,umask,vmask = image.cuda(), idmask.cuda(), umask.cuda(), vmask.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        idmask_pred,umask_pred,vmask_pred = correspondence_block(image)       
        # calculate the batch loss
        loss_id = criterion_id(idmask_pred, idmask)
        loss_u = criterion_u(umask_pred, umask)
        loss_v = criterion_v(vmask_pred, vmask)
        loss = loss_id + loss_u + loss_v
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()


    ######################    
    # validate the model #
    ######################
    correspondence_block.eval()
    for _,image, idmask,umask,vmask in valid_loader:       
        # move tensors to GPU if CUDA is available
        image, idmask,umask,vmask = image.cuda(), idmask.cuda(), umask.cuda(), vmask.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        idmask_pred,umask_pred,vmask_pred = correspondence_block(image)
        # calculate the batch loss
        loss_id = criterion_id(idmask_pred, idmask)
        loss_u = criterion_u(umask_pred, umask)
        loss_v = criterion_v(vmask_pred, vmask)
        loss = loss_id + loss_u + loss_v
        # update average validation loss 
        valid_loss += loss.item()
    
    # calculate average losses
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(correspondence_block.state_dict(), 'correspondence_block.pt')
        valid_loss_min = valid_loss
        

------ Epoch  1  ---------
Epoch: 1 	Training Loss: 0.114288 	Validation Loss: 0.074979
Validation loss decreased (inf --> 0.074979).  Saving model ...
------ Epoch  2  ---------
Epoch: 2 	Training Loss: 0.071020 	Validation Loss: 0.068038
Validation loss decreased (0.074979 --> 0.068038).  Saving model ...
------ Epoch  3  ---------
Epoch: 3 	Training Loss: 0.065894 	Validation Loss: 0.073685
------ Epoch  4  ---------
Epoch: 4 	Training Loss: 0.063808 	Validation Loss: 0.066853
Validation loss decreased (0.068038 --> 0.066853).  Saving model ...
------ Epoch  5  ---------
Epoch: 5 	Training Loss: 0.061883 	Validation Loss: 0.061034
Validation loss decreased (0.066853 --> 0.061034).  Saving model ...
------ Epoch  6  ---------
Epoch: 6 	Training Loss: 0.060640 	Validation Loss: 0.066362
------ Epoch  7  ---------
Epoch: 7 	Training Loss: 0.059254 	Validation Loss: 0.066238
------ Epoch  8  ---------
Epoch: 8 	Training Loss: 0.057956 	Validation Loss: 0.061009
Validation loss decreased

Load the model with the lowest validation Loss

In [11]:
correspondence_block.load_state_dict(torch.load('correspondence_block.pt',map_location=torch.device('cpu')))

<All keys matched successfully>

In [12]:
fx=572.41140; px=325.26110; fy=573.57043; py=242.04899 # Intrinsic Parameters of the Camera
intrinsic_matrix =  np.array([[fx, 0, px], [0, fy, py], [0, 0, 1]],dtype = "double")

## Pose Block

In [13]:
regex = re.compile(r'\d+')
root_dir = "/home/jovyan/work/LineMOD_Dataset/"
for i in range(len(train_data)):
    if i % 1000 == 0:
        print(str(i) + "/" + str(len(train_data)) + " finished!")
    img_adr,img,idmask, _ , _ = train_data[i]
    label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1]
    idx = regex.findall(os.path.split(img_adr)[1])[0]
    img = img.view(1,img.shape[0],img.shape[1],img.shape[2])
    idmask_pred,umask_pred,vmask_pred = correspondence_block(img.cuda())
    # convert the masks to 240,320 shape
    temp = torch.argmax(idmask_pred,dim=1).squeeze().cpu() 
    upred = torch.argmax(umask_pred,dim=1).squeeze().cpu()
    vpred = torch.argmax(vmask_pred,dim=1).squeeze().cpu()
    coord_2d = (temp == classes[label]).nonzero(as_tuple=True)

    adr = root_dir + label + "/predicted_pose/" + "info_" + str(idx) + ".txt"

    coord_2d = torch.cat((coord_2d[0].view(coord_2d[0].shape[0],1),coord_2d[1].view(coord_2d[1].shape[0],1)),1)
    uvalues = upred[coord_2d[:,0],coord_2d[:,1]]
    vvalues = vpred[coord_2d[:,0],coord_2d[:,1]]
    dct_keys = torch.cat((uvalues.view(-1,1),vvalues.view(-1,1)),1)
    dct_keys = tuple(dct_keys.numpy())
    dct = load_obj(root_dir + label + "/UV-XYZ_mapping")
    mapping_2d = []
    mapping_3d = []
    for count,(u, v) in enumerate(dct_keys):
        if (u, v) in dct:
            mapping_2d.append(np.array(coord_2d[count]))
            mapping_3d.append(dct[(u,v)])
    # Get the 6D pose from rotation and translation matrices
    
    if len(mapping_2d) >= 6 or len(mapping_3d) >= 6: # PnP needs atleast 6 unique 2D-3D correspondences to run
        _,rvecs, tvecs, inliers = cv2.solvePnPRansac(np.array(mapping_3d,dtype=np.float32),
            np.array(mapping_2d,dtype = np.float32),intrinsic_matrix,distCoeffs = None,
                iterationsCount = 150, reprojectionError = 1.0,flags = cv2.SOLVEPNP_P3P)
        rot, _ = cv2.Rodrigues(rvecs,jacobian = None)
        rot_tra = np.append(rot,tvecs,axis = 1)
        # save the predicted pose
        np.savetxt(adr,rot_tra)
    else: # save an empty file
        np.savetxt(adr,np.zeros((3,4)))

0/14620 finished!
1000/14620 finished!
2000/14620 finished!
3000/14620 finished!
4000/14620 finished!
5000/14620 finished!
6000/14620 finished!
7000/14620 finished!
8000/14620 finished!
9000/14620 finished!
10000/14620 finished!
11000/14620 finished!
12000/14620 finished!
13000/14620 finished!
14000/14620 finished!
