In [3]:
#########LOAD LIBRARIES###############               
import torch                                         
import os                                            
import cv2                                           
import torch.nn as nn                                
import random                                        
import torch.nn.functional as F                      
import torchvision.datasets as dset                  
import torchvision.transforms as transforms           
import torchvision.utils as utils                    
import numpy as np                                   
from torch.autograd import Variable                  
from torch import optim                              
from torch.utils.data import Dataset, DataLoader
from PIL import Image

In [7]:
#########DATA LOADER##################
class BodyPoseSet(Dataset):
    """Body pose dataset"""
    def __init__(self, root_dir='./', mode='train', transform=None):
        self.root_dir = root_dir
        self.mode = mode
        self.all_imgs, self.all_labels = self.parse_files()
        self.transform = transform

    def __len__(self):
        return len(self.all_imgs)

    def __getitem__(self, idx):
        img_name = self.all_imgs[idx]
        img_path = os.path.join(self.root_dir, img_name)
        label_name= self.all_labels[idx]
        label_path = os.path.join(self.root_dir, label_name)
        img = Image.open(img_path).convert('L')
        label = Image.open(label_path)
        
        if self.transform is not None:
            img = self.transform(img)
            label = self.transform(label)
            
        sample = {'img':img, 'label':label}

        return sample

    def parse_files(self):
        all_imgs = []
        all_labels = []
        for a in ['easy-pose']:
            for b in [i+1 for i in range(1)]:
                for c in ['Cam1','Cam2','Cam3']:
                    for d in ["{0:04}".format(i+1) for i in range(1001)]:
                        img_name = "%s/%s/%d/images/depthRender/%s/mayaProject.00%s.png" %(a,self.mode,b,c,d)
                        all_imgs.append(img_name)
                        label_name = "%s/%s/%d/images/groundtruth/%s/mayaProject.00%s.png" %(a,self.mode,b,c,d)
                        all_labels.append(label_name)
        return all_imgs, all_labels
#####################################
trans = transforms.Compose([
    transforms.Resize((250,250)),
    transforms.ToTensor(),
])

In [8]:
#########NET WORK STRUCTURE###########
class Body_Net(nn.Module):
    def __init__(self):
        super(Body_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=5)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=3)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=5, stride=1)
        self.relu3 = nn.ReLU()
        self.conv4_class = nn.Conv2d(256, 44, kernel_size=3, stride=1)
        self.upscore1 = nn.ConvTranspose2d(44, 44, kernel_size=3, stride=1, bias=False)
        self.score_pool2 = nn.Conv2d(128, 44, kernel_size=1, stride=1)
        self.dropout = nn.Dropout2d()  # defualt = 0.5, which is used in paper
        self.upscore2 = nn.ConvTranspose2d(44, 44, kernel_size=4, stride=2, bias=False)
        self.score_pool1 = nn.Conv2d(64, 44, kernel_size=1, stride=1)
        self.upscore3 = nn.ConvTranspose2d(44, 44, kernel_size=19, stride=7, bias=False)

    def forward(self, data):
        h = data
        h = self.relu1(self.conv1(h))
        h = self.pool1(h)
        # record pool 1
        pool1 = h
        h = self.relu2(self.conv2(h))
        h = self.pool2(h)
        # record pool 2
        pool2 = h
        h = self.relu3(self.conv3(h))
        h = self.conv4_class(h)
        h = self.upscore1(h)
        # upsample output
        upscore1 = h
        # crop pool2 and fuse with upscore1
        h =  self.score_pool2(pool2)
        h = h[:, :, 1:17, 1:17]
        score_pool2 = h
        h = upscore1 + score_pool2
        h = self.dropout(h)
        # upsample output
        h = self.upscore2(h)
        upscore2 = h
        # crop pool1 and fuse with upscore2
        h = self.score_pool1(pool1)
        h = h[:, :, 3:37, 3:37]
        score_pool1 = h
        h = upscore2 + score_pool1
        h = self.dropout(h)
        output = self.upscore3(h)

        return output

In [9]:
data = BodyPoseSet(transform = trans)
sample_tuple = data[0]
input_test = sample_tuple['img']
input_test = input_test.unsqueeze(0)
net = Body_Net()
output_test = net(Variable(input_test))
print output_test

Variable containing:
( 0 , 0 ,.,.) = 
 -1.2335e-03 -5.0216e-03  2.5610e-03  ...  -7.2880e-04 -1.3368e-03  3.6729e-03
  3.5085e-03 -1.9488e-03  9.5432e-04  ...  -7.1405e-03  2.0539e-03 -2.1124e-04
  9.5580e-04  5.2209e-04 -1.3286e-03  ...   3.4859e-05  5.0884e-03  3.5569e-03
                 ...                   ⋱                   ...                
 -2.8317e-04 -1.6938e-03 -8.9433e-04  ...   2.2827e-03  2.3156e-03 -2.9752e-03
  1.4169e-05  1.8803e-03  3.9341e-03  ...   2.4227e-03 -1.2194e-03  3.0162e-03
  1.3943e-03  1.4991e-03  3.0403e-03  ...  -2.7857e-03  4.5763e-03 -2.8562e-03

( 0 , 1 ,.,.) = 
  3.7033e-03 -2.3728e-03 -7.6757e-04  ...   4.1992e-04  4.4801e-03  4.1916e-03
 -8.8810e-04  9.1069e-04 -5.4224e-03  ...   4.8534e-04  5.6864e-04 -1.9924e-04
  1.9660e-03  8.1047e-06 -2.7258e-03  ...   3.2036e-03 -1.7521e-03 -3.7981e-03
                 ...                   ⋱                   ...                
  2.5875e-03 -1.5877e-04  2.9897e-03  ...  -3.7246e-04  1.3231e-04  7.6758e