In [1]:
from __future__ import division, print_function, unicode_literals
import numpy as np
import argparse
import os
import os.path as osp
import tqdm
import torch
import math
import torch.utils.data
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import torchvision
import PIL.Image
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import scipy.misc
import scipy.io as sio
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
# HYPERPARAMS
max_iteration=100000
lr=1.0e-14
momentum=0.99
weight_decay=0.0005
interval_validate=4000
batch_size = 1

In [3]:
torch.cuda.set_device(0)
cuda = torch.cuda.is_available()

# to reproduce same results
torch.manual_seed(1337)
if cuda:
    torch.cuda.manual_seed(1337)

In [4]:
def genmat(path,segment):
    H,W,_ = segment.shape
    Hn = np.ceil(H/32).astype(np.int64)
    Wn = np.ceil(W/32).astype(np.int64)
#     print(segment.shape)
    mat = sio.loadmat(path)
    #ADD A POSE FOR HIP CENTER? OR HEURISTIC ON SEGMENT BOUNDS
    limbs = [[0,1],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13]]
    out = np.zeros((14,Hn,Wn))
#     print(out[0].shape)
    paf = np.zeros((26,Hn,Wn))
#     print(pafx.shape)
    x, y = np.meshgrid(np.arange(Wn), np.arange(Hn))
#     y = np.flipud(y)
#     print(x.shape)
    for human in mat['joints'][0]:
        poselist = np.around(human[:,:-1]).astype(np.int64)
        poselist[:,0] = poselist[:,0]*Hn/H
        poselist[:,1] = poselist[:,1]*Wn/W
        vis = human[:,2]
        #PAF GT
        for (i,limb) in enumerate(limbs):
            p1 = poselist[limb[0],:]
            p2 = poselist[limb[1],:]
            dvec = (p2-p1)/np.linalg.norm(p2-p1)
            if not (vis[limb[0]]==0 or vis[limb[1]]==0):
#             if (np.all(p1>0) and np.all(p2>0)):
                #APPROX RECON
                vecx = x - p1[0]
                vecy = y - p1[1]
                dot = vecx*dvec[0] + vecy*dvec[1]
                perp2 = vecx**2+vecy**2-dot**2
                boolmat = (dot>0) & (dot<np.linalg.norm(p2-p1)) & (perp2<np.linalg.norm(p2-p1)*0.3) #sigma^2
                paf[2*i][boolmat] = dvec[0]
                paf[2*i+1][boolmat] = dvec[1]
#             else:
#                 mp = np.around((p1+p2)/2.0).astype(np.uint8)
#                 midval = segment[mp[1],mp[0]]
#                 pafx[segment==midval] = dvec[0]
#                 pafy[segment==midval] = dvec[1]
        #POSE GT
        for (i,pose) in enumerate(poselist):
            tmp = np.exp(-((x-pose[0])**2 + (y-pose[1])**2)/(2.0*1.0))
            out[i] = np.maximum(out[i],tmp)
#             print(human[i])
#         out[0]=np.maximum(out[0],pafx+pafy)
    return out,paf
    

In [5]:
# DEMO
segment = PIL.Image.open('/extra_data/ayushya/VOCdevkit/VOC2010/JPEGImages/2009_000544.jpg')
print(segment.size)
segment = np.array(segment,dtype=np.uint8)
print(segment.shape)
out,paf = genmat('/extra_data/ayushya/pascal_data/pascal_data/PersonJoints/2009_000544.mat',segment)
print(paf.shape)
print(out.shape)

(375, 500)
(500, 375, 3)
(26, 16, 12)
(14, 16, 12)




In [6]:
class CDATA(torch.utils.data.Dataset): # Extend PyTorch's Dataset class
    def __init__(self, root_dir, train, transform=None):
        if(train):
            rfile = '200'
        else :
            rfile = '201'
        ldir = root_dir + 'VOCdevkit/VOC2010/JPEGImages/'
        sdir = root_dir + 'pascal_data/pascal_data/SegmentationPart/'
        pdir = root_dir + 'pascal_data/pascal_data/PersonJoints/'
#         self.transform = transform
        self.img = []
        self.seg = []
        self.mat = []
        
        for line in glob.glob(pdir+rfile+'*.mat'):
            line = line.split("/")[-1].split(".")[-2]
#                     print(line)
            self.img.append(ldir+line+'.jpg')
            self.seg.append(sdir+line+'.png')
            self.mat.append(pdir+line+'.mat')
#             PUT DATA IN CORRESPONDING VARS
            
            
#             self.label.append(ord(file_path.split('/')[-2]) - ord('A')) #ord makes A,B,C.. to 0,1,2,.. respectively

            
    def __len__(self):
        # return the size of the dataset (total number of images) as an integer
        # this should be rather easy if you created a mapping in __init__
        return len(self.img)
       
    mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
    def transform(self, img):
        img = img[:, :, ::-1]  # RGB -> BGR
        img = img.astype(np.float64)
        img -= self.mean_bgr
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).float()
        return img
    
    def __getitem__(self, idx):
        # idx - the index of the sample requested
        #
        # Open the image correspoding to idx, apply transforms on it and return a tuple (image, label)
        # where label is an integer from 0-9 (since notMNIST has 10 classes)
#         print(idx)
        image = PIL.Image.open(self.img[idx])
        image = np.array(image,dtype=np.uint8)
        if os.path.exists(self.seg[idx]):
            segment = PIL.Image.open(self.seg[idx])
            segment = np.array(segment,dtype=np.uint8)
            segment = torch.from_numpy(segment).long()
        else:
            segment = None
        if os.path.exists(self.mat[idx]):
            poset,paft = genmat(self.mat[idx],image)
            poset = torch.from_numpy(poset).float()
            paft = torch.from_numpy(paft).float()
        else:
            poset = None
            paft = None
#         print(image.shape,segment.shape)
#         poset,paft = genmat(self.mat[idx],segment)
        if self.transform is None:
            return (image,poset,paft)
        else:
            img_transformed = self.transform(image)
#             RETURN VARS
            return (img_transformed,poset,paft)


In [7]:
composed_transform = transforms.Compose([transforms.Scale((224,224)),transforms.ToTensor()])
train_dataset = CDATA(root_dir='/extra_data/ayushya/', train=True, transform=composed_transform) # Supply proper root_dir
test_dataset = CDATA(root_dir='/extra_data/ayushya/', train=False, transform=composed_transform) # Supply proper root_dir

print('Size of train dataset: %d' % len(train_dataset))
print('Size of test dataset: %d' % len(test_dataset))


kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {}

# Create loaders for the dataset
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, **kwargs)


Size of train dataset: 2779
Size of test dataset: 769


In [8]:
class suhpos(nn.Module):
    def __init__(self, n_class=7):
        super(suhpos, self).__init__()
        # conv1
        self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/32

        #pose
        self.poselayer = nn.Sequential(
            nn.Conv2d(512, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(512, 14, kernel_size=(1, 1), stride=(1, 1)),
        )
        
        #paf
        self.paflayer = nn.Sequential(
            nn.Conv2d(512, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(True),
            nn.Conv2d(512, 26, kernel_size=(1, 1), stride=(1, 1)),
        )
        
        # fc6
        self.fc6 = nn.Conv2d(512, 4096, 7)
        self.relu6 = nn.ReLU(inplace=True)
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(inplace=True)
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, n_class, 1)
        self.upscore = nn.ConvTranspose2d(n_class, n_class, 64, stride=32,
                                          bias=False)

    def forward(self, x):
        h = x
        h = self.relu1_1(self.conv1_1(h))
        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))
        h = self.pool3(h)

        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)
        poseo = self.poselayer(h)
        poseo = poseo[:, :, 5:5 + np.ceil(x.size()[2]/32).astype(np.int64), 5:5 + np.ceil(x.size()[3]/32).astype(np.int64)].contiguous()
        pafo = self.paflayer(h)
        pafo = pafo[:, :, 5:5 + np.ceil(x.size()[2]/32).astype(np.int64), 5:5 + np.ceil(x.size()[3]/32).astype(np.int64)].contiguous()
        h = self.relu6(self.fc6(h))
        h = self.drop6(h)
        h = self.relu7(self.fc7(h))
        h = self.drop7(h)

        h = self.score_fr(h)

        h = self.upscore(h)
        h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous()

        return poseo,pafo
    
    def copy_params_from_vgg16(self, vgg16):
        features = [
            self.conv1_1, self.relu1_1,
            self.conv1_2, self.relu1_2,
            self.pool1,
            self.conv2_1, self.relu2_1,
            self.conv2_2, self.relu2_2,
            self.pool2,
            self.conv3_1, self.relu3_1,
            self.conv3_2, self.relu3_2,
            self.conv3_3, self.relu3_3,
            self.pool3,
            self.conv4_1, self.relu4_1,
            self.conv4_2, self.relu4_2,
            self.conv4_3, self.relu4_3,
            self.pool4,
            self.conv5_1, self.relu5_1,
            self.conv5_2, self.relu5_2,
            self.conv5_3, self.relu5_3,
            self.pool5,
        ]
        for l1, l2 in zip(vgg16.features, features):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
        for i, name in zip([0, 3], ['fc6', 'fc7']):
            l1 = vgg16.classifier[i]
            l2 = getattr(self, name)
            l2.weight.data = l1.weight.data.view(l2.weight.size())
            l2.bias.data = l1.bias.data.view(l2.bias.size())

In [9]:
# MODEL
# DEFINE MODEL
# model = torchfcn.models.FCN8s(n_class=21)
vgg16 = models.vgg16(pretrained=True)
model = suhpos()
model.copy_params_from_vgg16(vgg16)
print(model)

resume = 0

start_epoch = 0
start_iteration = 0
if cuda:
    model = model.cuda()

suhpos (
  (conv1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(100, 100))
  (relu1_1): ReLU (inplace)
  (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1_2): ReLU (inplace)
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2_1): ReLU (inplace)
  (conv2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2_2): ReLU (inplace)
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3_1): ReLU (inplace)
  (conv3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3_2): ReLU (inplace)
  (conv3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3_3): ReLU (inplace)
  (pool3): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv4_1): Conv2d(256, 512,

In [10]:
# LOSS
loss1 = nn.MSELoss()
loss2 = nn.MSELoss()

def cross_entropy2d(input, target, weight=None, size_average=True):
    # input: (n, c, h, w), target: (n, h, w)
    n, c, h, w = input.size()
    # log_p: (n, c, h, w)
    log_p = F.log_softmax(input)
    # log_p: (n*h*w, c)
    log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
    log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
    log_p = log_p.view(-1, c)
    # target: (n*h*w,)
    mask = target >= 0
    target = target[mask]
    loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
    if size_average:
        loss /= mask.data.sum()
    return loss


In [11]:
# OPTIMIZER
optim = torch.optim.SGD(
    model.parameters(),
    lr=lr,
    momentum=momentum,
    weight_decay=weight_decay)

In [12]:
# TRAINING
def train_model():
    max_epoch = int(math.ceil(1. * max_iteration / len(train_loader)))
#     print(max_epoch)
    for epoch in tqdm.tnrange(0, max_epoch, desc='Train', ncols=80):
#         INSERT TARGETS
        for batch_idx, (data, poset,paft) in tqdm.tqdm( enumerate(train_loader), total=len(train_loader),desc='Train epoch=%d' % epoch, ncols=80, leave=False):
#         for batch_idx, (data, target) in enumerate(train_loader):  
            iteration = batch_idx + epoch * len(train_loader)
#             print(iteration)
#             VALIDATE
#             if iteration % interval_validate == 0:
#                 validate(iteration)

#             MODIFY FOR TARGETS
#             print(poset.shape , paft.shape)
            if cuda:
                data, poset, paft = data.cuda(), poset.cuda(), paft.cuda()
#                 data = data.cuda()
            data, poset, paft = Variable(data), Variable(poset), Variable(paft)
            optim.zero_grad()
#             MODIFY FOR OUTPUTS
            pose,paf = model(data)
#             print(pose.size(), paf.size())
#             print(torch.numpy(pose.size()))
#             break
        
            l1 = loss1(pose, poset)
            l2 = loss2(paf, paft)
#             loss /= len(data)
            if np.isnan(float(l1.data[0])):
                raise ValueError('loss1 is nan while training')
            if np.isnan(float(l2.data[0])):
                raise ValueError('loss2 is nan while training')
            l = l1 + l2
#             l1.backward()
#             l2.backward()
            l.backward()             
            optim.step()
            print("loss1: ", l1.data[0], "loss2: ", l2.data[0])
#             METRICS
#             metrics = []
#             lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
#             lbl_true = target.data.cpu().numpy()
#             for lt, lp in zip(lbl_true, lbl_pred):
#                 acc, acc_cls, mean_iu, fwavacc = \
#                     torchfcn.utils.label_accuracy_score(
#                         [lt], [lp], n_class=n_class)
#                 metrics.append((acc, acc_cls, mean_iu, fwavacc))
#             metrics = np.mean(metrics, axis=0)

            if iteration >= max_iteration:
                break



In [13]:
train_model()

A Jupyter Widget

                                                                                




RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.