In [5]:
from __future__ import division, print_function, unicode_literals
import numpy as np
import argparse
import os
import os.path as osp
import tqdm
import torch
import math
import torch.utils.data
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import torchvision
import PIL.Image
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import scipy.misc
import scipy.io as sio
import fcn
# %matplotlib inline
import matplotlib.pyplot as plt

In [6]:
# HYPERPARAMS
max_iteration=100000
lr=1.0e-14
momentum=0.99
weight_decay=0.0005
interval_validate=4000
batch_size = 1

In [8]:
torch.cuda.set_device(1)
cuda = torch.cuda.is_available()

# to reproduce same results
torch.manual_seed(1337)
if cuda:
    torch.cuda.manual_seed(1337)

In [9]:
class CDATA(torch.utils.data.Dataset): # Extend PyTorch's Dataset class
    def __init__(self, root_dir, train, transform=None):
        if(train):
            rfile = '200'
        else :
            rfile = '201'
        ldir = root_dir + 'VOCdevkit/VOC2010/JPEGImages/'
        sdir = root_dir + 'pascal_data/pascal_data/SegmentationPart/'
#         pdir = root_dir + 'pascal_data/pascal_data/PersonJoints/'
#         self.transform = transform
        self.img = []
        self.seg = []
#         self.mat = []
        
        for line in glob.glob(sdir+rfile+'*.png'):
            line = line.split("/")[-1].split(".")[-2]
#                     print(line)
            self.img.append(ldir+line+'.jpg')
            self.seg.append(sdir+line+'.png')
#                     self.mat.append(pdir+line+'.mat')
#             PUT DATA IN CORRESPONDING VARS
            
            
#             self.label.append(ord(file_path.split('/')[-2]) - ord('A')) #ord makes A,B,C.. to 0,1,2,.. respectively

            
    def __len__(self):
        # return the size of the dataset (total number of images) as an integer
        # this should be rather easy if you created a mapping in __init__
        return len(self.img)
       
    mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
    def transform(self, img, lbl):
        img = img[:, :, ::-1]  # RGB -> BGR
        img = img.astype(np.float64)
        img -= self.mean_bgr
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()
        return img, lbl
    
    def __getitem__(self, idx):
        # idx - the index of the sample requested
        #
        # Open the image correspoding to idx, apply transforms on it and return a tuple (image, label)
        # where label is an integer from 0-9 (since notMNIST has 10 classes)
#         print(idx)
        image = PIL.Image.open(self.img[idx])
        image = np.array(image,dtype=np.uint8)
        segment = PIL.Image.open(self.seg[idx])
        segment = np.array(segment,dtype=np.uint8)
#         print(image.shape,segment.shape)
#         poset,paft = genmat(self.mat[idx],segment)
        if self.transform is None:
            return (image,segment)
        else:
            img_transformed,segment = self.transform(image,segment)
#             RETURN VARS
            return (img_transformed,segment)



class Seg_test(VOCClassSegBase):

    # XXX: It must be renamed to benchmark.tar to be extracted.
    url = 'http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz'  # NOQA

    def __init__(self, root, split='train', transform=False):
        self.root = root
        self.split = split
        self._transform = transform

        self.files = collections.defaultdict(list)
        for split in ['train', 'val']:
            imgsets_file = osp.join(self.root, 'pascal_data/pascal_data/%s_seg.txt' % split)
            img_id_list = [did.strip() for did in open(imgsets_file)]
            np.random.shuffle(img_id_list)
            img_id_list = img_id_list[:0.1*len(img_id_list)]
            for did in open(img_id_list):
                img_file = osp.join(self.root, 'VOCdevkit/VOC2010/JPEGImages/%s.jpg' % did)
                lbl_file = osp.join(self.root, 'pascal_data/pascal_data/SegmentationPart/%s.png' % did)
                self.files[split].append({
                    'img': img_file,
                    'lbl': lbl_file,
                })
        

    def __getitem__(self, index):
        data_file = self.files[self.split][index]
        # load image
        img_file = data_file['img']
        img = PIL.Image.open(img_file)
        img = np.array(img, dtype=np.uint8)
        # load label
        lbl_file = data_file['lbl']
        mat = PIL.Image.open(lbl_file)
        lbl =  np.array(mat, dtype=np.uint8)
        if self._transform:
            return self.transform(img, lbl)
        else:
            return img, lbl
        
class PAFloader(VOCClassSegBase):

    # XXX: It must be renamed to benchmark.tar to be extracted.
    url = 'http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz'  # NOQA

    def __init__(self, root, split='train', transform=False):
        self.root = root
        self.split = split
        self._transform = transform

        self.files = collections.defaultdict(list)
        for split in ['train', 'val']:
            imgsets_file = osp.join(self.root, 'pascal_data/pascal_data/%s_idnew.txt' % split)
            for did in open(imgsets_file):
                did = did.strip()
                img_file = osp.join(self.root, 'VOCdevkit/VOC2010/JPEGImages/%s.jpg' % did)
                lbl_file = osp.join(self.root, 'pascal_data/pascal_data/PersonJoints/%s.mat' % did)
                self.files[split].append({
                    'img': img_file,
                    'lbl': lbl_file,
                })
    
    def genmat(path,image):
        H,W,_ = image.shape
        Hn = np.ceil(H/32).astype(np.int64)
        Wn = np.ceil(W/32).astype(np.int64)
        mat = sio.loadmat(path)
        limbs = [[0,1],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13]]
        out = np.zeros((14,Hn,Wn))
        paf = np.zeros((26,Hn,Wn))
        x, y = np.meshgrid(np.arange(Wn), np.arange(Hn))
        for human in mat['joints'][0]:
            poselist = np.around(human[:,:-1]).astype(np.int64)
            poselist[:,0] = poselist[:,0]*Hn/H
            poselist[:,1] = poselist[:,1]*Wn/W
            vis = human[:,2]
            #PAF GT
            for (i,limb) in enumerate(limbs):
                p1 = poselist[limb[0],:]
                p2 = poselist[limb[1],:]
                dvec = (p2-p1)/np.linalg.norm(p2-p1)
                if not (vis[limb[0]]==0 or vis[limb[1]]==0):
    #             if (np.all(p1>0) and np.all(p2>0)):
                    #APPROX RECON
                    vecx = x - p1[0]
                    vecy = y - p1[1]
                    dot = vecx*dvec[0] + vecy*dvec[1]
                    perp2 = vecx**2+vecy**2-dot**2
                    boolmat = (dot>0) & (dot<np.linalg.norm(p2-p1)) & (perp2<np.linalg.norm(p2-p1)*0.3) #sigma^2
                    paf[2*i][boolmat] = dvec[0]
                    paf[2*i+1][boolmat] = dvec[1]
            #POSE GT
            for (i,pose) in enumerate(poselist):
                tmp = np.exp(-((x-pose[0])**2 + (y-pose[1])**2)/(2.0*1.0))
                out[i] = np.maximum(out[i],tmp)
        return out,paf

    
    def transform(self, img, pose, paf):
        img = img[:, :, ::-1]  # RGB -> BGR
        img = img.astype(np.float64)
        img -= self.mean_bgr
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).float()
        pose = torch.from_numpy(pose).float()
        paf = torch.from_numpy(paf).float()
        return img, pose, paf

    def __getitem__(self, index):
        data_file = self.files[self.split][index]
        # load image
        img_file = data_file['img']
        img = PIL.Image.open(img_file)
        img = np.array(img, dtype=np.uint8)
        # load label
        lbl_file = data_file['lbl']
        pose, paf = genmat(lbl_file,img)
        if self._transform:
            return self.transform(img, pose, paf)
        else:
            return img, pose, paf

In [10]:
composed_transform = transforms.Compose([transforms.Scale((224,224)),transforms.ToTensor()])
train_dataset = CDATA(root_dir='/extra_data/ayushya/', train=True, transform=composed_transform) # Supply proper root_dir
test_dataset = CDATA(root_dir='/extra_data/ayushya/', train=False, transform=composed_transform) # Supply proper root_dir

print('Size of train dataset: %d' % len(train_dataset))
print('Size of test dataset: %d' % len(test_dataset))


kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {}

# Create loaders for the dataset
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, **kwargs)


Size of train dataset: 4871
Size of test dataset: 1889


In [11]:
class suhpos(nn.Module):
    def __init__(self, n_class=7):
        super(suhpos, self).__init__()
        # conv1
        self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/32

        # fc6
        self.fc6 = nn.Conv2d(512, 4096, 7)
        self.relu6 = nn.ReLU(inplace=True)
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(inplace=True)
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, n_class, 1)
        self.upscore = nn.ConvTranspose2d(n_class, n_class, 64, stride=32,
                                          bias=False)

    def forward(self, x):
        h = x
        h = self.relu1_1(self.conv1_1(h))
        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))
        h = self.pool3(h)

        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)

        h = self.relu6(self.fc6(h))
        h = self.drop6(h)

        h = self.relu7(self.fc7(h))
        h = self.drop7(h)

        h = self.score_fr(h)

        h = self.upscore(h)
        h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous()

        return h
    
    def copy_params_from_vgg16(self, vgg16):
        features = [
            self.conv1_1, self.relu1_1,
            self.conv1_2, self.relu1_2,
            self.pool1,
            self.conv2_1, self.relu2_1,
            self.conv2_2, self.relu2_2,
            self.pool2,
            self.conv3_1, self.relu3_1,
            self.conv3_2, self.relu3_2,
            self.conv3_3, self.relu3_3,
            self.pool3,
            self.conv4_1, self.relu4_1,
            self.conv4_2, self.relu4_2,
            self.conv4_3, self.relu4_3,
            self.pool4,
            self.conv5_1, self.relu5_1,
            self.conv5_2, self.relu5_2,
            self.conv5_3, self.relu5_3,
            self.pool5,
        ]
        for l1, l2 in zip(vgg16.features, features):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
        for i, name in zip([0, 3], ['fc6', 'fc7']):
            l1 = vgg16.classifier[i]
            l2 = getattr(self, name)
            l2.weight.data = l1.weight.data.view(l2.weight.size())
            l2.bias.data = l1.bias.data.view(l2.bias.size())

In [12]:

def VGG16(pretrained=False):
    model = torchvision.models.vgg16(pretrained=False)
    if not pretrained:
        return model
    model_file = _get_vgg16_pretrained_model()
    state_dict = torch.load(model_file)
    model.load_state_dict(state_dict)
    return model

def _get_vgg16_pretrained_model():
    return fcn.data.cached_download(
#         https://drive.google.com/uc?export=download&confirm=pNaP&id=0B9P1L--7Wd2vLTJZMXpIRkVVRFk
        url='http://drive.google.com/uc?id=0B9P1L--7Wd2vLTJZMXpIRkVVRFk',
        path=osp.expanduser('~/data/models/pytorch/vgg16_from_caffe.pth'),
        md5='aa75b158f4181e7f6230029eb96c1b13',
    )

In [13]:
# MODEL
# DEFINE MODEL
# model = torchfcn.models.FCN8s(n_class=21)
# vgg16 = models.vgg16(pretrained=True)
vgg16 = VGG16(pretrained=True)
model = suhpos()
model.copy_params_from_vgg16(vgg16)
# print(model)

resume = 0

start_epoch = 0
start_iteration = 0
if cuda:
    model = model.cuda()


Permission denied: http://drive.google.com/uc?id=0B9P1L--7Wd2vLTJZMXpIRkVVRFk


ValueError: nothing to open

In [None]:
# LOSS
loss1 = nn.MSELoss()
loss2 = nn.MSELoss()

def cross_entropy2d(input, target, weight=None, size_average=False):
    # input: (n, c, h, w), target: (n, h, w)
    n, c, h, w = input.size()
    # log_p: (n, c, h, w)
    log_p = F.log_softmax(input)
    # log_p: (n*h*w, c)
    log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
    log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
    log_p = log_p.view(-1, c)
    # target: (n*h*w,)
    mask = target >= 0
    target = target[mask]
    loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
    if size_average:
        loss /= mask.data.sum()
    return loss


In [9]:
def get_parameters(model, bias=False):
    import torch.nn as nn
    modules_skipped = (
        nn.ReLU,
        nn.MaxPool2d,
        nn.Dropout2d,
        nn.Sequential,
        suhpos,
    )
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            if bias:
                yield m.bias
            else:
                yield m.weight
        elif isinstance(m, nn.ConvTranspose2d):
            # weight is frozen because it is just a bilinear upsampling
            if bias:
                assert m.bias is None
        elif isinstance(m, modules_skipped):
            continue
        else:
            raise ValueError('Unexpected module: %s' % str(m))


In [10]:
# OPTIMIZER
optim = torch.optim.SGD(
    [
            {'params': get_parameters(model, bias=False)},
            {'params': get_parameters(model, bias=True),
             'lr': lr * 2, 'weight_decay': 0},
    ],
    lr=lr,
    momentum=momentum,
    weight_decay=weight_decay)
# print("optimizer made")

In [11]:
n_class = 7
def _fast_hist(label_true, label_pred, n_class):
    mask = (label_true >= 0) & (label_true < n_class)
    hist = np.bincount(
        n_class * label_true[mask].astype(int) +
        label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
    return hist


def label_accuracy_score(label_trues, label_preds, n_class):
    """Returns accuracy score evaluation result.
      - overall accuracy
      - mean accuracy
      - mean IU
      - fwavacc
    """
    hist = np.zeros((n_class, n_class))
    for lt, lp in zip(label_trues, label_preds):
        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
    acc = np.diag(hist).sum() / hist.sum()
    acc_cls = np.diag(hist) / hist.sum(axis=1)
    acc_cls = np.nanmean(acc_cls)
    iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
    mean_iu = np.nanmean(iu)
    freq = hist.sum(axis=1) / hist.sum()
    fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
    return acc, acc_cls, mean_iu, fwavacc

In [33]:
ones  = np.ones((1,7,50,50))
for i in xrange(7):
    ones[0,i,:,:]+=i**2
ones = torch.from_numpy(ones).long()
out = Variable(ones).data.max(1)[1].cpu().numpy()[:,:,:]

onez_ = np.ones((1,50,50)).astype(np.int64) * 6

label_accuracy_score(out,onez_, 7)

onez_ = np.zeros((1,50,50)).astype(np.int64)

label_accuracy_score(out,onez_,7)

In [12]:
# TRAINING
def train_model():
    lval = 1000.0
    max_epoch = int(math.ceil(1. * max_iteration / len(train_loader)))
#     print(max_epoch)
    for epoch in tqdm.trange(0, max_epoch, desc='Train', ncols=80):
#         INSERT TARGETS
        for batch_idx, (data, target) in tqdm.tqdm( enumerate(train_loader), 
                                                   total=len(train_loader),
                                                   desc="Train epoch: "+str(epoch) ,
                                                   ncols=80,
                                                   leave=False):
#         for batch_idx, (data, target) in enumerate(train_loader):  
            iteration = batch_idx + epoch * len(train_loader)
            if iteration >= max_iteration:
                break

#             print(iteration)
#             VALIDATE
#             if iteration % interval_validate == 0:
#                 validate(iteration)

#             MODIFY FOR TARGETS
            if cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)
            optim.zero_grad()
#             MODIFY FOR OUTPUTS
            seg = model(data)

            loss = cross_entropy2d(seg, target)
            loss /= len(data)
            if np.isnan(float(loss.data[0])):
                raise ValueError('loss is nan while training')
            loss.backward()
            optim.step()

#             METRICS
#           metrics = []
#                     lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
#                     lbl_true = target.data.cpu().numpy()
#                     for lt, lp in zip(lbl_true, lbl_pred):
#                         acc, acc_cls, mean_iu, fwavacc = \
#                             torchfcn.utils.label_accuracy_score(
#                                 [lt], [lp], n_class=n_class)
#                         metrics.append((acc, acc_cls, mean_iu, fwavacc))
#                     metrics = np.mean(metrics, axis=0)

            metrics = []
            lbl_pred = seg.data.max(1)[1].cpu().numpy()[:, :, :]
            lbl_true = target.data.cpu().numpy()
            for lt, lp in zip(lbl_true, lbl_pred):
                acc, acc_cls, mean_iu, fwavacc = \
                    label_accuracy_score(
                        [lt], [lp], n_class=n_class)
                metrics.append((acc, acc_cls, mean_iu, fwavacc))
            metrics = np.mean(metrics, axis=0)
            
            if iteration%100 == 0:
                print("loss :", loss.data[0], "Metrics : ", metrics.tolist())




In [13]:
train_model()

Train:   0%|                                             | 0/21 [00:00<?, ?it/s]
Train epoch: 0:   0%|                                  | 0/4871 [00:00<?, ?it/s][A


RuntimeError: Traceback (most recent call last):
  File "/users/gpu/ayushya/miniconda2/envs/ayushya/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 62, in _pin_memory_loop
    batch = pin_memory_batch(batch)
  File "/users/gpu/ayushya/miniconda2/envs/ayushya/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 123, in pin_memory_batch
    return [pin_memory_batch(sample) for sample in batch]
  File "/users/gpu/ayushya/miniconda2/envs/ayushya/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 117, in pin_memory_batch
    return batch.pin_memory()
  File "/users/gpu/ayushya/miniconda2/envs/ayushya/lib/python2.7/site-packages/torch/tensor.py", line 82, in pin_memory
    return type(self)().set_(storage.pin_memory()).view_as(self)
  File "/users/gpu/ayushya/miniconda2/envs/ayushya/lib/python2.7/site-packages/torch/storage.py", line 84, in pin_memory
    return type(self)(self.size(), allocator=allocator).copy_(self)
RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1503966894950/work/torch/lib/THC/THCCachingHostAllocator.cpp:258
