In [1]:
from __future__ import division, print_function, unicode_literals
import numpy as np
import argparse
import os
import os.path as osp
import tqdm
import torch
import math
import torch.utils.data
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import torchvision
import PIL.Image
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import scipy.misc
import scipy.io as sio
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
# HYPERPARAMS
max_iteration=100000
lr=1.0e-14
momentum=0.99
weight_decay=0.0005
interval_validate=4000
batch_size = 1

In [3]:
torch.cuda.set_device(1)
cuda = torch.cuda.is_available()

# to reproduce same results
torch.manual_seed(1337)
if cuda:
    torch.cuda.manual_seed(1337)

In [4]:
class CDATA(torch.utils.data.Dataset): # Extend PyTorch's Dataset class
    def __init__(self, root_dir, train, transform=None):
        if(train):
            rfile = '200'
        else :
            rfile = '201'
        ldir = root_dir + 'VOCdevkit/VOC2010/JPEGImages/'
        sdir = root_dir + 'pascal_data/pascal_data/SegmentationPart/'
#         pdir = root_dir + 'pascal_data/pascal_data/PersonJoints/'
#         self.transform = transform
        self.img = []
        self.seg = []
#         self.mat = []
        
        for line in glob.glob(sdir+rfile+'*.png'):
            line = line.split("/")[-1].split(".")[-2]
#                     print(line)
            self.img.append(ldir+line+'.jpg')
            self.seg.append(sdir+line+'.png')
#                     self.mat.append(pdir+line+'.mat')
#             PUT DATA IN CORRESPONDING VARS
            
            
#             self.label.append(ord(file_path.split('/')[-2]) - ord('A')) #ord makes A,B,C.. to 0,1,2,.. respectively

            
    def __len__(self):
        # return the size of the dataset (total number of images) as an integer
        # this should be rather easy if you created a mapping in __init__
        return len(self.img)
       
    mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
    def transform(self, img, lbl):
        img = img[:, :, ::-1]  # RGB -> BGR
        img = img.astype(np.float64)
        img -= self.mean_bgr
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()
        return img, lbl
    
    def __getitem__(self, idx):
        # idx - the index of the sample requested
        #
        # Open the image correspoding to idx, apply transforms on it and return a tuple (image, label)
        # where label is an integer from 0-9 (since notMNIST has 10 classes)
#         print(idx)
        image = PIL.Image.open(self.img[idx])
        image = np.array(image,dtype=np.uint8)
        segment = PIL.Image.open(self.seg[idx])
        segment = np.array(segment,dtype=np.uint8)
        print(image.shape,segment.shape)
#         poset,paft = genmat(self.mat[idx],segment)
        if self.transform is None:
            return (image,segment)
        else:
            img_transformed,segment = self.transform(image,segment)
#             RETURN VARS
            return (img_transformed,segment)


In [5]:
composed_transform = transforms.Compose([transforms.Scale((224,224)),transforms.ToTensor()])
train_dataset = CDATA(root_dir='/extra_data/ayushya/', train=True, transform=composed_transform) # Supply proper root_dir
test_dataset = CDATA(root_dir='/extra_data/ayushya/', train=False, transform=composed_transform) # Supply proper root_dir

print('Size of train dataset: %d' % len(train_dataset))
print('Size of test dataset: %d' % len(test_dataset))


kwargs = {'num_workers': 4, 'pin_memory': True} if cuda else {}

# Create loaders for the dataset
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, **kwargs)


Size of train dataset: 4871
Size of test dataset: 1889


In [6]:
class suhpos(nn.Module):
    def __init__(self, n_class=7):
        super(suhpos, self).__init__()
        # conv1
        self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu4_3 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_1 = nn.ReLU(inplace=True)
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_2 = nn.ReLU(inplace=True)
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.relu5_3 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)  # 1/32

        # fc6
        self.fc6 = nn.Conv2d(512, 4096, 7)
        self.relu6 = nn.ReLU(inplace=True)
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, 1)
        self.relu7 = nn.ReLU(inplace=True)
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, n_class, 1)
        self.upscore = nn.ConvTranspose2d(n_class, n_class, 64, stride=32,
                                          bias=False)

    def forward(self, x):
        h = x
        h = self.relu1_1(self.conv1_1(h))
        h = self.relu1_2(self.conv1_2(h))
        h = self.pool1(h)

        h = self.relu2_1(self.conv2_1(h))
        h = self.relu2_2(self.conv2_2(h))
        h = self.pool2(h)

        h = self.relu3_1(self.conv3_1(h))
        h = self.relu3_2(self.conv3_2(h))
        h = self.relu3_3(self.conv3_3(h))
        h = self.pool3(h)

        h = self.relu4_1(self.conv4_1(h))
        h = self.relu4_2(self.conv4_2(h))
        h = self.relu4_3(self.conv4_3(h))
        h = self.pool4(h)

        h = self.relu5_1(self.conv5_1(h))
        h = self.relu5_2(self.conv5_2(h))
        h = self.relu5_3(self.conv5_3(h))
        h = self.pool5(h)

        h = self.relu6(self.fc6(h))
        h = self.drop6(h)

        h = self.relu7(self.fc7(h))
        h = self.drop7(h)

        h = self.score_fr(h)

        h = self.upscore(h)
        h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous()

        return h
    
    def copy_params_from_vgg16(self, vgg16):
        features = [
            self.conv1_1, self.relu1_1,
            self.conv1_2, self.relu1_2,
            self.pool1,
            self.conv2_1, self.relu2_1,
            self.conv2_2, self.relu2_2,
            self.pool2,
            self.conv3_1, self.relu3_1,
            self.conv3_2, self.relu3_2,
            self.conv3_3, self.relu3_3,
            self.pool3,
            self.conv4_1, self.relu4_1,
            self.conv4_2, self.relu4_2,
            self.conv4_3, self.relu4_3,
            self.pool4,
            self.conv5_1, self.relu5_1,
            self.conv5_2, self.relu5_2,
            self.conv5_3, self.relu5_3,
            self.pool5,
        ]
        for l1, l2 in zip(vgg16.features, features):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
        for i, name in zip([0, 3], ['fc6', 'fc7']):
            l1 = vgg16.classifier[i]
            l2 = getattr(self, name)
            l2.weight.data = l1.weight.data.view(l2.weight.size())
            l2.bias.data = l1.bias.data.view(l2.bias.size())

In [7]:
# MODEL
# DEFINE MODEL
# model = torchfcn.models.FCN8s(n_class=21)
vgg16 = models.vgg16(pretrained=True)
model = suhpos()
model.copy_params_from_vgg16(vgg16)
print(model)

resume = 0

start_epoch = 0
start_iteration = 0
if cuda:
    model = model.cuda()


suhpos (
  (conv1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(100, 100))
  (relu1_1): ReLU (inplace)
  (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1_2): ReLU (inplace)
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2_1): ReLU (inplace)
  (conv2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2_2): ReLU (inplace)
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3_1): ReLU (inplace)
  (conv3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3_2): ReLU (inplace)
  (conv3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3_3): ReLU (inplace)
  (pool3): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv4_1): Conv2d(256, 512,

In [8]:
# LOSS
loss1 = nn.MSELoss()
loss2 = nn.MSELoss()

def cross_entropy2d(input, target, weight=None, size_average=True):
    # input: (n, c, h, w), target: (n, h, w)
    n, c, h, w = input.size()
    # log_p: (n, c, h, w)
    log_p = F.log_softmax(input)
    # log_p: (n*h*w, c)
    log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
    log_p = log_p[target.view(n, h, w, 1).repeat(1, 1, 1, c) >= 0]
    log_p = log_p.view(-1, c)
    # target: (n*h*w,)
    mask = target >= 0
    target = target[mask]
    loss = F.nll_loss(log_p, target, weight=weight, size_average=False)
    if size_average:
        loss /= mask.data.sum()
    return loss


In [9]:
# OPTIMIZER
optim = torch.optim.SGD(
    model.parameters(),
    lr=lr,
    momentum=momentum,
    weight_decay=weight_decay)

In [10]:
# TRAINING
def train_model():
    max_epoch = int(math.ceil(1. * max_iteration / len(train_loader)))
#     print(max_epoch)
    for epoch in tqdm.trange(0, max_epoch, desc='Train', ncols=80):
#         INSERT TARGETS
        for batch_idx, (data, target) in tqdm.tqdm( enumerate(train_loader), total=len(train_loader),desc='Train epoch=%d' % epoch, ncols=80, leave=False):
#         for batch_idx, (data, target) in enumerate(train_loader):  
            iteration = batch_idx + epoch * len(train_loader)
#             print(iteration)
#             VALIDATE
#             if iteration % interval_validate == 0:
#                 validate(iteration)

#             MODIFY FOR TARGETS
            if cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)
            optim.zero_grad()
#             MODIFY FOR OUTPUTS
            seg = model(data)

            loss = cross_entropy2d(seg, target)
            loss /= len(data)
            if np.isnan(float(loss.data[0])):
                raise ValueError('loss is nan while training')
            loss.backward()
            optim.step()

#             METRICS
#             metrics = []
#             lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
#             lbl_true = target.data.cpu().numpy()
#             for lt, lp in zip(lbl_true, lbl_pred):
#                 acc, acc_cls, mean_iu, fwavacc = \
#                     torchfcn.utils.label_accuracy_score(
#                         [lt], [lp], n_class=n_class)
#                 metrics.append((acc, acc_cls, mean_iu, fwavacc))
#             metrics = np.mean(metrics, axis=0)

            if iteration >= max_iter:
                break



In [11]:
train_model()

Train:   0%|                                             | 0/21 [00:00<?, ?it/s]
Train epoch=0:   0%|                                   | 0/4871 [00:00<?, ?it/s]

(375, 500, 3) (375, 500)
(375, 500, 3) (375, 500)
(500, 375, 3) (500, 375)
(387, 500, 3) (387, 500)
(375, 500, 3) (375, 500)


[A

(370, 500, 3) (370, 500)
(331, 500, 3) (331, 500)
(375, 500, 3) (375, 500)



                                                                                

(333, 500, 3) (333, 500)


[A


NameError: global name 'self' is not defined