In [1]:
import argparse
import os
import shutil
import time
import sys
sys.path.insert(0,'/home/ubuntu/code/visual_learning_and_recognition/hw2/code/faster_rcnn')
import sklearn
import sklearn.metrics

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim as optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

from datasets.factory import get_imdb
from custom import *

model_names = sorted(name for name in models.__dict__
    if name.islower() and not name.startswith("__")
    and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--arch', default='localizer_alexnet')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=2, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
                    metavar='N', help='mini-batch size (default: 256)')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)')
parser.add_argument('--print-freq', '-p', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--eval-freq', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                    help='use pre-trained model')
parser.add_argument('--world-size', default=1, type=int,
                    help='number of distributed processes')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
                    help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='gloo', type=str,
                    help='distributed backend')
parser.add_argument('--vis',action='store_true')

best_prec1 = 0


In [2]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    avg_m1 = AverageMeter()
    avg_m2 = AverageMeter()


    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.type(torch.FloatTensor).cuda(async=True)
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # TODO: Get output from model
        # TODO: Perform any necessary functions on the output
        # TODO: Compute loss using ``criterion``
        # compute output




        # measure metrics and record loss
        m1 = metric1(imoutput.data, target)
        m2 = metric2(imoutput.data, target)
        losses.update(loss.data[0], input.size(0))
        avg_m1.update(m1[0], input.size(0))
        avg_m2.update(m2[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Metric1 {avg_m1.val:.3f} ({avg_m1.avg:.3f})\t'
                  'Metric2 {avg_m2.val:.3f} ({avg_m2.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   avg_m1=avg_m1, avg_m2=avg_m2))

        #TODO: Visualize things as mentioned in handout
        #TODO: Visualize at appropriate intervals





    print(' * Metric1 {avg_m1.avg:.3f} Metric2 {avg_m2.avg:.3f}'
          .format(avg_m1=avg_m1, avg_m2=avg_m2))

    return avg_m1.avg, avg_m2.avg


# TODO: You can make changes to this function if you wish (not necessary)
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = 0.001 * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def metric1(output, target):
    # TODO: Ignore for now - proceed till instructed
    return [0]

def metric2(output, target):
    # TODO: Ignore for now - proceed till instructed
    return [0]

In [3]:
#args = parser.parse_args()
#args.distributed = args.world_size > 1

model = localizer_alexnet(pretrained=True)
print(model)


LocalizerAlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
  )
  (classifier): Sequential(
    (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU(inplace)
    (4): Conv2d(256, 20, kernel_size=(1, 1), stride=(1, 1))
  )
)


In [4]:
model.features = torch.nn.DataParallel(model.features)
model.cuda()

# TODO:
# define loss function (criterion) and optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = nn.MultiLabelSoftMarginLoss()


In [5]:
cudnn.benchmark = True

# Data loading code
# TODO: Write code for IMDBDataset in custom.py
trainval_imdb = get_imdb('voc_2007_trainval')
test_imdb = get_imdb('voc_2007_test')

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
train_dataset = IMDBDataset(
    trainval_imdb,
    transforms.Compose([
        transforms.Resize((512, 512)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))
train_sampler = None
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=16, shuffle=(train_sampler is None),
    num_workers=4, pin_memory=True, sampler=train_sampler)

val_loader = torch.utils.data.DataLoader(
    IMDBDataset(test_imdb, transforms.Compose([
        transforms.Resize((384, 384)),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=16, shuffle=False,
    num_workers=4, pin_memory=True)


voc_2007_trainval gt roidb loaded from /home/ubuntu/code/visual_learning_and_recognition/hw2/code/data/cache/voc_2007_trainval_gt_roidb.pkl
voc_2007_test gt roidb loaded from /home/ubuntu/code/visual_learning_and_recognition/hw2/code/data/cache/voc_2007_test_gt_roidb.pkl


In [6]:
adjust_learning_rate(optimizer, 0)


In [7]:
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
avg_m1 = AverageMeter()
avg_m2 = AverageMeter()
model.train()
end = time.time()

In [9]:
for i, (input, target) in enumerate(train_loader):
    #print(target)
    data_time.update(time.time() - end)

    target = target.type(torch.FloatTensor).cuda(async=True)
    input_var = torch.autograd.Variable(input, requires_grad=True)
    target_var = torch.autograd.Variable(target)
    
    output = model(input_var)
    
    #print('model output',output.size())
    max_out = F.max_pool2d(output, kernel_size=output.size()[-1])
    
    #print('after maxpool', max_out.size())

#    out = max_out.squeeze(2)
    imoutput = max_out.squeeze()
    #print('after squeeze', imoutput.size())

    #imoutput = out.transpose(1, 2)
    #print('after transpose', imoutput.size())
    #print('target size', target_var.size())
    
    loss = criterion(imoutput, target_var)
    
    # measure metrics and record loss
    m1 = metric1(imoutput.data, target)
    m2 = metric2(imoutput.data, target)
    losses.update(loss.data[0], input.size(0))
    avg_m1.update(m1[0], input.size(0))
    avg_m2.update(m2[0], input.size(0))
    
    # TODO:
    # compute gradient and do SGD step
    
    optimizer.zero_grad()  # zeros out all buffer for gradients from optimizer
    loss.backward()
    optimizer.step()


('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target siz

('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target siz

('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target siz

('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target siz

('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target siz

('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target siz

('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target size', torch.Size([16, 20]))
('model output', torch.Size([16, 20, 29, 29]))
('after maxpool', torch.Size([16, 20, 1, 1]))
('after squeeze', torch.Size([16, 20]))
('target siz

In [10]:
batch_time.update(time.time() - end)
end = time.time()