In [1]:
import argparse
import os
import shutil
import time

import sklearn
import sklearn.metrics
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torchmetrics import JaccardIndex
from sklearn.preprocessing import minmax_scale
import wandb

import matplotlib.pyplot as plt
from AlexNet import localizer_alexnet, localizer_alexnet_robust
from voc_dataset import *
from utils import *
from task_1 import AverageMeter
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
USE_WANDB = True  # use flags, wandb is not convenient for debugging
model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--arch', default='localizer_alexnet')
parser.add_argument(
    '-j',
    '--workers',
    default=2,
    type=int,
    metavar='N',
    help='number of data loading workers (default: 4)')
parser.add_argument(
    '--epochs',
    default=2,
    type=int,
    metavar='N',
    help='number of total epochs to run')
parser.add_argument(
    '--start-epoch',
    default=0,
    type=int,
    metavar='N',
    help='manual epoch number (useful on restarts)')
parser.add_argument(
    '-b',
    '--batch-size',
    default=32,
    type=int,
    metavar='N',
    help='mini-batch size (default: 256)')
parser.add_argument(
    '--lr',
    '--learning-rate',
    default=1e-2,
    type=float,
    metavar='LR',
    help='initial learning rate')
parser.add_argument(
    '--momentum', default=0.9, type=float, metavar='M', help='momentum')
parser.add_argument(
    '--weight-decay',
    '--wd',
    default=1e-4,
    type=float,
    metavar='W',
    help='weight decay (default: 1e-4)')
parser.add_argument(
    '--print-freq',
    '-p',
    default=10,
    type=int,
    metavar='N',
    help='print frequency (default: 10)')
parser.add_argument(
    '--eval-freq',
    default=2,
    type=int,
    metavar='N',
    help='print frequency (default: 10)')
parser.add_argument(
    '--resume',
    default='',
    type=str,
    metavar='PATH',
    help='path to latest checkpoint (default: none)')
parser.add_argument(
    '-e',
    '--evaluate',
    dest='evaluate',
    action='store_true',
    help='evaluate model on validation set')
parser.add_argument(
    '--pretrained',
    dest='pretrained',
    action='store_false',
    help='use pre-trained model')
parser.add_argument(
    '--world-size',
    default=1,
    type=int,
    help='number of distributed processes')
parser.add_argument(
    '--dist-url',
    default='tcp://224.66.41.62:23456',
    type=str,
    help='url used to set up distributed training')
parser.add_argument(
    '--dist-backend', default='gloo', type=str, help='distributed backend')
parser.add_argument('--vis', action='store_true')

best_prec1 = 0
cudnn.benchmark = True


In [3]:
class args:
    parsed_args = parser.parse_known_args()[0]
    batch_size = parsed_args.batch_size
    workers = parsed_args.workers
    print_freq = parsed_args.print_freq
    eval_freq= parsed_args.eval_freq
    epochs = parsed_args.epochs
    lr= parsed_args.lr
    arch = parsed_args.arch
    pretrained = True
    momentum = parsed_args.momentum
    weight_decay = parsed_args.weight_decay
    start_epoch = parsed_args.start_epoch
print(args.__dict__)

{'__module__': '__main__', 'parsed_args': Namespace(arch='localizer_alexnet', batch_size=32, dist_backend='gloo', dist_url='tcp://224.66.41.62:23456', epochs=2, eval_freq=2, evaluate=False, lr=0.01, momentum=0.9, pretrained=True, print_freq=10, resume='', start_epoch=0, vis=False, weight_decay=0.0001, workers=2, world_size=1), 'batch_size': 32, 'workers': 2, 'print_freq': 10, 'eval_freq': 2, 'epochs': 2, 'lr': 0.01, 'arch': 'localizer_alexnet', 'pretrained': True, 'momentum': 0.9, 'weight_decay': 0.0001, 'start_epoch': 0, '__dict__': <attribute '__dict__' of 'args' objects>, '__weakref__': <attribute '__weakref__' of 'args' objects>, '__doc__': None}


In [4]:
# create model
print("=> creating model '{}'".format(args.arch))
if args.arch == 'localizer_alexnet':
    model = localizer_alexnet(pretrained=args.pretrained)
elif args.arch == 'localizer_alexnet_robust':
    model = localizer_alexnet_robust(pretrained=args.pretrained)
#print(model)

model.features = torch.nn.DataParallel(model.features)
model.cuda()

# TODO (Q1.1): define loss function (criterion) and optimizer from [1]
# also use an LR scheduler to decay LR by 10 every 30 epochs
#criterion = nn.MultiLabelSoftMarginLoss().cuda()   #
criterion = nn.BCELoss().cuda()

optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)
# optimizer = torch.optim.Adam(model.parameters(), lr=args.lr,
#                             weight_decay=args.weight_decay)

"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

=> creating model 'localizer_alexnet'


  nn.init.xavier_uniform(layer.weight)


In [5]:
# test output size
model(torch.randn((3, 512,512)).unsqueeze(dim=0)).size()

torch.Size([1, 20, 29, 29])

In [6]:
dataset = VOCDataset('trainval', top_n=10, image_size=512, data_dir='../data/VOCdevkit/VOC2007/')
# TODO (Q1.1): Create Datasets and Dataloaders using VOCDataset
# Ensure that the sizes are 512x512
# Also ensure that data directories are correct
# The ones use for testing by TAs might be different
n = len(dataset)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(np.floor(n*0.8)), n-int(np.floor(n*0.8))])
train_sampler = torch.utils.data.SubsetRandomSampler(range(len(train_dataset)))

Path:/home/mo/16824-VLR/hw1-object_localization/../data/VOCdevkit/VOC2007


In [7]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.workers,
    pin_memory=True,
    sampler=train_sampler,
    collate_fn=custom_collate_fn_VOC,
    drop_last=True)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.workers,
    pin_memory=True,
    collate_fn=custom_collate_fn_VOC,
    drop_last=True)

# TODO (Q1.3): Create loggers for wandb.
if USE_WANDB:
    wandb.init(project="vlr-hw1", reinit=True)
#Ideally, use flags since wandb makes it harder to debug code.

[34m[1mwandb[0m: Currently logged in as: [33m3m-m[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
data = train_dataset[2020]
for k, v in data.items():
    print(k, type(v)==np.ndarray)
for k, v in data.items():
    print(k, type(v))
    if type(v) != list: print(v.shape)
    else: print(np.array(v).shape)

image False
label False
wgt False
rois True
gt_boxes False
gt_classes False
image <class 'torch.Tensor'>
torch.Size([3, 512, 512])
label <class 'torch.Tensor'>
torch.Size([20])
wgt <class 'torch.Tensor'>
torch.Size([20])
rois <class 'numpy.ndarray'>
(10, 4)
gt_boxes <class 'list'>
(1, 4)
gt_classes <class 'list'>
(1,)


In [9]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def metric1(output, target):
    # TODO (Q1.5): compute metric1
    target = target.detach().numpy()
    output = output.detach().numpy()
    mean_ap = sklearn.metrics.average_precision_score(target, output, average='micro')
    #print(f"{type(target)}\n{output}")
    #print(f"Metric 1: {mean_ap}")
    return mean_ap    #[0]
        
def metric2(output, target, thres=0.5):
    # TODO (Q1.5): compute metric2
    target = target.detach().numpy().astype(int)
    output = output.detach().numpy().astype(int)
    recall = sklearn.metrics.recall_score(target, output, average='samples')
    #print(f"Metric 2:{recall}")
    return recall  #[0]
        
def metric3(output, target):
    target = target.detach().numpy().astype(int)
    output = output.detach().numpy().astype(int)
    #print(f"{type(target)}{type(output)}")
    precision = JaccardIndex(num_classes=20, average='micro', multilabel=True)(target, output)
    print(f"METRIC 3:{precision}")
    return precision    #[0]

In [10]:
# TODO: You can add input arguments if you wish
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    avg_m1 = AverageMeter()
    avg_m2 = AverageMeter()

    # switch to train mode
    model.train()
    class_id_to_label = dict(enumerate(dataset.CLASS_NAMES))
    end = time.time()
    for i, (data) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # TODO (Q1.1): Get inputs from the data dict
        # Convert inputs to cuda if training on GPU
        input_im = data['image'].to('cuda')
        target = data['label']
                            
        # TODO (Q1.1): Get output from model
        if i==0: print("Forward pass")
        conv_out = model(input_im)
        
        # TODO (Q1.1): Perform any necessary functions on the output
        imoutput = (nn.MaxPool2d(kernel_size=(conv_out.size(2), conv_out.size(3)))(conv_out)).squeeze()
        imoutput = torch.sigmoid(imoutput)
         
        if i==0: print(f"Output size:{imoutput.size()}")
        vis_heatmap = F.interpolate(conv_out, size=(input_im.shape[2],input_im.shape[3]), mode='nearest')
        if i==0: print(f"Heatmap output size:{vis_heatmap.shape}")
        
        # TODO (Q1.1): Compute loss using ``criterion``
        loss = criterion(imoutput.to('cpu'), target)
        
        # measure metrics and record loss
        m1 = metric1(imoutput.to('cpu'), target)
        m2 = metric2(imoutput.to('cpu'), target)
        #m3 = metric3(imoutput.to('cpu'), target)
        losses.update(loss.item(), len(data))
        avg_m1.update(m1)
        avg_m2.update(m2)

        # TODO (Q1.1): compute gradient and perform optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Metric1 {avg_m1.val:.3f} ({avg_m1.avg:.3f})\t'
                  'Metric2 {avg_m2.val:.3f} ({avg_m2.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      avg_m1=avg_m1,
                      avg_m2=avg_m2)
                 )
        # TODO (Q1.3): Visualize/log things as mentioned in handout at appropriate intervals
        c_map = plt.get_cmap('jet')
        table = wandb.Table(columns=["id", "image", "heatmap"])
        if (epoch==0 or epoch==1 ) and i==0:
            table = wandb.Table(columns=["id", "image", "heatmap"])
            for n, (im, out_heatmap) in enumerate(zip(input_im, vis_heatmap)):
                input_img = wandb.Image(im, boxes={
                    "predictions": {
                        "box_data": get_box_data(data['gt_classes'][n], data['gt_boxes'][n]),
                        "class_labels": class_id_to_label,       
                    },
                })
                # Log selective masks
                att_map = vis_heatmap[n][data['gt_classes'][n][0]].cpu().detach().numpy()
                att_map = minmax_scale(att_map.ravel(), feature_range=(0,255)).reshape(att_map.shape)
                table.add_data(n, input_img, c_map(wandb.Image(att_map))
                if n==1: 
                    wandb.log({"Visuals": table})
                    break
        wandb.log(
            {'train/loss':loss, 'train/metric1': m1,  'train/metric2': m2,}
        )
        # End of train()

In [11]:
def validate(val_loader, model, criterion, epoch=0):
    batch_time = AverageMeter()
    losses = AverageMeter()
    avg_m1 = AverageMeter()
    avg_m2 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    class_id_to_label = dict(enumerate(dataset.CLASS_NAMES))
    end = time.time()
    for i, (data) in enumerate(val_loader):
        # TODO (Q1.1): Get inputs from the data dict
        # Convert inputs to cuda if training on GPU
        input_im = data['image'].to('cuda')
        target_class = data['label']
                            
        # TODO (Q1.1): Get output from model
        if i==0: print("Forward pass")
        conv_out = model(input_im)
        
        # TODO (Q1.1): Perform any necessary functions on the output
        imoutput = (nn.MaxPool2d(kernel_size=(conv_out.size(2), conv_out.size(3)))(conv_out)).squeeze()
        imoutput = torch.sigmoid(imoutput)
        if i==0: print(f"Output size:{imoutput.size()}")
        
        vis_heatmap = F.interpolate(conv_out, size=(input_im.shape[2],input_im.shape[3]), mode='nearest')
        if i==0: print(f"Heatmap output size:{vis_heatmap.shape}")
        
        # TODO (Q1.1): Compute loss using ``criterion``
        loss = criterion(imoutput.to('cpu'), target_class)
        
        # measure metrics and record loss
        m1 = metric1(imoutput.to('cpu'), target_class)
        m2 = metric2(imoutput.to('cpu'), target_class)
        #m3 = metric3(imoutput.to('cpu'), target_class)
        avg_m1.update(m1)
        avg_m2.update(m2)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Metric1 {avg_m1.val:.3f} ({avg_m1.avg:.3f})\t'
                  'Metric2 {avg_m2.val:.3f} ({avg_m2.avg:.3f})'.format(
                      i,
                      len(val_loader),
                      batch_time=batch_time,
                      loss=losses,
                      avg_m1=avg_m1,
                      avg_m2=avg_m2))

        # TODO (Q1.3): Visualize things as mentioned in handout
        c_map = plt.get_cmap('jet')
        
        # TODO (Q1.3): Visualize at appropriate intervals
        if (epoch==0 or epoch==1 ) and i==0:
            table = wandb.Table(columns=["id", "image", "heatmap"])
            for n, (im, out_heatmap) in enumerate(zip(input_im, vis_heatmap)):
                input_img = wandb.Image(im, boxes={
                    "predictions": {
                        "box_data": get_box_data(data['gt_classes'][n], data['gt_boxes'][n]),
                        "class_labels": class_id_to_label,       
                    },
                })
                # Log selective masks
                att_map = vis_heatmap[n][data['gt_classes'][n][0]].cpu().detach().numpy()
                att_map = minmax_scale(att_map.ravel(), feature_range=(0,255)).reshape(att_map.shape)
                table.add_data(n, input_img, wandb.Image(c_map(att_map)))
                if n==1: 
                    wandb.log({"Visuals": table})
                    break
                
        wandb.log(
            {'train/loss':loss, 'train/metric1': m1,  'train/metric2': m2,}
        )

        
    print(' * Metric1 {avg_m1.avg:.3f} Metric2 {avg_m2.avg:.3f}'.format(
        avg_m1=avg_m1, avg_m2=avg_m2))

    return avg_m1.avg, avg_m2.avg


# TODO: You can make changes to this function if you wish (not necessary)
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


In [12]:
for epoch in range(args.start_epoch, args.epochs):
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)
    
    # evaluate on validation set
    if epoch % args.eval_freq == 0:
        m1, m2 = validate(val_loader, model, criterion, epoch)
        score = m1 * m2
        # remember best prec@1 and save checkpoint
        is_best = score > best_prec1
        best_prec1 = max(score, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
        }, is_best)

Forward pass
Output size:torch.Size([32, 20])
Heatmap output size:torch.Size([32, 20, 512, 512])
Epoch: [0][0/125]	Time 3.364 (3.364)	Data 0.999 (0.999)	Loss 0.7034 (0.7034)	Metric1 0.080 (0.080)	Metric2 0.000 (0.000)
Epoch: [0][10/125]	Time 0.206 (0.577)	Data 0.144 (0.296)	Loss 0.6867 (0.6960)	Metric1 0.097 (0.099)	Metric2 0.000 (0.000)
Epoch: [0][20/125]	Time 0.118 (0.465)	Data 0.060 (0.289)	Loss 0.6674 (0.6865)	Metric1 0.099 (0.100)	Metric2 0.000 (0.000)
Epoch: [0][30/125]	Time 0.119 (0.436)	Data 0.003 (0.289)	Loss 0.6449 (0.6763)	Metric1 0.174 (0.138)	Metric2 0.000 (0.000)
Epoch: [0][40/125]	Time 0.124 (0.420)	Data 0.002 (0.288)	Loss 0.6091 (0.6644)	Metric1 0.223 (0.156)	Metric2 0.000 (0.000)
Epoch: [0][50/125]	Time 0.117 (0.426)	Data 0.006 (0.303)	Loss 0.4711 (0.6425)	Metric1 0.214 (0.176)	Metric2 0.000 (0.000)
Epoch: [0][60/125]	Time 0.104 (0.417)	Data 0.001 (0.300)	Loss 0.3790 (0.5946)	Metric1 0.125 (0.179)	Metric2 0.000 (0.000)
Epoch: [0][70/125]	Time 0.116 (0.408)	Data 0.003 (

In [13]:
torch.cuda.empty_cache()
print("Done.")

Done.


<b> Task 2

In [14]:
from torchvision.ops import roi_pool, roi_align, RoIPool