In [2]:
!pip install tensorboard

Collecting tensorboard
  Downloading tensorboard-2.4.0-py3-none-any.whl (10.6 MB)
[K     |████████████████████████████████| 10.6 MB 8.6 MB/s eta 0:00:01
Collecting tensorboard-plugin-wit>=1.6.0
  Downloading tensorboard_plugin_wit-1.7.0-py3-none-any.whl (779 kB)
[K     |████████████████████████████████| 779 kB 31.5 MB/s eta 0:00:01
Collecting absl-py>=0.4
  Downloading absl_py-0.11.0-py3-none-any.whl (127 kB)
[K     |████████████████████████████████| 127 kB 28.9 MB/s eta 0:00:01
Installing collected packages: tensorboard-plugin-wit, absl-py, tensorboard
Successfully installed absl-py-0.11.0 tensorboard-2.4.0 tensorboard-plugin-wit-1.7.0


In [4]:
!pip install --upgrade git+https://github.com/bes-dev/mean_average_precision.git

Collecting git+https://github.com/bes-dev/mean_average_precision.git
  Cloning https://github.com/bes-dev/mean_average_precision.git to /tmp/pip-req-build-fab5gfy9
Building wheels for collected packages: mean-average-precision
  Building wheel for mean-average-precision (setup.py) ... [?25ldone
[?25h  Created wheel for mean-average-precision: filename=mean_average_precision-0.0.2.1-py3-none-any.whl size=14137 sha256=f9bfbf87b8f7d2faf4c68e4e235a4cf32ad8489173c284f66ffec4629f95ee42
  Stored in directory: /tmp/pip-ephem-wheel-cache-gf39v54a/wheels/32/c7/c8/fbe474b16ebb6737f48a026fd3cf0048af9ffbdb0e9be48ea5
Successfully built mean-average-precision
Installing collected packages: mean-average-precision
Successfully installed mean-average-precision-0.0.2.1


In [1]:
import os

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torchvision
import torchvision.utils as utils
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter
from mean_average_precision import MetricBuilder


from wsddn import *

In [2]:
torch.__version__

'1.4.0'

In [3]:
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

augmentation = BoxAndImageFlip(p_horiz=0.5, p_vert=0.0)

train_set = CustomVOC('../edge_boxes_model/model.yml.gz', root='../../../data', image_set='train', download=False, post_transform=transform_train, augmentation=augmentation)

test_set = CustomVOC('../edge_boxes_model/model.yml.gz', eval_mode=True, root='../../../data', image_set='val', download=False, transform=transform_test)

def my_collate(batch):
    data = [item[0] for item in batch]
    label = [item[1] for item in batch]
    boxes = [item[2] for item in batch]
    if (len(batch[0]) == 4):
        gt_boxes = [item[3] for item in batch]
        return data, label, boxes, gt_boxes
    return data, label, boxes

train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=0, collate_fn=my_collate)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0, collate_fn=my_collate)

In [4]:
torch.cuda.is_available()

True

In [5]:
model = WSDDN(21)
model = model.cuda()
optimizer = optim.SGD(model.parameters(), lr=1e-5, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5, 10, 15, 20], gamma=0.25)
lam = 0.25 # spatial Regularizer weight

In [6]:
attempt = 2

In [7]:
# attempt += 1
step = 0
log_freq = 60
epochs = 30
save_freq = 10
num_eval_images = 50 # Only run eval on this many images
run_name = 'wsddn_reg_attempt_%s'%attempt
checkpoints_folder = './checkpoints/%s/'%run_name
if not os.path.isdir(checkpoints_folder):
    os.mkdir(checkpoints_folder)
writer = SummaryWriter('./runs/' + run_name)
for epoch in range(epochs + 1):
    print("\nepoch %d learning rate %f\n" % (epoch, optimizer.param_groups[0]['lr']))
    
    for i, stacked_data in enumerate(train_loader):
        model.train()
        for data in zip(stacked_data[0], stacked_data[1], stacked_data[2]):
            optimizer.zero_grad()
            model.zero_grad()
        
            image, label, boxes = data
            image = image.reshape([1, image.shape[0], image.shape[1], image.shape[2]])

            image, label, boxes = image.cuda(), label.cuda(), boxes.cuda()

            # forward
            pred, x = model.forward(image, boxes)

            # backward
            loss = model.loss(pred, label) + lam*model.reg(pred, x, boxes)

            loss.backward()
            optimizer.step()
            

        # display results
        if i % log_freq == 0:
            writer.add_scalar('train/loss', loss.item(), step)
            print("[epoch %d][%d/%d] loss %.4f"%(epoch, i, len(train_loader)-1, loss.item()))

        # global step
        step += 1

    if epoch % save_freq == 0 and epoch > 0:
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
            }, 
            '%s/wsdnn_reg_epoch_%s.pth' % (checkpoints_folder, epoch)
        )

    print('-'*40)
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        metric_fn = MetricBuilder.build_evaluation_metric("map_2d", num_classes=21)
        # log scalars
        for i, stacked_data in enumerate(test_loader):
            for data in zip(stacked_data[0], stacked_data[1], stacked_data[2], stacked_data[3]):
                image_test, _, boxes_test, gt_boxes = data
                image_test = image_test.reshape([1, image_test.shape[0], image_test.shape[1], image_test.shape[2]])

                image_test, boxes_test = image_test.cuda(), boxes_test.cuda()

                detections = model.detect(image_test, boxes_test)

                formatted_detections = test_set.format_pred(detections)
         
                metric_fn.add(np.array(formatted_detections), np.array(gt_boxes))
            
            if i == num_eval_images:
                break


        mAP = metric_fn.value(iou_thresholds=0.5, recall_thresholds=np.arange(0., 1.1, 0.1))['mAP']
        writer.add_scalar('test/mAP', mAP, epoch)
        print("\n[epoch %d] mAP on test data: %.2f%%\n" % (epoch, mAP))
    
    
    scheduler.step()
    
    print('-'*40)


epoch 0 learning rate 0.000010

[epoch 0][0/5716] loss 4.1411
[epoch 0][60/5716] loss 12.5677
[epoch 0][120/5716] loss 6.8948
[epoch 0][180/5716] loss 5.8703
[epoch 0][240/5716] loss 7.5008
[epoch 0][300/5716] loss 3.6120
[epoch 0][360/5716] loss 3.4687
[epoch 0][420/5716] loss 6.7953
[epoch 0][480/5716] loss 4.2732
[epoch 0][540/5716] loss 12.2147
[epoch 0][600/5716] loss 3.2100
[epoch 0][660/5716] loss 4.7471
[epoch 0][720/5716] loss 8.0117
[epoch 0][780/5716] loss 2.1110
[epoch 0][840/5716] loss 3.6560
[epoch 0][900/5716] loss 1.0991
[epoch 0][960/5716] loss 3.2254
[epoch 0][1020/5716] loss 6.9385
[epoch 0][1080/5716] loss 4.8526
[epoch 0][1140/5716] loss 1.5423
[epoch 0][1200/5716] loss 3.8010
[epoch 0][1260/5716] loss 4.0282
[epoch 0][1320/5716] loss 2.0105
[epoch 0][1380/5716] loss 3.2916
[epoch 0][1440/5716] loss 2.7444
[epoch 0][1500/5716] loss 4.9826
[epoch 0][1560/5716] loss 7.9863
[epoch 0][1620/5716] loss 1.9654
[epoch 0][1680/5716] loss 2.1779
[epoch 0][1740/5716] loss 4.

KeyboardInterrupt: 

In [8]:
# For visualization, create test set without the normalization
test_vis_set = CustomVOC('../edge_boxes_model/model.yml.gz', eval_mode=True, root='../../../data', image_set='val', download=False)

In [12]:
for _ in range(5):
    idx = np.random.randint(len(test_set))
    img, _, _, _ = test_vis_set[idx]
    image, label, boxes, gt_boxes = test_set[idx]
    image = image.reshape([1, image.shape[0], image.shape[1], image.shape[2]])
    image, label, boxes = image.cuda(), label.cuda(), boxes.cuda()
    detections = test_set.format_pred(model.detect(image, boxes))
    display(test_set.draw_boxes(transforms.ToTensor()(img), detections, gt_boxes))

TypeError: Expected Ptr<cv::UMat> for argument 'img'

In [41]:
model.reg(pred, x, boxes)

tensor(1.5385e-07, device='cuda:0', grad_fn=<DivBackward0>)