# load model

In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch

import yolo.config as cfg

In [2]:
class YoloHand(nn.Module):
    def __init__(self, width_mul=0.125):
        super(YoloHand, self).__init__()

        self.width_mul = width_mul;

        def conv_bn(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
                nn.BatchNorm2d(oup),
                nn.LeakyReLU(inplace=True),
            )
        def conv_dw(inp, oup, stride):
            return nn.Sequential(
                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
                nn.BatchNorm2d(inp),
                nn.LeakyReLU(inplace=True),

                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
                nn.LeakyReLU(inplace=True),
            )
        self.feature = nn.Sequential( # feature of hand
            conv_bn(3, 10, 1),  # 3 low level preserve high res
            conv_dw(10, int(self.width_mul* 64), 2), # 7
            conv_dw(int(self.width_mul* 64), int(self.width_mul*64), 1), #  11
            conv_dw(int(self.width_mul*64), int(self.width_mul*128), 2), #  19
            conv_dw(int(self.width_mul*128), int(self.width_mul*128), 1), # 27
            conv_dw(int(self.width_mul*128), int(self.width_mul*256), 2), # 43
            conv_dw(int(self.width_mul*256), int(self.width_mul*256), 1), # 59
            conv_dw(int(self.width_mul*256), int(self.width_mul*512), 2), # 91
            
            conv_dw(int(self.width_mul*512), int(self.width_mul*512), 1), # 133
            conv_dw(int(self.width_mul*512), int(self.width_mul*512), 1), # 133
            conv_dw(int(self.width_mul*512), int(self.width_mul*512), 1), # 133
        )
        
        # transfer
        inp = int(self.width_mul*512) 
        oup = int(self.width_mul*512)
        self.transfer = nn.Sequential(
            nn.Conv2d(inp, oup, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(oup),
            nn.LeakyReLU(inplace=True),
            
            nn.Conv2d(oup, oup, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(oup),
            nn.LeakyReLU(inplace=True),
        )
        
        # linear
        out_channels = cfg.num_anchors * (cfg.num_classes + 5)
        self.final_conv = nn.Conv2d(oup, out_channels, 1, 1, padding=0, bias=True)
    
    def forward(self, im_data):
        feature_map = self.feature(im_data) # get hand feature map batchsize x 320x240/8 --> 40x30
        h = self.transfer(feature_map)
#         h = feature_map
        y = self.final_conv(h)
        
        return y
    
    def post_process(self, y):
        # for detection
        bsize, c, h, w = y.size() # c = cfg.num_anchors * (cfg.num_classes + 5)
        y_reshaped = y.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes+5) # shape=(bsize, wxh, num_a, num_c+5)
        
        # bbox related 0~4
        xy_pred = F.sigmoid(y_reshaped[:, :, :, 0:2])
        wh_pred = torch.exp(y_reshaped[:, :, :, 2:4])
        bbox_pred = torch.cat([xy_pred, wh_pred], 3) # (bsize, wxh, num_a, 4) 4: [sig(tx), sig(ty), exp(tw), exp(th)]
        
        iou_pred = F.sigmoid(y_reshaped[:, :, :, 4:5]) # (bsize, wxh, num_a, 1)
        
        # cls related 5~end
        score_pred = y_reshaped[:, :, :, 5:].contiguous()
        prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred) # (bsize, wxh, num_a, num_cls)
        
        return bbox_pred, iou_pred, prob_pred

In [3]:
net = YoloHand(width_mul=0.158)
net.load_state_dict(torch.load('models/yolohanddetect-crop-5-face-lowres-deeper-leaky-finetue'))
net.eval()

YoloHand (
  (feature): Sequential (
    (0): Sequential (
      (0): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True)
      (2): LeakyReLU (0.01, inplace)
    )
    (1): Sequential (
      (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=10, bias=False)
      (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True)
      (2): LeakyReLU (0.01, inplace)
      (3): Conv2d(10, 10, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (4): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True)
      (5): LeakyReLU (0.01, inplace)
    )
    (2): Sequential (
      (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=10, bias=False)
      (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True)
      (2): LeakyReLU (0.01, inplace)
      (3): Conv2d(10, 10, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (4): BatchNorm2d(10, eps=1e-05, momen

# load test COCO detection dataset
## only five gesture classes, no face annotated as no face cls id is produced

In [4]:
from torchvision import transforms, datasets
import torch

In [5]:
root = 'Datasets/coco_test/images/'
anno_file = 'Datasets/coco_test/annotations/test.json'
mean, std = [0.5, 0.5, 0.5],[0.25, 0.25, 0.25]

data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

coco_dataset = datasets.CocoDetection(root=root, annFile=anno_file, transform = data_transform)

dataset_size = len(coco_dataset)
class_names = coco_dataset.coco.cats

print('dataset_sizes :', dataset_size, 'class names :', class_names)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
('dataset_sizes :', 500, 'class names :', {0: {u'supercategory': u'none', u'id': 0, u'name': u'five'}, 1: {u'supercategory': u'none', u'id': 1, u'name': u'l'}, 2: {u'supercategory': u'none', u'id': 2, u'name': u'one'}, 3: {u'supercategory': u'none', u'id': 3, u'name': u'seeyou'}, 4: {u'supercategory': u'none', u'id': 4, u'name': u'zero'}})


# Test and store result on small crop

In [6]:
def result_temp(image_id, category_id, bbox, score):
    return dict(image_id=image_id, category_id=category_id, bbox=bbox, score=score)

In [7]:
import time
import os
import json

In [15]:
from torch.autograd import Variable
from utils import postprocess

use_gpu=True
if use_gpu:
    net.cuda()
else:
    net.cpu()
    
cfg.infer_inp_size=(192, 144)
cfg.infer_out_size=(12, 9)

result_file = []
thresh=0.75
for data in coco_dataset:
    # get the inputs
    img, target = data
    img = img.view(1, *img.shape)
    
    # wrap them in Variable
    if use_gpu:
        img = Variable(img.cuda())
    else:
        img= Variable(img)
    
    net_output = net(img)
    net_output = net.post_process(net_output)
    
    # post process
    if use_gpu:
        bbox_pred, iou_pred, prob_pred = [x.cpu() for x in net_output]
    else:
        bbox_pred, iou_pred, prob_pred = net_output
    bbox_pred, iou_pred, prob_pred = bbox_pred.data.numpy(), iou_pred.data.numpy(), prob_pred.data.numpy()
    post_output = postprocess(bbox_pred, iou_pred, prob_pred, cfg, thresh)
    bboxes, scores, cls_inds = post_output
    
    for i in range(len(bboxes)):
        x0, y0, x1, y1 = bboxes[i]
        result_file.append(result_temp(
            image_id = target[0]['image_id'],
            category_id = cls_inds[i],
            bbox = [x0, y0, x1-x0, y1-y0],
            score=float(scores[i])
        ))
        
result_dir = 'Datasets/coco_test/result/'
fn = str(thresh) + '_' + time.strftime('%Y-%m-%d-%H-%-M-%S',time.localtime(time.time()))
result_fn = os.path.join(result_dir, fn)
print 'write result to {}'.format(result_fn)
json.dump(result_file, open(result_fn, 'w'))

write result to Datasets/coco_test/result/0.75_2017-12-14-09-5-43


# Eval

In [10]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np

In [16]:
#initialize COCO ground truth api
dataDir='Datasets/coco_test/'
dataType='test'
annFile = '%s/annotations/%s.json'%(dataDir,dataType)
cocoGt=COCO(annFile)

#initialize COCO detections api
resFile='Datasets/coco_test/result/0.75_2017-12-14-09-5-43'
cocoDt=cocoGt.loadRes(resFile)

imgIds=sorted(cocoGt.getImgIds())

# running evaluation
cocoEval = COCOeval(cocoGt,cocoDt,'bbox')
cocoEval.params.imgIds  = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.16s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.294
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.479
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.322
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.310
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.260
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.326
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.326
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet

In [17]:
#initialize COCO ground truth api
dataDir='Datasets/coco_test/'
dataType='test'
annFile = '%s/annotations/%s.json'%(dataDir,dataType)
cocoGt=COCO(annFile)

#initialize COCO detections api
resFile='Datasets/coco_test/result/0.5_2017-12-14-09-5-38'
cocoDt=cocoGt.loadRes(resFile)

imgIds=sorted(cocoGt.getImgIds())

# running evaluation
cocoEval = COCOeval(cocoGt,cocoDt,'bbox')
cocoEval.params.imgIds  = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.13s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.334
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.581
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.342
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.357
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.306
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.383
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.383
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet

In [18]:
#initialize COCO ground truth api
dataDir='Datasets/coco_test/'
dataType='test'
annFile = '%s/annotations/%s.json'%(dataDir,dataType)
cocoGt=COCO(annFile)

#initialize COCO detections api
resFile='Datasets/coco_test/result/0.25_2017-12-14-09-5-34'
cocoDt=cocoGt.loadRes(resFile)

imgIds=sorted(cocoGt.getImgIds())

# running evaluation
cocoEval = COCOeval(cocoGt,cocoDt,'bbox')
cocoEval.params.imgIds  = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.14s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.353
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.644
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.348
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.371
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.335
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.414
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.416
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet

In [12]:
#initialize COCO ground truth api
dataDir='Datasets/coco_test/'
dataType='test'
annFile = '%s/annotations/%s.json'%(dataDir,dataType)
cocoGt=COCO(annFile)

#initialize COCO detections api
resFile='Datasets/coco_test/result/0.1_2017-12-14-09-4-14'
cocoDt=cocoGt.loadRes(resFile)

imgIds=sorted(cocoGt.getImgIds())

# running evaluation
cocoEval = COCOeval(cocoGt,cocoDt,'bbox')
cocoEval.params.imgIds  = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.16s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.358
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.663
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.349
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.376
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.341
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.423
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.427
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet