rbgirshick · asbroad · Jun 11, 2015 · Jun 11, 2015 · Jun 11, 2015
diff --git a/README.md b/README.md
@@ -114,6 +114,13 @@ The demo performs detection using a VGG16 network trained for detection on PASCA
 
 **Note:** If the demo crashes Caffe because your GPU doesn't have enough memory, try running the demo with a small network, e.g., `./tools/demo.py --net caffenet` or with `--net vgg_cnn_m_1024`. Or run in CPU mode `./tools/demo.py --cpu`. Type `./tools/demo.py -h` for usage.
 
+To run the webcam demo
+```Shell
+cd $FRCN_ROOT
+./tools/webcam.py
+```
+The webcam demo runs the same VGG16 network as the original demo.  This demo requires the [Dlib](http://www.dlib.net) as it is used to perform selective search. This demo will display a live 'person' detector in a webcam feed.  You also need to have a webcam to run this demo (either built in or attached).
+
 **MATLAB**
 
 There's also a *basic* MATLAB demo, though it's missing some minor bells and whistles compared to the Python version.

diff --git a/tools/webcam.py b/tools/webcam.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""
+Demo script showing detections in sample images.
+
+See README.md for installation instructions before running.
+"""
+
+import _init_paths
+from fast_rcnn.config import cfg
+from fast_rcnn.test import im_detect
+from utils.cython_nms import nms
+from utils.timer import Timer
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.io as sio
+import caffe, os, cv2
+import argparse
+import dlib
+
+CLASSES = ('__background__',
+           'aeroplane', 'bicycle', 'bird', 'boat',
+           'bottle', 'bus', 'car', 'cat', 'chair',
+           'cow', 'diningtable', 'dog', 'horse',
+           'motorbike', 'person', 'pottedplant',
+           'sheep', 'sofa', 'train', 'tvmonitor')
+
+NETS = {'vgg16': ('VGG16',
+                  'vgg16_fast_rcnn_iter_40000.caffemodel'),
+        'vgg_cnn_m_1024': ('VGG_CNN_M_1024',
+                           'vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel'),
+        'caffenet': ('CaffeNet',
+                     'caffenet_fast_rcnn_iter_40000.caffemodel')}
+
+
+def vis_detections(im, class_name, dets, thresh=0.5):
+    """Draw detected bounding boxes."""
+    inds = np.where(dets[:, -1] >= thresh)[0]
+    if len(inds) == 0:
+        return
+
+    im = im[:, :, (2, 1, 0)]
+    fig, ax = plt.subplots(figsize=(12, 12))
+    ax.imshow(im, aspect='equal')
+    for i in inds:
+        bbox = dets[i, :4]
+        score = dets[i, -1]
+
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1], fill=False,
+                          edgecolor='red', linewidth=3.5)
+            )
+        ax.text(bbox[0], bbox[1] - 2,
+                '{:s} {:.3f}'.format(class_name, score),
+                bbox=dict(facecolor='blue', alpha=0.5),
+                fontsize=14, color='white')
+
+    ax.set_title(('{} detections with '
+                  'p({} | box) >= {:.1f}').format(class_name, class_name,
+                                                  thresh),
+                  fontsize=14)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.draw()
+
+def demo(net, im, scale_factor, classes):
+    """Detect object classes in an image using pre-computed object proposals."""
+
+    im2 = cv2.resize(im, (0,0), fx=1.0/scale_factor, fy=1.0/scale_factor)
+
+    obj_proposals_in = []
+    dlib.find_candidate_object_locations(im2, obj_proposals_in, min_size=70)
+
+    obj_proposals = np.empty((len(obj_proposals_in),4))
+    for idx in range(len(obj_proposals_in)):
+        obj_proposals[idx] = [obj_proposals_in[idx].left(), obj_proposals_in[idx].top(), obj_proposals_in[idx].right(), obj_proposals_in[idx].bottom()]
+
+    # Detect all object classes and regress object bounds
+    scores, boxes = im_detect(net, im2, obj_proposals)
+
+    # Visualize detections for each class
+    CONF_THRESH = 0.8
+    NMS_THRESH = 0.3
+    for cls in classes:
+        cls_ind = CLASSES.index(cls)
+        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
+        cls_scores = scores[:, cls_ind]
+        dets = np.hstack((cls_boxes,
+                          cls_scores[:, np.newaxis])).astype(np.float32)
+        keep = nms(dets, NMS_THRESH)
+        dets = dets[keep, :]
+
+    return [im2, cls, dets, CONF_THRESH]
+
+
+def parse_args():
+    """Parse input arguments."""
+    parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
+                        default=0, type=int)
+    parser.add_argument('--cpu', dest='cpu_mode',
+                        help='Use CPU mode (overrides --gpu)',
+                        action='store_true')
+    parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
+                        choices=NETS.keys(), default='vgg16')
+
+    args = parser.parse_args()
+
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    prototxt = os.path.join('models', NETS[args.demo_net][0], 'test.prototxt')
+    caffemodel = os.path.join('data', 'fast_rcnn_models',
+                              NETS[args.demo_net][1])
+
+    if not os.path.isfile(caffemodel):
+        raise IOError(('{:s} not found.\nDid you run ./data/script/'
+                       'fetch_fast_rcnn_models.sh?').format(caffemodel))
+
+    if args.cpu_mode:
+        caffe.set_mode_cpu()
+    else:
+        caffe.set_mode_gpu()
+    caffe.set_device(args.gpu_id)
+    net = caffe.Net(prototxt, caffemodel, caffe.TEST)
+
+    print '\n\nLoaded network {:s}'.format(caffemodel)
+
+    cap = cv2.VideoCapture(0)
+
+    while(True):
+        # Capture frame-by-frame
+        ret, frame = cap.read()
+
+        # Scaling the video feed can help the system run faster (and run on GPUs with less memory)
+        # e.g. with a standard video stream of size 640x480, a scale_factor = 4 will allow the system to run a < 1 sec/frame
+        scale_factor = 4
+        [im2, cls, dets, CONF_THRESH] = demo(net, frame, scale_factor, ('person',))
+
+        inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
+        if len(inds) != 0:
+            for i in inds:
+                bbox = dets[i, :4]
+                cv2.rectangle(frame,(int(bbox[0]*scale_factor),int(bbox[1]*scale_factor)),(int(bbox[2]*scale_factor),int(bbox[3]*scale_factor)),(0,255,0),2)
+
+        # Display the resulting frame
+        cv2.imshow('frame',frame)
+
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    # When everything done, release the capture
+    cap.release()
+    cv2.destroyAllWindows()