From 91b538a9d1e9b038325420a3a29f619ee7a5a0fd Mon Sep 17 00:00:00 2001 From: Alex Broad Date: Thu, 11 Jun 2015 16:47:30 -0500 Subject: [PATCH 1/3] Added webcam demo for detecting people in a live video feed. Requires Dlib library. --- tools/webcam.py | 166 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100755 tools/webcam.py diff --git a/tools/webcam.py b/tools/webcam.py new file mode 100755 index 000000000..0f3980021 --- /dev/null +++ b/tools/webcam.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python + +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +""" +Demo script showing detections in sample images. + +See README.md for installation instructions before running. +""" + +import _init_paths +from fast_rcnn.config import cfg +from fast_rcnn.test import im_detect +from utils.cython_nms import nms +from utils.timer import Timer +import matplotlib.pyplot as plt +import numpy as np +import scipy.io as sio +import caffe, os, cv2 +import argparse +import dlib + +CLASSES = ('__background__', + 'aeroplane', 'bicycle', 'bird', 'boat', + 'bottle', 'bus', 'car', 'cat', 'chair', + 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', + 'sheep', 'sofa', 'train', 'tvmonitor') + +NETS = {'vgg16': ('VGG16', + 'vgg16_fast_rcnn_iter_40000.caffemodel'), + 'vgg_cnn_m_1024': ('VGG_CNN_M_1024', + 'vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel'), + 'caffenet': ('CaffeNet', + 'caffenet_fast_rcnn_iter_40000.caffemodel')} + + +def vis_detections(im, class_name, dets, thresh=0.5): + """Draw detected bounding boxes.""" + inds = np.where(dets[:, -1] >= thresh)[0] + if len(inds) == 0: + return + + im = im[:, :, (2, 1, 0)] + fig, ax = plt.subplots(figsize=(12, 12)) + ax.imshow(im, aspect='equal') + for i in inds: + bbox = dets[i, :4] + score = dets[i, -1] + + ax.add_patch( + plt.Rectangle((bbox[0], bbox[1]), + bbox[2] - bbox[0], + bbox[3] - bbox[1], fill=False, + edgecolor='red', linewidth=3.5) + ) + ax.text(bbox[0], bbox[1] - 2, + '{:s} {:.3f}'.format(class_name, score), + bbox=dict(facecolor='blue', alpha=0.5), + fontsize=14, color='white') + + ax.set_title(('{} detections with ' + 'p({} | box) >= {:.1f}').format(class_name, class_name, + thresh), + fontsize=14) + plt.axis('off') + plt.tight_layout() + plt.draw() + +def demo(net, im, scale_factor, classes): + """Detect object classes in an image using pre-computed object proposals.""" + + im2 = cv2.resize(im, (0,0), fx=1.0/scale_factor, fy=1.0/scale_factor) + + obj_proposals_in = [] + dlib.find_candidate_object_locations(im2, obj_proposals_in, min_size=70) + + obj_proposals = np.empty((len(obj_proposals_in),4)) + for idx in range(len(obj_proposals_in)): + obj_proposals[idx] = [obj_proposals_in[idx].left(), obj_proposals_in[idx].top(), obj_proposals_in[idx].right(), obj_proposals_in[idx].bottom()] + + # Detect all object classes and regress object bounds + scores, boxes = im_detect(net, im2, obj_proposals) + + # Visualize detections for each class + CONF_THRESH = 0.8 + NMS_THRESH = 0.3 + for cls in classes: + cls_ind = CLASSES.index(cls) + cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] + cls_scores = scores[:, cls_ind] + dets = np.hstack((cls_boxes, + cls_scores[:, np.newaxis])).astype(np.float32) + keep = nms(dets, NMS_THRESH) + dets = dets[keep, :] + + return [im2, cls, dets, CONF_THRESH] + + +def parse_args(): + """Parse input arguments.""" + parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') + parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', + default=0, type=int) + parser.add_argument('--cpu', dest='cpu_mode', + help='Use CPU mode (overrides --gpu)', + action='store_true') + parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', + choices=NETS.keys(), default='vgg16') + + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + args = parse_args() + + prototxt = os.path.join('models', NETS[args.demo_net][0], 'test.prototxt') + caffemodel = os.path.join('data', 'fast_rcnn_models', + NETS[args.demo_net][1]) + + if not os.path.isfile(caffemodel): + raise IOError(('{:s} not found.\nDid you run ./data/script/' + 'fetch_fast_rcnn_models.sh?').format(caffemodel)) + + if args.cpu_mode: + caffe.set_mode_cpu() + else: + caffe.set_mode_gpu() + caffe.set_device(args.gpu_id) + net = caffe.Net(prototxt, caffemodel, caffe.TEST) + + print '\n\nLoaded network {:s}'.format(caffemodel) + + cap = cv2.VideoCapture(0) + + while(True): + # Capture frame-by-frame + ret, frame = cap.read() + + # Scaling the video feed can help the system run faster (and run on GPUs with less memory) + # e.g. with a standard video stream of size 640x480, a scale_factor = 4 will allow the system to run a < 1 sec/frame + scale_factor = 4 + [im2, cls, dets, CONF_THRESH] = demo(net, frame, scale_factor, ('person',)) + + inds = np.where(dets[:, -1] >= CONF_THRESH)[0] + if len(inds) != 0: + for i in inds: + bbox = dets[i, :4] + cv2.rectangle(frame,(int(bbox[0]*scale_factor),int(bbox[1]*scale_factor)),(int(bbox[2]*scale_factor),int(bbox[3]*scale_factor)),(0,255,0),2) + + # Display the resulting frame + cv2.imshow('frame',frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + # When everything done, release the capture + cap.release() + cv2.destroyAllWindows() From 1f6b0ccca3f515a4d102a2992fd0608949eaa0fd Mon Sep 17 00:00:00 2001 From: Alex Broad Date: Thu, 11 Jun 2015 16:53:41 -0500 Subject: [PATCH 2/3] Updated README to explain the webcam demo and how to use it --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index b5a9ff26a..000704c6f 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,13 @@ The demo performs detection using a VGG16 network trained for detection on PASCA **Note:** If the demo crashes Caffe because your GPU doesn't have enough memory, try running the demo with a small network, e.g., `./tools/demo.py --net caffenet` or with `--net vgg_cnn_m_1024`. Or run in CPU mode `./tools/demo.py --cpu`. Type `./tools/demo.py -h` for usage. +To run the webcam demo +```Shell +cd $FRCN_ROOT +./tools/webcam.py +``` +The webcam demo runs the same VGG16 network as the original demo. This demo requires the [Dlib](www.dlib.net) as it is used to perform selective search. This demo will display a live 'person' detector in a webcam feed. You also need to have a webcam to run this demo (either built in or attached). + **MATLAB** There's also a *basic* MATLAB demo, though it's missing some minor bells and whistles compared to the Python version. From c5b0b9a6b6ae93a8a3b853334d4f6149860b48ef Mon Sep 17 00:00:00 2001 From: Alexander Broad Date: Thu, 11 Jun 2015 16:55:02 -0500 Subject: [PATCH 3/3] Fixed link in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 000704c6f..f0008d03f 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ To run the webcam demo cd $FRCN_ROOT ./tools/webcam.py ``` -The webcam demo runs the same VGG16 network as the original demo. This demo requires the [Dlib](www.dlib.net) as it is used to perform selective search. This demo will display a live 'person' detector in a webcam feed. You also need to have a webcam to run this demo (either built in or attached). +The webcam demo runs the same VGG16 network as the original demo. This demo requires the [Dlib](http://www.dlib.net) as it is used to perform selective search. This demo will display a live 'person' detector in a webcam feed. You also need to have a webcam to run this demo (either built in or attached). **MATLAB**