Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Webcam Demo #29

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Expand Up @@ -114,6 +114,13 @@ The demo performs detection using a VGG16 network trained for detection on PASCA

**Note:** If the demo crashes Caffe because your GPU doesn't have enough memory, try running the demo with a small network, e.g., `./tools/demo.py --net caffenet` or with `--net vgg_cnn_m_1024`. Or run in CPU mode `./tools/demo.py --cpu`. Type `./tools/demo.py -h` for usage.

To run the webcam demo
```Shell
cd $FRCN_ROOT
./tools/webcam.py
```
The webcam demo runs the same VGG16 network as the original demo. This demo requires the [Dlib](http://www.dlib.net) as it is used to perform selective search. This demo will display a live 'person' detector in a webcam feed. You also need to have a webcam to run this demo (either built in or attached).

**MATLAB**

There's also a *basic* MATLAB demo, though it's missing some minor bells and whistles compared to the Python version.
Expand Down
166 changes: 166 additions & 0 deletions tools/webcam.py
@@ -0,0 +1,166 @@
#!/usr/bin/env python

# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""
Demo script showing detections in sample images.

See README.md for installation instructions before running.
"""

import _init_paths
from fast_rcnn.config import cfg
from fast_rcnn.test import im_detect
from utils.cython_nms import nms
from utils.timer import Timer
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import caffe, os, cv2
import argparse
import dlib

CLASSES = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')

NETS = {'vgg16': ('VGG16',
'vgg16_fast_rcnn_iter_40000.caffemodel'),
'vgg_cnn_m_1024': ('VGG_CNN_M_1024',
'vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel'),
'caffenet': ('CaffeNet',
'caffenet_fast_rcnn_iter_40000.caffemodel')}


def vis_detections(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return

im = im[:, :, (2, 1, 0)]
fig, ax = plt.subplots(figsize=(12, 12))
ax.imshow(im, aspect='equal')
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]

ax.add_patch(
plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
edgecolor='red', linewidth=3.5)
)
ax.text(bbox[0], bbox[1] - 2,
'{:s} {:.3f}'.format(class_name, score),
bbox=dict(facecolor='blue', alpha=0.5),
fontsize=14, color='white')

ax.set_title(('{} detections with '
'p({} | box) >= {:.1f}').format(class_name, class_name,
thresh),
fontsize=14)
plt.axis('off')
plt.tight_layout()
plt.draw()

def demo(net, im, scale_factor, classes):
"""Detect object classes in an image using pre-computed object proposals."""

im2 = cv2.resize(im, (0,0), fx=1.0/scale_factor, fy=1.0/scale_factor)

obj_proposals_in = []
dlib.find_candidate_object_locations(im2, obj_proposals_in, min_size=70)

obj_proposals = np.empty((len(obj_proposals_in),4))
for idx in range(len(obj_proposals_in)):
obj_proposals[idx] = [obj_proposals_in[idx].left(), obj_proposals_in[idx].top(), obj_proposals_in[idx].right(), obj_proposals_in[idx].bottom()]

# Detect all object classes and regress object bounds
scores, boxes = im_detect(net, im2, obj_proposals)

# Visualize detections for each class
CONF_THRESH = 0.8
NMS_THRESH = 0.3
for cls in classes:
cls_ind = CLASSES.index(cls)
cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(dets, NMS_THRESH)
dets = dets[keep, :]

return [im2, cls, dets, CONF_THRESH]


def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--cpu', dest='cpu_mode',
help='Use CPU mode (overrides --gpu)',
action='store_true')
parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
choices=NETS.keys(), default='vgg16')

args = parser.parse_args()

return args


if __name__ == '__main__':
args = parse_args()

prototxt = os.path.join('models', NETS[args.demo_net][0], 'test.prototxt')
caffemodel = os.path.join('data', 'fast_rcnn_models',
NETS[args.demo_net][1])

if not os.path.isfile(caffemodel):
raise IOError(('{:s} not found.\nDid you run ./data/script/'
'fetch_fast_rcnn_models.sh?').format(caffemodel))

if args.cpu_mode:
caffe.set_mode_cpu()
else:
caffe.set_mode_gpu()
caffe.set_device(args.gpu_id)
net = caffe.Net(prototxt, caffemodel, caffe.TEST)

print '\n\nLoaded network {:s}'.format(caffemodel)

cap = cv2.VideoCapture(0)

while(True):
# Capture frame-by-frame
ret, frame = cap.read()

# Scaling the video feed can help the system run faster (and run on GPUs with less memory)
# e.g. with a standard video stream of size 640x480, a scale_factor = 4 will allow the system to run a < 1 sec/frame
scale_factor = 4
[im2, cls, dets, CONF_THRESH] = demo(net, frame, scale_factor, ('person',))

inds = np.where(dets[:, -1] >= CONF_THRESH)[0]
if len(inds) != 0:
for i in inds:
bbox = dets[i, :4]
cv2.rectangle(frame,(int(bbox[0]*scale_factor),int(bbox[1]*scale_factor)),(int(bbox[2]*scale_factor),int(bbox[3]*scale_factor)),(0,255,0),2)

# Display the resulting frame
cv2.imshow('frame',frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()