Skip to content
Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
260 lines (225 sloc) 9.69 KB
#include <fstream>
#include <sstream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "common.hpp"
std::string keys =
"{ help h | | Print help message. }"
"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }"
"{ zoo | models.yml | An optional path to file with preprocessing parameters }"
"{ device | 0 | camera device number. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }"
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
"{ classes | | Optional path to a text file with names of classes to label detected objects. }"
"{ thr | .5 | Confidence threshold. }"
"{ nms | .4 | Non-maximum suppression threshold. }"
"{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default), "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
"3: OpenCV implementation }"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default), "
"1: OpenCL, "
"2: OpenCL fp16 (half-float precision), "
"3: VPU }";
using namespace cv;
using namespace dnn;
float confThreshold, nmsThreshold;
std::vector<std::string> classes;
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
void callback(int pos, void* userdata);
std::vector<String> getOutputsNames(const Net& net);
int main(int argc, char** argv)
{
CommandLineParser parser(argc, argv, keys);
const std::string modelName = parser.get<String>("@alias");
const std::string zooFile = parser.get<String>("zoo");
keys += genPreprocArguments(modelName, zooFile);
parser = CommandLineParser(argc, argv, keys);
parser.about("Use this script to run object detection deep learning networks using OpenCV.");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
return 0;
}
confThreshold = parser.get<float>("thr");
nmsThreshold = parser.get<float>("nms");
float scale = parser.get<float>("scale");
Scalar mean = parser.get<Scalar>("mean");
bool swapRB = parser.get<bool>("rgb");
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
CV_Assert(parser.has("model"));
std::string modelPath = findFile(parser.get<String>("model"));
std::string configPath = findFile(parser.get<String>("config"));
// Open file with classes names.
if (parser.has("classes"))
{
std::string file = parser.get<String>("classes");
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line))
{
classes.push_back(line);
}
}
// Load a model.
Net net = readNet(modelPath, configPath, parser.get<String>("framework"));
net.setPreferableBackend(parser.get<int>("backend"));
net.setPreferableTarget(parser.get<int>("target"));
std::vector<String> outNames = net.getUnconnectedOutLayersNames();
// Create a window
static const std::string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
int initialConf = (int)(confThreshold * 100);
createTrackbar("Confidence threshold, %", kWinName, &initialConf, 99, callback);
// Open a video file or an image file or a camera stream.
VideoCapture cap;
if (parser.has("input"))
cap.open(parser.get<String>("input"));
else
cap.open(parser.get<int>("device"));
// Process frames.
Mat frame, blob;
while (waitKey(1) < 0)
{
cap >> frame;
if (frame.empty())
{
waitKey();
break;
}
// Create a 4D blob from a frame.
Size inpSize(inpWidth > 0 ? inpWidth : frame.cols,
inpHeight > 0 ? inpHeight : frame.rows);
blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false);
// Run a model.
net.setInput(blob);
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
resize(frame, frame, inpSize);
Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f);
net.setInput(imInfo, "im_info");
}
std::vector<Mat> outs;
net.forward(outs, outNames);
postprocess(frame, outs, net);
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow(kWinName, frame);
}
return 0;
}
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
{
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<Rect> boxes;
if (outLayerType == "DetectionOutput")
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() > 0);
for (size_t k = 0; k < outs.size(); k++)
{
float* data = (float*)outs[k].data;
for (size_t i = 0; i < outs[k].total(); i += 7)
{
float confidence = data[i + 2];
if (confidence > confThreshold)
{
int left = (int)data[i + 3];
int top = (int)data[i + 4];
int right = (int)data[i + 5];
int bottom = (int)data[i + 6];
int width = right - left + 1;
int height = bottom - top + 1;
if (width * height <= 1)
{
left = (int)(data[i + 3] * frame.cols);
top = (int)(data[i + 4] * frame.rows);
right = (int)(data[i + 5] * frame.cols);
bottom = (int)(data[i + 6] * frame.rows);
width = right - left + 1;
height = bottom - top + 1;
}
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
}
else if (outLayerType == "Region")
{
for (size_t i = 0; i < outs.size(); ++i)
{
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > confThreshold)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
}
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0));
std::string label = format("%.2f", conf);
if (!classes.empty())
{
CV_Assert(classId < (int)classes.size());
label = classes[classId] + ": " + label;
}
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
rectangle(frame, Point(left, top - labelSize.height),
Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar());
}
void callback(int pos, void*)
{
confThreshold = pos * 0.01f;
}
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.