diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 415ced70b47..8a38a824699 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -9,7 +9,7 @@ endif() set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass") -ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab) +ocv_add_module(dnn opencv_core opencv_imgproc) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo -Wmissing-declarations -Wmissing-prototypes ) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 42bd2811f32..70d8687fe31 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -72,12 +72,18 @@ namespace dnn - Dropout (since it does nothing on forward pass -)) */ + class CV_EXPORTS BlankLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + //! LSTM recurrent layer - class CV_EXPORTS_W LSTMLayer : public Layer + class CV_EXPORTS LSTMLayer : public Layer { public: /** Creates instance of LSTM layer */ - static CV_WRAP Ptr create(); + static Ptr create(const LayerParams& params); /** Set trained weights for LSTM layer. LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights. @@ -109,27 +115,27 @@ namespace dnn @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$) @param b is bias vector (i.e. according to abovemtioned notation is @f$ b @f$) */ - CV_WRAP virtual void setWeights(const Blob &Wh, const Blob &Wx, const Blob &b) = 0; + virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0; /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape. * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used, * where `Wh` is parameter from setWeights(). */ - CV_WRAP virtual void setOutShape(const BlobShape &outTailShape = BlobShape::empty()) = 0; + virtual void setOutShape(const std::vector &outTailShape = std::vector()) = 0; /** @brief Set @f$ h_{t-1} @f$ value that will be used in next forward() calls. * @details By-default @f$ h_{t-1} @f$ is inited by zeros and updated after each forward() call. */ - CV_WRAP virtual void setH(const Blob &H) = 0; + virtual void setH(const Mat &H) = 0; /** @brief Returns current @f$ h_{t-1} @f$ value (deep copy). */ - CV_WRAP virtual Blob getH() const = 0; + virtual Mat getH() const = 0; /** @brief Set @f$ c_{t-1} @f$ value that will be used in next forward() calls. * @details By-default @f$ c_{t-1} @f$ is inited by zeros and updated after each forward() call. */ - CV_WRAP virtual void setC(const Blob &C) = 0; + virtual void setC(const Mat &C) = 0; /** @brief Returns current @f$ c_{t-1} @f$ value (deep copy). */ - CV_WRAP virtual Blob getC() const = 0; + virtual Mat getC() const = 0; /** @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample. * @@ -139,14 +145,14 @@ namespace dnn * If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`]. * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`]. */ - CV_WRAP virtual void setUseTimstampsDim(bool use = true) = 0; + virtual void setUseTimstampsDim(bool use = true) = 0; /** @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output. * @details Shape of the second output is the same as first output. */ - CV_WRAP virtual void setProduceCellOutput(bool produce = false) = 0; + virtual void setProduceCellOutput(bool produce = false) = 0; - /** In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$). + /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$). * @param input should contain packed values @f$x_t@f$ * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true). * @@ -156,19 +162,17 @@ namespace dnn * If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension. * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]). */ - void forward(std::vector &input, std::vector &output); int inputNameToIndex(String inputName); - int outputNameToIndex(String outputName); }; //! Classical recurrent layer - class CV_EXPORTS_W RNNLayer : public Layer + class CV_EXPORTS RNNLayer : public Layer { public: /** Creates instance of RNNLayer */ - static CV_WRAP Ptr create(); + static Ptr create(const LayerParams& params); /** Setups learned weights. @@ -184,12 +188,12 @@ namespace dnn @param Who is @f$ W_{xo} @f$ matrix @param bo is @f$ b_{o} @f$ vector */ - CV_WRAP virtual void setWeights(const Blob &Wxh, const Blob &bh, const Blob &Whh, const Blob &Who, const Blob &bo) = 0; + virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0; /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output. * @details Shape of the second output is the same as first output. */ - CV_WRAP virtual void setProduceHiddenOutput(bool produce = false) = 0; + virtual void setProduceHiddenOutput(bool produce = false) = 0; /** Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$. @@ -200,57 +204,49 @@ namespace dnn @p output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix. - If setProduceHiddenOutput() is set to true then @p output[1] will contain a Blob with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix. + If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix. */ - void forward(std::vector &input, std::vector &output); }; - class CV_EXPORTS_W BaseConvolutionLayer : public Layer + class CV_EXPORTS BaseConvolutionLayer : public Layer { public: - - CV_PROP_RW Size kernel, stride, pad, dilation, adjustPad; - CV_PROP_RW String padMode; + Size kernel, stride, pad, dilation, adjustPad; + String padMode; }; - class CV_EXPORTS_W ConvolutionLayer : public BaseConvolutionLayer + class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer { public: - - static CV_WRAP Ptr create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1)); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W DeconvolutionLayer : public BaseConvolutionLayer + class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer { public: - - static CV_WRAP Ptr create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1), Size adjustPad = Size()); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W LRNLayer : public Layer + class CV_EXPORTS LRNLayer : public Layer { public: - enum Type { CHANNEL_NRM, SPATIAL_NRM }; - CV_PROP_RW int type; + int type; - CV_PROP_RW int size; - CV_PROP_RW double alpha, beta, bias; - CV_PROP_RW bool normBySize; + int size; + float alpha, beta, bias; + bool normBySize; - static CV_WRAP Ptr create(int type = LRNLayer::CHANNEL_NRM, int size = 5, - double alpha = 1, double beta = 0.75, double bias = 1, - bool normBySize = true); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W PoolingLayer : public Layer + class CV_EXPORTS PoolingLayer : public Layer { public: - enum Type { MAX, @@ -258,139 +254,146 @@ namespace dnn STOCHASTIC }; - CV_PROP_RW int type; - CV_PROP_RW Size kernel, stride, pad; - CV_PROP_RW bool globalPooling; - CV_PROP_RW String padMode; + int type; + Size kernel, stride, pad; + bool globalPooling; + String padMode; - static CV_WRAP Ptr create(int type = PoolingLayer::MAX, Size kernel = Size(2, 2), - Size stride = Size(1, 1), Size pad = Size(0, 0), - const cv::String& padMode = ""); - static CV_WRAP Ptr createGlobal(int type = PoolingLayer::MAX); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W SoftmaxLayer : public Layer + class CV_EXPORTS SoftmaxLayer : public Layer { public: - - static CV_WRAP Ptr create(int axis = 1); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W InnerProductLayer : public Layer + class CV_EXPORTS InnerProductLayer : public Layer { public: - CV_PROP_RW int axis; - - static CV_WRAP Ptr create(int axis = 1); + int axis; + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W MVNLayer : public Layer + class CV_EXPORTS MVNLayer : public Layer { public: - CV_PROP_RW double eps; - CV_PROP_RW bool normVariance, acrossChannels; + float eps; + bool normVariance, acrossChannels; - static CV_WRAP Ptr create(bool normVariance = true, bool acrossChannels = false, double eps = 1e-9); + static Ptr create(const LayerParams& params); }; /* Reshaping */ - class CV_EXPORTS_W ReshapeLayer : public Layer + class CV_EXPORTS ReshapeLayer : public Layer { public: - CV_PROP_RW BlobShape newShapeDesc; - CV_PROP_RW Range newShapeRange; + std::vector newShapeDesc; + Range newShapeRange; + + static Ptr create(const LayerParams& params); + }; - static CV_WRAP Ptr create(const BlobShape &newShape, Range applyingRange = Range::all(), - bool enableReordering = false); + class CV_EXPORTS FlattenLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W ConcatLayer : public Layer + class CV_EXPORTS ConcatLayer : public Layer { public: int axis; - static CV_WRAP Ptr create(int axis = 1); + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W SplitLayer : public Layer + class CV_EXPORTS SplitLayer : public Layer { public: int outputsCount; //!< Number of copies that will be produced (is ignored when negative). - static CV_WRAP Ptr create(int outputsCount = -1); + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W SliceLayer : public Layer + class CV_EXPORTS SliceLayer : public Layer { public: - CV_PROP_RW int axis; - CV_PROP std::vector sliceIndices; + int axis; + std::vector sliceIndices; - static CV_WRAP Ptr create(int axis); - static CV_WRAP Ptr create(int axis, const std::vector &sliceIndices); + static Ptr create(const LayerParams ¶ms); }; - /* Activations */ - - class CV_EXPORTS_W ReLULayer : public Layer + class CV_EXPORTS PermuteLayer : public Layer { public: - CV_PROP_RW double negativeSlope; + static Ptr create(const LayerParams& params); + }; - static CV_WRAP Ptr create(double negativeSlope = 0); + class CV_EXPORTS PaddingLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W ChannelsPReLULayer : public Layer + /* Activations */ + + class CV_EXPORTS ReLULayer : public Layer { public: - static CV_WRAP Ptr create(); + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W TanHLayer : public Layer + class CV_EXPORTS ChannelsPReLULayer : public Layer { public: - static CV_WRAP Ptr create(); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W SigmoidLayer : public Layer + class CV_EXPORTS TanHLayer : public Layer { public: - static CV_WRAP Ptr create(); + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W BNLLLayer : public Layer + class CV_EXPORTS SigmoidLayer : public Layer { public: - static CV_WRAP Ptr create(); + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W AbsLayer : public Layer + class CV_EXPORTS BNLLLayer : public Layer { public: - static CV_WRAP Ptr create(); + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W PowerLayer : public Layer + class CV_EXPORTS AbsLayer : public Layer { public: - CV_PROP_RW double power, scale, shift; + static Ptr create(const LayerParams ¶ms); + }; - static CV_WRAP Ptr create(double power = 1, double scale = 1, double shift = 0); + class CV_EXPORTS PowerLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); }; /* Layers using in semantic segmentation */ - class CV_EXPORTS_W CropLayer : public Layer + class CV_EXPORTS CropLayer : public Layer { public: - CV_PROP int startAxis; - CV_PROP std::vector offset; + int startAxis; + std::vector offset; - static Ptr create(int start_axis, const std::vector &offset); + static Ptr create(const LayerParams ¶ms); }; - class CV_EXPORTS_W EltwiseLayer : public Layer + class CV_EXPORTS EltwiseLayer : public Layer { public: enum EltwiseOp @@ -400,25 +403,49 @@ namespace dnn MAX = 2, }; - static Ptr create(EltwiseOp op, const std::vector &coeffs); + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS BatchNormLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS MaxUnpoolLayer : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ScaleLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS ShiftLayer : public Layer + { + public: + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W BatchNormLayer : public Layer + class CV_EXPORTS PriorBoxLayer : public Layer { public: - static CV_WRAP Ptr create(bool hasWeights, bool hasBias, float epsilon); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W MaxUnpoolLayer : public Layer + class CV_EXPORTS DetectionOutputLayer : public Layer { public: - static CV_WRAP Ptr create(Size poolKernel, Size poolPad, Size poolStride); + static Ptr create(const LayerParams& params); }; - class CV_EXPORTS_W ScaleLayer : public Layer + class NormalizeBBoxLayer : public Layer { public: - static CV_WRAP Ptr create(bool hasBias); + static Ptr create(const LayerParams& params); }; //! @} diff --git a/modules/dnn/include/opencv2/dnn/blob.hpp b/modules/dnn/include/opencv2/dnn/blob.hpp deleted file mode 100644 index 71e929de353..00000000000 --- a/modules/dnn/include/opencv2/dnn/blob.hpp +++ /dev/null @@ -1,341 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_DNN_BLOB_HPP__ -#define __OPENCV_DNN_DNN_BLOB_HPP__ -#include -#include -#include -#include - -namespace cv -{ -namespace dnn -{ -//! @addtogroup dnn -//! @{ - - /** @brief Lightweight class for storing and processing a shape of blob (or anything else). */ - struct CV_EXPORTS_W BlobShape - { - BlobShape(); //!< Creates [1, 1, 1, 1] shape @todo Make more clearer behavior. - explicit BlobShape(int s0); //!< Creates 1-dim shape [@p s0] - BlobShape(int s0, int s1); //!< @overload - BlobShape(int s0, int s1, int s2); //!< @overload - BlobShape(int num, int cn, int rows, int cols); //!< Creates 4-dim shape [@p num, @p cn, @p rows, @p cols] - - //! Creates n-dim shape from the @p sizes array; if @p sizes is NULL then shape will contain unspecified data - BlobShape(int ndims, const int *sizes); - BlobShape(const std::vector &sizes); //!< Creates n-dim shape from the @p sizes vector - template - BlobShape(const Vec &shape); //!< Creates n-dim shape from @ref cv::Vec - - //! Creates n-dim shape and fill its by @p fill - static BlobShape all(int ndims, int fill = 1); - - /** @brief Returns number of dimensions. */ - int dims() const; - - /** @brief Returns reference to the size of the specified @p axis. - * - * Negative @p axis is supported, in this case a counting starts from the last axis, - * i. e. -1 corresponds to last axis. - * If non-existing axis was passed then an error will be generated. - */ - int &size(int axis); - - /** @brief Returns the size of the specified @p axis. - * @see size() - */ - int size(int axis) const; - - int operator[](int axis) const; //!< Does the same thing as size(axis). - int &operator[](int axis); //!< Does the same thing as size(int) const. - - /** @brief Returns the size of the specified @p axis. - * - * Does the same thing as size(int) const, but if non-existing axis will be passed then 1 will be returned, - * therefore this function always finishes successfully. - */ - int xsize(int axis) const; - - /** @brief Converts @p axis index to canonical format (where 0 <= @p axis < dims()). */ - int canonicalAxis(int axis) const; - - /** @brief Returns the product of all sizes of axes. */ - ptrdiff_t total() const; - - /** @brief Computes the product of sizes of axes among the specified axes range [@p startAxis; @p endAxis). - * @details Negative axis indexing can be used. @sa Blob::total(int,int) - */ - ptrdiff_t total(int startAxis, int endAxis = INT_MAX) const; - - /** @brief Constructs new shape from axes in range [@p startAxis; @p endAxis). - * @details Negative axis indexing can be used. @sa Blob::total(int,int) - */ - BlobShape slice(int startAxis, int endAxis = INT_MAX) const; - - /** @brief Returns pointer to the first element of continuous size array. */ - const int *ptr() const; - /** @overload */ - int *ptr(); - - bool equal(const BlobShape &other) const; //!< Checks equality of two shapes. - bool operator== (const BlobShape &r) const; //!< @sa equal() - - BlobShape operator+ (const BlobShape &r) const; //!< Contacenates two shapes. - - static BlobShape like(const Mat &m); //!< Returns shape of passed Mat. - static BlobShape like(const UMat &m); //!< Returns shape of passed UMat. - - static BlobShape empty(); //!< Returns empty shape []. - bool isEmpty() const; //!< Returns true if shape is empty (i.e []). - -#ifdef CV_CXX_MOVE_SEMANTICS - //TBD -#endif - - private: - cv::AutoBuffer sz; - }; - - - /** @brief This class provides methods for continuous n-dimensional CPU and GPU array processing. - * - * The class is realized as a wrapper over @ref cv::Mat and @ref cv::UMat. - * It will support methods for switching and logical synchronization between CPU and GPU. - */ - class CV_EXPORTS_W Blob - { - public: - Blob(); - - /** @brief Constructs blob with specified @p shape and @p type. */ - explicit Blob(const BlobShape &shape, int type = CV_32F, int allocFlags = ALLOC_MAT); - - /** @brief Constructs Blob from existing Mat or UMat. */ - Blob(InputArray data); - - /** @brief Constructs 4-dimensional blob (so-called batch) from image or array of images. - * @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of such images) - * @param dstCn specifies size of second axis of ouptut blob - */ - static Blob fromImages(InputArray image, int dstCn = -1); - - /** @brief Works like Blob::fromImages() but in-place. */ - void batchFromImages(InputArray image, int dstCn = -1); - - /** @brief Creates blob with specified @p shape and @p type. */ - void create(const BlobShape &shape, int type = CV_32F, int allocFlags = ALLOC_MAT); - - /** @brief Creates blob from Mat or UMat without copying the data. - * @details If in is Mat then Mat data is populated, otherwise - UMat. - */ - void fill(InputArray in); - - /** @brief Creates blob from user data. - * @details If @p deepCopy is false then CPU data will not be allocated. - */ - void fill(const BlobShape &shape, int type, void *data, bool deepCopy = true); - - /** @brief Sets @p value to the last used data (if @p allocFlags = -1). - * @details If @p allocFlags != -1 then destination data (Mat or UMat) is determined by flags from AllocFlag enum like in create(). - */ - void setTo(InputArray value, int allocFlags = -1); - - Mat& matRef(bool writeOnly = true); //!< Returns reference to cv::Mat, containing blob data. - const Mat& matRefConst() const; //!< Returns reference to cv::Mat, containing blob data, for read-only purposes. - UMat &umatRef(bool writeOnly = true); //!< Returns reference to cv::UMat, containing blob data. - const UMat &umatRefConst() const; //!< Returns reference to cv::UMat, containing blob data, for read-only purposes. - - template - XMat &getRef(bool writeOnly = true); - template - const XMat &getRefConst() const; - - void updateMat(bool syncData = true) const; //!< Actualizes data stored inside Mat of Blob; if @p syncData is false then only shape will be actualized. - void updateUMat(bool syncData = true) const; //!< Actualizes data stored inside Mat of Blob; if @p syncData is false then only shape will be actualized. - void sync() const; //!< Updates Mat and UMat of Blob. - - /** @brief Returns number of blob dimensions. */ - int dims() const; - - /** @brief Returns the size of the specified @p axis. - * - * Negative @p axis is supported, in this case a counting starts from the last axis, - * i. e. -1 corresponds to last axis. - * If non-existing axis was passed then an error will be generated. - */ - int size(int axis) const; - - /** @brief Returns the size of the specified @p axis. - * - * Does the same thing as size(int) const, but if non-existing axis will be passed then 1 will be returned, - * therefore this function always finishes successfully. - */ - int xsize(int axis) const; - - /** @brief Computes the product of sizes of axes among the specified axes range [@p startAxis; @p endAxis). - * @param startAxis the first axis to include in the range. - * @param endAxis the first axis to exclude from the range. - * @details Negative axis indexing can be used. - */ - size_t total(int startAxis = 0, int endAxis = INT_MAX) const; - - /** @brief Converts @p axis index to canonical format (where 0 <= @p axis < dims()). */ - int canonicalAxis(int axis) const; - - /** @brief Returns shape of the blob. */ - BlobShape shape() const; - - /** @brief Checks equality of two blobs shapes. */ - bool equalShape(const Blob &other) const; - - /** @brief Returns slice of first two dimensions. - * @details The behaviour is similar to the following numpy code: blob[n, cn, ...] - */ - Mat getPlane(int n, int cn); - - /** @brief Returns slice of first dimension. - * @details The behaviour is similar to getPlane(), but returns all - * channels * rows * cols values, corresponding to the n-th value - * of the first dimension. - */ - Mat getPlanes(int n); - - /* Shape getters of 4-dimensional blobs. */ - int cols() const; //!< Returns size of the fourth axis blob. - int rows() const; //!< Returns size of the thrid axis blob. - int channels() const; //!< Returns size of the second axis blob. - int num() const; //!< Returns size of the first axis blob. - Size size2() const; //!< Returns cv::Size(cols(), rows()) - Vec4i shape4() const; //!< Returns shape of first four blob axes. - - /** @brief Returns linear index of the element with specified coordinates in the blob. - * - * If @p n < dims() then unspecified coordinates will be filled by zeros. - * If @p n > dims() then extra coordinates will be ignored. - */ - template - size_t offset(const Vec &pos) const; - /** @overload */ - size_t offset(int n = 0, int cn = 0, int row = 0, int col = 0) const; - - /* CPU pointer getters */ - /** @brief Returns pointer to the blob element with the specified position, stored in CPU memory. - * - * @p n correspond to the first axis, @p cn - to the second, etc. - * If dims() > 4 then unspecified coordinates will be filled by zeros. - * If dims() < 4 then extra coordinates will be ignored. - */ - uchar *ptr(int n = 0, int cn = 0, int row = 0, int col = 0); - /** @overload */ - template - Type *ptr(int n = 0, int cn = 0, int row = 0, int col = 0); - /** @overload ptr() */ - float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0); - //TODO: add const ptr methods - - /** @brief Shares data from other @p blob. - * @returns *this - */ - Blob &shareFrom(const Blob &blob); - - /** @brief Changes shape of the blob without copying the data. - * @returns *this - */ - Blob &reshape(const BlobShape &shape); - - /** @brief Changes shape of the blob without copying the data. - * @returns shallow copy of original blob with new shape. - */ - Blob reshaped(const BlobShape &newShape) const; - - int type() const; //!< Returns type of the blob. - int elemSize() const; //!< Returns size of single element in bytes. - int getState() const; //!< Returns current state of the blob, @see DataState. - - private: - const int *sizes() const; - -# define CV_DNN_UMAT //DBG -#ifdef HAVE_OPENCL -# define CV_DNN_UMAT -#endif - -#ifdef CV_DNN_UMAT -# define CV_DNN_UMAT_ONLY(expr) (expr) -#else -# define CV_DNN_UMAT_ONLY(expr) -#endif - -#ifndef CV_DNN_UMAT - Mat m; -#else - mutable Mat m; - mutable UMat um; - mutable uchar state; -#endif - -public: - enum DataState - { - UNINITIALIZED = 0, - HEAD_AT_MAT = 1 << 0, - HEAD_AT_UMAT = 1 << 1, - SYNCED = HEAD_AT_MAT | HEAD_AT_UMAT - }; - - enum AllocFlag - { - ALLOC_MAT = HEAD_AT_MAT, - ALLOC_UMAT = HEAD_AT_UMAT, - ALLOC_BOTH = SYNCED - }; - }; - -//! @} -} -} - -#include "blob.inl.hpp" - -#endif diff --git a/modules/dnn/include/opencv2/dnn/blob.inl.hpp b/modules/dnn/include/opencv2/dnn/blob.inl.hpp deleted file mode 100644 index b7f741e3acb..00000000000 --- a/modules/dnn/include/opencv2/dnn/blob.inl.hpp +++ /dev/null @@ -1,533 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_DNN_BLOB_INL_HPP__ -#define __OPENCV_DNN_DNN_BLOB_INL_HPP__ -#include "blob.hpp" - -namespace cv -{ -namespace dnn -{ - -inline BlobShape::BlobShape() -{ - sz.allocate(4); - for (size_t i = 0; i < sz.size(); i++) - sz[i] = 1; -} - -inline BlobShape BlobShape::all(int ndims, int fill) -{ - CV_Assert(ndims >= 0); - BlobShape res; - res.sz.allocate(ndims); - for (int i = 0; i < ndims; i++) - res.sz[i] = fill; - return res; -} - -inline BlobShape::BlobShape(int ndims, const int *sizes) : sz( (size_t)std::max(ndims, 0) ) -{ - CV_Assert(ndims >= 0); - if (!sizes) - return; - for (int i = 0; i < ndims; i++) - sz[i] = sizes[i]; -} - -inline BlobShape::BlobShape(int s0) : sz(1) -{ - sz[0] = s0; -} - -inline BlobShape::BlobShape(int s0, int s1) : sz(2) -{ - sz[0] = s0; - sz[1] = s1; -} - -inline BlobShape::BlobShape(int s0, int s1, int s2) : sz(3) -{ - sz[0] = s0; - sz[1] = s1; - sz[2] = s2; -} - -inline BlobShape::BlobShape(int num, int cn, int rows, int cols) : sz(4) -{ - sz[0] = num; - sz[1] = cn; - sz[2] = rows; - sz[3] = cols; -} - -inline BlobShape::BlobShape(const std::vector &sizes) : sz( sizes.size() ) -{ - for (int i = 0; i < (int)sizes.size(); i++) - sz[i] = sizes[i]; -} - -template -inline BlobShape::BlobShape(const Vec &shape) : sz(n) -{ - for (int i = 0; i < n; i++) - sz[i] = shape[i]; -} - -inline int BlobShape::dims() const -{ - return (int)sz.size(); -} - -inline int BlobShape::xsize(int axis) const -{ - if (axis < -dims() || axis >= dims()) - return 1; - - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int BlobShape::size(int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int &BlobShape::size(int axis) -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int BlobShape::operator[] (int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int &BlobShape::operator[] (int axis) -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sz[(axis < 0) ? axis + dims() : axis]; -} - -inline int BlobShape::canonicalAxis(int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return (axis < 0) ? axis + dims() : axis; -} - -inline ptrdiff_t BlobShape::total() const -{ - if (dims() == 0) - return 0; - - ptrdiff_t res = 1; - for (int i = 0; i < dims(); i++) - res *= sz[i]; - return res; -} - -inline ptrdiff_t BlobShape::total(int startAxis, int endAxis) const -{ - if (isEmpty()) - return 0; - - if (endAxis == INT_MAX) - endAxis = dims(); - else if (endAxis < 0) - endAxis += dims(); - startAxis = (startAxis < 0) ? startAxis + dims() : startAxis; - CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims()); - - ptrdiff_t res = 1; - for (int i = startAxis; i < endAxis; i++) - res *= sz[i]; - return res; -} - -inline BlobShape BlobShape::slice(int startAxis, int endAxis) const -{ - if (isEmpty()) - return BlobShape::empty(); - - if (endAxis == INT_MAX) - endAxis = dims(); - else if (endAxis < 0) - endAxis += dims(); - startAxis = (startAxis < 0) ? startAxis + dims() : startAxis; - CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims()); - - BlobShape res(endAxis - startAxis, (const int*)NULL); - for (int i = startAxis; i < endAxis; i++) - res[i - startAxis] = sz[i]; - return res; -} - -inline const int *BlobShape::ptr() const -{ - return sz; -} - -inline int *BlobShape::ptr() -{ - return sz; -} - -inline bool BlobShape::equal(const BlobShape &other) const -{ - if (this->dims() != other.dims()) - return false; - - for (int i = 0; i < other.dims(); i++) - { - if (sz[i] != other.sz[i]) - return false; - } - - return true; -} - -inline bool BlobShape::operator==(const BlobShape &r) const -{ - return this->equal(r); -} - -inline BlobShape BlobShape::like(const Mat &m) -{ - return BlobShape(m.dims, (const int*)m.size); -} - -inline BlobShape BlobShape::like(const UMat &m) -{ - return BlobShape(m.dims, (const int*)m.size); -} - -inline BlobShape BlobShape::empty() -{ - return BlobShape(0, (const int*)NULL); -} - -inline bool BlobShape::isEmpty() const -{ - return dims() == 0; -} - -inline BlobShape BlobShape::operator+(const BlobShape &r) const -{ - BlobShape newShape(this->dims() + r.dims(), (int*)NULL); - for (int i = 0; i < this->dims(); i++) - newShape[i] = (*this)[i]; - for (int i = 0; i < r.dims(); i++) - newShape[this->dims() + i] = r[i]; - return newShape; -} - -CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape); - -///////////////////////////////////////////////////////////////////// - -#ifndef CV_DNN_UMAT -# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) (cpu_expr) -#else -# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) ((state == HEAD_AT_UMAT) ? (gpu_expr) : (cpu_expr)) -#endif - - -inline int Blob::dims() const -{ - return CV_DNN_SWITCH_MU(m.dims, um.dims); -} - -inline const int * Blob::sizes() const -{ - return CV_DNN_SWITCH_MU((const int*)m.size, (const int*)um.size); -} - -inline int Blob::type() const -{ - return CV_DNN_SWITCH_MU(m.type(), um.type()); -} - -template -inline size_t Blob::offset(const Vec &pos) const -{ - const MatStep &step = CV_DNN_SWITCH_MU(m.step, um.step); - size_t ofs = 0; - int i; - for (i = 0; i < std::min(n, dims()); i++) - { - CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i)); - ofs += step[i] * pos[i]; - } - for (; i < dims(); i++) - CV_DbgAssert(pos[i] == 0); - CV_DbgAssert(ofs % elemSize() == 0); - return ofs / elemSize(); -} - -inline int Blob::canonicalAxis(int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return (axis < 0) ? axis + dims() : axis; -} - -inline int Blob::xsize(int axis) const -{ - if (axis < -dims() || axis >= dims()) - return 1; - - return sizes()[(axis < 0) ? axis + dims() : axis]; -} - -inline int Blob::size(int axis) const -{ - CV_Assert(-dims() <= axis && axis < dims()); - return sizes()[(axis < 0) ? axis + dims() : axis]; -} - -inline size_t Blob::total(int startAxis, int endAxis) const -{ - if (startAxis < 0) - startAxis += dims(); - - if (endAxis == INT_MAX) - endAxis = dims(); - else if (endAxis < 0) - endAxis += dims(); - - CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims()); - - size_t cnt = 1; //fix: assume that slice isn't empty - for (int i = startAxis; i < endAxis; i++) - cnt *= (size_t)sizes()[i]; - - return cnt; -} - -inline size_t Blob::offset(int n, int cn, int row, int col) const -{ - return offset(Vec4i(n, cn, row, col)); -} - -inline float *Blob::ptrf(int n, int cn, int row, int col) -{ - return matRef(false).ptr() + offset(n, cn, row, col); -} - -inline uchar *Blob::ptr(int n, int cn, int row, int col) -{ - Mat &mat = matRef(false); - return mat.ptr() + mat.elemSize() * offset(n, cn, row, col); -} - -template -inline Dtype* Blob::ptr(int n, int cn, int row, int col) -{ - CV_Assert(type() == cv::DataDepth::value); - return (Dtype*) ptr(n, cn, row, col); -} - -inline BlobShape Blob::shape() const -{ - return BlobShape(dims(), sizes()); -} - -inline bool Blob::equalShape(const Blob &other) const -{ - if (this->dims() != other.dims()) - return false; - - for (int i = 0; i < dims(); i++) - { - if (this->sizes()[i] != other.sizes()[i]) - return false; - } - return true; -} - -inline Mat& Blob::matRef(bool writeOnly) -{ -#ifdef CV_DNN_UMAT - updateMat(!writeOnly); - state = HEAD_AT_MAT; -#else - (void)writeOnly; -#endif - return m; -} - -inline const Mat& Blob::matRefConst() const -{ - CV_DNN_UMAT_ONLY( updateMat() ); - return m; -} - -inline UMat &Blob::umatRef(bool writeOnly) -{ -#ifndef CV_DNN_UMAT - CV_Error(Error::GpuNotSupported, ""); - (void)writeOnly; - return *(new UMat()); -#else - updateUMat(!writeOnly); - state = HEAD_AT_UMAT; - return um; -#endif -} - -inline const UMat &Blob::umatRefConst() const -{ -#ifndef CV_DNN_UMAT - CV_Error(Error::GpuNotSupported, ""); - return *(new UMat()); -#else - updateUMat(); - return um; -#endif -} - -template<> -inline Mat &Blob::getRef(bool writeOnly) -{ - return matRef(writeOnly); -} - -template<> -inline UMat &Blob::getRef(bool writeOnly) -{ - return umatRef(writeOnly); -} - -template<> -inline const Mat &Blob::getRefConst() const -{ - return matRefConst(); -} - -template<> -inline const UMat &Blob::getRefConst() const -{ - return umatRefConst(); -} - -inline Mat Blob::getPlane(int n, int cn) -{ - CV_Assert(dims() > 2); - return Mat(dims() - 2, sizes() + 2, type(), ptr(n, cn)); -} - -inline Mat Blob::getPlanes(int n) -{ - CV_Assert(dims() > 3); - return Mat(dims() - 1, sizes() + 1, type(), ptr(n)); -} - -inline int Blob::cols() const -{ - return xsize(3); -} - -inline int Blob::rows() const -{ - return xsize(2); -} - -inline int Blob::channels() const -{ - return xsize(1); -} - -inline int Blob::num() const -{ - return xsize(0); -} - -inline Size Blob::size2() const -{ - return Size(cols(), rows()); -} - -inline Blob &Blob::shareFrom(const Blob &blob) -{ - this->m = blob.m; -#ifdef CV_DNN_UMAT - this->um = blob.um; - this->state = blob.state; -#endif - return *this; -} - -inline Blob &Blob::reshape(const BlobShape &newShape) -{ - if (!m.empty()) m = m.reshape(1, newShape.dims(), newShape.ptr()); -#ifdef CV_DNN_UMAT - if (!um.empty()) um = um.reshape(1, newShape.dims(), newShape.ptr()); -#endif - return *this; -} - -inline Blob Blob::reshaped(const BlobShape &newShape) const -{ - Blob res(*this); //also, res.shareFrom(*this) could be used - res.reshape(newShape); - return res; -} - -inline int Blob::elemSize() const -{ - return CV_ELEM_SIZE(type()); -} - -inline int Blob::getState() const -{ -#ifdef CV_DNN_UMAT - return this->state; -#else - return m.empty() ? UNINITIALIZED : HEAD_AT_MAT; -#endif -} - -} -} - -#endif diff --git a/modules/dnn/include/opencv2/dnn/dict.hpp b/modules/dnn/include/opencv2/dnn/dict.hpp index f7cd0f21150..1096cc0ff26 100644 --- a/modules/dnn/include/opencv2/dnn/dict.hpp +++ b/modules/dnn/include/opencv2/dnn/dict.hpp @@ -118,6 +118,9 @@ class CV_EXPORTS Dict //! If the @p key in the dictionary then returns pointer to its value, else returns NULL. DictValue *ptr(const String &key); + /** @overload */ + const DictValue *ptr(const String &key) const; + //! If the @p key in the dictionary then returns its value, else an error will be generated. const DictValue &get(const String &key) const; diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index cdfdfe96627..d2440d5c6f6 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -45,7 +45,6 @@ #include #include #include -#include namespace cv { @@ -70,7 +69,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity. { public: //TODO: Add ability to name blob params - std::vector blobs; //!< List of learned parameters stored as blobs. + std::vector blobs; //!< List of learned parameters stored as blobs. String name; //!< Name of the layer instance (optional, can be used internal purposes). String type; //!< Type name which was used for creating layer by layer factory (optional). @@ -86,7 +85,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity. public: //! List of learned parameters must be stored here to allow read them by using Net::getParam(). - CV_PROP_RW std::vector blobs; + CV_PROP_RW std::vector blobs; /** @brief Allocates internal buffers and output blobs with respect to the shape of inputs. * @param[in] input vector of already allocated input blobs @@ -96,25 +95,25 @@ namespace dnn //! This namespace is used for dnn module functionlaity. * If this method is called first time then @p output vector consists from empty blobs and its size determined by number of output connections. * This method can be called multiple times if size of any @p input blob was changed. */ - virtual void allocate(const std::vector &input, std::vector &output) = 0; + virtual void allocate(const std::vector &input, std::vector &output) = 0; /** @brief Given the @p input blobs, computes the output @p blobs. * @param[in] input the input blobs. * @param[out] output allocated output blobs, which will store results of the computation. */ - virtual void forward(std::vector &input, std::vector &output) = 0; + virtual void forward(std::vector &input, std::vector &output) = 0; /** @brief @overload */ - CV_WRAP void allocate(const std::vector &inputs, CV_OUT std::vector &outputs); + CV_WRAP void allocate(const std::vector &inputs, CV_OUT std::vector &outputs); /** @brief @overload */ - CV_WRAP std::vector allocate(const std::vector &inputs); + CV_WRAP std::vector allocate(const std::vector &inputs); /** @brief @overload */ - CV_WRAP void forward(const std::vector &inputs, CV_IN_OUT std::vector &outputs); + CV_WRAP void forward(const std::vector &inputs, CV_IN_OUT std::vector &outputs); /** @brief Allocates layer and computes output. */ - CV_WRAP void run(const std::vector &inputs, CV_OUT std::vector &outputs); + CV_WRAP void run(const std::vector &inputs, CV_OUT std::vector &outputs); /** @brief Returns index of input blob into the input array. * @param inputName label of input blob @@ -248,13 +247,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity. * @note If updating blob is not empty then @p blob must have the same shape, * because network reshaping is not implemented yet. */ - CV_WRAP void setBlob(String outputName, const Blob &blob); + CV_WRAP void setBlob(String outputName, const Mat &blob); /** @brief Returns the layer output blob. * @param outputName the descriptor of the returning layer output blob. * @see connect(String, String) */ - CV_WRAP Blob getBlob(String outputName); + CV_WRAP Mat getBlob(String outputName); /** @brief Sets the new value for the learned param of the layer. * @param layer name or id of the layer. @@ -264,14 +263,14 @@ namespace dnn //! This namespace is used for dnn module functionlaity. * @note If shape of the new blob differs from the previous shape, * then the following forward pass may fail. */ - CV_WRAP void setParam(LayerId layer, int numParam, const Blob &blob); + CV_WRAP void setParam(LayerId layer, int numParam, const Mat &blob); /** @brief Returns parameter blob of the layer. * @param layer name or id of the layer. * @param numParam index of the layer parameter in the Layer::blobs array. * @see Layer::blobs */ - CV_WRAP Blob getParam(LayerId layer, int numParam = 0); + CV_WRAP Mat getParam(LayerId layer, int numParam = 0); /** @brief Returns indexes of layers with unconnected outputs. */ @@ -341,7 +340,10 @@ namespace dnn //! This namespace is used for dnn module functionlaity. /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework. * @warning This function has the same limitations as createTorchImporter(). */ - CV_EXPORTS_W Blob readTorchBlob(const String &filename, bool isBinary = true); + CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true); + + CV_EXPORTS Mat blobFromImage(const Mat& image, double scalefactor=1.0, bool swapRB=true); + CV_EXPORTS Mat blobFromImages(const std::vector& image, double scalefactor=1.0, bool swapRB=true); //! @} } diff --git a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp index a272044025e..8a3c72ee983 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp @@ -298,6 +298,12 @@ inline DictValue *Dict::ptr(const String &key) return (i == dict.end()) ? NULL : &i->second; } +inline const DictValue *Dict::ptr(const String &key) const +{ + _Dict::const_iterator i = dict.find(key); + return (i == dict.end()) ? NULL : &i->second; +} + inline const DictValue &Dict::get(const String &key) const { _Dict::const_iterator i = dict.find(key); diff --git a/modules/dnn/include/opencv2/dnn/layer.hpp b/modules/dnn/include/opencv2/dnn/layer.hpp index e0510411b11..af663dd3e1e 100644 --- a/modules/dnn/include/opencv2/dnn/layer.hpp +++ b/modules/dnn/include/opencv2/dnn/layer.hpp @@ -122,7 +122,7 @@ static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, __LayerStati template Ptr _layerDynamicRegisterer(LayerParams ¶ms) { - return Ptr(new LayerClass(params)); + return Ptr(LayerClass::create(params)); } //allows automatically register created layer on module load time diff --git a/modules/dnn/include/opencv2/dnn/shape_utils.hpp b/modules/dnn/include/opencv2/dnn/shape_utils.hpp index f52e5b9cbc1..6d2d7d65562 100644 --- a/modules/dnn/include/opencv2/dnn/shape_utils.hpp +++ b/modules/dnn/include/opencv2/dnn/shape_utils.hpp @@ -43,14 +43,13 @@ #define __OPENCV_DNN_DNN_SHAPE_UTILS_HPP__ #include +#include #include namespace cv { namespace dnn { //Useful shortcut -typedef BlobShape Shape; - inline std::ostream &operator<< (std::ostream &s, cv::Range &r) { return s << "[" << r.start << ", " << r.end << ")"; @@ -59,7 +58,7 @@ inline std::ostream &operator<< (std::ostream &s, cv::Range &r) //Reshaping //TODO: add -1 specifier for automatic size inferring -template +/*template void reshape(Mat &m, const BlobShape &shape) { m = m.reshape(1, shape.dims(), shape.ptr()); @@ -69,7 +68,7 @@ template Mat reshaped(const Mat &m, const BlobShape &shape) { return m.reshape(1, shape.dims(), shape.ptr()); -} +}*/ //Slicing @@ -80,22 +79,19 @@ struct _Range : public cv::Range _Range(int start, int size = 1) : cv::Range(start, start + size) {} }; -template -Mat slice(const Mat &m, const _Range &r0) +static inline Mat slice(const Mat &m, const _Range &r0) { - //CV_Assert(m.dims >= 1); - cv::AutoBuffer ranges(m.dims); + Range ranges[CV_MAX_DIM]; for (int i = 1; i < m.dims; i++) ranges[i] = Range::all(); ranges[0] = r0; return m(&ranges[0]); } -template -Mat slice(const Mat &m, const _Range &r0, const _Range &r1) +static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1) { CV_Assert(m.dims >= 2); - cv::AutoBuffer ranges(m.dims); + Range ranges[CV_MAX_DIM]; for (int i = 2; i < m.dims; i++) ranges[i] = Range::all(); ranges[0] = r0; @@ -103,11 +99,10 @@ Mat slice(const Mat &m, const _Range &r0, const _Range &r1) return m(&ranges[0]); } -template -Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2) +static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2) { - CV_Assert(m.dims <= 3); - cv::AutoBuffer ranges(m.dims); + CV_Assert(m.dims >= 3); + Range ranges[CV_MAX_DIM]; for (int i = 3; i < m.dims; i++) ranges[i] = Range::all(); ranges[0] = r0; @@ -116,11 +111,10 @@ Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2) return m(&ranges[0]); } -template -Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3) +static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3) { - CV_Assert(m.dims <= 4); - cv::AutoBuffer ranges(m.dims); + CV_Assert(m.dims >= 4); + Range ranges[CV_MAX_DIM]; for (int i = 4; i < m.dims; i++) ranges[i] = Range::all(); ranges[0] = r0; @@ -130,7 +124,28 @@ Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, co return m(&ranges[0]); } -BlobShape computeShapeByReshapeMask(const BlobShape &srcShape, const BlobShape &maskShape, Range srcRange = Range::all()); +static inline Mat getPlane(const Mat &m, int n, int cn) +{ + CV_Assert(m.dims > 2); + Range range[CV_MAX_DIM]; + int sz[CV_MAX_DIM]; + for(int i = 2; i < m.dims; i++) + { + sz[i-2] = m.size.p[i]; + range[i] = Range::all(); + } + range[0] = Range(n, n+1); + range[1] = Range(cn, cn+1); + return m(range).reshape(1, m.dims-2, sz); +} + +static inline size_t shapeTotal(const std::vector& shape) +{ + size_t i, n = shape.size(), p = 1; + for( i = 0; i < n; i++ ) p *= shape[i]; + + return p; +} } } diff --git a/modules/dnn/misc/python/pyopencv_dnn.hpp b/modules/dnn/misc/python/pyopencv_dnn.hpp index 06661de123a..40ac504f241 100644 --- a/modules/dnn/misc/python/pyopencv_dnn.hpp +++ b/modules/dnn/misc/python/pyopencv_dnn.hpp @@ -1,66 +1,5 @@ #ifdef HAVE_OPENCV_DNN typedef dnn::DictValue LayerId; -typedef std::vector vector_Blob; - -template<> -bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name); - -template<> struct pyopencvVecConverter -{ - static bool to(PyObject* obj, std::vector& value, const ArgInfo info) - { - if (PyArray_Check(obj)) - { - value.resize(1); - return pyopencv_to(obj, value[0], info.name); - } - - return pyopencv_to_generic_vec(obj, value, info); - } - - static PyObject* from(const std::vector& value) - { - return pyopencv_from_generic_vec(value); - } -}; - -template<> -bool pyopencv_to(PyObject *o, std::vector &blobs, const char *name) //required for Layer::blobs RW -{ - return pyopencvVecConverter::to(o, blobs, ArgInfo(name, false)); -} - -template<> -bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name) -{ - Mat &dst = blob.matRef(); - if (!pyopencv_to(o, dst, name)) - return false; - - if (PyArray_Check(o)) //try fix channels - { - PyArrayObject* oarr = (PyArrayObject*) o; - - if (PyArray_NDIM(oarr) == dst.dims) - return true; - - int ndims = PyArray_NDIM(oarr); - std::vector shape(ndims); - const npy_intp* _sizes = PyArray_DIMS(oarr); - for (int i = 0; i < ndims; i++) - shape[i] = (int)_sizes[i]; - - dst = dst.reshape(1, ndims, &shape[0]); - } - - return true; -} - -template<> -PyObject *pyopencv_from(const dnn::Blob &blob) -{ - return pyopencv_from(blob.matRefConst()); -} template<> bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name) @@ -87,22 +26,4 @@ bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name) return false; } -template<> -bool pyopencv_to(PyObject *o, dnn::BlobShape &shape, const char *name) -{ - std::vector data; - if (!pyopencv_to_generic_vec(o, data, ArgInfo(name, false))) - return false; - - shape = data.size() ? dnn::BlobShape((int)data.size(), &data[0]) : dnn::BlobShape::empty(); - return true; -} - -template<> -PyObject *pyopencv_from(const dnn::BlobShape &shape) -{ - std::vector data(shape.ptr(), shape.ptr() + shape.dims()); - return pyopencv_from_generic_vec(data); -} - -#endif \ No newline at end of file +#endif diff --git a/modules/dnn/perf/perf_convolution.cpp b/modules/dnn/perf/perf_convolution.cpp index 17fda01592c..af37134300e 100644 --- a/modules/dnn/perf/perf_convolution.cpp +++ b/modules/dnn/perf/perf_convolution.cpp @@ -21,15 +21,21 @@ CV_ENUM(GroupSize, GROUP_OFF, GROUP_2); //Squared Size #define SSZ(n) cv::Size(n, n) -typedef std::pair InpShapeNumOut; +typedef std::pair, int> InpShapeNumOut; typedef tuple ConvParam; //kernel_size, inp shape, groups, stride typedef TestBaseWithParam ConvolutionPerfTest; +static inline std::vector blobShape(int count, int nplanes, int height, int width) +{ + int data[] = {count, nplanes, height, width}; + return std::vector(data, data+4); +} + PERF_TEST_P( ConvolutionPerfTest, perf, Combine( Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)), - Values(make_pair(BlobShape(1, 4, 224, 224), 64), - make_pair(BlobShape(1, 64, 112, 122), 128), - make_pair(BlobShape(1, 256, 28, 28), 512)), + Values(make_pair(blobShape(1, 4, 224, 224), 64), + make_pair(blobShape(1, 64, 112, 122), 128), + make_pair(blobShape(1, 256, 28, 28), 512)), GroupSize::all(), StrideSize::all()) ) @@ -38,17 +44,20 @@ PERF_TEST_P( ConvolutionPerfTest, perf, Combine( ConvParam params = GetParam(); int ksz = get<0>(params).width; - BlobShape inpShape = get<1>(params).first; + std::vector inpShape = get<1>(params).first; int outCn = get<1>(params).second; int groups = get<2>(params); int stride = (ksz >= 11) ? 4 : (int)get<3>(params); int inpCn = inpShape[1]; - Blob wgtBlob(BlobShape(outCn, inpCn/groups, ksz, ksz)), biasBlob(BlobShape(outCn, 1, 1, 1)); - Blob inpBlob(inpShape); - rng.fill(biasBlob.matRef(), RNG::UNIFORM, -1, +1); - rng.fill(wgtBlob.matRef(), RNG::UNIFORM, -1, +1); - rng.fill(inpBlob.matRef(), RNG::UNIFORM, -1, +1); + int wgtSize[] = { outCn, inpCn/groups, ksz, ksz }; + int biasSize[] = { outCn, 1, 1, 1 }; + const int wtype = CV_32F; + Mat wgtBlob(4, wgtSize, wtype), biasBlob(4, biasSize, wtype); + Mat inpBlob(4, &inpShape[0], wtype); + rng.fill(biasBlob, RNG::UNIFORM, -1, +1); + rng.fill(wgtBlob, RNG::UNIFORM, -1, +1); + rng.fill(inpBlob, RNG::UNIFORM, -1, +1); LayerParams lp; lp.set("num_output", outCn); @@ -59,15 +68,18 @@ PERF_TEST_P( ConvolutionPerfTest, perf, Combine( lp.blobs.push_back(wgtBlob); lp.blobs.push_back(biasBlob); - std::vector inpBlobs(1, &inpBlob); - std::vector outBlobs; + std::vector inpBlobs(1, &inpBlob); + std::vector outBlobs; cv::setNumThreads(cv::getNumberOfCPUs()); Ptr layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp); layer->allocate(inpBlobs, outBlobs); - declare.in(inpBlob.matRef(), wgtBlob.matRef(), WARMUP_RNG).out(outBlobs[0].matRef()).tbb_threads(cv::getNumThreads()); + Mat inpBlob2D = inpBlob.reshape(1, outCn); + Mat wgtBlob2D = wgtBlob.reshape(1, outCn*(inpCn/groups)); + Mat outBlob2D = outBlobs[0].reshape(1, outBlobs[0].size[0]); + declare.in(inpBlob2D, wgtBlob2D, WARMUP_RNG).out(outBlob2D).tbb_threads(cv::getNumThreads()); TEST_CYCLE_N(10) { @@ -77,4 +89,4 @@ PERF_TEST_P( ConvolutionPerfTest, perf, Combine( SANITY_CHECK_NOTHING(); } -} \ No newline at end of file +} diff --git a/modules/dnn/samples/caffe_googlenet.cpp b/modules/dnn/samples/caffe_googlenet.cpp index 0655a817f0f..73d5c2c6731 100644 --- a/modules/dnn/samples/caffe_googlenet.cpp +++ b/modules/dnn/samples/caffe_googlenet.cpp @@ -50,9 +50,9 @@ using namespace cv::dnn; using namespace std; /* Find best class for the blob (i. e. class with maximal probability) */ -void getMaxClass(dnn::Blob &probBlob, int *classId, double *classProb) +void getMaxClass(const Mat &probBlob, int *classId, double *classProb) { - Mat probMat = probBlob.matRefConst().reshape(1, 1); //reshape the blob to 1x1000 matrix + Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix Point classNumber; minMaxLoc(probMat, NULL, classProb, NULL, &classNumber); @@ -115,8 +115,7 @@ int main(int argc, char **argv) } resize(img, img, Size(224, 224)); //GoogLeNet accepts only 224x224 RGB-images - cv::cvtColor(img, img, cv::COLOR_BGR2RGB); - dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob batch of images + Mat inputBlob = blobFromImage(img); //Convert Mat to batch of images //! [Prepare blob] //! [Set input blob] @@ -128,7 +127,7 @@ int main(int argc, char **argv) //! [Make forward pass] //! [Gather output] - dnn::Blob prob = net.getBlob("prob"); //gather output of "prob" layer + Mat prob = net.getBlob("prob"); //gather output of "prob" layer int classId; double classProb; diff --git a/modules/dnn/samples/fcn_semsegm.cpp b/modules/dnn/samples/fcn_semsegm.cpp index bdeb75cd2f7..58d38d65321 100755 --- a/modules/dnn/samples/fcn_semsegm.cpp +++ b/modules/dnn/samples/fcn_semsegm.cpp @@ -1,7 +1,6 @@ #include #include #include -#include using namespace cv; using namespace cv::dnn; @@ -45,11 +44,11 @@ static vector readColors(const string &filename = "pascal-classes.txt return colors; } -static void colorizeSegmentation(dnn::Blob &score, const vector &colors, cv::Mat &segm) +static void colorizeSegmentation(const Mat &score, const vector &colors, cv::Mat &segm) { - const int rows = score.rows(); - const int cols = score.cols(); - const int chns = score.channels(); + const int rows = score.size[2]; + const int cols = score.size[3]; + const int chns = score.size[1]; cv::Mat maxCl(rows, cols, CV_8UC1); cv::Mat maxVal(rows, cols, CV_32FC1); @@ -57,7 +56,7 @@ static void colorizeSegmentation(dnn::Blob &score, const vector &colo { for (int row = 0; row < rows; row++) { - const float *ptrScore = score.ptrf(0, ch, row); + const float *ptrScore = score.ptr(0, ch, row); uchar *ptrMaxCl = maxCl.ptr(row); float *ptrMaxVal = maxVal.ptr(row); for (int col = 0; col < cols; col++) @@ -87,7 +86,6 @@ static void colorizeSegmentation(dnn::Blob &score, const vector &colo int main(int argc, char **argv) { cv::dnn::initModule(); //Required if OpenCV is built as static libs - cv::ocl::setUseOpenCL(false); //OpenCL switcher String modelTxt = fcnType + "-heavy-pascal.prototxt"; String modelBin = fcnType + "-heavy-pascal.caffemodel"; @@ -132,7 +130,7 @@ int main(int argc, char **argv) } resize(img, img, Size(500, 500)); //FCN accepts 500x500 RGB-images - dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob batch of images + Mat inputBlob = blobFromImage(img); //Convert Mat to batch of images //! [Prepare blob] //! [Set input blob] @@ -147,13 +145,13 @@ int main(int argc, char **argv) //! [Make forward pass] //! [Gather output] - dnn::Blob score = net.getBlob("score"); + Mat score = net.getBlob("score"); - cv::Mat colorize; + Mat colorize; colorizeSegmentation(score, colors, colorize); - cv::Mat show; - cv::addWeighted(img, 0.4, colorize, 0.6, 0.0, show); - cv::imshow("show", show); - cv::waitKey(0); + Mat show; + addWeighted(img, 0.4, colorize, 0.6, 0.0, show); + imshow("show", show); + waitKey(0); return 0; } //main diff --git a/modules/dnn/samples/ssd_object_detection.cpp b/modules/dnn/samples/ssd_object_detection.cpp index ec01d8f6cf6..4f9e6df732d 100644 --- a/modules/dnn/samples/ssd_object_detection.cpp +++ b/modules/dnn/samples/ssd_object_detection.cpp @@ -101,7 +101,7 @@ int main(int argc, char** argv) //! [Prepare blob] Mat preprocessedFrame = preprocess(frame); - dnn::Blob inputBlob = dnn::Blob::fromImages(preprocessedFrame); //Convert Mat to dnn::Blob image + Mat inputBlob = blobFromImage(preprocessedFrame); //Convert Mat to batch of images //! [Prepare blob] //! [Set input blob] @@ -113,8 +113,8 @@ int main(int argc, char** argv) //! [Make forward pass] //! [Gather output] - dnn::Blob detection = net.getBlob("detection_out"); - Mat detectionMat(detection.rows(), detection.cols(), CV_32F, detection.ptrf()); + Mat detection = net.getBlob("detection_out"); + Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr()); float confidenceThreshold = parser.get("min_confidence"); for(int i = 0; i < detectionMat.rows; i++) diff --git a/modules/dnn/samples/tf_inception.cpp b/modules/dnn/samples/tf_inception.cpp index e3b6e9cb38c..b6c03170771 100644 --- a/modules/dnn/samples/tf_inception.cpp +++ b/modules/dnn/samples/tf_inception.cpp @@ -32,7 +32,7 @@ const String keys = "{result r || path to save output blob (optional, binary format, NCHW order) }" ; -void getMaxClass(dnn::Blob &probBlob, int *classId, double *classProb); +void getMaxClass(const Mat &probBlob, int *classId, double *classProb); std::vector readClassNames(const char *filename); int main(int argc, char **argv) @@ -97,9 +97,7 @@ int main(int argc, char **argv) if (inputImgSize != img.size()) resize(img, img, inputImgSize); //Resize image to input size - cv::cvtColor(img, img, cv::COLOR_BGR2RGB); - - dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob image batch + Mat inputBlob = blobFromImage(img); //Convert Mat to image batch //! [Prepare blob] //! [Set input blob] @@ -116,11 +114,7 @@ int main(int argc, char **argv) tm.stop(); //! [Gather output] - dnn::Blob prob = net.getBlob(outBlobName); //gather output of "prob" layer - - Mat& result = prob.matRef(); - - BlobShape shape = prob.shape(); + Mat result = net.getBlob(outBlobName); //gather output of "prob" layer if (!resultFile.empty()) { CV_Assert(result.isContinuous()); @@ -130,7 +124,7 @@ int main(int argc, char **argv) fout.close(); } - std::cout << "Output blob shape " << shape << std::endl; + std::cout << "Output blob shape " << result.size[0] << " x " << result.size[1] << " x " << result.size[2] << " x " << result.size[3] << std::endl; std::cout << "Inference time, ms: " << tm.getTimeMilli() << std::endl; if (!classNamesFile.empty()) { @@ -138,7 +132,7 @@ int main(int argc, char **argv) int classId; double classProb; - getMaxClass(prob, &classId, &classProb);//find the best class + getMaxClass(result, &classId, &classProb);//find the best class //! [Print results] std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl; @@ -149,9 +143,9 @@ int main(int argc, char **argv) /* Find best class for the blob (i. e. class with maximal probability) */ -void getMaxClass(dnn::Blob &probBlob, int *classId, double *classProb) +void getMaxClass(const Mat &probBlob, int *classId, double *classProb) { - Mat probMat = probBlob.matRefConst().reshape(1, 1); //reshape the blob to 1x1000 matrix + Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix Point classNumber; minMaxLoc(probMat, NULL, classProb, NULL, &classNumber); diff --git a/modules/dnn/samples/torch_enet.cpp b/modules/dnn/samples/torch_enet.cpp index bf16eff7308..feb276cb43c 100644 --- a/modules/dnn/samples/torch_enet.cpp +++ b/modules/dnn/samples/torch_enet.cpp @@ -27,12 +27,12 @@ const String keys = ; std::vector readClassNames(const char *filename); -static void colorizeSegmentation(Blob &score, Mat &segm, +static void colorizeSegmentation(const Mat &score, Mat &segm, Mat &legend, vector &classNames); int main(int argc, char **argv) { - cv::CommandLineParser parser(argc, argv, keys); + CommandLineParser parser(argc, argv, keys); if (parser.has("help")) { @@ -78,31 +78,27 @@ int main(int argc, char **argv) //! [Initialize network] //! [Prepare blob] - Mat img = imread(imageFile), input; + Mat img = imread(imageFile, 1); + if (img.empty()) { std::cerr << "Can't read image from the file: " << imageFile << std::endl; exit(-1); } - cv::Size inputImgSize = cv::Size(512, 512); + Size inputImgSize(512, 512); if (inputImgSize != img.size()) resize(img, img, inputImgSize); //Resize image to input size - if(img.channels() == 3) - cv::cvtColor(img, input, cv::COLOR_BGR2RGB); - - input.convertTo(input, CV_32F, 1/255.0); - - dnn::Blob inputBlob = dnn::Blob::fromImages(input); //Convert Mat to dnn::Blob image batch + Mat inputBlob = blobFromImage(img, 1./255, true); //Convert Mat to image batch //! [Prepare blob] //! [Set input blob] net.setBlob("", inputBlob); //set the network input //! [Set input blob] - cv::TickMeter tm; + TickMeter tm; tm.start(); //! [Make forward pass] @@ -119,11 +115,7 @@ int main(int argc, char **argv) oBlob = parser.get("o_blob"); } - dnn::Blob prob = net.getBlob(oBlob); //gather output of "prob" layer - - Mat& result = prob.matRef(); - - BlobShape shape = prob.shape(); + Mat result = net.getBlob(oBlob); //gather output of "prob" layer if (!resultFile.empty()) { CV_Assert(result.isContinuous()); @@ -133,20 +125,21 @@ int main(int argc, char **argv) fout.close(); } - std::cout << "Output blob shape " << shape << std::endl; + std::cout << "Output blob: " << result.size[0] << " x " << result.size[1] << " x " << result.size[2] << " x " << result.size[3] << "\n"; std::cout << "Inference time, ms: " << tm.getTimeMilli() << std::endl; if (parser.has("show")) { + size_t nclasses = result.size[1]; std::vector classNames; if(!classNamesFile.empty()) { classNames = readClassNames(classNamesFile.c_str()); - if (classNames.size() > prob.channels()) - classNames = std::vector(classNames.begin() + classNames.size() - prob.channels(), + if (classNames.size() > nclasses) + classNames = std::vector(classNames.begin() + classNames.size() - nclasses, classNames.end()); } Mat segm, legend; - colorizeSegmentation(prob, segm, legend, classNames); + colorizeSegmentation(result, segm, legend, classNames); Mat show; addWeighted(img, 0.2, segm, 0.8, 0.0, show); @@ -184,11 +177,11 @@ std::vector readClassNames(const char *filename) return classNames; } -static void colorizeSegmentation(Blob &score, Mat &segm, Mat &legend, vector &classNames) +static void colorizeSegmentation(const Mat &score, Mat &segm, Mat &legend, vector &classNames) { - const int rows = score.rows(); - const int cols = score.cols(); - const int chns = score.channels(); + const int rows = score.size[2]; + const int cols = score.size[3]; + const int chns = score.size[1]; vector colors; RNG rng(12345678); @@ -200,7 +193,7 @@ static void colorizeSegmentation(Blob &score, Mat &segm, Mat &legend, vector(0, ch, row); uchar *ptrMaxCl = maxCl.ptr(row); float *ptrMaxVal = maxVal.ptr(row); for (int col = 0; col < cols; col++) diff --git a/modules/dnn/src/blob.cpp b/modules/dnn/src/blob.cpp deleted file mode 100644 index 9dc0d970407..00000000000 --- a/modules/dnn/src/blob.cpp +++ /dev/null @@ -1,421 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -Blob::Blob() -{ - CV_DNN_UMAT_ONLY(state = UNINITIALIZED); -} - -Blob::Blob(const BlobShape &shape, int type, int allocFlags) -{ - CV_DNN_UMAT_ONLY(state = UNINITIALIZED); - this->create(shape, type, allocFlags); -} - -Blob::Blob(InputArray data) -{ -#ifndef CV_DNN_UMAT - m = data.getMat(); -#else - if (data.isUMat()) - { - um = data.getUMat(); - state = HEAD_AT_UMAT; - } - else - { - m = data.getMat(); - state = HEAD_AT_MAT; - } -#endif -} - -void Blob::create(const BlobShape &shape, int type, int allocFlags) -{ -#ifndef CV_DNN_UMAT - CV_Assert(allocFlags & ALLOC_MAT); - m.create(shape.dims(), shape.ptr(), type); -#else - CV_Assert(allocFlags & ALLOC_MAT || allocFlags & ALLOC_UMAT); - - if (allocFlags & ALLOC_MAT) - m.create(shape.dims(), shape.ptr(), type); - if (allocFlags & ALLOC_UMAT) - um.create(shape.dims(), shape.ptr(), type); - - if (state == UNINITIALIZED) - { - if (allocFlags & ALLOC_MAT && allocFlags & ALLOC_UMAT) - state = SYNCED; - else if (allocFlags & ALLOC_MAT) - state = HEAD_AT_MAT; - else - state = HEAD_AT_UMAT; - } -#endif -} - -void Blob::fill(InputArray in) -{ -#ifdef CV_DNN_UMAT - CV_Assert(in.isMat() || in.isUMat()); - if (in.isMat()) - { - m = in.getMat(); - state = HEAD_AT_MAT; - } - else - { - um = in.getUMat(); - state = HEAD_AT_UMAT; - } -#else - CV_Assert(in.isMat()); - m = in.getMat(); -#endif -} - -static inline int getMatChannels(const Mat &mat) -{ - return (mat.dims <= 2) ? mat.channels() : mat.size[0]; -} - -static BlobShape getBlobShape(std::vector &vmat, int requestedCn = -1) -{ - BlobShape shape(BlobShape::all(4)); - int cnSum = 0, matCn; - - CV_Assert(vmat.size() > 0); - - for (size_t i = 0; i < vmat.size(); i++) - { - Mat &mat = vmat[i]; - CV_Assert(!mat.empty()); - CV_Assert((mat.dims == 3 && mat.channels() == 1) || mat.dims <= 2); - - matCn = getMatChannels(mat); - cnSum += getMatChannels(mat); - - if (i == 0) - { - shape[-1] = mat.cols; - shape[-2] = mat.rows; - shape[-3] = (requestedCn <= 0) ? matCn : requestedCn; - } - else - { - if (mat.cols != shape[-1] || mat.rows != shape[-2]) - CV_Error(Error::StsError, "Each Mat.size() must be equal"); - - if (requestedCn <= 0 && matCn != shape[-3]) - CV_Error(Error::StsError, "Each Mat.chnannels() (or number of planes) must be equal"); - } - } - - if (cnSum % shape[-3] != 0) - CV_Error(Error::StsError, "Total number of channels in vector is not a multiple of requsted channel number"); - - shape[0] = cnSum / shape[-3]; - return shape; -} - -static std::vector extractMatVector(InputArray in) -{ - if (in.isMat() || in.isUMat()) - { - return std::vector(1, in.getMat()); - } - else if (in.isMatVector()) - { - return *static_cast*>(in.getObj()); - } - else if (in.isUMatVector()) - { - std::vector vmat; - in.getMatVector(vmat); - return vmat; - } - else - { - CV_Assert(in.isMat() || in.isMatVector() || in.isUMat() || in.isUMatVector()); - return std::vector(); - } -} - -void Blob::batchFromImages(InputArray image, int dstCn) -{ - CV_Assert(dstCn == -1 || dstCn > 0); - std::vector inMats = extractMatVector(image); - BlobShape dstShape = getBlobShape(inMats, dstCn); - - int dtype = CV_32F; - this->create(dstShape, dtype, ALLOC_MAT); - uchar *dstPtr = this->matRef().ptr(); - int elemSize = CV_ELEM_SIZE(dtype); - - std::vector wrapBuf(dstShape[-3]); - for (size_t i = 0; i < inMats.size(); i++) - { - Mat inMat = inMats[i]; - - if (inMat.dims <= 2) - { - inMat.convertTo(inMat, dtype); - - wrapBuf.resize(0); - for (int cn = 0; cn < inMat.channels(); cn++) - { - wrapBuf.push_back(Mat(inMat.rows, inMat.cols, dtype, dstPtr)); - dstPtr += elemSize * inMat.total(); - } - - cv::split(inMat, wrapBuf); - } - else - { - inMat.convertTo(Mat(inMat.dims, inMat.size, dtype, dstPtr), dtype); - dstPtr += elemSize * inMat.total(); - } - } -} - -Blob Blob::fromImages(InputArray image, int dstCn) -{ - Blob res; - res.batchFromImages(image, dstCn); - return res; -} - -void Blob::fill(const BlobShape &shape, int type, void *data, bool deepCopy) -{ - if (deepCopy) - { - create(shape, type); - memcpy(ptr(), data, this->total() * CV_ELEM_SIZE(type)); - } - else - { - m = Mat(shape.dims(), shape.ptr(), type, data); - } - CV_DNN_UMAT_ONLY(state = HEAD_AT_MAT); -} - -void Blob::setTo(InputArray value, int allocFlags) -{ -#ifdef CV_DNN_UMAT - if (allocFlags == -1) - { - if (state == HEAD_AT_UMAT) - um.setTo(value); - else if (state == HEAD_AT_MAT) - m.setTo(value); - else //SYNCED or UNINITIALIZED - { - um.setTo(value); - m.setTo(value); - - if (state == UNINITIALIZED) - state = SYNCED; - } - } - else if (allocFlags == ALLOC_BOTH) - { - m.setTo(value); - um.setTo(value); - state = SYNCED; - } - else if (allocFlags == ALLOC_MAT) - { - matRef().setTo(value); - } - else if (allocFlags == ALLOC_UMAT) - { - umatRef().setTo(value); - } - else - { - CV_Error(Error::StsBadArg, "allocFlags sholud be -1 or one of Blob::AllocFlag values"); - } -#else - m.setTo(value); -#endif -} - -void Blob::updateMat(bool syncData) const -{ -#ifdef CV_DNN_UMAT - if (state == UNINITIALIZED || state == SYNCED || state == HEAD_AT_MAT) - { - return; - } - else if (state == HEAD_AT_UMAT) - { - if (syncData) - um.copyTo(m); - else - m.create(dims(), sizes(), type()); - state = SYNCED; - } - else - { - CV_Error(Error::StsInternal, ""); - } -#else - (void)syncData; -#endif -} - -void Blob::updateUMat(bool syncData) const -{ -#ifdef CV_DNN_UMAT - if (state == UNINITIALIZED || state == SYNCED || state == HEAD_AT_UMAT) - { - return; - } - else if (state == HEAD_AT_MAT) - { - if (syncData) - m.copyTo(um); - else - um.create(dims(), sizes(), type()); - } - else - { - CV_Error(Error::StsInternal, ""); - } -#else - (void)syncData; -#endif -} - -void Blob::sync() const -{ - updateMat(); - updateUMat(); -} - -Vec4i Blob::shape4() const -{ - return Vec4i(num(), channels(), rows(), cols()); -} - -//BlobShape - -std::ostream &operator<< (std::ostream &stream, const BlobShape &shape) -{ - stream << "["; - - for (int i = 0; i < shape.dims() - 1; i++) - stream << shape[i] << ", "; - if (shape.dims() > 0) - stream << shape[-1]; - - return stream << "]"; -} - -BlobShape computeShapeByReshapeMask(const BlobShape &srcShape, const BlobShape &maskShape, Range srcRange /*= Range::all()*/) -{ - if (srcRange == Range::all()) - srcRange = Range(0, srcShape.dims()); - else - { - int sz = srcRange.size(); - srcRange.start = srcShape.canonicalAxis(srcRange.start); - srcRange.end = (srcRange.end == INT_MAX) ? srcShape.dims() : srcRange.start + sz; - } - - CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShape.dims()); - BlobShape dstShape(srcShape.dims() - srcRange.size() + maskShape.dims(), (const int*)NULL); - - std::copy(srcShape.ptr(), srcShape.ptr() + srcRange.start, dstShape.ptr()); - std::copy(srcShape.ptr() + srcRange.end, srcShape.ptr() + srcShape.dims(), dstShape.ptr() + srcRange.start + maskShape.dims()); - - int inferDim = -1; - for (int i = 0; i < maskShape.dims(); i++) - { - if (maskShape[i] > 0) - { - dstShape[srcRange.start + i] = maskShape[i]; - } - else if (maskShape[i] == 0) - { - if (srcRange.start + i >= srcShape.dims()) - CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i)); - dstShape[srcRange.start + i] = srcShape[srcRange.start + i]; - } - else if (maskShape[i] == -1) - { - if (inferDim != -1) - CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)"); - inferDim = srcRange.start + i; - dstShape[inferDim] = 1; - } - else - CV_Error(Error::StsBadArg, "maskShape[i] >= -1"); - } - - if (inferDim != -1) - { - ptrdiff_t srcTotal = srcShape.total(); - ptrdiff_t dstTotal = dstShape.total(); - if (srcTotal % dstTotal != 0) - CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1"); - - dstShape[inferDim] = (int)(srcTotal / dstTotal); - } - else - { - CV_Assert(srcShape.total() == dstShape.total()); - } - - return dstShape; -} - -} -} diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp index 312671e3eab..c41f0925730 100644 --- a/modules/dnn/src/caffe/caffe_importer.cpp +++ b/modules/dnn/src/caffe/caffe_importer.cpp @@ -192,38 +192,37 @@ class CaffeImporter : public Importer } } - BlobShape blobShapeFromProto(const caffe::BlobProto &pbBlob) + void blobShapeFromProto(const caffe::BlobProto &pbBlob, std::vector& shape) { + shape.clear(); if (pbBlob.has_num() || pbBlob.has_channels() || pbBlob.has_height() || pbBlob.has_width()) { - return BlobShape(pbBlob.num(), pbBlob.channels(), pbBlob.height(), pbBlob.width()); + shape.push_back(pbBlob.num()); + shape.push_back(pbBlob.channels()); + shape.push_back(pbBlob.height()); + shape.push_back(pbBlob.width()); } else if (pbBlob.has_shape()) { const caffe::BlobShape &_shape = pbBlob.shape(); - BlobShape shape = BlobShape::all(_shape.dim_size()); for (int i = 0; i < _shape.dim_size(); i++) - shape[i] = (int)_shape.dim(i); - - return shape; + shape.push_back((int)_shape.dim(i)); } else - { CV_Error(Error::StsError, "Unknown shape of input blob"); - return BlobShape(); - } } - void blobFromProto(const caffe::BlobProto &pbBlob, cv::dnn::Blob &dstBlob) + void blobFromProto(const caffe::BlobProto &pbBlob, cv::Mat &dstBlob) { - BlobShape shape = blobShapeFromProto(pbBlob); + std::vector shape; + blobShapeFromProto(pbBlob, shape); - dstBlob.create(shape, CV_32F); - CV_Assert(pbBlob.data_size() == (int)dstBlob.matRefConst().total()); + dstBlob.create((int)shape.size(), &shape[0], CV_32F); + CV_Assert(pbBlob.data_size() == (int)dstBlob.total()); CV_DbgAssert(pbBlob.GetDescriptor()->FindFieldByLowercaseName("data")->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT); - float *dstData = dstBlob.matRef().ptr(); + float *dstData = dstBlob.ptr(); for (int i = 0; i < pbBlob.data_size(); i++) dstData[i] = pbBlob.data(i); diff --git a/modules/dnn/src/caffe/layer_loaders.cpp b/modules/dnn/src/caffe/layer_loaders.cpp deleted file mode 100644 index 571d27a5bda..00000000000 --- a/modules/dnn/src/caffe/layer_loaders.cpp +++ /dev/null @@ -1,383 +0,0 @@ -#include "../precomp.hpp" -#include "layer_loaders.hpp" -#include -#include -#include "layers/layers_common.hpp" - -namespace cv -{ -namespace dnn -{ - -//Layers - -//Convolution and Deconvolution -static void initConvDeconvLayerFromCaffe(Ptr l, LayerParams ¶ms) -{ - l->setParamsFrom(params); - getConvolutionKernelParams(params, l->kernel.height, l->kernel.width, l->pad.height, - l->pad.width, l->stride.height, l->stride.width, l->dilation.height, - l->dilation.width, l->padMode); - - bool bias = params.get("bias_term", true); - int numOutput = params.get("num_output"); - int group = params.get("group", 1); - - l->adjustPad.height = params.get("adj_h", 0); - l->adjustPad.width = params.get("adj_w", 0); - - CV_Assert(numOutput % group == 0); - CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1)); -} - -template<> -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - Ptr l = ConvolutionLayer::create(); - initConvDeconvLayerFromCaffe(l, params); - return Ptr(l); -} - -template<> -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - Ptr l = DeconvolutionLayer::create(); - initConvDeconvLayerFromCaffe(l, params); - - return Ptr(l); -} - -template<> -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - int type = PoolingLayer::MAX; - Size kernel, stride, pad; - bool globalPooling; - cv::String padMode; - - if (params.has("pool")) - { - String pool = params.get("pool").toLowerCase(); - if (pool == "max") - type = PoolingLayer::MAX; - else if (pool == "ave") - type = PoolingLayer::AVE; - else if (pool == "stochastic") - type = PoolingLayer::STOCHASTIC; - else - CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); - } - - getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, - pad.height, pad.width, stride.height, stride.width, padMode); - //getCaffeConvParams(params, kernel, pad, stride); - - Ptr l; - if (!globalPooling) - l = PoolingLayer::create(type, kernel, stride, pad, padMode); - else - l = PoolingLayer::createGlobal(type); - l->setParamsFrom(params); - return l; -} - -template<> -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - int axis = params.get("axis", 1); - Ptr l(SoftmaxLayer::create(axis)); - l->setParamsFrom(params); - return l; -} - -template<> //InnerProduct specialization -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - const std::vector &blobs = params.blobs; - CV_Assert(1 <= blobs.size() && blobs.size() <= 2); - - int numOutputs = params.get("num_output"); - int innerSize = (int)blobs[0].total() / numOutputs; - bool bias = params.get("bias_term", true); - int axis = params.get("axis", 1); - - CV_Assert(blobs[0].dims() >= 2 && (size_t)(innerSize * numOutputs) == blobs[0].total()); - CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutputs == blobs[1].total())); - - Ptr l = InnerProductLayer::create(axis); - l->setParamsFrom(params); - l->blobs[0].reshape(Shape(numOutputs, innerSize)); - if (bias) - l->blobs[1].reshape(Shape(1, numOutputs)); - - return Ptr(l); -} - -template<> //LRNLayer specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - int type = -1; - String nrmType = params.get("norm_region", "ACROSS_CHANNELS"); - if (nrmType == "ACROSS_CHANNELS") - type = LRNLayer::CHANNEL_NRM; - else if (nrmType == "WITHIN_CHANNEL") - type = LRNLayer::SPATIAL_NRM; - else - CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\""); - - int size = params.get("local_size", 5); - if (size % 2 != 1 || size <= 0) - CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size"); - - double alpha = params.get("alpha", 1); - double beta = params.get("beta", 0.75); - double bias = params.get("bias", 1); - bool normBySize = params.get("norm_by_size", true); - - Ptr l(LRNLayer::create(type, size, alpha, beta, bias, normBySize)); - l->setParamsFrom(params); - return l; -} - -template<> -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - Ptr l(MVNLayer::create( - params.get("normalize_variance", true), - params.get("across_channels", false), - params.get("eps", 1e-9) - )); - l->setParamsFrom(params); - return l; -} - -/* Reshape layers */ - -template<> -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - int axis = params.get("axis", 0); - int numAxes = params.get("num_axes", -1); - bool enableReordering = params.get("reorder_dims", false); - CV_Assert(numAxes >= -1); - Range applyingRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes); - - Shape newShape; - if (params.has("dim")) - { - const DictValue ¶mShape = params.get("dim"); - newShape = Shape::all(paramShape.size()); - for (int i = 0; i < paramShape.size(); i++) - newShape[i] = paramShape.get(i); - } - else - newShape = Shape::all(0); - - Ptr l(ReshapeLayer::create(newShape, applyingRange, enableReordering)); - l->setParamsFrom(params); - return l; -} - -template<> -Ptr createLayerFromCaffe(LayerParams& params) -{ - Ptr l(ConcatLayer::create(params.get("axis", 1))); - l->setParamsFrom(params); - return l; -} - -template<> -Ptr createLayerFromCaffe(LayerParams ¶ms) -{ - int outputsCount; - - //TODO: maybe "top_count" param is useless because it can be determined by output connections number - if (params.has("top_count")) - { - outputsCount = params.get("top_count"); - CV_Assert(outputsCount >= 0); - } - else - { - outputsCount = -1; - } - - Ptr l(SplitLayer::create(outputsCount)); - l->setParamsFrom(params); - return l; -} - -template<> -Ptr createLayerFromCaffe(LayerParams& params) -{ - int axis = params.get("axis", 1); - - Ptr l; - if (!params.has("slice_point")) - { - l = SliceLayer::create(axis); - } - else - { - const DictValue &indicesValue = params.get("slice_point"); - std::vector sliceIndices(indicesValue.size()); - for (int i = 0; i < indicesValue.size(); i++) - sliceIndices[i] = indicesValue.get(i); - - l = SliceLayer::create(axis, sliceIndices); - } - l->setParamsFrom(params); - return l; -} - -/* Activation layers */ - -template //Intended for parameters-free activations -Ptr createLayerFromCaffe(LayerParams&) -{ - return Ptr(ActivationLayer::create()); -} - -template<> //ReLU specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - float negative_slope = params.get("negative_slope", 0.f); - Ptr l(ReLULayer::create(negative_slope)); - l->setParamsFrom(params); - return l; -} - -template<> //Power specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - float power = params.get("power", 1.0f); - float scale = params.get("scale", 1.0f); - float shift = params.get("shift", 0.0f); - Ptr l(PowerLayer::create(power, scale, shift)); - l->setParamsFrom(params); - return l; -} - -template<> //CropLayer specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - int start_axis = params.get("axis", 2); - DictValue *paramOffset = params.ptr("offset"); - - std::vector offset; - if (paramOffset) - { - for (int i = 0; i < paramOffset->size(); i++) - offset.push_back(paramOffset->get(i)); - } - - Ptr l(CropLayer::create(start_axis, offset)); - l->setParamsFrom(params); - return l; -} - -template<> //Eltwise specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - EltwiseLayer::EltwiseOp op = EltwiseLayer::SUM; - if (params.has("operation")) - { - String operation = params.get("operation").toLowerCase(); - if (operation == "prod") - op = EltwiseLayer::PROD; - else if (operation == "sum") - op = EltwiseLayer::SUM; - else if (operation == "max") - op = EltwiseLayer::MAX; - else - CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\""); - } - - std::vector coeffs; - if (params.has("coeff")) - { - DictValue paramCoeff = params.get("coeff"); - coeffs.resize(paramCoeff.size(), 1); - for (int i = 0; i < paramCoeff.size(); i++) - { - coeffs[i] = paramCoeff.get(i); - } - } - Ptr l(EltwiseLayer::create(op, coeffs)); - l->setParamsFrom(params); - return l; -} - -template<> //BatchNormLayer specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - const std::vector &blobs = params.blobs; - CV_Assert(blobs.size() >= 3); - - bool hasWeights = params.get("has_weight", false); - bool hasBias = params.get("has_bias", false); - float epsilon = params.get("eps", 1E-5); - Ptr l = BatchNormLayer::create(hasWeights, hasBias, epsilon); - l->setParamsFrom(params); - - return Ptr(l); -} - -template<> //ChannelsPReLULayer specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - CV_Assert(params.blobs.size() == 1); - Ptr l = ChannelsPReLULayer::create(); - l->setParamsFrom(params); - - return Ptr(l); -} - -template<> //MaxUnpoolLayer specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - Size poolKernel(params.get("pool_k_w"), params.get("pool_k_h")), - poolPad(params.get("pool_pad_w"), params.get("pool_pad_h")), - poolStride(params.get("pool_stride_w"), params.get("pool_stride_h")); - Ptr l = MaxUnpoolLayer::create(poolKernel, poolPad, poolStride); - l->setParamsFrom(params); - - return Ptr(l); -} - -template<> //ScaleLayer specialization -Ptr createLayerFromCaffe(LayerParams& params) -{ - Ptr l = ScaleLayer::create(params.get("bias_term", false)); - l->setParamsFrom(params); - - return Ptr(l); -} - -//Explicit instantiation -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); - -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); - -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); - -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -template Ptr createLayerFromCaffe(LayerParams&); -} -} diff --git a/modules/dnn/src/caffe/layer_loaders.hpp b/modules/dnn/src/caffe/layer_loaders.hpp deleted file mode 100644 index 617691cb467..00000000000 --- a/modules/dnn/src/caffe/layer_loaders.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__ -#define __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__ - -#include - -namespace cv -{ -namespace dnn -{ - -//Common template for Caffe layer loaders -template -Ptr createLayerFromCaffe(LayerParams&); - -Ptr createFlattenLayerFromCaffe(LayerParams&); - -} -} -#endif \ No newline at end of file diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 26d21a31128..4be9138f936 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -67,6 +67,65 @@ static String toString(const T &v) return ss.str(); } +Mat blobFromImage(const Mat& image_, double scalefactor, bool swapRB) +{ + Mat image; + if(image_.depth() == CV_8U) + { + image_.convertTo(image, CV_32F, scalefactor); + } + else + image = image_; + CV_Assert(image.dims == 2 && image.depth() == CV_32F); + int nch = image.channels(); + CV_Assert(nch == 3 || nch == 4); + int sz[] = { 1, 3, image.rows, image.cols }; + Mat blob(4, sz, CV_32F); + Mat ch[4]; + for( int j = 0; j < 3; j++ ) + ch[j] = Mat(image.rows, image.cols, CV_32F, blob.ptr(0, j)); + if(swapRB) + std::swap(ch[0], ch[2]); + split(image, ch); + return blob; +} + +Mat blobFromImages(const std::vector& images, double scalefactor, bool swapRB) +{ + size_t i, nimages = images.size(); + if(nimages == 0) + return Mat(); + Mat image0 = images[0]; + int nch = image0.channels(); + CV_Assert(image0.dims == 2 && (nch == 3 || nch == 4)); + int sz[] = { (int)nimages, 3, image0.rows, image0.cols }; + Mat blob(4, sz, CV_32F), image; + Mat ch[4]; + + for( i = 0; i < nimages; i++ ) + { + Mat image_ = images[i]; + if(image_.depth() == CV_8U) + { + image_.convertTo(image, CV_32F, scalefactor); + } + else + image = image_; + CV_Assert(image.depth() == CV_32F); + nch = image.channels(); + CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); + CV_Assert(image.size() == image0.size()); + + for( int j = 0; j < 3; j++ ) + ch[j] = Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, j)); + if(swapRB) + std::swap(ch[0], ch[2]); + split(image, ch); + } + return blob; +} + + struct LayerPin { int lid; @@ -107,8 +166,8 @@ struct LayerData std::set requiredOutputs; Ptr layerInstance; - std::vector outputBlobs; - std::vector inputBlobs; + std::vector outputBlobs; + std::vector inputBlobs; int flag; @@ -130,8 +189,8 @@ struct LayerData //fake layer containing network input blobs struct DataLayer : public Layer { - void allocate(const std::vector&, std::vector&) {} - void forward(std::vector&, std::vector&) {} + void allocate(const std::vector&, std::vector&) {} + void forward(std::vector&, std::vector&) {} int outputNameToIndex(String tgtName) { @@ -348,8 +407,27 @@ struct Net::Impl if (ld.flag) return; + size_t ninputs = ld.inputBlobsId.size(); +#if 0 + printf("layer %s:", ld.name.c_str()); + for (size_t i = 0; i < ninputs; i++) + { + int inp_lid = ld.inputBlobsId[i].lid; + LayerData &inp_ld = layers[inp_lid]; + int inp_outputs = (int)inp_ld.outputBlobs.size(); + std::cout << " " << inp_ld.name << "(" << inp_outputs; + + for( int j = 0; j < inp_outputs; j++ ) + { + std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; + } + std::cout << ")"; + } + printf("\n"); +#endif + //determine parent layers - for (size_t i = 0; i < ld.inputBlobsId.size(); i++) + for (size_t i = 0; i < ninputs; i++) ld.inputLayersId.insert(ld.inputBlobsId[i].lid); //allocate parents @@ -357,8 +435,8 @@ struct Net::Impl allocateLayer(*i); //bind inputs - ld.inputBlobs.resize(ld.inputBlobsId.size()); - for (size_t i = 0; i < ld.inputBlobsId.size(); i++) + ld.inputBlobs.resize(ninputs); + for (size_t i = 0; i < ninputs; i++) { LayerPin from = ld.inputBlobsId[i]; CV_Assert(from.valid()); @@ -368,15 +446,24 @@ struct Net::Impl //allocate layer ld.outputBlobs.resize(std::max((size_t)1, ld.requiredOutputs.size())); //layer produce at least one output blob - try + //try { Ptr layerPtr = ld.getLayerInstance(); layerPtr->allocate(ld.inputBlobs, ld.outputBlobs); +#if 0 + std::cout << "\toutputs:"; + size_t noutputs = ld.outputBlobs.size(); + for (size_t j = 0; j < noutputs; j++) + { + std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; + } + std::cout << "\n"; +#endif } - catch (const cv::Exception &err) + /*catch (const cv::Exception &err) { CV_RETHROW_ERROR(err, format("The following error occured while making allocate() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str())); - } + }*/ ld.flag = 1; } @@ -414,14 +501,14 @@ struct Net::Impl } //forward itself - try + //try { ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs); } - catch (const cv::Exception &err) + /*catch (const cv::Exception &err) { CV_RETHROW_ERROR(err, format("The following error occured while making forward() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str())); - } + }*/ ld.flag = 1; } @@ -509,7 +596,7 @@ void Net::setNetInputs(const std::vector &inputBlobNames) impl->netInputLayer->setNames(inputBlobNames); } -void Net::setBlob(String outputName, const Blob &blob) +void Net::setBlob(String outputName, const Mat &blob_) { LayerPin pin = impl->getPinByAlias(outputName); if (!pin.valid()) @@ -517,10 +604,10 @@ void Net::setBlob(String outputName, const Blob &blob) LayerData &ld = impl->layers[pin.lid]; ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); - ld.outputBlobs[pin.oid] = blob; + ld.outputBlobs[pin.oid] = blob_.clone(); } -Blob Net::getBlob(String outputName) +Mat Net::getBlob(String outputName) { LayerPin pin = impl->getPinByAlias(outputName); if (!pin.valid()) @@ -535,20 +622,20 @@ Blob Net::getBlob(String outputName) return ld.outputBlobs[pin.oid]; } -Blob Net::getParam(LayerId layer, int numParam) +Mat Net::getParam(LayerId layer, int numParam) { LayerData &ld = impl->getLayerData(layer); - std::vector &layerBlobs = ld.layerInstance->blobs; + std::vector &layerBlobs = ld.layerInstance->blobs; CV_Assert(numParam < (int)layerBlobs.size()); return layerBlobs[numParam]; } -void Net::setParam(LayerId layer, int numParam, const Blob &blob) +void Net::setParam(LayerId layer, int numParam, const Mat &blob) { LayerData &ld = impl->getLayerData(layer); - std::vector &layerBlobs = ld.layerInstance->blobs; + std::vector &layerBlobs = ld.layerInstance->blobs; CV_Assert(numParam < (int)layerBlobs.size()); //we don't make strong checks, use this function carefully layerBlobs[numParam] = blob; @@ -662,30 +749,30 @@ static void vecToPVec(const std::vector &v, std::vector &pv) pv[i] = const_cast(&v[i]); } -void Layer::allocate(const std::vector &inputs, std::vector &outputs) +void Layer::allocate(const std::vector &inputs, std::vector &outputs) { - std::vector inputsp; + std::vector inputsp; vecToPVec(inputs, inputsp); this->allocate(inputsp, outputs); } -std::vector Layer::allocate(const std::vector &inputs) +std::vector Layer::allocate(const std::vector &inputs) { - std::vector outputs; + std::vector outputs; this->allocate(inputs, outputs); return outputs; } -void Layer::forward(const std::vector &inputs, std::vector &outputs) +void Layer::forward(const std::vector &inputs, std::vector &outputs) { - std::vector inputsp; + std::vector inputsp; vecToPVec(inputs, inputsp); this->forward(inputsp, outputs); } -void Layer::run(const std::vector &inputs, std::vector &outputs) +void Layer::run(const std::vector &inputs, std::vector &outputs) { - std::vector inputsp; + std::vector inputsp; vecToPVec(inputs, inputsp); this->allocate(inputsp, outputs); this->forward(inputsp, outputs); diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 8d92d61aa34..e9b03464e76 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -40,19 +40,6 @@ //M*/ #include "precomp.hpp" -#include "caffe/layer_loaders.hpp" -#include "layers/blank_layer.hpp" - -#include "layers/crop_layer.hpp" -#include "layers/eltwise_layer.hpp" -#include "layers/flatten_layer.hpp" -#include "layers/permute_layer.hpp" -#include "layers/prior_box_layer.hpp" -#include "layers/detection_output_layer.hpp" -#include "layers/normalize_bbox_layer.hpp" -#include "layers/shift_layer.hpp" -#include "layers/padding_layer.hpp" -#include "layers/scale_layer.hpp" namespace cv { @@ -65,7 +52,7 @@ struct AutoInitializer AutoInitializer() : status(false) { - cv::dnn::initModule(); + initModule(); } }; @@ -76,41 +63,41 @@ void initModule() if (init.status) return; - REG_RUNTIME_LAYER_FUNC(Slice, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Split, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Concat, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Reshape, createLayerFromCaffe); + REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer); + REG_RUNTIME_LAYER_CLASS(Split, SplitLayer); + REG_RUNTIME_LAYER_CLASS(Concat, ConcatLayer); + REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer); REG_RUNTIME_LAYER_CLASS(Flatten, FlattenLayer); - REG_RUNTIME_LAYER_FUNC(Convolution, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Deconvolution, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Pooling, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(LRN, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(InnerProduct, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Softmax, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(MVN, createLayerFromCaffe); + REG_RUNTIME_LAYER_CLASS(Convolution, ConvolutionLayer); + REG_RUNTIME_LAYER_CLASS(Deconvolution, DeconvolutionLayer); + REG_RUNTIME_LAYER_CLASS(Pooling, PoolingLayer); + REG_RUNTIME_LAYER_CLASS(LRN, LRNLayer); + REG_RUNTIME_LAYER_CLASS(InnerProduct, InnerProductLayer); + REG_RUNTIME_LAYER_CLASS(Softmax, SoftmaxLayer); + REG_RUNTIME_LAYER_CLASS(MVN, MVNLayer); - REG_RUNTIME_LAYER_FUNC(ReLU, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(ChannelsPReLU, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Sigmoid, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(TanH, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(BNLL, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(AbsVal, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Power, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(BatchNorm, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(MaxUnpool, createLayerFromCaffe); + REG_RUNTIME_LAYER_CLASS(ReLU, ReLULayer); + REG_RUNTIME_LAYER_CLASS(ChannelsPReLU, ChannelsPReLULayer); + REG_RUNTIME_LAYER_CLASS(Sigmoid, SigmoidLayer); + REG_RUNTIME_LAYER_CLASS(TanH, TanHLayer); + REG_RUNTIME_LAYER_CLASS(BNLL, BNLLLayer); + REG_RUNTIME_LAYER_CLASS(AbsVal, AbsLayer); + REG_RUNTIME_LAYER_CLASS(Power, PowerLayer); + REG_RUNTIME_LAYER_CLASS(BatchNorm, BatchNormLayer); + REG_RUNTIME_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer); REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer); REG_RUNTIME_LAYER_CLASS(Identity, BlankLayer); - REG_RUNTIME_LAYER_FUNC(Crop, createLayerFromCaffe); - REG_RUNTIME_LAYER_FUNC(Eltwise, createLayerFromCaffe); + REG_RUNTIME_LAYER_CLASS(Crop, CropLayer); + REG_RUNTIME_LAYER_CLASS(Eltwise, EltwiseLayer); REG_RUNTIME_LAYER_CLASS(Permute, PermuteLayer); REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer); REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer); REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer); REG_RUNTIME_LAYER_CLASS(Shift, ShiftLayer); REG_RUNTIME_LAYER_CLASS(Padding, PaddingLayer); - REG_RUNTIME_LAYER_FUNC(Scale, createLayerFromCaffe); + REG_RUNTIME_LAYER_CLASS(Scale, ScaleLayer); init.status = true; } diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 7f9109e3afa..04ef3c234ca 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -9,78 +9,95 @@ Implementation of Batch Normalization layer. */ -#include "batch_norm_layer.hpp" +#include "../precomp.hpp" namespace cv { namespace dnn { -BatchNormLayerImpl::BatchNormLayerImpl(bool hasWeights_, bool hasBias_, float epsilon_): - hasWeights(hasWeights_), - hasBias(hasBias_), - epsilon(epsilon_) -{} - -void BatchNormLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +class BatchNormLayerImpl : public BatchNormLayer { - CV_Assert(blobs.size() >= 2); - - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) +public: + BatchNormLayerImpl(const LayerParams& params) { - CV_Assert(blobs[0].total() == inputs[i]->channels()); - CV_Assert(blobs[1].total() == inputs[i]->channels()); - outputs[i].create(inputs[i]->shape()); - } -} - -void BatchNormLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 1); + setParamsFrom(params); + CV_Assert(blobs.size() >= 3); - Blob &inpBlob = *inputs[0]; - - int weightsBlobIndex = 2; - int biasBlobIndex = weightsBlobIndex + hasWeights; - - float varMeanScale = 1; - if (!hasWeights && !hasBias) { - varMeanScale = *blobs[2].ptrf(); - if (varMeanScale != 0) - varMeanScale = 1/varMeanScale; + hasWeights = params.get("has_weight", false); + hasBias = params.get("has_bias", false); + epsilon = params.get("eps", 1E-5); } - Mat invStdMat; - cv::pow(blobs[1].matRefConst()*varMeanScale + epsilon, -0.5, invStdMat); + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(blobs.size() >= 2); + + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + CV_Assert(blobs[0].total() == inputs[i]->size[1]); + CV_Assert(blobs[1].total() == inputs[i]->size[1]); + Mat* inp = inputs[i]; + outputs[i].create(inp->dims, &inp->size.p[0], inp->type()); + } + } - for (size_t ii = 0; ii < outputs.size(); ii++) + void forward(std::vector &inputs, std::vector &outputs) { - Blob &outBlob = outputs[ii]; - - if (hasWeights) - CV_Assert(inpBlob.channels() == blobs[weightsBlobIndex].total()); - - if (hasBias) - CV_Assert(inpBlob.channels() == blobs[biasBlobIndex].total()); - - for(int num = 0; num < outBlob.num(); num++) - { - for (int n = 0; n < outBlob.channels(); n++) - { - float mean = blobs[0].matRefConst().at(n)*varMeanScale; - double invstd = invStdMat.at(n); - float w = hasWeights ? blobs[weightsBlobIndex].matRefConst().at(n) : 1; - float b = hasBias ? blobs[biasBlobIndex].matRefConst().at(n) : 0; - outBlob.getPlane(num, n) = (inpBlob.getPlane(num, n) - mean)*w*invstd + b; - } - } + CV_Assert(inputs.size() == 1); + + Mat &inpBlob = *inputs[0]; + + int weightsBlobIndex = 2; + int biasBlobIndex = weightsBlobIndex + hasWeights; + + float varMeanScale = 1; + if (!hasWeights && !hasBias) { + varMeanScale = *blobs[2].ptr(); + if (varMeanScale != 0) + varMeanScale = 1/varMeanScale; + } + + Mat invStdMat; + cv::pow(blobs[1]*varMeanScale + epsilon, -0.5, invStdMat); + + int rows = inpBlob.size[2]; + int cols = inpBlob.size[3]; + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + Mat &outBlob = outputs[ii]; + + if (hasWeights) + CV_Assert(inpBlob.size[1] == blobs[weightsBlobIndex].total()); + + if (hasBias) + CV_Assert(inpBlob.size[1] == blobs[biasBlobIndex].total()); + + for(int num = 0; num < outBlob.size[0]; num++) + { + for (int n = 0; n < outBlob.size[1]; n++) + { + float mean = blobs[0].at(n)*varMeanScale; + double invstd = invStdMat.at(n); + float w = hasWeights ? blobs[weightsBlobIndex].at(n) : 1; + float b = hasBias ? blobs[biasBlobIndex].at(n) : 0; + Mat inpBlobPlane(rows, cols, CV_32F, inpBlob.ptr(num, n)); + Mat outBlobPlane(rows, cols, CV_32F, outBlob.ptr(num, n)); + inpBlobPlane.convertTo(outBlobPlane, CV_32F, w*invstd, b - mean*w*invstd); + } + } + } } -} -Ptr BatchNormLayer::create(bool hasWeights, bool hasBias, float epsilon) + bool hasWeights, hasBias; + float epsilon; +}; + +Ptr BatchNormLayer::create(const LayerParams& params) { - return Ptr(new BatchNormLayerImpl(hasWeights, hasBias, epsilon)); + return Ptr(new BatchNormLayerImpl(params)); } } // namespace dnn diff --git a/modules/dnn/src/layers/batch_norm_layer.hpp b/modules/dnn/src/layers/batch_norm_layer.hpp deleted file mode 100644 index 1afa01476ef..00000000000 --- a/modules/dnn/src/layers/batch_norm_layer.hpp +++ /dev/null @@ -1,37 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -// Copyright (C) 2016, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. - -/* -Declaration of Batch Normalization layer. -*/ - -#ifndef __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__ -#include - -namespace cv -{ -namespace dnn -{ - -class BatchNormLayerImpl : public BatchNormLayer -{ -public: - BatchNormLayerImpl(bool hasWeights_, bool hasBias_, float epsilon_); - - void allocate(const std::vector &inputs, std::vector &outputs); - - void forward(std::vector &inputs, std::vector &outputs); - -private: - bool hasWeights, hasBias; - float epsilon; -}; - -} -} -#endif // __OPENCV_DNN_LAYERS_BATCH_NORM_LAYER_HPP__ diff --git a/modules/dnn/src/layers/flatten_layer.hpp b/modules/dnn/src/layers/blank_layer.cpp similarity index 78% rename from modules/dnn/src/layers/flatten_layer.hpp rename to modules/dnn/src/layers/blank_layer.cpp index 1aab0eb1a82..581b55d2adc 100644 --- a/modules/dnn/src/layers/flatten_layer.hpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -38,30 +38,35 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - -#ifndef __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__ #include "../precomp.hpp" namespace cv { namespace dnn { -class FlattenLayer : public Layer +class BlankLayerImpl : public BlankLayer { - int _startAxis; - int _endAxis; - size_t _numAxes; - - BlobShape resultShape; - public: - FlattenLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); + BlankLayerImpl(const LayerParams&) {} + + void allocate(const std::vector &inputs, std::vector &outputs) + { + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + outputs[i] = *inputs[i]; + } - void checkInputs(const std::vector &inputs); + void forward(std::vector &inputs, std::vector &outputs) + { + for (size_t i = 0; i < inputs.size(); i++) + outputs[i] = *inputs[i]; + } }; + +Ptr BlankLayer::create(const LayerParams& params) +{ + return Ptr(new BlankLayerImpl(params)); +} + } } -#endif diff --git a/modules/dnn/src/layers/blank_layer.hpp b/modules/dnn/src/layers/blank_layer.hpp deleted file mode 100644 index 6d93f278a87..00000000000 --- a/modules/dnn/src/layers/blank_layer.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_BLANK_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_BLANK_LAYER_HPP__ -#include "../precomp.hpp" - -namespace cv -{ -namespace dnn -{ - class BlankLayer : public Layer - { - public: - - BlankLayer(LayerParams&) - { - - } - - void allocate(const std::vector &inputs, std::vector &outputs) - { - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) - outputs[i].shareFrom(*inputs[i]); - } - - void forward(std::vector &inputs, std::vector &outputs) - { - for (size_t i = 0; i < inputs.size(); i++) - outputs[i] = *inputs[i]; - } - }; -} -} -#endif diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 61341fefa4b..61bd83c64e0 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -41,80 +41,69 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "concat_layer.hpp" -#include namespace cv { namespace dnn { -ConcatLayerImpl::ConcatLayerImpl(int axis_ /*= 1*/) +class ConcatLayerImpl : public ConcatLayer { - axis = axis_; -} - -void ConcatLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() > 0); - - BlobShape refShape = inputs[0]->shape(); - axisIdx = inputs[0]->canonicalAxis(axis); +public: + ConcatLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + axis = params.get("axis", 1); + } - int axisSum = 0; - useOpenCL = false; - for (size_t i = 0; i < inputs.size(); i++) + void allocate(const std::vector &inputs, std::vector &outputs) { - BlobShape curShape = inputs[i]->shape(); + CV_Assert(inputs.size() > 0); - CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == inputs[0]->type()); - for (int curAxis = 0; curAxis < refShape.dims(); curAxis++) + int dims = inputs[0]->dims, dtype = inputs[0]->type(); + std::vector refShape(inputs[0]->size.p, inputs[0]->size.p + dims); + axisIdx = axis < 0 ? axis + dims : axis; + + int axisSum = 0; + for (size_t i = 0; i < inputs.size(); i++) { - if (curAxis != axisIdx && refShape[curAxis] != curShape[curAxis]) - CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer"); + CV_Assert(inputs[i]->type() == dtype); + for (int curAxis = 0; curAxis < dims; curAxis++) + { + if (curAxis != axisIdx && inputs[0]->size[curAxis] != inputs[i]->size[curAxis]) + CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer"); + } + + axisSum += inputs[i]->size[axisIdx]; } - axisSum += curShape[axisIdx]; - useOpenCL |= inputs[i]->getState() == Blob::HEAD_AT_MAT; - } - - refShape[axisIdx] = axisSum; - useOpenCL &= ocl::useOpenCL(); - int allocFlags = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; - - outputs.resize(1); - outputs[0].create(refShape, inputs[0]->type(), allocFlags); -} + refShape[axisIdx] = axisSum; + outputs.resize(1); + outputs[0].create(dims, &refShape[0], dtype); + } -void ConcatLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - #ifdef HAVE_OPENCL - if (useOpenCL) - forward_(inputs, outputs); - else - #endif - forward_(inputs, outputs); -} - -template -void ConcatLayerImpl::forward_(std::vector &inputs, std::vector &outputs) -{ - XMat& outMat = outputs[0].getRef(); - std::vector ranges(outputs[0].dims(), Range::all()); - ranges[axisIdx].start = 0; - for (size_t i = 0; i < inputs.size(); i++) + void forward(std::vector &inputs, std::vector &outputs) { - ranges[axisIdx].end = ranges[axisIdx].start + inputs[i]->size(axisIdx); - inputs[i]->getRefConst().copyTo(outMat(&ranges[0])); - ranges[axisIdx].start = ranges[axisIdx].end; + Mat& outMat = outputs[0]; + std::vector ranges(outputs[0].dims, Range::all()); + + ranges[axisIdx].start = 0; + for (size_t i = 0; i < inputs.size(); i++) + { + ranges[axisIdx].end = ranges[axisIdx].start + inputs[i]->size[axisIdx]; + inputs[i]->copyTo(outMat(&ranges[0])); + ranges[axisIdx].start = ranges[axisIdx].end; + } } -} -Ptr ConcatLayer::create(int axis) + int axisIdx; +}; + +Ptr ConcatLayer::create(const LayerParams& params) { - return Ptr(new ConcatLayerImpl(axis)); + return Ptr(new ConcatLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/concat_layer.hpp b/modules/dnn/src/layers/concat_layer.hpp deleted file mode 100644 index 86f2083b942..00000000000 --- a/modules/dnn/src/layers/concat_layer.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class ConcatLayerImpl : public ConcatLayer -{ - bool useOpenCL; - int axisIdx; - - template - void forward_(std::vector &inputs, std::vector &outputs); - -public: - ConcatLayerImpl(int axis_ = 1); - - void allocate(const std::vector &inputs, std::vector &outputs); - - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 48d12a35410..5ce08646ab8 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -40,9 +40,7 @@ //M*/ #include "../precomp.hpp" -#include #include "layers_common.hpp" -#include "convolution_layer.hpp" #include "op_im2col.hpp" #include "op_blas.hpp" #include @@ -53,12 +51,54 @@ namespace cv namespace dnn { +class BaseConvolutionLayerImpl : public ConvolutionLayer +{ +public: + BaseConvolutionLayerImpl(); + virtual void allocate(const std::vector &inputs, std::vector &outputs); + + void init(); + virtual void computeInpOutShape(const Mat &inpBlob) = 0; + bool is1x1() const; + + int numOutput, group; + int inpH, inpW, inpCn; + int outH, outW, outCn; + int inpGroupCn, outGroupCn; + int ksize; + std::vector colRowBlobShape; + + bool bias; + Mat colRowBlob, biasOnesBlob; +}; + +//TODO: simultaneously convolution and bias addition for cache optimization +class ConvolutionLayerImpl : public BaseConvolutionLayerImpl +{ +public: + virtual void forward(std::vector &inputs, std::vector &outputs); + virtual void computeInpOutShape(const Mat &inpBlob); + + void im2col(const Mat &srcImg, Mat &dstCol); + void im2row(const Mat &srcImg, Mat &dstRow); +}; + +class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl +{ +public: + virtual void forward(std::vector &inputs, std::vector &outputs); + + virtual void computeInpOutShape(const Mat &inpBlob); + void col2im(const Mat &colMat, Mat &dstImg); +}; + + BaseConvolutionLayerImpl::BaseConvolutionLayerImpl(): numOutput(-1), group(-1), inpH(0), inpW(0), inpCn(0), outH(0), outW(0), outCn(0), inpGroupCn(0), outGroupCn(0), - ksize(0), bias(false), tryUseOpenCL(false) + ksize(0), bias(false) { #ifdef HAVE_LAPACK if (getBlasThreads() != cv::getThreadNum()) @@ -71,46 +111,44 @@ BaseConvolutionLayerImpl::BaseConvolutionLayerImpl(): void BaseConvolutionLayerImpl::init() { CV_Assert(blobs.size() >= 1 && blobs.size() <= 2); - CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height); + CV_Assert(blobs[0].dims == 4 && blobs[0].size[3] == kernel.width && blobs[0].size[2] == kernel.height); bias = (blobs.size() >= 2); - useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1); } -void BaseConvolutionLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +void BaseConvolutionLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) { CV_Assert(inputs.size() > 0); init(); - const Blob &input = *inputs[0]; - CV_Assert(input.dims() == 4 && (input.type() == CV_32F || input.type() == CV_64F)); + const Mat &input = *inputs[0]; + CV_Assert(input.dims == 4 && (input.type() == CV_32F || input.type() == CV_64F)); for (size_t i = 0; i < inputs.size(); i++) { CV_Assert(inputs[i]->type() == input.type()); - CV_Assert(inputs[i]->dims() == 4 && inputs[i]->channels() == input.channels()); - CV_Assert(inputs[i]->rows() == input.rows() && inputs[i]->cols() == input.cols()); + CV_Assert(inputs[i]->dims == 4 && inputs[i]->size[1] == input.size[1]); + CV_Assert(inputs[i]->size[2] == input.size[2] && inputs[i]->size[3] == input.size[3]); } computeInpOutShape(input); - int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; - if (bias) { - biasOnesBlob.create(Shape(1, outH * outW), input.type(), allocFlags); + biasOnesBlob.create(1, outH * outW, input.type()); biasOnesBlob.setTo(1); } outputs.resize(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { - outputs[i].create(Shape(inputs[i]->num(), outCn, outH, outW), input.type(), allocFlags); + int sz[] = { inputs[i]->size[0], outCn, outH, outW }; + outputs[i].create(4, sz, input.type()); } if (!is1x1()) { - colRowBlob.create(colRowBlobShape, input.type(), allocFlags); + colRowBlob.create((int)colRowBlobShape.size(), &colRowBlobShape[0], input.type()); colRowBlob.setTo(0); } } @@ -122,15 +160,15 @@ bool BaseConvolutionLayerImpl::is1x1() const (dilation.height == 1 && dilation.width == 1); } -void ConvolutionLayerImpl::computeInpOutShape(const Blob &input) +void ConvolutionLayerImpl::computeInpOutShape(const Mat &input) { - CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num()); + CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].size[0]); - numOutput = blobs[0].num(); + numOutput = blobs[0].size[0]; - inpH = input.rows(); - inpW = input.cols(); - inpCn = input.channels(); + inpH = input.size[2]; + inpW = input.size[3]; + inpCn = input.size[1]; outCn = numOutput; if (padMode.empty()) @@ -143,90 +181,67 @@ void ConvolutionLayerImpl::computeInpOutShape(const Blob &input) getConvPoolOutParams(inpH, inpW, kernel, stride, pad, padMode, outH, outW); } - group = inpCn / blobs[0].channels(); + group = inpCn / blobs[0].size[1]; CV_Assert(inpCn % group == 0 && outCn % group == 0); - CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group); + CV_Assert(blobs[0].size[0] == outCn && blobs[0].size[1] == inpCn / group); outGroupCn = outCn / group; inpGroupCn = inpCn / group; ksize = inpGroupCn * kernel.height * kernel.width; - colRowBlobShape = BlobShape(outH * outW, ksize); + colRowBlobShape.clear(); + colRowBlobShape.push_back(outH*outW); + colRowBlobShape.push_back(ksize); } -template -void ConvolutionLayerImpl::forward_(std::vector &inputs, std::vector &outputs) +void ConvolutionLayerImpl::forward(std::vector &inputs, std::vector &outputs) { CV_Assert(inputs.size() > 0); - XMat weightsMat = reshaped(blobs[0].getRefConst(), Shape(outCn, ksize)); - XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst(), Shape(outCn, 1)) : XMat(); + Mat weightsMat = blobs[0].reshape(1, outCn); + Mat biasesMat = bias ? blobs[1].reshape(1, outCn) : Mat(); for (size_t ii = 0; ii < outputs.size(); ii++) { - int numImg = inputs[ii]->size(0); - XMat inpMat = inputs[ii]->getRefConst(); - XMat outMat = reshaped(outputs[ii].getRef(), Shape(numImg*group*outGroupCn, outH*outW)); + int numImg = inputs[ii]->size[0]; + Mat inpMat = *inputs[ii]; + Mat outMat = outputs[ii].reshape(1, numImg*group*outGroupCn); for (int n = 0; n < numImg; n++) { for (int g = 0; g < group; g++) { - XMat colMat, curInp = slice(inpMat, n, _Range(g * inpGroupCn, inpGroupCn)); + Mat colMat, curInp = slice(inpMat, n, _Range(g * inpGroupCn, inpGroupCn)); im2row(curInp, colMat); _Range kerRange(g * outGroupCn, outGroupCn); - XMat kerMat = weightsMat.rowRange(kerRange); + Mat kerMat = weightsMat.rowRange(kerRange); _Range outRange((g + n * group) * outGroupCn, outGroupCn); - XMat dstMat = outMat.rowRange(outRange); + Mat dstMat = outMat.rowRange(outRange); dnn::gemm(kerMat, colMat, 1, dstMat, 0, GEMM_2_T); if (bias) { - dnn::gemm(biasesMat.rowRange(kerRange), biasOnesBlob.getRefConst(), 1, dstMat, 1); + dnn::gemm(biasesMat.rowRange(kerRange), biasOnesBlob, 1, dstMat, 1); } } } } } -void ConvolutionLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - if (!useOpenCL) - forward_(inputs, outputs); - else - forward_(inputs, outputs); -} - -void ConvolutionLayerImpl::im2col(const UMat &srcImg, UMat &dstCol) -{ - if (is1x1()) - { - dstCol = reshaped(srcImg, Shape(ksize, outH*outW)); - return; - } -#ifdef HAVE_OPENCL - CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, this->colRowBlob.umatRef())); - dstCol = this->colRowBlob.umatRefConst(); -#else - CV_Error(Error::StsInternal, ""); - dstCol = srcImg; //supress warning -#endif -} - void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol) { if (is1x1()) { - dstCol = reshaped(srcImg, Shape(ksize, outH*outW)); + dstCol = srcImg.reshape(1, ksize); return; } - Mat &colMat = colRowBlob.matRef(); + Mat &colMat = colRowBlob; if (srcImg.type() == CV_32F) im2col_CpuPBody::run(srcImg.ptr(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, @@ -243,11 +258,11 @@ void ConvolutionLayerImpl::im2row(const Mat &srcImg, Mat &dstRow) { if (is1x1()) { - dstRow = reshaped(srcImg, Shape(ksize, outH*outW)).t(); + dstRow = srcImg.reshape(1, ksize).t(); return; } - Mat &colMat = colRowBlob.matRef(); + Mat &colMat = colRowBlob; if (srcImg.type() == CV_32F) im2row_CpuPBody::run(srcImg.ptr(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, @@ -260,67 +275,55 @@ void ConvolutionLayerImpl::im2row(const Mat &srcImg, Mat &dstRow) dstRow = colMat; } -void ConvolutionLayerImpl::im2row(const UMat &srcImg, UMat &dstCol) -{ - CV_Error(cv::Error::StsNotImplemented, ""); -} - //Deconvolution -void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob) +void DeConvolutionLayerImpl::computeInpOutShape(const Mat &inpBlob) { - CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num()); + CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].size[0]); - numOutput = blobs[0].num(); + numOutput = blobs[0].size[0]; - inpH = inpBlob.rows(); - inpW = inpBlob.cols(); - inpCn = inpBlob.channels(); + inpH = inpBlob.size[2]; + inpW = inpBlob.size[3]; + inpCn = inpBlob.size[1]; outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height; outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width; outCn = numOutput; - group = inpCn / blobs[0].channels(); + group = inpCn / blobs[0].size[1]; outGroupCn = outCn / group; inpGroupCn = inpCn / group; ksize = outGroupCn * kernel.height * kernel.width; CV_Assert(inpCn % group == 0 && outCn % group == 0); - CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group); + CV_Assert(blobs[0].size[0] == outCn && blobs[0].size[1] == inpCn / group); - colRowBlobShape = BlobShape(ksize, inpH * inpW); + colRowBlobShape.clear(); + colRowBlobShape.push_back(ksize); + colRowBlobShape.push_back(inpH * inpW); } -void DeConvolutionLayerImpl::forward(std::vector &inputs, std::vector &outputs) +void DeConvolutionLayerImpl::forward(std::vector &inputs, std::vector &outputs) { - if (!useOpenCL) - forward_(inputs, outputs); - else - forward_(inputs, outputs); -} - -template -void DeConvolutionLayerImpl::forward_(std::vector &inputs, std::vector &outputs) -{ - XMat weightsMat = reshaped(blobs[0].getRefConst(), Shape(inpCn, ksize)); - XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst(), Shape(outCn, 1)) : XMat(); + Mat weightsMat = blobs[0].reshape(1, inpCn); + Mat biasesMat = bias ? blobs[1].reshape(1, outCn) : Mat(); for (size_t ii = 0; ii < outputs.size(); ii++) { - int numImg = inputs[ii]->size(0); - XMat convBlob = reshaped(inputs[ii]->getRefConst(), Shape(numImg*inpCn, inpH*inpW)); - XMat decnBlob = reshaped(outputs[ii].getRef(), Shape(numImg*outCn, outH*outW)); + int numImg = inputs[ii]->size[0]; + Mat convBlob = inputs[ii]->reshape(1, numImg*inpCn); + Mat decnBlob = outputs[ii].reshape(1, numImg*outCn); for (int n = 0; n < numImg; n++) { for (int g = 0; g < group; g++) { - XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn)); - XMat &colMat = (is1x1()) ? dstMat : colRowBlob.getRef(); + Mat dstMat = decnBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn)); + Mat &colMat = (is1x1()) ? dstMat : colRowBlob; - XMat convMat = convBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn)); - XMat wghtMat = weightsMat.rowRange(_Range(g * inpGroupCn, inpGroupCn)); + Mat convMat = convBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn)); + Mat wghtMat = weightsMat.rowRange(_Range(g * inpGroupCn, inpGroupCn)); dnn::gemm(wghtMat, convMat, 1, colMat, 0, GEMM_1_T); @@ -329,8 +332,8 @@ void DeConvolutionLayerImpl::forward_(std::vector &inputs, std::vector(), 1, dstMat, 1); + Mat curBiasMat = biasesMat.rowRange(_Range(g * outGroupCn, outGroupCn)); + dnn::gemm(curBiasMat, biasOnesBlob, 1, dstMat, 1); } } } @@ -350,24 +353,9 @@ void DeConvolutionLayerImpl::col2im(const Mat &colMat, Mat &dstImg) col2im_CpuPBody::run(colMat.ptr(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr()); } -void DeConvolutionLayerImpl::col2im(const UMat &colMat, UMat &dstImg) -{ - if (is1x1()) - { - dstImg = colMat; - return; - } -#ifdef HAVE_OPENCL - CV_Assert(col2im_ocl(colMat, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg)); -#else - CV_Error(Error::StsInternal, ""); - dstImg = colMat; -#endif -} - //Initializers -Ptr ConvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation) +/*Ptr ConvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation) { ConvolutionLayerImpl *l = new ConvolutionLayerImpl(); l->kernel = kernel; @@ -387,6 +375,40 @@ Ptr DeconvolutionLayer::create(Size kernel, Size stride, S l->adjustPad = adjustPad; return Ptr(l); +}*/ + +//Convolution and Deconvolution +static void initConvDeconvLayerFromCaffe(Ptr l, const LayerParams ¶ms) +{ + l->setParamsFrom(params); + getConvolutionKernelParams(params, l->kernel.height, l->kernel.width, l->pad.height, + l->pad.width, l->stride.height, l->stride.width, l->dilation.height, + l->dilation.width, l->padMode); + + bool bias = params.get("bias_term", true); + int numOutput = params.get("num_output"); + int group = params.get("group", 1); + + l->adjustPad.height = params.get("adj_h", 0); + l->adjustPad.width = params.get("adj_w", 0); + + CV_Assert(numOutput % group == 0); + CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1)); +} + +Ptr ConvolutionLayer::create(const LayerParams ¶ms) +{ + Ptr l(new ConvolutionLayerImpl); + initConvDeconvLayerFromCaffe(l, params); + return l; +} + +Ptr DeconvolutionLayer::create(const LayerParams ¶ms) +{ + Ptr l(new DeConvolutionLayerImpl); + initConvDeconvLayerFromCaffe(l, params); + + return l; } } diff --git a/modules/dnn/src/layers/convolution_layer.hpp b/modules/dnn/src/layers/convolution_layer.hpp deleted file mode 100644 index de2b0ab7811..00000000000 --- a/modules/dnn/src/layers/convolution_layer.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class BaseConvolutionLayerImpl : public ConvolutionLayer -{ -public: - BaseConvolutionLayerImpl(); - virtual void allocate(const std::vector &inputs, std::vector &outputs); - -protected: - void init(); - virtual void computeInpOutShape(const Blob &inpBlob) = 0; - bool is1x1() const; - - int numOutput, group; - int inpH, inpW, inpCn; - int outH, outW, outCn; - int inpGroupCn, outGroupCn; - int ksize; - BlobShape colRowBlobShape; - - bool bias; - bool tryUseOpenCL, useOpenCL; - - Blob colRowBlob, biasOnesBlob; - -}; - -//TODO: simultaneously convolution and bias addition for cache optimization -class ConvolutionLayerImpl : public BaseConvolutionLayerImpl -{ -public: - virtual void forward(std::vector &inputs, std::vector &outputs); - -protected: - virtual void computeInpOutShape(const Blob &inpBlob); - - template - void forward_(std::vector &inputs, std::vector &outputs); - void im2col(const Mat &srcImg, Mat &dstCol); - void im2row(const Mat &srcImg, Mat &dstRow); - void im2col(const UMat &srcImg, UMat &dstCol); - void im2row(const UMat &srcImg, UMat &dstCol); -}; - -class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl -{ -public: - virtual void forward(std::vector &inputs, std::vector &outputs); - -protected: - - virtual void computeInpOutShape(const Blob &inpBlob); - - template - void forward_(std::vector &inputs, std::vector &outputs); - void col2im(const Mat &colMat, Mat &dstImg); - void col2im(const UMat &colMat, UMat &dstImg); -}; - -//Importers -Ptr createConvolutionLayerFromCaffe(LayerParams ¶ms); -Ptr createDeconvolutionLayerFromCaffe(LayerParams ¶ms); - -} -} - -#endif diff --git a/modules/dnn/src/layers/crop_layer.cpp b/modules/dnn/src/layers/crop_layer.cpp index 06f6f75b3a5..422144c6c66 100755 --- a/modules/dnn/src/layers/crop_layer.cpp +++ b/modules/dnn/src/layers/crop_layer.cpp @@ -41,87 +41,97 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "crop_layer.hpp" namespace cv { namespace dnn { -CropLayerImpl::CropLayerImpl(int start_axis_, const std::vector &offset_) +class CropLayerImpl : public CropLayer { - startAxis = start_axis_; - offset = offset_; -} - -void CropLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(2 == inputs.size()); - - const Blob &inpBlob = *inputs[0]; - const Blob &inpSzBlob = *inputs[1]; - - int start_axis = inpBlob.canonicalAxis(startAxis); - int dims = inpBlob.dims(); - - std::vector offset_final(dims, 0); - if (offset.size() == 1) +public: + CropLayerImpl(const LayerParams& params) { - for (int i = start_axis; i < dims; i++) - offset_final[i] = offset[0]; - } - else if (offset.size() > 1) - { - if ((int)offset.size() != dims - start_axis) - CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis."); + setParamsFrom(params); + startAxis = params.get("axis", 2); + const DictValue *paramOffset = params.ptr("offset"); - for (int i = start_axis; i < dims; i++) - offset_final[i] = offset[i - start_axis]; + if (paramOffset) + { + for (int i = 0; i < paramOffset->size(); i++) + offset.push_back(paramOffset->get(i)); + } } - BlobShape dstShape = inpBlob.shape(); - crop_ranges.resize(dims, Range::all()); - for (int i = start_axis; i < dims; i++) + void allocate(const std::vector &inputs, std::vector &outputs) { - dstShape[i] = inpSzBlob.size(i); + CV_Assert(2 == inputs.size()); - if (!offset.empty()) //normal case - { - if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size(i) > inpBlob.size(i)) - CV_Error(Error::StsBadArg, "invalid crop parameters"); + const Mat &inpBlob = *inputs[0]; + const Mat &inpSzBlob = *inputs[1]; + + int dims = inpBlob.dims; + int start_axis = startAxis < 0 ? startAxis + dims : startAxis; - crop_ranges[i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size(i)); + std::vector offset_final(dims, 0); + if (offset.size() == 1) + { + for (int i = start_axis; i < dims; i++) + offset_final[i] = offset[0]; } - else //detect offset automatically so that cropped image is center of original one + else if (offset.size() > 1) { - if (inpSzBlob.size(i) > inpBlob.size(i)) - CV_Error(Error::StsBadArg, "invalid output blob size"); + if ((int)offset.size() != dims - start_axis) + CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis."); + + for (int i = start_axis; i < dims; i++) + offset_final[i] = offset[i - start_axis]; + } - int cur_crop = (inpBlob.size(i) - inpSzBlob.size(i)) / 2; - crop_ranges[i] = Range(cur_crop, cur_crop + inpSzBlob.size(i)); + std::vector dstShape(dims); + crop_ranges.resize(dims, Range::all()); + for (int i = 0; i < dims; i++) + { + dstShape[i] = inpSzBlob.size[i]; + if( i < start_axis ) + continue; + + if (!offset.empty()) //normal case + { + if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size[i] > inpBlob.size[i]) + CV_Error(Error::StsBadArg, "invalid crop parameters"); + + crop_ranges[i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size[i]); + } + else //detect offset automatically so that cropped image is center of original one + { + if (inpSzBlob.size[i] > inpBlob.size[i]) + CV_Error(Error::StsBadArg, "invalid output blob size"); + + int cur_crop = (inpBlob.size[i] - inpSzBlob.size[i]) / 2; + crop_ranges[i] = Range(cur_crop, cur_crop + inpSzBlob.size[i]); + } } + + outputs.resize(1); + outputs[0].create(dims, &dstShape[0], inpBlob.type()); } - outputs.resize(1); - outputs[0].create(dstShape); -} + void forward(std::vector &inputs, std::vector &outputs) + { + Mat &input = *inputs[0]; + Mat &output = outputs[0]; + + input(&crop_ranges[0]).copyTo(output); + } + + std::vector crop_ranges; +}; -void CropLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - Blob &input = *inputs[0]; - Blob &output = outputs[0]; - - #ifdef HAVE_OPENCL - if (input.getState() == Blob::HEAD_AT_UMAT) - input.umatRefConst()(&crop_ranges[0]).copyTo(output.umatRef()); - else - #endif - input.matRefConst()(&crop_ranges[0]).copyTo(output.matRef()); -} -Ptr CropLayer::create(int start_axis, const std::vector &offset) +Ptr CropLayer::create(const LayerParams& params) { - return Ptr(new CropLayerImpl(start_axis, offset)); + return Ptr(new CropLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/crop_layer.hpp b/modules/dnn/src/layers/crop_layer.hpp deleted file mode 100755 index bc8789b9054..00000000000 --- a/modules/dnn/src/layers/crop_layer.hpp +++ /dev/null @@ -1,62 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - class CropLayerImpl : public CropLayer - { - std::vector crop_ranges; - - public: - CropLayerImpl(int start_axis, const std::vector &offset); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - }; -} -} -#endif diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 00002dbb4bc..bcffda41180 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -41,9 +41,9 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "detection_output_layer.hpp" #include #include +#include namespace cv { @@ -52,6 +52,7 @@ namespace dnn namespace util { + template std::string to_string(T value) { @@ -74,676 +75,742 @@ bool SortScorePairDescend(const std::pair& pair1, { return pair1.first > pair2.first; } -} -const std::string DetectionOutputLayer::_layerName = std::string("DetectionOutput"); +} -bool DetectionOutputLayer::getParameterDict(const LayerParams ¶ms, - const std::string ¶meterName, - DictValue& result) +class DetectionOutputLayerImpl : public DetectionOutputLayer { - if (!params.has(parameterName)) - { - return false; - } +public: + unsigned _numClasses; + bool _shareLocation; + int _numLocClasses; - result = params.get(parameterName); - return true; -} + int _backgroundLabelId; -template -T DetectionOutputLayer::getParameter(const LayerParams ¶ms, - const std::string ¶meterName, - const size_t &idx, - const bool required, - const T& defaultValue) -{ - DictValue dictValue; - bool success = getParameterDict(params, parameterName, dictValue); - if(!success) + typedef caffe::PriorBoxParameter_CodeType CodeType; + CodeType _codeType; + + bool _varianceEncodedInTarget; + int _keepTopK; + float _confidenceThreshold; + + int _num; + int _numPriors; + + float _nmsThreshold; + int _topK; + + enum { _numAxes = 4 }; + static const std::string _layerName; + + typedef std::map > LabelBBox; + + bool getParameterDict(const LayerParams ¶ms, + const std::string ¶meterName, + DictValue& result) { - if(required) + if (!params.has(parameterName)) { - std::string message = _layerName; - message += " layer parameter does not contain "; - message += parameterName; - message += " parameter."; - CV_Error(Error::StsBadArg, message); + return false; } - else + + result = params.get(parameterName); + return true; + } + + template + T getParameter(const LayerParams ¶ms, + const std::string ¶meterName, + const size_t &idx=0, + const bool required=true, + const T& defaultValue=T()) + { + DictValue dictValue; + bool success = getParameterDict(params, parameterName, dictValue); + if(!success) { - return defaultValue; + if(required) + { + std::string message = _layerName; + message += " layer parameter does not contain "; + message += parameterName; + message += " parameter."; + CV_Error(Error::StsBadArg, message); + } + else + { + return defaultValue; + } } + return dictValue.get(idx); } - return dictValue.get(idx); -} -void DetectionOutputLayer::getCodeType(LayerParams ¶ms) -{ - String codeTypeString = params.get("code_type").toLowerCase(); - if (codeTypeString == "corner") - _codeType = caffe::PriorBoxParameter_CodeType_CORNER; - else if (codeTypeString == "center_size") - _codeType = caffe::PriorBoxParameter_CodeType_CENTER_SIZE; - else - _codeType = caffe::PriorBoxParameter_CodeType_CORNER; -} + void getCodeType(const LayerParams ¶ms) + { + String codeTypeString = params.get("code_type").toLowerCase(); + if (codeTypeString == "corner") + _codeType = caffe::PriorBoxParameter_CodeType_CORNER; + else if (codeTypeString == "center_size") + _codeType = caffe::PriorBoxParameter_CodeType_CENTER_SIZE; + else + _codeType = caffe::PriorBoxParameter_CodeType_CORNER; + } -DetectionOutputLayer::DetectionOutputLayer(LayerParams ¶ms) : Layer(params) -{ - _numClasses = getParameter(params, "num_classes"); - _shareLocation = getParameter(params, "share_location"); - _numLocClasses = _shareLocation ? 1 : _numClasses; - _backgroundLabelId = getParameter(params, "background_label_id"); - _varianceEncodedInTarget = getParameter(params, "variance_encoded_in_target", 0, false, false); - _keepTopK = getParameter(params, "keep_top_k"); - _confidenceThreshold = getParameter(params, "confidence_threshold", 0, false, -FLT_MAX); - _topK = getParameter(params, "top_k", 0, false, -1); - - getCodeType(params); - - // Parameters used in nms. - _nmsThreshold = getParameter(params, "nms_threshold"); - CV_Assert(_nmsThreshold > 0.); -} + DetectionOutputLayerImpl(const LayerParams ¶ms) + { + _numClasses = getParameter(params, "num_classes"); + _shareLocation = getParameter(params, "share_location"); + _numLocClasses = _shareLocation ? 1 : _numClasses; + _backgroundLabelId = getParameter(params, "background_label_id"); + _varianceEncodedInTarget = getParameter(params, "variance_encoded_in_target", 0, false, false); + _keepTopK = getParameter(params, "keep_top_k"); + _confidenceThreshold = getParameter(params, "confidence_threshold", 0, false, -FLT_MAX); + _topK = getParameter(params, "top_k", 0, false, -1); + + getCodeType(params); + + // Parameters used in nms. + _nmsThreshold = getParameter(params, "nms_threshold"); + CV_Assert(_nmsThreshold > 0.); + + setParamsFrom(params); + } -void DetectionOutputLayer::checkInputs(const std::vector &inputs) -{ - for (size_t i = 1; i < inputs.size(); i++) + void checkInputs(const std::vector &inputs) { - for (size_t j = 0; j < _numAxes; j++) + for (size_t i = 1; i < inputs.size(); i++) { - CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]); + CV_Assert(inputs[i]->size == inputs[0]->size); } } -} -void DetectionOutputLayer::allocate(const std::vector &inputs, - std::vector &outputs) -{ - CV_Assert(inputs.size() > 0); - CV_Assert(inputs[0]->num() == inputs[1]->num()); - _num = inputs[0]->num(); - - _numPriors = inputs[2]->rows() / 4; - CV_Assert((_numPriors * _numLocClasses * 4) == inputs[0]->channels()); - CV_Assert(int(_numPriors * _numClasses) == inputs[1]->channels()); - - // num() and channels() are 1. - // Since the number of bboxes to be kept is unknown before nms, we manually - // set it to (fake) 1. - // Each row is a 7 dimension std::vector, which stores - // [image_id, label, confidence, xmin, ymin, xmax, ymax] - BlobShape outputShape = BlobShape(1, 1, 1, 7); - outputs[0].create(BlobShape(outputShape)); -} + void allocate(const std::vector &inputs, + std::vector &outputs) + { + CV_Assert(inputs.size() > 0); + CV_Assert(inputs[0]->size[0] == inputs[1]->size[0]); + _num = inputs[0]->size[0]; + + _numPriors = inputs[2]->size[2] / 4; + CV_Assert((_numPriors * _numLocClasses * 4) == inputs[0]->size[1]); + CV_Assert(int(_numPriors * _numClasses) == inputs[1]->size[1]); + + // num() and channels() are 1. + // Since the number of bboxes to be kept is unknown before nms, we manually + // set it to (fake) 1. + // Each row is a 7 dimension std::vector, which stores + // [image_id, label, confidence, xmin, ymin, xmax, ymax] + int outputShape[] = {1, 1, 1, 7}; + outputs[0].create(4, outputShape, CV_32F); + } -void DetectionOutputLayer::forward(std::vector &inputs, - std::vector &outputs) -{ - const float* locationData = inputs[0]->ptrf(); - const float* confidenceData = inputs[1]->ptrf(); - const float* priorData = inputs[2]->ptrf(); - - // Retrieve all location predictions. - std::vector allLocationPredictions; - GetLocPredictions(locationData, _num, _numPriors, _numLocClasses, - _shareLocation, &allLocationPredictions); - - // Retrieve all confidences. - std::vector > > allConfidenceScores; - GetConfidenceScores(confidenceData, _num, _numPriors, _numClasses, - &allConfidenceScores); - - // Retrieve all prior bboxes. It is same within a batch since we assume all - // images in a batch are of same dimension. - std::vector priorBBoxes; - std::vector > priorVariances; - GetPriorBBoxes(priorData, _numPriors, &priorBBoxes, &priorVariances); - - // Decode all loc predictions to bboxes. - std::vector allDecodedBBoxes; - DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, _num, - _shareLocation, _numLocClasses, _backgroundLabelId, - _codeType, _varianceEncodedInTarget, &allDecodedBBoxes); - - int numKept = 0; - std::vector > > allIndices; - for (int i = 0; i < _num; ++i) + void forward(std::vector &inputs, + std::vector &outputs) { - const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; - const std::map >& confidenceScores = - allConfidenceScores[i]; - std::map > indices; - int numDetections = 0; - for (int c = 0; c < (int)_numClasses; ++c) + const float* locationData = inputs[0]->ptr(); + const float* confidenceData = inputs[1]->ptr(); + const float* priorData = inputs[2]->ptr(); + + // Retrieve all location predictions. + std::vector allLocationPredictions; + GetLocPredictions(locationData, _num, _numPriors, _numLocClasses, + _shareLocation, &allLocationPredictions); + + // Retrieve all confidences. + std::vector > > allConfidenceScores; + GetConfidenceScores(confidenceData, _num, _numPriors, _numClasses, + &allConfidenceScores); + + // Retrieve all prior bboxes. It is same within a batch since we assume all + // images in a batch are of same dimension. + std::vector priorBBoxes; + std::vector > priorVariances; + GetPriorBBoxes(priorData, _numPriors, &priorBBoxes, &priorVariances); + + // Decode all loc predictions to bboxes. + std::vector allDecodedBBoxes; + DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, _num, + _shareLocation, _numLocClasses, _backgroundLabelId, + _codeType, _varianceEncodedInTarget, &allDecodedBBoxes); + + int numKept = 0; + std::vector > > allIndices; + for (int i = 0; i < _num; ++i) { - if (c == _backgroundLabelId) - { - // Ignore background class. - continue; - } - if (confidenceScores.find(c) == confidenceScores.end()) + const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; + const std::map >& confidenceScores = + allConfidenceScores[i]; + std::map > indices; + int numDetections = 0; + for (int c = 0; c < (int)_numClasses; ++c) { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find confidence predictions for label ", c); - } + if (c == _backgroundLabelId) + { + // Ignore background class. + continue; + } + if (confidenceScores.find(c) == confidenceScores.end()) + { + // Something bad happened if there are no predictions for current label. + util::make_error("Could not find confidence predictions for label ", c); + } - const std::vector& scores = confidenceScores.find(c)->second; - int label = _shareLocation ? -1 : c; - if (decodeBBoxes.find(label) == decodeBBoxes.end()) - { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find location predictions for label ", label); - continue; - } - const std::vector& bboxes = - decodeBBoxes.find(label)->second; - ApplyNMSFast(bboxes, scores, _confidenceThreshold, _nmsThreshold, - _topK, &(indices[c])); - numDetections += indices[c].size(); - } - if (_keepTopK > -1 && numDetections > _keepTopK) - { - std::vector > > scoreIndexPairs; - for (std::map >::iterator it = indices.begin(); - it != indices.end(); ++it) - { - int label = it->first; - const std::vector& labelIndices = it->second; - if (confidenceScores.find(label) == confidenceScores.end()) + const std::vector& scores = confidenceScores.find(c)->second; + int label = _shareLocation ? -1 : c; + if (decodeBBoxes.find(label) == decodeBBoxes.end()) { - // Something bad happened for current label. + // Something bad happened if there are no predictions for current label. util::make_error("Could not find location predictions for label ", label); continue; } - const std::vector& scores = confidenceScores.find(label)->second; - for (size_t j = 0; j < labelIndices.size(); ++j) + const std::vector& bboxes = + decodeBBoxes.find(label)->second; + ApplyNMSFast(bboxes, scores, _confidenceThreshold, _nmsThreshold, + _topK, &(indices[c])); + numDetections += indices[c].size(); + } + if (_keepTopK > -1 && numDetections > _keepTopK) + { + std::vector > > scoreIndexPairs; + for (std::map >::iterator it = indices.begin(); + it != indices.end(); ++it) + { + int label = it->first; + const std::vector& labelIndices = it->second; + if (confidenceScores.find(label) == confidenceScores.end()) + { + // Something bad happened for current label. + util::make_error("Could not find location predictions for label ", label); + continue; + } + const std::vector& scores = confidenceScores.find(label)->second; + for (size_t j = 0; j < labelIndices.size(); ++j) + { + size_t idx = labelIndices[j]; + CV_Assert(idx < scores.size()); + scoreIndexPairs.push_back( + std::make_pair(scores[idx], std::make_pair(label, idx))); + } + } + // Keep outputs k results per image. + std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), + util::SortScorePairDescend >); + scoreIndexPairs.resize(_keepTopK); + // Store the new indices. + std::map > newIndices; + for (size_t j = 0; j < scoreIndexPairs.size(); ++j) { - size_t idx = labelIndices[j]; - CV_Assert(idx < scores.size()); - scoreIndexPairs.push_back( - std::make_pair(scores[idx], std::make_pair(label, idx))); + int label = scoreIndexPairs[j].second.first; + int idx = scoreIndexPairs[j].second.second; + newIndices[label].push_back(idx); } + allIndices.push_back(newIndices); + numKept += _keepTopK; } - // Keep outputs k results per image. - std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), - util::SortScorePairDescend >); - scoreIndexPairs.resize(_keepTopK); - // Store the new indices. - std::map > newIndices; - for (size_t j = 0; j < scoreIndexPairs.size(); ++j) + else { - int label = scoreIndexPairs[j].second.first; - int idx = scoreIndexPairs[j].second.second; - newIndices[label].push_back(idx); + allIndices.push_back(indices); + numKept += numDetections; } - allIndices.push_back(newIndices); - numKept += _keepTopK; } - else + + if (numKept == 0) { - allIndices.push_back(indices); - numKept += numDetections; + CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections"); + return; } - } + int outputShape[] = {1, 1, numKept, 7}; + outputs[0].create(4, outputShape, CV_32F); + float* outputsData = outputs[0].ptr(); - if (numKept == 0) - { - CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections"); - return; - } - std::vector outputsShape(2, 1); - outputsShape.push_back(numKept); - outputsShape.push_back(7); - outputs[0].create(outputsShape); - float* outputsData = outputs[0].ptrf(); - - int count = 0; - for (int i = 0; i < _num; ++i) - { - const std::map >& confidenceScores = - allConfidenceScores[i]; - const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; - for (std::map >::iterator it = allIndices[i].begin(); - it != allIndices[i].end(); ++it) + int count = 0; + for (int i = 0; i < _num; ++i) { - int label = it->first; - if (confidenceScores.find(label) == confidenceScores.end()) - { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find confidence predictions for label ", label); - continue; - } - const std::vector& scores = confidenceScores.find(label)->second; - int locLabel = _shareLocation ? -1 : label; - if (decodeBBoxes.find(locLabel) == decodeBBoxes.end()) + const std::map >& confidenceScores = + allConfidenceScores[i]; + const LabelBBox& decodeBBoxes = allDecodedBBoxes[i]; + for (std::map >::iterator it = allIndices[i].begin(); + it != allIndices[i].end(); ++it) { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find location predictions for label ", locLabel); - continue; - } - const std::vector& bboxes = + int label = it->first; + if (confidenceScores.find(label) == confidenceScores.end()) + { + // Something bad happened if there are no predictions for current label. + util::make_error("Could not find confidence predictions for label ", label); + continue; + } + const std::vector& scores = confidenceScores.find(label)->second; + int locLabel = _shareLocation ? -1 : label; + if (decodeBBoxes.find(locLabel) == decodeBBoxes.end()) + { + // Something bad happened if there are no predictions for current label. + util::make_error("Could not find location predictions for label ", locLabel); + continue; + } + const std::vector& bboxes = decodeBBoxes.find(locLabel)->second; - std::vector& indices = it->second; + std::vector& indices = it->second; - for (size_t j = 0; j < indices.size(); ++j) - { - int idx = indices[j]; - outputsData[count * 7] = i; - outputsData[count * 7 + 1] = label; - outputsData[count * 7 + 2] = scores[idx]; - caffe::NormalizedBBox clipBBox; - ClipBBox(bboxes[idx], &clipBBox); - outputsData[count * 7 + 3] = clipBBox.xmin(); - outputsData[count * 7 + 4] = clipBBox.ymin(); - outputsData[count * 7 + 5] = clipBBox.xmax(); - outputsData[count * 7 + 6] = clipBBox.ymax(); - - ++count; + for (size_t j = 0; j < indices.size(); ++j) + { + int idx = indices[j]; + outputsData[count * 7] = i; + outputsData[count * 7 + 1] = label; + outputsData[count * 7 + 2] = scores[idx]; + caffe::NormalizedBBox clipBBox; + ClipBBox(bboxes[idx], &clipBBox); + outputsData[count * 7 + 3] = clipBBox.xmin(); + outputsData[count * 7 + 4] = clipBBox.ymin(); + outputsData[count * 7 + 5] = clipBBox.xmax(); + outputsData[count * 7 + 6] = clipBBox.ymax(); + + ++count; + } } } } -} -float DetectionOutputLayer::BBoxSize(const caffe::NormalizedBBox& bbox, - const bool normalized) -{ - if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin()) - { - // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0. - return 0; - } - else + // Compute bbox size. + float BBoxSize(const caffe::NormalizedBBox& bbox, + const bool normalized=true) { - if (bbox.has_size()) + if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin()) { - return bbox.size(); + // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0. + return 0; } else { - float width = bbox.xmax() - bbox.xmin(); - float height = bbox.ymax() - bbox.ymin(); - if (normalized) + if (bbox.has_size()) { - return width * height; + return bbox.size(); } else { - // If bbox is not within range [0, 1]. - return (width + 1) * (height + 1); + float width = bbox.xmax() - bbox.xmin(); + float height = bbox.ymax() - bbox.ymin(); + if (normalized) + { + return width * height; + } + else + { + // If bbox is not within range [0, 1]. + return (width + 1) * (height + 1); + } } } } -} -void DetectionOutputLayer::ClipBBox(const caffe::NormalizedBBox& bbox, - caffe::NormalizedBBox* clipBBox) -{ - clipBBox->set_xmin(std::max(std::min(bbox.xmin(), 1.f), 0.f)); - clipBBox->set_ymin(std::max(std::min(bbox.ymin(), 1.f), 0.f)); - clipBBox->set_xmax(std::max(std::min(bbox.xmax(), 1.f), 0.f)); - clipBBox->set_ymax(std::max(std::min(bbox.ymax(), 1.f), 0.f)); - clipBBox->clear_size(); - clipBBox->set_size(BBoxSize(*clipBBox)); - clipBBox->set_difficult(bbox.difficult()); -} + // Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1]. + void ClipBBox(const caffe::NormalizedBBox& bbox, + caffe::NormalizedBBox* clipBBox) + { + clipBBox->set_xmin(std::max(std::min(bbox.xmin(), 1.f), 0.f)); + clipBBox->set_ymin(std::max(std::min(bbox.ymin(), 1.f), 0.f)); + clipBBox->set_xmax(std::max(std::min(bbox.xmax(), 1.f), 0.f)); + clipBBox->set_ymax(std::max(std::min(bbox.ymax(), 1.f), 0.f)); + clipBBox->clear_size(); + clipBBox->set_size(BBoxSize(*clipBBox)); + clipBBox->set_difficult(bbox.difficult()); + } -void DetectionOutputLayer::DecodeBBox( - const caffe::NormalizedBBox& priorBBox, const std::vector& priorVariance, - const CodeType codeType, const bool varianceEncodedInTarget, - const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* decodeBBox) -{ - if (codeType == caffe::PriorBoxParameter_CodeType_CORNER) + // Decode a bbox according to a prior bbox. + void DecodeBBox(const caffe::NormalizedBBox& priorBBox, const std::vector& priorVariance, + const CodeType codeType, const bool varianceEncodedInTarget, + const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* decodeBBox) { - if (varianceEncodedInTarget) + if (codeType == caffe::PriorBoxParameter_CodeType_CORNER) { - // variance is encoded in target, we simply need to add the offset - // predictions. - decodeBBox->set_xmin(priorBBox.xmin() + bbox.xmin()); - decodeBBox->set_ymin(priorBBox.ymin() + bbox.ymin()); - decodeBBox->set_xmax(priorBBox.xmax() + bbox.xmax()); - decodeBBox->set_ymax(priorBBox.ymax() + bbox.ymax()); + if (varianceEncodedInTarget) + { + // variance is encoded in target, we simply need to add the offset + // predictions. + decodeBBox->set_xmin(priorBBox.xmin() + bbox.xmin()); + decodeBBox->set_ymin(priorBBox.ymin() + bbox.ymin()); + decodeBBox->set_xmax(priorBBox.xmax() + bbox.xmax()); + decodeBBox->set_ymax(priorBBox.ymax() + bbox.ymax()); + } + else + { + // variance is encoded in bbox, we need to scale the offset accordingly. + decodeBBox->set_xmin( + priorBBox.xmin() + priorVariance[0] * bbox.xmin()); + decodeBBox->set_ymin( + priorBBox.ymin() + priorVariance[1] * bbox.ymin()); + decodeBBox->set_xmax( + priorBBox.xmax() + priorVariance[2] * bbox.xmax()); + decodeBBox->set_ymax( + priorBBox.ymax() + priorVariance[3] * bbox.ymax()); + } } - else + else if (codeType == caffe::PriorBoxParameter_CodeType_CENTER_SIZE) { - // variance is encoded in bbox, we need to scale the offset accordingly. - decodeBBox->set_xmin( - priorBBox.xmin() + priorVariance[0] * bbox.xmin()); - decodeBBox->set_ymin( - priorBBox.ymin() + priorVariance[1] * bbox.ymin()); - decodeBBox->set_xmax( - priorBBox.xmax() + priorVariance[2] * bbox.xmax()); - decodeBBox->set_ymax( - priorBBox.ymax() + priorVariance[3] * bbox.ymax()); - } - } - else - if (codeType == caffe::PriorBoxParameter_CodeType_CENTER_SIZE) - { - float priorWidth = priorBBox.xmax() - priorBBox.xmin(); - CV_Assert(priorWidth > 0); + float priorWidth = priorBBox.xmax() - priorBBox.xmin(); + CV_Assert(priorWidth > 0); - float priorHeight = priorBBox.ymax() - priorBBox.ymin(); - CV_Assert(priorHeight > 0); + float priorHeight = priorBBox.ymax() - priorBBox.ymin(); + CV_Assert(priorHeight > 0); - float priorCenterX = (priorBBox.xmin() + priorBBox.xmax()) / 2.; - float priorCenterY = (priorBBox.ymin() + priorBBox.ymax()) / 2.; + float priorCenterX = (priorBBox.xmin() + priorBBox.xmax()) / 2.; + float priorCenterY = (priorBBox.ymin() + priorBBox.ymax()) / 2.; - float decodeBBoxCenterX, decodeBBoxCenterY; - float decodeBBoxWidth, decodeBBoxHeight; - if (varianceEncodedInTarget) - { - // variance is encoded in target, we simply need to retore the offset - // predictions. - decodeBBoxCenterX = bbox.xmin() * priorWidth + priorCenterX; - decodeBBoxCenterY = bbox.ymin() * priorHeight + priorCenterY; - decodeBBoxWidth = exp(bbox.xmax()) * priorWidth; - decodeBBoxHeight = exp(bbox.ymax()) * priorHeight; - } - else - { - // variance is encoded in bbox, we need to scale the offset accordingly. - decodeBBoxCenterX = + float decodeBBoxCenterX, decodeBBoxCenterY; + float decodeBBoxWidth, decodeBBoxHeight; + if (varianceEncodedInTarget) + { + // variance is encoded in target, we simply need to retore the offset + // predictions. + decodeBBoxCenterX = bbox.xmin() * priorWidth + priorCenterX; + decodeBBoxCenterY = bbox.ymin() * priorHeight + priorCenterY; + decodeBBoxWidth = exp(bbox.xmax()) * priorWidth; + decodeBBoxHeight = exp(bbox.ymax()) * priorHeight; + } + else + { + // variance is encoded in bbox, we need to scale the offset accordingly. + decodeBBoxCenterX = priorVariance[0] * bbox.xmin() * priorWidth + priorCenterX; - decodeBBoxCenterY = + decodeBBoxCenterY = priorVariance[1] * bbox.ymin() * priorHeight + priorCenterY; - decodeBBoxWidth = + decodeBBoxWidth = exp(priorVariance[2] * bbox.xmax()) * priorWidth; - decodeBBoxHeight = + decodeBBoxHeight = exp(priorVariance[3] * bbox.ymax()) * priorHeight; - } + } - decodeBBox->set_xmin(decodeBBoxCenterX - decodeBBoxWidth / 2.); - decodeBBox->set_ymin(decodeBBoxCenterY - decodeBBoxHeight / 2.); - decodeBBox->set_xmax(decodeBBoxCenterX + decodeBBoxWidth / 2.); - decodeBBox->set_ymax(decodeBBoxCenterY + decodeBBoxHeight / 2.); - } - else - { - CV_Error(Error::StsBadArg, "Unknown LocLossType."); + decodeBBox->set_xmin(decodeBBoxCenterX - decodeBBoxWidth / 2.); + decodeBBox->set_ymin(decodeBBoxCenterY - decodeBBoxHeight / 2.); + decodeBBox->set_xmax(decodeBBoxCenterX + decodeBBoxWidth / 2.); + decodeBBox->set_ymax(decodeBBoxCenterY + decodeBBoxHeight / 2.); + } + else + { + CV_Error(Error::StsBadArg, "Unknown LocLossType."); + } + float bboxSize = BBoxSize(*decodeBBox); + decodeBBox->set_size(bboxSize); } - float bboxSize = BBoxSize(*decodeBBox); - decodeBBox->set_size(bboxSize); -} -void DetectionOutputLayer::DecodeBBoxes( - const std::vector& priorBBoxes, - const std::vector >& priorVariances, - const CodeType codeType, const bool varianceEncodedInTarget, - const std::vector& bboxes, - std::vector* decodeBBoxes) -{ - CV_Assert(priorBBoxes.size() == priorVariances.size()); - CV_Assert(priorBBoxes.size() == bboxes.size()); - int numBBoxes = priorBBoxes.size(); - if (numBBoxes >= 1) + // Decode a set of bboxes according to a set of prior bboxes. + void DecodeBBoxes(const std::vector& priorBBoxes, + const std::vector >& priorVariances, + const CodeType codeType, const bool varianceEncodedInTarget, + const std::vector& bboxes, + std::vector* decodeBBoxes) { - CV_Assert(priorVariances[0].size() == 4); - } - decodeBBoxes->clear(); - for (int i = 0; i < numBBoxes; ++i) - { - caffe::NormalizedBBox decodeBBox; - DecodeBBox(priorBBoxes[i], priorVariances[i], codeType, - varianceEncodedInTarget, bboxes[i], &decodeBBox); - decodeBBoxes->push_back(decodeBBox); + CV_Assert(priorBBoxes.size() == priorVariances.size()); + CV_Assert(priorBBoxes.size() == bboxes.size()); + int numBBoxes = priorBBoxes.size(); + if (numBBoxes >= 1) + { + CV_Assert(priorVariances[0].size() == 4); + } + decodeBBoxes->clear(); + for (int i = 0; i < numBBoxes; ++i) + { + caffe::NormalizedBBox decodeBBox; + DecodeBBox(priorBBoxes[i], priorVariances[i], codeType, + varianceEncodedInTarget, bboxes[i], &decodeBBox); + decodeBBoxes->push_back(decodeBBox); + } } -} -void DetectionOutputLayer::DecodeBBoxesAll( - const std::vector& allLocPreds, - const std::vector& priorBBoxes, - const std::vector >& priorVariances, - const size_t num, const bool shareLocation, - const int numLocClasses, const int backgroundLabelId, - const CodeType codeType, const bool varianceEncodedInTarget, - std::vector* allDecodeBBoxes) -{ - CV_Assert(allLocPreds.size() == num); - allDecodeBBoxes->clear(); - allDecodeBBoxes->resize(num); - for (size_t i = 0; i < num; ++i) + // Decode all bboxes in a batch. + void DecodeBBoxesAll(const std::vector& allLocPreds, + const std::vector& priorBBoxes, + const std::vector >& priorVariances, + const size_t num, const bool shareLocation, + const int numLocClasses, const int backgroundLabelId, + const CodeType codeType, const bool varianceEncodedInTarget, + std::vector* allDecodeBBoxes) { - // Decode predictions into bboxes. - LabelBBox& decodeBBoxes = (*allDecodeBBoxes)[i]; - for (int c = 0; c < numLocClasses; ++c) + CV_Assert(allLocPreds.size() == num); + allDecodeBBoxes->clear(); + allDecodeBBoxes->resize(num); + for (size_t i = 0; i < num; ++i) { - int label = shareLocation ? -1 : c; - if (label == backgroundLabelId) - { - // Ignore background class. - continue; - } - if (allLocPreds[i].find(label) == allLocPreds[i].end()) + // Decode predictions into bboxes. + LabelBBox& decodeBBoxes = (*allDecodeBBoxes)[i]; + for (int c = 0; c < numLocClasses; ++c) { - // Something bad happened if there are no predictions for current label. - util::make_error("Could not find location predictions for label ", label); - } - const std::vector& labelLocPreds = + int label = shareLocation ? -1 : c; + if (label == backgroundLabelId) + { + // Ignore background class. + continue; + } + if (allLocPreds[i].find(label) == allLocPreds[i].end()) + { + // Something bad happened if there are no predictions for current label. + util::make_error("Could not find location predictions for label ", label); + } + const std::vector& labelLocPreds = allLocPreds[i].find(label)->second; - DecodeBBoxes(priorBBoxes, priorVariances, - codeType, varianceEncodedInTarget, - labelLocPreds, &(decodeBBoxes[label])); + DecodeBBoxes(priorBBoxes, priorVariances, + codeType, varianceEncodedInTarget, + labelLocPreds, &(decodeBBoxes[label])); + } } } -} -void DetectionOutputLayer::GetPriorBBoxes(const float* priorData, const int& numPriors, - std::vector* priorBBoxes, - std::vector >* priorVariances) -{ - priorBBoxes->clear(); - priorVariances->clear(); - for (int i = 0; i < numPriors; ++i) + // Get prior bounding boxes from prior_data. + // prior_data: 1 x 2 x num_priors * 4 x 1 blob. + // num_priors: number of priors. + // prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox. + // prior_variances: stores all the variances needed by prior bboxes. + void GetPriorBBoxes(const float* priorData, const int& numPriors, + std::vector* priorBBoxes, + std::vector >* priorVariances) { - int startIdx = i * 4; - caffe::NormalizedBBox bbox; - bbox.set_xmin(priorData[startIdx]); - bbox.set_ymin(priorData[startIdx + 1]); - bbox.set_xmax(priorData[startIdx + 2]); - bbox.set_ymax(priorData[startIdx + 3]); - float bboxSize = BBoxSize(bbox); - bbox.set_size(bboxSize); - priorBBoxes->push_back(bbox); - } + priorBBoxes->clear(); + priorVariances->clear(); + for (int i = 0; i < numPriors; ++i) + { + int startIdx = i * 4; + caffe::NormalizedBBox bbox; + bbox.set_xmin(priorData[startIdx]); + bbox.set_ymin(priorData[startIdx + 1]); + bbox.set_xmax(priorData[startIdx + 2]); + bbox.set_ymax(priorData[startIdx + 3]); + float bboxSize = BBoxSize(bbox); + bbox.set_size(bboxSize); + priorBBoxes->push_back(bbox); + } - for (int i = 0; i < numPriors; ++i) - { - int startIdx = (numPriors + i) * 4; - std::vector var; - for (int j = 0; j < 4; ++j) + for (int i = 0; i < numPriors; ++i) { - var.push_back(priorData[startIdx + j]); + int startIdx = (numPriors + i) * 4; + std::vector var; + for (int j = 0; j < 4; ++j) + { + var.push_back(priorData[startIdx + j]); + } + priorVariances->push_back(var); } - priorVariances->push_back(var); } -} - -void DetectionOutputLayer::ScaleBBox(const caffe::NormalizedBBox& bbox, - const int height, const int width, - caffe::NormalizedBBox* scaleBBox) -{ - scaleBBox->set_xmin(bbox.xmin() * width); - scaleBBox->set_ymin(bbox.ymin() * height); - scaleBBox->set_xmax(bbox.xmax() * width); - scaleBBox->set_ymax(bbox.ymax() * height); - scaleBBox->clear_size(); - bool normalized = !(width > 1 || height > 1); - scaleBBox->set_size(BBoxSize(*scaleBBox, normalized)); - scaleBBox->set_difficult(bbox.difficult()); -} - -void DetectionOutputLayer::GetLocPredictions( - const float* locData, const int num, - const int numPredsPerClass, const int numLocClasses, - const bool shareLocation, std::vector* locPreds) -{ - locPreds->clear(); - if (shareLocation) + // Scale the caffe::NormalizedBBox w.r.t. height and width. + void ScaleBBox(const caffe::NormalizedBBox& bbox, + const int height, const int width, + caffe::NormalizedBBox* scaleBBox) { - CV_Assert(numLocClasses == 1); + scaleBBox->set_xmin(bbox.xmin() * width); + scaleBBox->set_ymin(bbox.ymin() * height); + scaleBBox->set_xmax(bbox.xmax() * width); + scaleBBox->set_ymax(bbox.ymax() * height); + scaleBBox->clear_size(); + bool normalized = !(width > 1 || height > 1); + scaleBBox->set_size(BBoxSize(*scaleBBox, normalized)); + scaleBBox->set_difficult(bbox.difficult()); } - locPreds->resize(num); - for (int i = 0; i < num; ++i) + + // Get location predictions from loc_data. + // loc_data: num x num_preds_per_class * num_loc_classes * 4 blob. + // num: the number of images. + // num_preds_per_class: number of predictions per class. + // num_loc_classes: number of location classes. It is 1 if share_location is + // true; and is equal to number of classes needed to predict otherwise. + // share_location: if true, all classes share the same location prediction. + // loc_preds: stores the location prediction, where each item contains + // location prediction for an image. + void GetLocPredictions(const float* locData, const int num, + const int numPredsPerClass, const int numLocClasses, + const bool shareLocation, std::vector* locPreds) { - LabelBBox& labelBBox = (*locPreds)[i]; - for (int p = 0; p < numPredsPerClass; ++p) + locPreds->clear(); + if (shareLocation) { - int startIdx = p * numLocClasses * 4; - for (int c = 0; c < numLocClasses; ++c) + CV_Assert(numLocClasses == 1); + } + locPreds->resize(num); + for (int i = 0; i < num; ++i) + { + LabelBBox& labelBBox = (*locPreds)[i]; + for (int p = 0; p < numPredsPerClass; ++p) { - int label = shareLocation ? -1 : c; - if (labelBBox.find(label) == labelBBox.end()) + int startIdx = p * numLocClasses * 4; + for (int c = 0; c < numLocClasses; ++c) { - labelBBox[label].resize(numPredsPerClass); + int label = shareLocation ? -1 : c; + if (labelBBox.find(label) == labelBBox.end()) + { + labelBBox[label].resize(numPredsPerClass); + } + labelBBox[label][p].set_xmin(locData[startIdx + c * 4]); + labelBBox[label][p].set_ymin(locData[startIdx + c * 4 + 1]); + labelBBox[label][p].set_xmax(locData[startIdx + c * 4 + 2]); + labelBBox[label][p].set_ymax(locData[startIdx + c * 4 + 3]); } - labelBBox[label][p].set_xmin(locData[startIdx + c * 4]); - labelBBox[label][p].set_ymin(locData[startIdx + c * 4 + 1]); - labelBBox[label][p].set_xmax(locData[startIdx + c * 4 + 2]); - labelBBox[label][p].set_ymax(locData[startIdx + c * 4 + 3]); } + locData += numPredsPerClass * numLocClasses * 4; } - locData += numPredsPerClass * numLocClasses * 4; } -} -void DetectionOutputLayer::GetConfidenceScores( - const float* confData, const int num, - const int numPredsPerClass, const int numClasses, - std::vector > >* confPreds) -{ - confPreds->clear(); - confPreds->resize(num); - for (int i = 0; i < num; ++i) + // Get confidence predictions from conf_data. + // conf_data: num x num_preds_per_class * num_classes blob. + // num: the number of images. + // num_preds_per_class: number of predictions per class. + // num_classes: number of classes. + // conf_preds: stores the confidence prediction, where each item contains + // confidence prediction for an image. + void GetConfidenceScores(const float* confData, const int num, + const int numPredsPerClass, const int numClasses, + std::vector > >* confPreds) { - std::map >& labelScores = (*confPreds)[i]; - for (int p = 0; p < numPredsPerClass; ++p) + confPreds->clear(); + confPreds->resize(num); + for (int i = 0; i < num; ++i) { - int startIdx = p * numClasses; - for (int c = 0; c < numClasses; ++c) + std::map >& labelScores = (*confPreds)[i]; + for (int p = 0; p < numPredsPerClass; ++p) { - labelScores[c].push_back(confData[startIdx + c]); + int startIdx = p * numClasses; + for (int c = 0; c < numClasses; ++c) + { + labelScores[c].push_back(confData[startIdx + c]); + } } + confData += numPredsPerClass * numClasses; } - confData += numPredsPerClass * numClasses; } -} -void DetectionOutputLayer::ApplyNMSFast(const std::vector& bboxes, - const std::vector& scores, - const float score_threshold, - const float nms_threshold, const int top_k, - std::vector* indices) -{ - // Sanity check. - CV_Assert(bboxes.size() == scores.size()); + // Do non maximum suppression given bboxes and scores. + // Inspired by Piotr Dollar's NMS implementation in EdgeBox. + // https://goo.gl/jV3JYS + // bboxes: a set of bounding boxes. + // scores: a set of corresponding confidences. + // score_threshold: a threshold used to filter detection results. + // nms_threshold: a threshold used in non maximum suppression. + // top_k: if not -1, keep at most top_k picked indices. + // indices: the kept indices of bboxes after nms. + void ApplyNMSFast(const std::vector& bboxes, + const std::vector& scores, + const float score_threshold, + const float nms_threshold, const int top_k, + std::vector* indices) + { + // Sanity check. + CV_Assert(bboxes.size() == scores.size()); - // Get top_k scores (with corresponding indices). - std::vector > score_index_vec; - GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec); + // Get top_k scores (with corresponding indices). + std::vector > score_index_vec; + GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec); - // Do nms. - indices->clear(); - while (score_index_vec.size() != 0) - { - const int idx = score_index_vec.front().second; - bool keep = true; - for (size_t k = 0; k < indices->size(); ++k) + // Do nms. + indices->clear(); + while (score_index_vec.size() != 0) { - if (keep) + const int idx = score_index_vec.front().second; + bool keep = true; + for (size_t k = 0; k < indices->size(); ++k) { - const int kept_idx = (*indices)[k]; - float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]); - keep = overlap <= nms_threshold; + if (keep) + { + const int kept_idx = (*indices)[k]; + float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]); + keep = overlap <= nms_threshold; + } + else + { + break; + } } - else + if (keep) { - break; + indices->push_back(idx); } + score_index_vec.erase(score_index_vec.begin()); } - if (keep) - { - indices->push_back(idx); - } - score_index_vec.erase(score_index_vec.begin()); } -} - -void DetectionOutputLayer::GetMaxScoreIndex( - const std::vector& scores, const float threshold,const int top_k, - std::vector >* score_index_vec) -{ - // Generate index score pairs. - for (size_t i = 0; i < scores.size(); ++i) + // Get max scores with corresponding indices. + // scores: a set of scores. + // threshold: only consider scores higher than the threshold. + // top_k: if -1, keep all; otherwise, keep at most top_k. + // score_index_vec: store the sorted (score, index) pair. + void GetMaxScoreIndex(const std::vector& scores, const float threshold,const int top_k, + std::vector >* score_index_vec) { - if (scores[i] > threshold) + // Generate index score pairs. + for (size_t i = 0; i < scores.size(); ++i) { - score_index_vec->push_back(std::make_pair(scores[i], i)); + if (scores[i] > threshold) + { + score_index_vec->push_back(std::make_pair(scores[i], i)); + } } - } - // Sort the score pair according to the scores in descending order - std::stable_sort(score_index_vec->begin(), score_index_vec->end(), - util::SortScorePairDescend); + // Sort the score pair according to the scores in descending order + std::stable_sort(score_index_vec->begin(), score_index_vec->end(), + util::SortScorePairDescend); - // Keep top_k scores if needed. - if (top_k > -1 && top_k < (int)score_index_vec->size()) - { - score_index_vec->resize(top_k); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < (int)score_index_vec->size()) + { + score_index_vec->resize(top_k); + } } -} -void DetectionOutputLayer::IntersectBBox(const caffe::NormalizedBBox& bbox1, - const caffe::NormalizedBBox& bbox2, - caffe::NormalizedBBox* intersect_bbox) { - if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() || - bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin()) - { - // Return [0, 0, 0, 0] if there is no intersection. - intersect_bbox->set_xmin(0); - intersect_bbox->set_ymin(0); - intersect_bbox->set_xmax(0); - intersect_bbox->set_ymax(0); - } - else - { - intersect_bbox->set_xmin(std::max(bbox1.xmin(), bbox2.xmin())); - intersect_bbox->set_ymin(std::max(bbox1.ymin(), bbox2.ymin())); - intersect_bbox->set_xmax(std::min(bbox1.xmax(), bbox2.xmax())); - intersect_bbox->set_ymax(std::min(bbox1.ymax(), bbox2.ymax())); + // Compute the intersection between two bboxes. + void IntersectBBox(const caffe::NormalizedBBox& bbox1, + const caffe::NormalizedBBox& bbox2, + caffe::NormalizedBBox* intersect_bbox) { + if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() || + bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin()) + { + // Return [0, 0, 0, 0] if there is no intersection. + intersect_bbox->set_xmin(0); + intersect_bbox->set_ymin(0); + intersect_bbox->set_xmax(0); + intersect_bbox->set_ymax(0); + } + else + { + intersect_bbox->set_xmin(std::max(bbox1.xmin(), bbox2.xmin())); + intersect_bbox->set_ymin(std::max(bbox1.ymin(), bbox2.ymin())); + intersect_bbox->set_xmax(std::min(bbox1.xmax(), bbox2.xmax())); + intersect_bbox->set_ymax(std::min(bbox1.ymax(), bbox2.ymax())); + } } -} -float DetectionOutputLayer::JaccardOverlap(const caffe::NormalizedBBox& bbox1, - const caffe::NormalizedBBox& bbox2, - const bool normalized) { - caffe::NormalizedBBox intersect_bbox; - IntersectBBox(bbox1, bbox2, &intersect_bbox); - float intersect_width, intersect_height; - if (normalized) - { - intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin(); - intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin(); - } - else - { - intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin() + 1; - intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin() + 1; - } - if (intersect_width > 0 && intersect_height > 0) - { - float intersect_size = intersect_width * intersect_height; - float bbox1_size = BBoxSize(bbox1); - float bbox2_size = BBoxSize(bbox2); - return intersect_size / (bbox1_size + bbox2_size - intersect_size); - } - else + // Compute the jaccard (intersection over union IoU) overlap between two bboxes. + float JaccardOverlap(const caffe::NormalizedBBox& bbox1, + const caffe::NormalizedBBox& bbox2, + const bool normalized=true) { - return 0.; + caffe::NormalizedBBox intersect_bbox; + IntersectBBox(bbox1, bbox2, &intersect_bbox); + float intersect_width, intersect_height; + if (normalized) + { + intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin(); + intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin(); + } + else + { + intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin() + 1; + intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin() + 1; + } + if (intersect_width > 0 && intersect_height > 0) + { + float intersect_size = intersect_width * intersect_height; + float bbox1_size = BBoxSize(bbox1); + float bbox2_size = BBoxSize(bbox2); + return intersect_size / (bbox1_size + bbox2_size - intersect_size); + } + else + { + return 0.; + } } +}; + +const std::string DetectionOutputLayerImpl::_layerName = std::string("DetectionOutput"); + +Ptr DetectionOutputLayer::create(const LayerParams ¶ms) +{ + return Ptr(new DetectionOutputLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/detection_output_layer.hpp b/modules/dnn/src/layers/detection_output_layer.hpp deleted file mode 100644 index 0b28d69a8ff..00000000000 --- a/modules/dnn/src/layers/detection_output_layer.hpp +++ /dev/null @@ -1,226 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__ - -#include "../precomp.hpp" -#include "caffe.pb.h" - -namespace cv -{ -namespace dnn -{ -class DetectionOutputLayer : public Layer -{ - unsigned _numClasses; - bool _shareLocation; - int _numLocClasses; - - int _backgroundLabelId; - - typedef caffe::PriorBoxParameter_CodeType CodeType; - CodeType _codeType; - - bool _varianceEncodedInTarget; - int _keepTopK; - float _confidenceThreshold; - - int _num; - int _numPriors; - - float _nmsThreshold; - int _topK; - - static const size_t _numAxes = 4; - static const std::string _layerName; - -public: - DetectionOutputLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - - void checkInputs(const std::vector &inputs); - void getCodeType(LayerParams ¶ms); - - template - T getParameter(const LayerParams ¶ms, - const std::string ¶meterName, - const size_t &idx = 0, - const bool required = true, - const T& defaultValue = T()); - - bool getParameterDict(const LayerParams ¶ms, - const std::string ¶meterName, - DictValue& result); - - typedef std::map > LabelBBox; - - // Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1]. - void ClipBBox(const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* clip_bbox); - - // Decode a bbox according to a prior bbox. - void DecodeBBox(const caffe::NormalizedBBox& prior_bbox, - const std::vector& prior_variance, const CodeType code_type, - const bool variance_encoded_in_target, const caffe::NormalizedBBox& bbox, - caffe::NormalizedBBox* decode_bbox); - - // Decode a set of bboxes according to a set of prior bboxes. - void DecodeBBoxes(const std::vector& prior_bboxes, - const std::vector >& prior_variances, - const CodeType code_type, const bool variance_encoded_in_target, - const std::vector& bboxes, - std::vector* decode_bboxes); - - // Decode all bboxes in a batch. - void DecodeBBoxesAll(const std::vector& all_loc_pred, - const std::vector& prior_bboxes, - const std::vector >& prior_variances, - const size_t num, const bool share_location, - const int num_loc_classes, const int background_label_id, - const CodeType code_type, const bool variance_encoded_in_target, - std::vector* all_decode_bboxes); - - // Get prior bounding boxes from prior_data. - // prior_data: 1 x 2 x num_priors * 4 x 1 blob. - // num_priors: number of priors. - // prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox. - // prior_variances: stores all the variances needed by prior bboxes. - void GetPriorBBoxes(const float* priorData, const int& numPriors, - std::vector* priorBBoxes, - std::vector >* priorVariances); - - // Scale the caffe::NormalizedBBox w.r.t. height and width. - void ScaleBBox(const caffe::NormalizedBBox& bbox, const int height, const int width, - caffe::NormalizedBBox* scale_bbox); - - // Do non maximum suppression given bboxes and scores. - // Inspired by Piotr Dollar's NMS implementation in EdgeBox. - // https://goo.gl/jV3JYS - // bboxes: a set of bounding boxes. - // scores: a set of corresponding confidences. - // score_threshold: a threshold used to filter detection results. - // nms_threshold: a threshold used in non maximum suppression. - // top_k: if not -1, keep at most top_k picked indices. - // indices: the kept indices of bboxes after nms. - void ApplyNMSFast(const std::vector& bboxes, - const std::vector& scores, const float score_threshold, - const float nms_threshold, const int top_k, std::vector* indices); - - - // Do non maximum suppression given bboxes and scores. - // bboxes: a set of bounding boxes. - // scores: a set of corresponding confidences. - // threshold: the threshold used in non maximu suppression. - // top_k: if not -1, keep at most top_k picked indices. - // reuse_overlaps: if true, use and update overlaps; otherwise, always - // compute overlap. - // overlaps: a temp place to optionally store the overlaps between pairs of - // bboxes if reuse_overlaps is true. - // indices: the kept indices of bboxes after nms. - void ApplyNMS(const std::vector& bboxes, - const std::vector& scores, - const float threshold, const int top_k, const bool reuse_overlaps, - std::map >* overlaps, std::vector* indices); - - void ApplyNMS(const bool* overlapped, const int num, std::vector* indices); - - // Get confidence predictions from conf_data. - // conf_data: num x num_preds_per_class * num_classes blob. - // num: the number of images. - // num_preds_per_class: number of predictions per class. - // num_classes: number of classes. - // conf_preds: stores the confidence prediction, where each item contains - // confidence prediction for an image. - void GetConfidenceScores(const float* conf_data, const int num, - const int num_preds_per_class, const int num_classes, - std::vector > >* conf_scores); - - // Get confidence predictions from conf_data. - // conf_data: num x num_preds_per_class * num_classes blob. - // num: the number of images. - // num_preds_per_class: number of predictions per class. - // num_classes: number of classes. - // class_major: if true, data layout is - // num x num_classes x num_preds_per_class; otherwise, data layerout is - // num x num_preds_per_class * num_classes. - // conf_preds: stores the confidence prediction, where each item contains - // confidence prediction for an image. - void GetConfidenceScores(const float* conf_data, const int num, - const int num_preds_per_class, const int num_classes, - const bool class_major, - std::vector > >* conf_scores); - - // Get location predictions from loc_data. - // loc_data: num x num_preds_per_class * num_loc_classes * 4 blob. - // num: the number of images. - // num_preds_per_class: number of predictions per class. - // num_loc_classes: number of location classes. It is 1 if share_location is - // true; and is equal to number of classes needed to predict otherwise. - // share_location: if true, all classes share the same location prediction. - // loc_preds: stores the location prediction, where each item contains - // location prediction for an image. - void GetLocPredictions(const float* loc_data, const int num, - const int num_preds_per_class, const int num_loc_classes, - const bool share_location, std::vector* loc_preds); - - // Get max scores with corresponding indices. - // scores: a set of scores. - // threshold: only consider scores higher than the threshold. - // top_k: if -1, keep all; otherwise, keep at most top_k. - // score_index_vec: store the sorted (score, index) pair. - void GetMaxScoreIndex(const std::vector& scores, const float threshold, - const int top_k, std::vector >* score_index_vec); - - // Compute the jaccard (intersection over union IoU) overlap between two bboxes. - float JaccardOverlap(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2, - const bool normalized = true); - - // Compute the intersection between two bboxes. - void IntersectBBox(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2, - caffe::NormalizedBBox* intersect_bbox); - - // Compute bbox size. - float BBoxSize(const caffe::NormalizedBBox& bbox, const bool normalized = true); -}; -} -} -#endif diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 914e9c2fe84..74e5ab4ac71 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -1,92 +1,254 @@ #include "../precomp.hpp" -#include "elementwise_layers.hpp" #include "opencv2/imgproc.hpp" +#include namespace cv { namespace dnn { -#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \ -Ptr<_Layer> _Layer::create() { \ - return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); } +using std::abs; +using std::exp; +using std::tanh; +using std::pow; - -Ptr ReLULayer::create(double negativeSlope) +template +class ElementWiseLayer : public Func::Layer { - Ptr layer(new ElementWiseLayer(ReLUFunctor(negativeSlope))); - layer->negativeSlope = negativeSlope; - return layer; -} + Func func; + + template + class PBody : public cv::ParallelLoopBody + { + Func &func; + Dtype *data; + public: + + PBody(Mat &mat, Func &func_) : + func(func_), data(mat.ptr()) + {} + + void operator()(const Range &r) const + { + for (int i = r.start; i < r.end; i++) + data[i] = func(data[i]); + } + }; + +public: + + ElementWiseLayer(const Func &f=Func()) : func(f) {} + + void allocate(const std::vector &inputs, std::vector &outputs) + { + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + outputs[i] = *inputs[i]; + } + } + + void forward(std::vector &inputs, std::vector &outputs) + { + for (size_t i = 0; i < inputs.size(); i++) + { + const Mat &src = *inputs[i]; + Mat &dst = outputs[i]; + CV_Assert(src.ptr() == dst.ptr() && src.isContinuous()); + + Range sizeRange = Range(0, dst.total()); + CV_Assert(src.type() == CV_32F); + cv::parallel_for_(sizeRange, PBody(dst, func)); + } + } +}; -Ptr TanHLayer::create() +struct ReLUFunctor { - return Ptr(new ElementWiseLayer()); -} + typedef ReLULayer Layer; + float slope; -Ptr SigmoidLayer::create() + ReLUFunctor(float slope_) : slope(slope_) {} + + template + inline TFloat operator()(TFloat x) const + { + return (x >= (TFloat)0) ? x : (TFloat)slope * x; + } +}; + +struct TanHFunctor { - return Ptr(new ElementWiseLayer()); -} + typedef TanHLayer Layer; + + template + inline TFloat operator()(TFloat x) const + { + return tanh(x); + } +}; -Ptr AbsLayer::create() +struct SigmoidFunctor { - return Ptr(new ElementWiseLayer()); -} + typedef SigmoidLayer Layer; + + template + inline TFloat operator()(TFloat x) const + { + return (TFloat)1 / ((TFloat)1 + exp(-x)); + } +}; -Ptr BNLLLayer::create() +struct AbsValFunctor { - return Ptr(new ElementWiseLayer()); -} + typedef AbsLayer Layer; -Ptr PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, double shift /*= 0*/) + template + inline TFloat operator()(TFloat x) const + { + return abs(x); + } +}; + +struct BNLLFunctor { - const PowerFunctor f(power, scale, shift); - Ptr layer(new ElementWiseLayer(f)); - layer->power = power; - layer->scale = scale; - layer->shift = shift; - return layer; -} + typedef BNLLLayer Layer; -//////////////////////////////////////////////////////////////////////////// + template + inline TFloat operator()(TFloat x) const + { + return log((TFloat)1 + exp(-abs(x))); + } +}; -void ChannelsPReLULayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +struct PowerFunctor { - CV_Assert(blobs.size() == 1); + typedef PowerLayer Layer; - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) + const float power; + const float scale; + const float shift; + + PowerFunctor(float power_, float scale_ = 1.f, float shift_ = 0) + : power(power_), scale(scale_), shift(shift_) {} + + template + inline TFloat operator()(TFloat x) const { - outputs[i].create(inputs[i]->shape()); + return power == 1.0f ? (TFloat)shift + (TFloat)scale * x : + pow((TFloat)shift + (TFloat)scale * x, (TFloat)power); } -} +}; -void ChannelsPReLULayerImpl::forward(std::vector &inputs, std::vector &outputs) +class ChannelsPReLULayerImpl : public ChannelsPReLULayer { - CV_Assert(inputs.size() == 1); +public: + ChannelsPReLULayerImpl(const LayerParams& params) + { + CV_Assert(params.blobs.size() == 1); + setParamsFrom(params); + } + + //////////////////////////////////////////////////////////////////////////// - Blob &inpBlob = *inputs[0]; + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(blobs.size() == 1); + + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + outputs[i].create(inputs[i]->dims, inputs[i]->size.p, inputs[i]->type()); + } + } - for (size_t ii = 0; ii < outputs.size(); ii++) + void forward(std::vector &inputs, std::vector &outputs) { - Blob &outBlob = outputs[ii]; + CV_Assert(inputs.size() == 1); - CV_Assert(blobs[0].total() == inpBlob.channels()); + Mat &inpBlob = *inputs[0]; - for (int n = 0; n < inpBlob.channels(); n++) + for (size_t ii = 0; ii < outputs.size(); ii++) { - float slopeWeight = blobs[0].matRefConst().at(n); + Mat &outBlob = outputs[ii]; - cv::threshold(inpBlob.getPlane(0, n), outBlob.getPlane(0, n), 0, 0, cv::THRESH_TOZERO_INV); - outBlob.getPlane(0, n) = inpBlob.getPlane(0, n) + (slopeWeight - 1)*outBlob.getPlane(0, n); + CV_Assert(blobs[0].total() == inpBlob.size[1]); + + for (int n = 0; n < inpBlob.size[1]; n++) + { + float slopeWeight = blobs[0].at(n); + + Mat inpBlobPlane = getPlane(inpBlob, 0, n); + Mat outBlobPlane = getPlane(outBlob, 0, n); + + threshold(inpBlobPlane, outBlobPlane, 0, 0, cv::THRESH_TOZERO_INV); + scaleAdd(outBlobPlane, slopeWeight-1, inpBlobPlane, outBlobPlane); + } } } +}; + +#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \ +Ptr<_Layer> _Layer::create() { \ + return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); } + + +Ptr ReLULayer::create(const LayerParams& params) +{ + float negativeSlope = params.get("negative_slope", 0.f); + Ptr l(new ElementWiseLayer(ReLUFunctor(negativeSlope))); + l->setParamsFrom(params); + + return l; } -Ptr ChannelsPReLULayer::create() +Ptr TanHLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr SigmoidLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr AbsLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr BNLLLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr PowerLayer::create(const LayerParams& params) +{ + float power = params.get("power", 1.0f); + float scale = params.get("scale", 1.0f); + float shift = params.get("shift", 0.0f); + Ptr l(new ElementWiseLayer(PowerFunctor(power, scale, shift))); + l->setParamsFrom(params); + + return l; +} + + +Ptr ChannelsPReLULayer::create(const LayerParams& params) { - return Ptr(new ChannelsPReLULayerImpl()); + return Ptr(new ChannelsPReLULayerImpl(params)); } } diff --git a/modules/dnn/src/layers/elementwise_layers.hpp b/modules/dnn/src/layers/elementwise_layers.hpp deleted file mode 100644 index 0c01812bf36..00000000000 --- a/modules/dnn/src/layers/elementwise_layers.hpp +++ /dev/null @@ -1,328 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_ELEMENTWISE_LAYERS_HPP__ -#define __OPENCV_DNN_LAYERS_ELEMENTWISE_LAYERS_HPP__ -#include "../precomp.hpp" -#include "layers_common.hpp" -#include -#include -#include -#include "opencl_kernels_dnn.hpp" - -namespace cv -{ -namespace dnn -{ - -using std::abs; -using std::exp; -using std::tanh; -using std::pow; - -template -class ElementWiseLayer : public Func::Layer -{ - bool useOpenCL; - Func func; - - template - class PBody : public cv::ParallelLoopBody - { - Func &func; - Dtype *data; - public: - - PBody(Mat &mat, Func &func_) : - func(func_), data(mat.ptr()) - {} - - void operator()(const Range &r) const - { - for (int i = r.start; i < r.end; i++) - data[i] = func(data[i]); - } - }; - -public: - - ElementWiseLayer() {} - ElementWiseLayer(const Func &f) : func(f) {} - - void allocate(const std::vector &inputs, std::vector &outputs) - { - useOpenCL = ocl::useOpenCL(); - - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) - { - outputs[i].shareFrom(*inputs[i]); //no data copy - - //hotfix: shareFrom doesn't provide properly Mat/UMat switching - if (useOpenCL) - outputs[i].umatRef() = inputs[i]->umatRefConst(); - else - outputs[i].matRef() = inputs[i]->matRefConst(); - } - } - - void forward(std::vector &inputs, std::vector &outputs) - { - #ifdef HAVE_OPENCL - if (useOpenCL) - forwardOCL(inputs, outputs); - else - #endif - forwardCPU(inputs, outputs); - } - - #ifdef HAVE_OPENCL - void forwardOCL(std::vector &inputs, std::vector &outputs) - { - size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); - - for (size_t i = 0; i < inputs.size(); i++) - { - const UMat &src = inputs[i]->umatRefConst(); - UMat &dst = outputs[i].umatRef(); - CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset); - - ocl::Kernel ker; - CV_Assert(func.initKernel(ker, src)); - ker.set(0, (int)src.total()); - ker.set(1, ocl::KernelArg::PtrReadOnly(src)); - ker.set(2, ocl::KernelArg::PtrWriteOnly(dst)); - - size_t gSize = src.total(); - CV_Assert(ker.run(1, &gSize, &wgSize, true)); - } - } - #endif - - void forwardCPU(std::vector &inputs, std::vector &outputs) - { - for (size_t i = 0; i < inputs.size(); i++) - { - const Mat &src = inputs[i]->matRefConst(); - Mat &dst = outputs[i].matRef(); - CV_Assert(src.ptr() == dst.ptr() && src.isContinuous()); - - Range sizeRange = Range(0, dst.total()); - if (dst.type() == CV_32F) - { - cv::parallel_for_(sizeRange, PBody(dst, func)); - } - else if (dst.type() == CV_64F) - { - cv::parallel_for_(sizeRange, PBody(dst, func)); - } - else - { - CV_Error(Error::StsNotImplemented, "Only CV_32F and CV_64F blobs are supported"); - } - } - } -}; - -#ifdef HAVE_OPENCL -static String oclGetTMacro(const UMat &m) -{ - return String("-DT=") + ocl::typeToStr(m.type()) + String(" "); -} -#endif - -struct ReLUFunctor -{ - typedef ReLULayer Layer; - - double slope; - - ReLUFunctor(double slope_) - : slope(slope_) {} - - template - inline TFloat operator()(TFloat x) const - { - return (x >= (TFloat)0) ? x : (TFloat)slope * x; - } - - #ifdef HAVE_OPENCL - bool initKernel(ocl::Kernel &ker, const UMat &src) const - { - const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : ""; - String buildopt = oclGetTMacro(src) + buildoptSlope; - - if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt)) - return false; - - if (slope != 0) - ker.set(3, (float)slope); - - return true; - } - #endif -}; - -struct TanHFunctor -{ - typedef TanHLayer Layer; - - template - inline TFloat operator()(TFloat x) const - { - return tanh(x); - } - - #ifdef HAVE_OPENCL - bool initKernel(ocl::Kernel &ker, const UMat &src) const - { - if (!ker.create("TanHForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src))) - return false; - return true; - } - #endif -}; - -struct SigmoidFunctor -{ - typedef SigmoidLayer Layer; - - template - inline TFloat operator()(TFloat x) const - { - return (TFloat)1 / ((TFloat)1 + exp(-x)); - } - - #ifdef HAVE_OPENCL - bool initKernel(ocl::Kernel &ker, const UMat &src) const - { - if (!ker.create("SigmoidForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src))) - return false; - return true; - } - #endif -}; - -struct AbsValFunctor -{ - typedef AbsLayer Layer; - - template - inline TFloat operator()(TFloat x) const - { - return abs(x); - } - - #ifdef HAVE_OPENCL - bool initKernel(ocl::Kernel &ker, const UMat &src) const - { - if (!ker.create("AbsValForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src))) - return false; - return true; - } - #endif -}; - -struct BNLLFunctor -{ - typedef BNLLLayer Layer; - - template - inline TFloat operator()(TFloat x) const - { - return log((TFloat)1 + exp(-abs(x))); - } - - #ifdef HAVE_OPENCL - bool initKernel(ocl::Kernel &ker, const UMat &src) const - { - if (!ker.create("BNLLForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src))) - return false; - return true; - } - #endif -}; - -struct PowerFunctor -{ - typedef PowerLayer Layer; - - const double power; - const double scale; - const double shift; - - PowerFunctor(double power_, double scale_ = 1, double shift_ = 0) - : power(power_), scale(scale_), shift(shift_) {} - - template - inline TFloat operator()(TFloat x) const - { - return power == 1.0 ? (TFloat)shift + (TFloat)scale * x : pow((TFloat)shift + (TFloat)scale * x, (TFloat)power); - } - - #ifdef HAVE_OPENCL - bool initKernel(ocl::Kernel &ker, const UMat &src) const - { - if (!ker.create("PowForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src))) - return false; - - ker.set(3, (float)power); - ker.set(4, (float)scale); - ker.set(5, (float)shift); - - return true; - } - #endif -}; - -class ChannelsPReLULayerImpl : public ChannelsPReLULayer -{ -public: - ChannelsPReLULayerImpl() {} - - void allocate(const std::vector &inputs, std::vector &outputs); - - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 3568ccdd4ad..2e88bbbe500 100755 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -41,88 +41,117 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "eltwise_layer.hpp" namespace cv { namespace dnn { - EltwiseLayerImpl::EltwiseLayerImpl(EltwiseOp op_, const std::vector &coeffs_) + +class EltwiseLayerImpl : public EltwiseLayer +{ +public: + EltwiseOp op; + std::vector coeffs; + + EltwiseLayerImpl(const LayerParams& params) { - op = op_; - coeffs = coeffs_; + setParamsFrom(params); + op = EltwiseLayer::SUM; + if (params.has("operation")) + { + String operation = params.get("operation").toLowerCase(); + if (operation == "prod") + op = EltwiseLayer::PROD; + else if (operation == "sum") + op = EltwiseLayer::SUM; + else if (operation == "max") + op = EltwiseLayer::MAX; + else + CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\""); + } + + if (params.has("coeff")) + { + DictValue paramCoeff = params.get("coeff"); + int i, n = paramCoeff.size(); + coeffs.resize(n); + for (i = 0; i < n; i++) + { + coeffs[i] = paramCoeff.get(i); + } + } } - void EltwiseLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) + void allocate(const std::vector &inputs, std::vector &outputs) { CV_Assert(2 <= inputs.size()); CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size()); CV_Assert(op == SUM || coeffs.size() == 0); - const BlobShape &shape0 = inputs[0]->shape(); for (size_t i = 1; i < inputs.size(); ++i) { - BlobShape iShape = inputs[i]->shape(); - CV_Assert(shape0 == iShape); + CV_Assert(inputs[i]->size == inputs[0]->size); } outputs.resize(1); - outputs[0].create(shape0); + outputs[0].create(inputs[0]->dims, inputs[0]->size.p, inputs[0]->type()); } - void EltwiseLayerImpl::forward(std::vector &inputs, std::vector &outputs) + void forward(std::vector &inputs, std::vector &outputs) { switch (op) { - case SUM: + case SUM: { CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size()); - Mat& output = outputs[0].matRef(); + Mat& output = outputs[0]; output.setTo(0.); if (0 < coeffs.size()) { for (size_t i = 0; i < inputs.size(); i++) { - output += inputs[i]->matRefConst() * coeffs[i]; + output += *inputs[i] * coeffs[i]; } } else { for (size_t i = 0; i < inputs.size(); i++) { - output += inputs[i]->matRefConst(); + output += *inputs[i]; } } } - break; - case PROD: + break; + case PROD: { - Mat& output = outputs[0].matRef(); + Mat& output = outputs[0]; output.setTo(1.); for (size_t i = 0; i < inputs.size(); i++) { - output = output.mul(inputs[i]->matRefConst()); + output = output.mul(*inputs[i]); } } - break; - case MAX: + break; + case MAX: { - Mat& output = outputs[0].matRef(); - cv::max(inputs[0]->matRefConst(), inputs[1]->matRefConst(), output); + Mat& output = outputs[0]; + cv::max(*inputs[0], *inputs[1], output); for (size_t i = 2; i < inputs.size(); i++) { - cv::max(output, inputs[i]->matRefConst(), output); + cv::max(output, *inputs[i], output); } } - break; - default: - CV_Assert(0); - break; - }; + break; + default: + CV_Assert(0); + break; + } } +}; + +Ptr EltwiseLayer::create(const LayerParams& params) +{ + return Ptr(new EltwiseLayerImpl(params)); +} - Ptr EltwiseLayer::create(EltwiseOp op, const std::vector &coeffs) - { - return Ptr(new EltwiseLayerImpl(op, coeffs)); - } } } diff --git a/modules/dnn/src/layers/eltwise_layer.hpp b/modules/dnn/src/layers/eltwise_layer.hpp deleted file mode 100755 index c67575cde2a..00000000000 --- a/modules/dnn/src/layers/eltwise_layer.hpp +++ /dev/null @@ -1,62 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - class EltwiseLayerImpl : public EltwiseLayer - { - EltwiseOp op; - std::vector coeffs; - public: - EltwiseLayerImpl(EltwiseOp op, const std::vector &coeffs); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - }; -} -} -#endif diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index dc8070393ff..05700f90c05 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -41,7 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "flatten_layer.hpp" #include #include @@ -50,68 +49,72 @@ namespace cv namespace dnn { -FlattenLayer::FlattenLayer(LayerParams ¶ms) : Layer(params) +class FlattenLayerImpl : public FlattenLayer { - _startAxis = params.get("axis", 1); - _endAxis = params.get("end_axis", -1); -} +public: + FlattenLayerImpl(const LayerParams ¶ms) + { + _startAxis = params.get("axis", 1); + _endAxis = params.get("end_axis", -1); + setParamsFrom(params); + } -void FlattenLayer::checkInputs(const std::vector &inputs) -{ - CV_Assert(inputs.size() > 0); - for (size_t i = 1; i < inputs.size(); i++) + void allocate(const std::vector &inputs, std::vector &outputs) { - for (size_t j = 0; j < _numAxes; j++) + size_t i, ninputs = inputs.size(); + CV_Assert(ninputs > 0); + const Mat& inp0 = *inputs[0]; + + for (i = 1; i < ninputs; i++) { - CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]); + CV_Assert(inputs[i]->size == inp0.size); } - } -} -void FlattenLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - checkInputs(inputs); + _numAxes = inp0.dims; + _endAxis = _endAxis < 0 ? _endAxis + _numAxes : _endAxis; + CV_Assert(_startAxis >= 0); + CV_Assert(_endAxis >= _startAxis && _endAxis < (int)_numAxes); - _numAxes = inputs[0]->dims(); - _endAxis = inputs[0]->canonicalAxis(_endAxis); - CV_Assert(_startAxis >= 0); - CV_Assert(_endAxis >= _startAxis && _endAxis < (int)_numAxes); + size_t flattenedDimensionSize = inp0.total(_startAxis, _endAxis+1); - size_t flattenedDimensionSize = 1; - for (int i = _startAxis; i <= _endAxis; i++) - { - flattenedDimensionSize *= inputs[0]->size(i); - } + resultShape.clear(); + for (int j = 0; j < _startAxis; j++) + { + resultShape.push_back(inp0.size[j]); + } + resultShape.push_back(flattenedDimensionSize); + for (int j = _endAxis + 1; j < _numAxes; j++) + { + resultShape.push_back(inp0.size[j]); + } + CV_Assert(resultShape.size() <= 4); - std::vector outputShapeVec; - for (int i = 0; i < _startAxis; i++) - { - outputShapeVec.push_back(inputs[0]->size(i)); + for (i = 0; i < ninputs; i++) + { + //in-place + outputs[i] = inputs[i]->reshape(1, (int)resultShape.size(), &resultShape[0]); + } } - outputShapeVec.push_back(flattenedDimensionSize); - for (size_t i = _endAxis + 1; i < _numAxes; i++) + + void forward(std::vector &inputs, std::vector &outputs) { - outputShapeVec.push_back(inputs[0]->size(i)); + for (size_t i = 0; i < inputs.size(); i++) + { + outputs[i] = inputs[i]->reshape(1, (int)resultShape.size(), &resultShape[0]); + } } - CV_Assert(outputShapeVec.size() <= 4); - resultShape = BlobShape(outputShapeVec); + int _startAxis; + int _endAxis; + size_t _numAxes; - for (size_t i = 0; i < inputs.size(); i++) - { - //in-place - outputs[i].shareFrom(*inputs[i]); - outputs[i].reshape(resultShape); - } -} + std::vector resultShape; +}; -void FlattenLayer::forward(std::vector &inputs, std::vector &outputs) +Ptr FlattenLayer::create(const LayerParams& params) { - for (size_t j = 0; j < inputs.size(); j++) - { - outputs[j].shareFrom(*inputs[j]); - outputs[j].reshape(resultShape); - } + return Ptr(new FlattenLayerImpl(params)); } + } } diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index bb5802743cb..bb296057aa7 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -41,88 +41,95 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "fully_connected_layer.hpp" #include "op_blas.hpp" #include -#include namespace cv { namespace dnn { -FullyConnectedLayerImpl::FullyConnectedLayerImpl(int axis_) +class FullyConnectedLayerImpl : public InnerProductLayer { - axis = axis_; -} - -void FullyConnectedLayerImpl::allocate(const std::vector &input, std::vector &output) -{ - CV_Assert(input.size() > 0); - CV_Assert(1 <= blobs.size() && blobs.size() <= 2); - CV_Assert(blobs[0].dims() == 2); - - bias = (blobs.size() >= 1); - axisCan = input[0]->canonicalAxis(axis); - dtype = input[0]->type(); - numOutput = blobs[0].size(0); - innerSize = blobs[0].size(1); - outerSize = input[0]->total(0, axisCan); - - CV_Assert((size_t)innerSize == input[0]->total(axisCan)); - CV_Assert(!bias || (size_t)numOutput == blobs[1].total()); +public: + FullyConnectedLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + CV_Assert(1 <= blobs.size() && blobs.size() <= 2); - useOpenCL = ocl::useOpenCL(); - int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_UMAT; + numOutput = params.get("num_output"); + innerSize = (int)blobs[0].total() / numOutput; + bias = params.get("bias_term", true); + axis = params.get("axis", 1); - biasOnesBlob.create(Shape(outerSize, 1), dtype, allocFlags); - biasOnesBlob.setTo(1); + CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total()); + CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutput == blobs[1].total())); - output.resize(input.size()); - for (size_t i = 0; i < input.size(); i++) - { - CV_Assert(i == 0 || (input[i]->equalShape(*input[0]) && input[i]->type() == dtype)); - Shape outShape = Shape(outerSize, numOutput); - output[i].create(outShape, dtype, allocFlags); + blobs[0] = blobs[0].reshape(1, numOutput); + if (bias) + blobs[1] = blobs[1].reshape(1, 1); } -} - -void FullyConnectedLayerImpl::forward(std::vector &input, std::vector &output) -{ - #ifdef HAVE_OPENCL - if (useOpenCL) - forward_(input, output); - else - #endif - forward_(input, output); -} -template -void FullyConnectedLayerImpl::forward_(std::vector &input, std::vector &output) -{ - const XMat &weight = blobs[0].getRefConst(); - const XMat *biasMat = NULL, *biasOnesMat = NULL; - if (bias) + void allocate(const std::vector &input, std::vector &output) { - biasOnesMat = &biasOnesBlob.getRefConst(); - biasMat = &blobs[1].getRefConst(); + CV_Assert(input.size() > 0); + const Mat& inp0 = *input[0]; + + CV_Assert(1 <= blobs.size() && blobs.size() <= 2); + CV_Assert(blobs[0].dims == 2); + + bias = (blobs.size() >= 1); + axisCan = axis < 0 ? axis + inp0.dims : axis; + dtype = inp0.type(); + numOutput = blobs[0].size[0]; + innerSize = blobs[0].size[1]; + outerSize = inp0.total(0, axisCan); + size_t innerSize0 = inp0.total(axisCan); + + CV_Assert((size_t)innerSize == innerSize0); + CV_Assert(!bias || (size_t)numOutput == blobs[1].total()); + + biasOnesBlob.create(outerSize, 1, dtype); + biasOnesBlob.setTo(1.); + + output.resize(input.size()); + for (size_t i = 0; i < input.size(); i++) + { + CV_Assert(i == 0 || (input[i]->size == input[0]->size && input[i]->type() == dtype)); + output[i].create(outerSize, numOutput, dtype); + } } - for (size_t i = 0; i < input.size(); i++) + void forward(std::vector &input, std::vector &output) { - const XMat srcMat = reshaped(input[i]->getRefConst(), Shape(outerSize, innerSize)); - XMat dstMat = reshaped(output[i].getRef(), Shape(outerSize, numOutput)); - dnn::gemm(srcMat, weight, 1, dstMat, 0, GEMM_2_T); - + const Mat &weight = blobs[0]; + const Mat *biasMat = NULL, *biasOnesMat = NULL; if (bias) - dnn::gemm(*biasOnesMat, *biasMat, 1, dstMat, 1); + { + biasOnesMat = &biasOnesBlob; + biasMat = &blobs[1]; + } + + for (size_t i = 0; i < input.size(); i++) + { + Mat srcMat = input[i]->reshape(1, outerSize); + Mat dstMat = output[i].reshape(1, outerSize); + dnn::gemm(srcMat, weight, 1, dstMat, 0, GEMM_2_T); + + if (bias) + dnn::gemm(*biasOnesMat, *biasMat, 1, dstMat, 1); + } } -} + int axisCan, dtype; + int numOutput, innerSize, outerSize; + bool bias; + Mat biasOnesBlob; +}; -Ptr InnerProductLayer::create(int axis) +Ptr InnerProductLayer::create(const LayerParams& params) { - return Ptr(new FullyConnectedLayerImpl(axis)); + return Ptr(new FullyConnectedLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/fully_connected_layer.hpp b/modules/dnn/src/layers/fully_connected_layer.hpp deleted file mode 100644 index 0cf59401139..00000000000 --- a/modules/dnn/src/layers/fully_connected_layer.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class FullyConnectedLayerImpl : public InnerProductLayer -{ - int axisCan, dtype; - int numOutput, innerSize, outerSize; - bool bias, useOpenCL; - Blob biasOnesBlob; - - template - void forward_(std::vector &input, std::vector &output); - -public: - - FullyConnectedLayerImpl(int axisCan = 1); - void allocate(const std::vector &input, std::vector &output); - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index c1f586a269b..cc40ab271da 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -54,7 +54,8 @@ std::string makeName(const std::string& str1, const std::string& str2) return str1 + str2; } -bool getParameter(LayerParams ¶ms, const std::string& nameBase, const std::string& nameAll, int ¶meterH, int ¶meterW, bool hasDefault = false, const int& defaultValue = 0) +bool getParameter(const LayerParams ¶ms, const std::string& nameBase, const std::string& nameAll, + int ¶meterH, int ¶meterW, bool hasDefault = false, const int& defaultValue = 0) { std::string nameH = makeName(nameBase, std::string("_h")); std::string nameW = makeName(nameBase, std::string("_w")); @@ -92,7 +93,7 @@ bool getParameter(LayerParams ¶ms, const std::string& nameBase, const std::s } } -void getKernelSize(LayerParams ¶ms, int &kernelH, int &kernelW) +void getKernelSize(const LayerParams ¶ms, int &kernelH, int &kernelW) { if(!util::getParameter(params, "kernel", "kernel_size", kernelH, kernelW)) { @@ -102,7 +103,7 @@ void getKernelSize(LayerParams ¶ms, int &kernelH, int &kernelW) CV_Assert(kernelH > 0 && kernelW > 0); } -void getStrideAndPadding(LayerParams ¶ms, int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode) +void getStrideAndPadding(const LayerParams ¶ms, int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode) { util::getParameter(params, "pad", "pad", padH, padW, true, 0); util::getParameter(params, "stride", "stride", strideH, strideW, true, 1); @@ -118,7 +119,7 @@ void getStrideAndPadding(LayerParams ¶ms, int &padH, int &padW, int &strideH } -void getPoolingKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, +void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW, cv::String &padMode) { util::getStrideAndPadding(params, padH, padW, strideH, strideW, padMode); @@ -142,7 +143,7 @@ void getPoolingKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, boo } } -void getConvolutionKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, +void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String &padMode) { util::getKernelSize(params, kernelH, kernelW); diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index b27afafe7ba..78e6ace3e78 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -50,15 +50,16 @@ namespace cv namespace dnn { -void getConvolutionKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, +void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW, cv::String& padMode); -void getPoolingKernelParams(LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, +void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW, cv::String& padMode); void getConvPoolOutParams(const int inputH, const int inputW, const cv::Size& kernel, const cv::Size& stride, cv::Size &pad, const cv::String& padMode, int &outH, int &outW); + } } diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 9ecb58a4e07..4f8f06aee0f 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -41,10 +41,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "lrn_layer.hpp" -#include "opencl_kernels_dnn.hpp" #include -#include #include #include @@ -53,207 +50,142 @@ namespace cv namespace dnn { -LRNLayerImpl::LRNLayerImpl(int type_, int size_, double alpha_, double beta_, double bias_, bool normBySize_) +class LRNLayerImpl : public LRNLayer { - type = type_; - size = size_; - alpha = alpha_; - beta = beta_; - bias = bias_; - normBySize = normBySize_; -} +public: + LRNLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + type = -1; + String nrmType = params.get("norm_region", "ACROSS_CHANNELS"); + if (nrmType == "ACROSS_CHANNELS") + type = LRNLayer::CHANNEL_NRM; + else if (nrmType == "WITHIN_CHANNEL") + type = LRNLayer::SPATIAL_NRM; + else + CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\""); + + size = params.get("local_size", 5); + if (size % 2 != 1 || size <= 0) + CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size"); + + alpha = params.get("alpha", 1); + beta = params.get("beta", 0.75); + bias = params.get("bias", 1); + normBySize = params.get("norm_by_size", true); + } -void LRNLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 1 && inputs[0]->dims() == 4); - CV_Assert(type == CHANNEL_NRM || type == SPATIAL_NRM); - useOpenCL = cv::ocl::useOpenCL(); + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == 1 && inputs[0]->dims == 4); + CV_Assert(type == CHANNEL_NRM || type == SPATIAL_NRM); - if (type == SPATIAL_NRM && !useOpenCL) - buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_MAT); - if (type == CHANNEL_NRM && useOpenCL) - buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_UMAT); + const Mat& inp0 = *inputs[0]; - outputs.resize(1); - outputs[0].create(inputs[0]->shape(), inputs[0]->type()); -} + if (type == SPATIAL_NRM) + buf.create(inp0.size[2], inp0.size[3], inp0.type()); -void LRNLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - Blob &src = *inputs[0]; - Blob &dst = outputs[0]; - - switch (type) - { - case CHANNEL_NRM: - channelNoramlization(src, dst); - break; - case SPATIAL_NRM: - spatialNormalization(src, dst); - break; - default: - CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer"); - break; + outputs.resize(1); + outputs[0].create(inp0.dims, inp0.size.p, inp0.type()); } -} - -template -static XMat getPlane(XMat &m, int n, int cn) -{ - return reshaped(slice(m, n, cn), BlobShape::like(m).slice(2)); -} -void LRNLayerImpl::channelNoramlization(Blob &src, Blob &dst) -{ - if (!useOpenCL) - channelNormalization_(src, dst); - else + void forward(std::vector &inputs, std::vector &outputs) { - //channelNoramlization_ocl(src.getRefConst(), dst.getRef()); //consumes a lot of memory - channelNormalization_(src, dst); - } -} + Mat &src = *inputs[0]; + Mat &dst = outputs[0]; -template -void LRNLayerImpl::channelNormalization_(Blob &srcBlob, Blob &dstBlob) -{ - int num = srcBlob.num(); - int channels = srcBlob.channels(); - int ksize = (size - 1) / 2; - int sizeNormFactor = normBySize ? size : 1; - - XMat srcMat = srcBlob.getRefConst().clone(); - XMat dstMat = dstBlob.getRef(); + switch (type) + { + case CHANNEL_NRM: + channelNormalization(src, dst); + break; + case SPATIAL_NRM: + spatialNormalization(src, dst); + break; + default: + CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer"); + break; + } + } - for (int n = 0; n < num; n++) + void channelNormalization(Mat &srcBlob, Mat &dstBlob) { - XMat accum = getPlane(dstMat, n, channels-1); //trick for memory saving - accum.setTo(0); + int num = srcBlob.size[0]; + int channels = srcBlob.size[1]; + int ksize = (size - 1) / 2; + int sizeNormFactor = normBySize ? size : 1; - for (int cn = 0; cn < std::min(ksize, channels); cn++) - cv::accumulateSquare(getPlane(srcMat, n, cn), accum); + Mat srcMat = srcBlob.clone(); + Mat dstMat = dstBlob; - for (int cn = 0; cn < channels; cn++) + for (int n = 0; n < num; n++) { - if (cn + ksize < channels) - { - cv::accumulateSquare(getPlane(srcMat, n, cn + ksize), accum); - } + Mat accum = getPlane(dstMat, n, channels-1); //trick for memory saving + accum.setTo(0); - if (cn - ksize - 1 >= 0) + for (int cn = 0; cn < std::min(ksize, channels); cn++) + cv::accumulateSquare(getPlane(srcMat, n, cn), accum); + + for (int cn = 0; cn < channels; cn++) { - //subtractSquare - XMat left = getPlane(srcMat, n, cn - ksize - 1); - cv::pow(left, 2, left); - cv::subtract(accum, left, accum); + if (cn + ksize < channels) + { + cv::accumulateSquare(getPlane(srcMat, n, cn + ksize), accum); + } + + if (cn - ksize - 1 >= 0) + { + //subtractSquare + Mat left = getPlane(srcMat, n, cn - ksize - 1); + cv::pow(left, 2, left); + cv::subtract(accum, left, accum); + } + + Mat dst = getPlane(dstMat, n, cn); + accum.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias); + cv::pow(dst, beta, dst); + cv::divide(getPlane(srcMat, n, cn), dst, dst); } - - XMat dst = getPlane(dstMat, n, cn); - accum.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias); - cv::pow(dst, beta, dst); - cv::divide(getPlane(srcMat, n, cn), dst, dst); } } -} -bool LRNLayerImpl::channelNormalization_ocl(const UMat &src, UMat &dst) -{ -#ifdef HAVE_OPENCL - if (src.offset != 0 || dst.offset != 0) //TODO: add offset - return false; - - String buildOpts = String("-DT=") + ocl::typeToStr(src.type()); - - ocl::Kernel kerScale("LRNFillScale", ocl::dnn::lrn_oclsrc, buildOpts); - if (kerScale.empty()) - return false; - - ocl::Kernel kerOutput("LRNComputeOutput", ocl::dnn::lrn_oclsrc, buildOpts); - if (kerOutput.empty()) - return false; - - Shape shape = Shape::like(src); - int ksize = (size - 1) / 2; - int sizeNormFactor = normBySize ? size : 1; - // TODO: add bias - size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); - UMat &scaleBuf = buf.umatRef(); - - size_t nthreads = (size_t)(shape.total() / shape[1]); - kerScale.args((int)nthreads, - ocl::KernelArg::PtrReadOnly(src), shape[0], shape[1], shape[2], shape[3], - size, (float)(alpha/sizeNormFactor), (float)ksize, ocl::KernelArg::PtrWriteOnly(scaleBuf)); - if (!kerScale.run(1, &nthreads, &wgSize, true)) - return false; - - nthreads = (size_t)shape.total(); - kerOutput.args((int)nthreads, - ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadOnly(scaleBuf), - -beta, ocl::KernelArg::PtrWriteOnly(dst) ); - if (!kerOutput.run(1, &nthreads, &wgSize, true)) - return false; - - return true; -#else - (void)src; - (void)dst; - return false; -#endif -} - -void LRNLayerImpl::spatialNormalization(Blob &src, Blob &dst) -{ - if (!useOpenCL) - spatialNormalization_(src, dst); - else - spatialNormalization_(src, dst); -} - -//TODO: fix cv::boxFilter with BORDER_ISOLATED flag in CPU mode -template<> -void LRNLayerImpl::sqrBoxFilter_(const Mat &src, Mat &dst) -{ - Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]); - cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT); -} - -template<> -void LRNLayerImpl::sqrBoxFilter_(const UMat &src, UMat &dst) -{ - cv::sqrBoxFilter(src, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT | BORDER_ISOLATED); -} + void sqrBoxFilter_(const Mat &src, Mat &dst) + { + Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]); + cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT); + } -template -void LRNLayerImpl::spatialNormalization_(Blob &srcBlob, Blob &dstBlob) -{ - int num = srcBlob.num(); - int channels = srcBlob.channels(); - int sizeNormFactor = normBySize ? size*size : 1; + void spatialNormalization(Mat &srcBlob, Mat &dstBlob) + { + int num = srcBlob.size[0]; + int channels = srcBlob.size[1]; + int sizeNormFactor = normBySize ? size*size : 1; - XMat srcMat = srcBlob.getRefConst(); - XMat dstMat = dstBlob.getRef(); + Mat srcMat = srcBlob; + Mat dstMat = dstBlob; - for (int n = 0; n < num; n++) - { - for (int cn = 0; cn < channels; cn++) + for (int n = 0; n < num; n++) { - XMat src = getPlane(srcMat, n, cn); - XMat dst = getPlane(dstMat, n, cn); + for (int cn = 0; cn < channels; cn++) + { + Mat src = getPlane(srcMat, n, cn); + Mat dst = getPlane(dstMat, n, cn); - sqrBoxFilter_(src, dst); + sqrBoxFilter_(src, dst); - dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias); - cv::pow(dst, beta, dst); - cv::divide(src, dst, dst); + dst.convertTo(dst, dst.type(), alpha/sizeNormFactor, bias); + cv::pow(dst, beta, dst); + cv::divide(src, dst, dst); + } } } -} + Mat buf; +}; -Ptr LRNLayer::create(int type, int size, double alpha, double beta, double bias, - bool normBySize) +Ptr LRNLayer::create(const LayerParams& params) { - return Ptr(new LRNLayerImpl(type, size, alpha, beta, bias, normBySize)); + return Ptr(new LRNLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/lrn_layer.hpp b/modules/dnn/src/layers/lrn_layer.hpp deleted file mode 100644 index cbdebb88eae..00000000000 --- a/modules/dnn/src/layers/lrn_layer.hpp +++ /dev/null @@ -1,79 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class LRNLayerImpl : public LRNLayer -{ - bool useOpenCL; - Blob buf; - - void channelNoramlization(Blob &src, Blob &dst); - template - void channelNormalization_(Blob &src, Blob &dst); - bool channelNormalization_ocl(const UMat &src, UMat &dst); - - void spatialNormalization(Blob &src, Blob &dst); - template - void spatialNormalization_(Blob &src, Blob &dst); - template - void sqrBoxFilter_(const XMat &src, XMat &dst); - -public: - - LRNLayerImpl(int type = CHANNEL_NRM, int size = 5, double alpha = 1, - double beta = 0.75, double bias = 1, bool normBySize = true); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} - -#endif diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp index 71283fc8afd..c42e110a456 100644 --- a/modules/dnn/src/layers/max_unpooling_layer.cpp +++ b/modules/dnn/src/layers/max_unpooling_layer.cpp @@ -9,64 +9,84 @@ Implementation of Batch Normalization layer. */ -#include "max_unpooling_layer.hpp" +#include "../precomp.hpp" +#include "layers_common.hpp" +#include namespace cv { namespace dnn { -MaxUnpoolLayerImpl::MaxUnpoolLayerImpl(Size poolKernel_, Size poolPad_, Size poolStride_): - poolKernel(poolKernel_), - poolPad(poolPad_), - poolStride(poolStride_) -{} - -void MaxUnpoolLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +class MaxUnpoolLayerImpl : public MaxUnpoolLayer { - CV_Assert(inputs.size() == 2); - CV_Assert(inputs[0]->total() == inputs[1]->total()); - - BlobShape outShape = inputs[0]->shape(); - outShape[2] = (outShape[2] - 1) * poolStride.height + poolKernel.height - 2 * poolPad.height; - outShape[3] = (outShape[3] - 1) * poolStride.width + poolKernel.width - 2 * poolPad.width; +public: + MaxUnpoolLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + poolKernel = Size(params.get("pool_k_w"), params.get("pool_k_h")); + poolPad = Size(params.get("pool_pad_w"), params.get("pool_pad_h")); + poolStride = Size(params.get("pool_stride_w"), params.get("pool_stride_h")); + } - outputs.resize(1); - outputs[0].create(outShape); -} + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == 2); + const Mat& inp0 = *inputs[0]; + CV_Assert(inp0.total() == inputs[1]->total()); + CV_Assert(inp0.dims == 4); -void MaxUnpoolLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 2); - Blob& input = *inputs[0]; - Blob& indices = *inputs[1]; + int outShape[] = { inp0.size[0], inp0.size[1], inp0.size[2], inp0.size[3] }; + outShape[2] = (outShape[2] - 1) * poolStride.height + poolKernel.height - 2 * poolPad.height; + outShape[3] = (outShape[3] - 1) * poolStride.width + poolKernel.width - 2 * poolPad.width; - CV_Assert(input.total() == indices.total()); - CV_Assert(input.num() == 1); + outputs.resize(1); + outputs[0].create(4, outShape, inp0.type()); + } - for(int i_n = 0; i_n < outputs.size(); i_n++) + void forward(std::vector &inputs, std::vector &outputs) { - Blob& outBlob = outputs[i_n]; - outBlob.setTo(0); - CV_Assert(input.channels() == outBlob.channels()); + CV_Assert(inputs.size() == 2); + Mat& input = *inputs[0]; + Mat& indices = *inputs[1]; + + CV_Assert(input.total() == indices.total()); + CV_Assert(input.size[0] == 1); + CV_Assert(input.isContinuous()); - for (int i_c = 0; i_c < input.channels(); i_c++) + for(int i_n = 0; i_n < outputs.size(); i_n++) { - Mat outPlane = outBlob.getPlane(0, i_c); - for(int i_wh = 0; i_wh < input.size2().area(); i_wh++) + Mat& outBlob = outputs[i_n]; + outBlob.setTo(0); + CV_Assert(input.size[1] == outBlob.size[1]); + int outPlaneTotal = outBlob.size[2]*outBlob.size[3]; + + for (int i_c = 0; i_c < input.size[1]; i_c++) { - int index = indices.getPlane(0, i_c).at(i_wh); + Mat outPlane = getPlane(outBlob, 0, i_c); + int wh_area = input.size[2]*input.size[3]; + const float* inptr = input.ptr(0, i_c); + const float* idxptr = indices.ptr(0, i_c); + float* outptr = outPlane.ptr(); - CV_Assert(index < outPlane.total()); - outPlane.at(index) = input.getPlane(0, i_c).at(i_wh); + for(int i_wh = 0; i_wh < wh_area; i_wh++) + { + int index = idxptr[i_wh]; + CV_Assert(0 <= index && index < outPlaneTotal); + outptr[index] = inptr[i_wh]; + } } } } -} -Ptr MaxUnpoolLayer::create(Size poolKernel, Size poolPad, Size poolStride) + Size poolKernel; + Size poolPad; + Size poolStride; +}; + +Ptr MaxUnpoolLayer::create(const LayerParams& params) { - return Ptr(new MaxUnpoolLayerImpl(poolKernel, poolPad, poolStride)); + return Ptr(new MaxUnpoolLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/max_unpooling_layer.hpp b/modules/dnn/src/layers/max_unpooling_layer.hpp deleted file mode 100644 index a73f460cee8..00000000000 --- a/modules/dnn/src/layers/max_unpooling_layer.hpp +++ /dev/null @@ -1,39 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -// Copyright (C) 2016, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. - -/* -Declaration of MaxUnpooling layer. -*/ - -#ifndef __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class MaxUnpoolLayerImpl : public MaxUnpoolLayer -{ -public: - MaxUnpoolLayerImpl(Size poolKernel_, Size poolPad_, Size poolStride_); - - void allocate(const std::vector &inputs, std::vector &outputs); - - void forward(std::vector &inputs, std::vector &outputs); - -private: - Size poolKernel; - Size poolPad; - Size poolStride; -}; - -} -} -#endif // __OPENCV_DNN_LAYERS_MAX_UNPOOLING_LAYER_HPP__ diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index 36c48c40bf8..b658d9548ac 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -41,7 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "mvn_layer.hpp" #include namespace cv @@ -49,52 +48,59 @@ namespace cv namespace dnn { -MVNLayerImpl::MVNLayerImpl(bool normVariance_, bool acrossChannels_, double eps_) +class MVNLayerImpl : public MVNLayer { - normVariance = normVariance_; - acrossChannels = acrossChannels_; - eps = eps_; -} +public: + MVNLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + normVariance = params.get("normalize_variance", true); + acrossChannels = params.get("across_channels", false); + eps = params.get("eps", 1e-9); + } -void MVNLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) + void allocate(const std::vector &inputs, std::vector &outputs) { - CV_Assert(!acrossChannels || inputs[i]->dims() >= 2); - outputs[i].create(inputs[i]->shape(), inputs[i]->type()); + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + int dims = inputs[i]->dims; + CV_Assert(!acrossChannels || dims >= 2); + outputs[i].create(dims, inputs[i]->size.p, inputs[i]->type()); + } } -} -void MVNLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) + void forward(std::vector &inputs, std::vector &outputs) { - Blob &inpBlob = *inputs[inpIdx]; - Blob &outBlob = outputs[inpIdx]; + for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) + { + Mat &inpBlob = *inputs[inpIdx]; + Mat &outBlob = outputs[inpIdx]; - int splitDim = (acrossChannels) ? 1 : 2; - Shape workSize((int)inpBlob.total(0, splitDim), (int)inpBlob.total(splitDim)); - Mat inpMat = reshaped(inpBlob.matRefConst(), workSize); - Mat outMat = reshaped(outBlob.matRef(), workSize); + int splitDim = (acrossChannels) ? 1 : 2; + int i, newRows = 1; + for( i = 0; i < splitDim; i++ ) + newRows *= inpBlob.size[i]; + Mat inpMat = inpBlob.reshape(1, newRows); + Mat outMat = outBlob.reshape(1, newRows); - Scalar mean, dev; - for (int i = 0; i < workSize[0]; i++) - { - Mat inpRow = inpMat.row(i); - Mat outRow = outMat.row(i); + Scalar mean, dev; + for ( i = 0; i < newRows; i++) + { + Mat inpRow = inpMat.row(i); + Mat outRow = outMat.row(i); - cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray()); - double alpha = (normVariance) ? 1/(eps + dev[0]) : 1; - inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha); + cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray()); + double alpha = (normVariance) ? 1/(eps + dev[0]) : 1; + inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha); + } } } -} - +}; -Ptr MVNLayer::create(bool normVariance, bool acrossChannels, double eps) +Ptr MVNLayer::create(const LayerParams& params) { - return Ptr(new MVNLayerImpl(normVariance, acrossChannels, eps)); + return Ptr(new MVNLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/mvn_layer.hpp b/modules/dnn/src/layers/mvn_layer.hpp deleted file mode 100644 index 80b89544b63..00000000000 --- a/modules/dnn/src/layers/mvn_layer.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class MVNLayerImpl : public MVNLayer -{ -public: - - MVNLayerImpl(bool normVariance_ = true, bool acrossChannels_ = false, double eps_ = 1e-9); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index 926465b77fd..a14648785af 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -41,7 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "normalize_bbox_layer.hpp" #include "op_blas.hpp" #include @@ -52,150 +51,186 @@ namespace cv namespace dnn { -const std::string NormalizeBBoxLayer::_layerName = std::string("NormalizeBBox"); - -bool NormalizeBBoxLayer::getParameterDict(const LayerParams ¶ms, - const std::string ¶meterName, - DictValue& result) +class NormalizeBBoxLayerImpl : public NormalizeBBoxLayer { - if (!params.has(parameterName)) - { - return false; - } +public: + Mat _buffer; - result = params.get(parameterName); - return true; -} + Mat _sumChannelMultiplier; + Mat _sumSpatialMultiplier; -template -T NormalizeBBoxLayer::getParameter(const LayerParams ¶ms, - const std::string ¶meterName, - const size_t &idx, - const bool required, - const T& defaultValue) -{ - DictValue dictValue; - bool success = getParameterDict(params, parameterName, dictValue); - if(!success) + Mat _scale; + + float _eps; + bool _across_spatial; + bool _channel_shared; + + size_t _num; + size_t _channels; + size_t _rows; + size_t _cols; + + size_t _channelSize; + size_t _imageSize; + + static const size_t _numAxes = 4; + static const std::string _layerName; + + bool getParameterDict(const LayerParams ¶ms, + const std::string ¶meterName, + DictValue& result) { - if(required) + if (!params.has(parameterName)) { - std::string message = _layerName; - message += " layer parameter does not contain "; - message += parameterName; - message += " parameter."; - CV_Error(Error::StsBadArg, message); + return false; } - else - { - return defaultValue; - } - } - return dictValue.get(idx); -} -NormalizeBBoxLayer::NormalizeBBoxLayer(LayerParams ¶ms) : Layer(params) -{ - _eps = getParameter(params, "eps", 0, false, 1e-10f); - _across_spatial = getParameter(params, "across_spatial"); - _channel_shared = getParameter(params, "channel_shared"); -} + result = params.get(parameterName); + return true; + } -void NormalizeBBoxLayer::checkInputs(const std::vector &inputs) -{ - CV_Assert(inputs.size() > 0); - for (size_t i = 1; i < inputs.size(); i++) + template + T getParameter(const LayerParams ¶ms, + const std::string ¶meterName, + const size_t &idx=0, + const bool required=true, + const T& defaultValue=T()) { - for (size_t j = 0; j < _numAxes; j++) + DictValue dictValue; + bool success = getParameterDict(params, parameterName, dictValue); + if(!success) { - CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]); + if(required) + { + std::string message = _layerName; + message += " layer parameter does not contain "; + message += parameterName; + message += " parameter."; + CV_Error(Error::StsBadArg, message); + } + else + { + return defaultValue; + } } + return dictValue.get(idx); } - CV_Assert(inputs[0]->dims() > 2); -} - -void NormalizeBBoxLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - checkInputs(inputs); - - _num = inputs[0]->num(); - _channels = inputs[0]->shape()[1]; - _rows = inputs[0]->shape()[2]; - _cols = inputs[0]->shape()[3]; - - _channelSize = _rows * _cols; - _imageSize = _channelSize * _channels; - _buffer = Mat(_channels, _channelSize, CV_32F); - - _sumChannelMultiplier = Mat(_channels, 1, CV_32F, Scalar(1.0)); - _sumSpatialMultiplier = Mat(1, _channelSize, CV_32F, Scalar(1.0)); - - _scale = blobs[0]; - - for(size_t i = 0; i < inputs.size(); i++) + NormalizeBBoxLayerImpl(const LayerParams ¶ms) { - outputs[i].create(BlobShape(inputs[0]->shape())); + _eps = getParameter(params, "eps", 0, false, 1e-10f); + _across_spatial = getParameter(params, "across_spatial"); + _channel_shared = getParameter(params, "channel_shared"); + setParamsFrom(params); } -} -void NormalizeBBoxLayer::forward(std::vector &inputs, std::vector &outputs) -{ - Mat zeroBuffer(_channels, _channelSize, CV_32F, Scalar(0)); - Mat absDiff; - - for (size_t j = 0; j < inputs.size(); j++) + void checkInputs(const std::vector &inputs) { - for (size_t n = 0; n < _num; ++n) + CV_Assert(inputs.size() > 0); + for (size_t i = 1; i < inputs.size(); i++) { - Mat src = Mat(_channels, _channelSize, CV_32F, inputs[j]->ptrf(n)); - Mat dst = Mat(_channels, _channelSize, CV_32F, outputs[j].ptrf(n)); + CV_Assert(inputs[i]->size == inputs[0]->size); + } + CV_Assert(inputs[0]->dims > 2); + } - _buffer = src.mul(src); + void allocate(const std::vector &inputs, std::vector &outputs) + { + checkInputs(inputs); - if (_across_spatial) - { - absdiff(_buffer, zeroBuffer, absDiff); + const Mat& inp0 = *inputs[0]; + CV_Assert(inp0.dims == 4 && inp0.type() == CV_32F); - // add eps to avoid overflow - double absSum = sum(absDiff)[0] + _eps; + _num = inp0.size[0]; + _channels = inp0.size[1]; + _rows = inp0.size[2]; + _cols = inp0.size[3]; - float norm = sqrt(absSum); - dst = src / norm; - } - else - { - Mat norm(_channelSize, 1, _buffer.type()); // 1 x _channelSize + _channelSize = _rows * _cols; + _imageSize = _channelSize * _channels; - // (_channels x_channelSize)T * _channels x 1 -> _channelSize x 1 - gemmCPU(_buffer, _sumChannelMultiplier, 1, norm, 0, GEMM_1_T); + _buffer = Mat(_channels, _channelSize, CV_32F); - // compute norm - pow(norm, 0.5f, norm); + _sumChannelMultiplier = Mat(_channels, 1, CV_32F, Scalar(1.0)); + _sumSpatialMultiplier = Mat(1, _channelSize, CV_32F, Scalar(1.0)); - // scale the layer - // _channels x 1 * (_channelSize x 1)T -> _channels x _channelSize - gemmCPU(_sumChannelMultiplier, norm, 1, _buffer, 0, GEMM_2_T); + _scale = blobs[0]; + size_t i, ninputs = inputs.size(); + outputs.resize(ninputs); - dst = src / _buffer; - } + for(i = 0; i < ninputs; i++) + { + outputs[i].create(inp0.dims, inp0.size.p, inp0.type()); + } + } - // scale the output - if (_channel_shared) + void forward(std::vector &inputs, std::vector &outputs) + { + Mat zeroBuffer(_channels, _channelSize, CV_32F, Scalar(0)); + Mat absDiff; + + for (size_t j = 0; j < inputs.size(); j++) + { + for (size_t n = 0; n < _num; ++n) { - // _scale: 1 x 1 - dst *= _scale.matRefConst().at(0, 0); + Mat src = Mat(_channels, _channelSize, CV_32F, inputs[j]->ptr(n)); + Mat dst = Mat(_channels, _channelSize, CV_32F, outputs[j].ptr(n)); + + _buffer = src.mul(src); + + if (_across_spatial) + { + absdiff(_buffer, zeroBuffer, absDiff); + + // add eps to avoid overflow + double absSum = sum(absDiff)[0] + _eps; + + float norm = sqrt(absSum); + dst = src / norm; + } + else + { + Mat norm(_channelSize, 1, _buffer.type()); // 1 x _channelSize + + // (_channels x_channelSize)T * _channels x 1 -> _channelSize x 1 + gemmCPU(_buffer, _sumChannelMultiplier, 1, norm, 0, GEMM_1_T); + + // compute norm + pow(norm, 0.5f, norm); + + // scale the layer + // _channels x 1 * (_channelSize x 1)T -> _channels x _channelSize + gemmCPU(_sumChannelMultiplier, norm, 1, _buffer, 0, GEMM_2_T); + + dst = src / _buffer; + } + + // scale the output + if (_channel_shared) + { + // _scale: 1 x 1 + dst *= _scale.at(0, 0); + } + else + { + // _scale: _channels x 1 + // _channels x 1 * 1 x _channelSize -> _channels x _channelSize + gemmCPU(_scale, _sumSpatialMultiplier, 1, _buffer, 0); + + dst = dst.mul(_buffer); + } } - else - { - // _scale: _channels x 1 - // _channels x 1 * 1 x _channelSize -> _channels x _channelSize - gemmCPU(_scale.matRefConst(), _sumSpatialMultiplier, 1, _buffer, 0); - - dst = dst.mul(_buffer); - } } } + +}; + +const std::string NormalizeBBoxLayerImpl::_layerName = std::string("NormalizeBBox"); + +Ptr NormalizeBBoxLayer::create(const LayerParams ¶ms) +{ + return Ptr(new NormalizeBBoxLayerImpl(params)); } + } } diff --git a/modules/dnn/src/layers/normalize_bbox_layer.hpp b/modules/dnn/src/layers/normalize_bbox_layer.hpp deleted file mode 100644 index 825a0f8d997..00000000000 --- a/modules/dnn/src/layers/normalize_bbox_layer.hpp +++ /dev/null @@ -1,94 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__ -#include "../precomp.hpp" - -namespace cv -{ -namespace dnn -{ -class NormalizeBBoxLayer : public Layer -{ - Mat _buffer; - - Mat _sumChannelMultiplier; - Mat _sumSpatialMultiplier; - - Blob _scale; - - float _eps; - bool _across_spatial; - bool _channel_shared; - - size_t _num; - size_t _channels; - size_t _rows; - size_t _cols; - - size_t _channelSize; - size_t _imageSize; - - static const size_t _numAxes = 4; - static const std::string _layerName; - -public: - NormalizeBBoxLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - - void checkInputs(const std::vector &inputs); - - template - T getParameter(const LayerParams ¶ms, - const std::string ¶meterName, - const size_t &idx = 0, - const bool required = true, - const T& defaultValue = T()); - - bool getParameterDict(const LayerParams ¶ms, - const std::string ¶meterName, - DictValue& result); -}; -} -} -#endif diff --git a/modules/dnn/src/layers/op_blas.hpp b/modules/dnn/src/layers/op_blas.hpp index 55c70d87dec..d9a264320bf 100644 --- a/modules/dnn/src/layers/op_blas.hpp +++ b/modules/dnn/src/layers/op_blas.hpp @@ -56,4 +56,4 @@ namespace dnn void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags = 0); } } -#endif \ No newline at end of file +#endif diff --git a/modules/dnn/src/layers/op_im2col.cpp b/modules/dnn/src/layers/op_im2col.cpp index 4adeec7b78d..bae2011d0e5 100644 --- a/modules/dnn/src/layers/op_im2col.cpp +++ b/modules/dnn/src/layers/op_im2col.cpp @@ -44,125 +44,3 @@ #include "opencl_kernels_dnn.hpp" #include "op_im2col.hpp" #include "opencl_kernels_dnn.hpp" - -namespace cv -{ -namespace dnn -{ - -#ifdef HAVE_OPENCL - -bool im2col_ocl(const UMat &img, - int channels, int height, int width, - int kernel_h, int kernel_w, - int pad_h, int pad_w, - int stride_h, int stride_w, - int dilation_h, int dilation_w, - UMat &col) -{ - //TODO - CV_Assert(dilation_h == 1 && dilation_w == 1); - - int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; - int channels_col = channels * kernel_h * kernel_w; - int esz = img.elemSize(); - - CV_Assert(img.isContinuous() && col.isContinuous()); - CV_Assert(img.total() == (size_t)channels * height * width); - CV_Assert(col.total() == (size_t)channels_col * height_col * width_col); - - ocl::Kernel ker("im2col", ocl::dnn::im2col_oclsrc, String("-DT=") + ocl::typeToStr(img.type())); - if (ker.empty()) - return false; - - ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset/esz, - channels, height, width, - kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, - height_col, width_col, - ocl::KernelArg::PtrWriteOnly(col), (int)col.offset/esz - ); - - size_t localSize = ocl::Device::getDefault().maxWorkGroupSize(); - size_t globalSize = (size_t)channels * height_col * width_col; - return ker.run(1, &globalSize, &localSize, true); -} - -bool col2im_ocl(const UMat &col, - int channels, int height, int width, - int kernel_h, int kernel_w, - int pad_h, int pad_w, - int stride_h, int stride_w, - UMat &img) -{ - int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; - int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; - int channels_col = channels * kernel_h * kernel_w; - int esz = img.elemSize(); - - CV_Assert(img.isContinuous() && col.isContinuous()); - CV_Assert(img.total() == (size_t)channels * height * width); - CV_Assert(col.total() == (size_t)channels_col * height_col * width_col); - - ocl::Kernel ker("col2im", ocl::dnn::col2im_oclsrc, String("-DT=") + ocl::typeToStr(col.type())); - if (ker.empty()) - return false; - - ker.args((int)img.total(), - ocl::KernelArg::PtrReadOnly(col), (int)col.offset/esz, - height, width, channels, - kernel_h, kernel_w, - pad_h, pad_w, - stride_h, stride_w, - height_col, width_col, - ocl::KernelArg::PtrWriteOnly(img), (int)img.offset/esz); - - size_t localSize = ocl::Device::getDefault().maxWorkGroupSize(); - size_t globalSize = img.total(); - return ker.run(1, &globalSize, &localSize, true); -} - -#endif -} -} - -namespace cv -{ -namespace dnn -{ - -#ifdef HAVE_OPENCL -void im2col_ocl(UMat &img, - int channels, int height, int width, - int kernel_h, int kernel_w, - int pad_h, int pad_w, - int stride_h, int stride_w, - int height_out, int width_out, - UMat &col) -{ - int h_out = height_out; - int w_out = width_out; - - CV_Assert(img.isContinuous() && col.isContinuous()); - CV_Assert(img.total() == (size_t)channels * height * width); - CV_Assert(col.total() == (size_t)channels * kernel_h * kernel_w * h_out * w_out); - - ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc); - CV_Assert(!im2col_ker.empty()); - - im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset, - channels, height, width, - kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, - h_out, w_out, - ocl::KernelArg::PtrWriteOnly(col), (int)col.offset - ); - - size_t localSize = ocl::Device::getDefault().maxWorkGroupSize(); - size_t globalSize = (size_t)channels * h_out * w_out; - - CV_Assert(im2col_ker.run(1, &globalSize, &localSize, true)); -} -#endif // HAVE_OPENCL - -} -} diff --git a/modules/dnn/src/layers/op_im2col.hpp b/modules/dnn/src/layers/op_im2col.hpp index 3026991e26d..488fab30fc5 100644 --- a/modules/dnn/src/layers/op_im2col.hpp +++ b/modules/dnn/src/layers/op_im2col.hpp @@ -308,23 +308,6 @@ void col2im_cpu(const Dtype* data_col, } } -#ifdef HAVE_OPENCL -bool im2col_ocl(const UMat &img, - int channels, int height, int width, - int kernel_h, int kernel_w, - int pad_h, int pad_w, - int stride_h, int stride_w, - int dilation_h, int dilation_w, - UMat &col); - -bool col2im_ocl(const UMat &col, - int channels, int height, int width, - int kernel_h, int kernel_w, - int pad_h, int pad_w, - int stride_h, int stride_w, - UMat &img); -#endif - } } diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index 0a682906f68..6704e76e882 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -9,7 +9,7 @@ Implementation of padding layer, which adds paddings to input blob. */ -#include "padding_layer.hpp" +#include "../precomp.hpp" #include namespace cv @@ -17,69 +17,89 @@ namespace cv namespace dnn { -PaddingLayer::PaddingLayer(LayerParams ¶ms) +class PaddingLayerImpl : public PaddingLayer { - paddingDim = params.get("padding_dim"); - padding = abs(params.get("padding")); - inputDims = params.get("input_dims", 0); - index = params.get("index", 0); - paddingValue = params.get("value", 0); - - if(paddingDim < 0 || padding < 0) - CV_Error(cv::Error::StsNotImplemented, "Negative padding and dim aren't supported"); -} +public: + PaddingLayerImpl(const LayerParams ¶ms) + { + setParamsFrom(params); + paddingDim = params.get("padding_dim"); + padding = abs(params.get("padding")); + inputDims = params.get("input_dims", 0); + index = params.get("index", 0); + paddingValue = params.get("value", 0); -void PaddingLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - outputs.resize(inputs.size()); - for(int i = 0; i < inputs.size(); i++) + if(paddingDim < 0 || padding < 0) + CV_Error(cv::Error::StsNotImplemented, "Negative padding and dim aren't supported"); + } + + void allocate(const std::vector &inputs, std::vector &outputs) { - BlobShape shape = inputs[i]->shape(); - int dim = getPadDim(shape); - CV_Assert(dim < shape.dims()); + size_t i, ninputs = inputs.size(); + outputs.resize(ninputs); - shape[dim] += padding; - outputs[i].create(shape); + for( i = 0; i < ninputs; i++ ) + { + const Mat& inp = *inputs[i]; + int dims = inp.dims; + std::vector shape(inp.size.p, inp.size.p + dims); + int dim = getPadDim(shape); + CV_Assert(dim < dims); + + shape[dim] += padding; + outputs[i].create(dims, &shape[0], inp.type()); + } } -} -void PaddingLayer::forward(std::vector &inputs, std::vector &outputs) -{ - for(int i = 0; i < inputs.size(); i++) + void forward(std::vector &inputs, std::vector &outputs) { - outputs[i].matRef() = paddingValue; - BlobShape inShape = inputs[i]->shape(); - BlobShape outShape = outputs[i].shape(); - int dim = getPadDim(inShape); + for(int i = 0; i < inputs.size(); i++) + { + outputs[i] = paddingValue; + const Mat& inp = *inputs[i]; + Mat& out = outputs[i]; + int dims = inp.dims; + std::vector inShape(inp.size.p, inp.size.p + dims); + std::vector outShape(out.size.p, out.size.p + dims); + int dim = getPadDim(inShape); - int actualIndex = index; - if(index == 0) - actualIndex = inShape[dim]; + int actualIndex = index; + if(index == 0) + actualIndex = inShape[dim]; - std::vector > srcDstRanges; - srcDstRanges.push_back(std::make_pair(Range(0, actualIndex), Range(0, actualIndex))); - srcDstRanges.push_back(std::make_pair(Range(actualIndex, inShape[dim]), - Range(actualIndex + padding, outShape[dim]))); + std::vector > srcDstRanges; + srcDstRanges.push_back(std::make_pair(Range(0, actualIndex), Range(0, actualIndex))); + srcDstRanges.push_back(std::make_pair(Range(actualIndex, inShape[dim]), + Range(actualIndex + padding, outShape[dim]))); - std::vector srcRanges(inShape.dims(), Range::all()), dstRanges = srcRanges; + std::vector srcRanges(dims, Range::all()), dstRanges = srcRanges; - for(int j = 0; j < srcDstRanges.size(); j++) - { - if(!srcDstRanges[j].first.empty()) + for(int j = 0; j < srcDstRanges.size(); j++) { - srcRanges[dim] = srcDstRanges[j].first; - dstRanges[dim] = srcDstRanges[j].second; - Mat dst = outputs[i].matRef()(&dstRanges[0]); - Mat src = inputs[i]->matRef()(&srcRanges[0]).clone(); - src.copyTo(dst); + if(!srcDstRanges[j].first.empty()) + { + srcRanges[dim] = srcDstRanges[j].first; + dstRanges[dim] = srcDstRanges[j].second; + Mat dst = out(&dstRanges[0]); + Mat src = inp(&srcRanges[0]).clone(); + src.copyTo(dst); + } } } } -} -int PaddingLayer::getPadDim(const BlobShape& shape) const + int getPadDim(const std::vector& shape) const + { + return inputDims > 0 && (int)shape.size() > inputDims ? paddingDim + 1 : paddingDim; + } + + int paddingDim, padding, inputDims, index; + float paddingValue; +}; + +Ptr PaddingLayer::create(const LayerParams ¶ms) { - return inputDims > 0 && shape.dims() > inputDims ? paddingDim + 1 : paddingDim; + return Ptr(new PaddingLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/padding_layer.hpp b/modules/dnn/src/layers/padding_layer.hpp deleted file mode 100644 index 18de0961077..00000000000 --- a/modules/dnn/src/layers/padding_layer.hpp +++ /dev/null @@ -1,37 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -// Copyright (C) 2016, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. - -/* -Declaration of padding layer, which adds paddings to input blob. -*/ - -#ifndef __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_PADDING_LAYER_HPP__ -#include "../precomp.hpp" - -namespace cv -{ -namespace dnn -{ - -class PaddingLayer : public Layer -{ -public: - PaddingLayer() {} - PaddingLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - -private: - int getPadDim(const BlobShape& shape) const; - int paddingDim, padding, inputDims, index; - float paddingValue; -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 41c8399cec3..18943c626bc 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -41,7 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "permute_layer.hpp" #include #include @@ -49,137 +48,184 @@ namespace cv { namespace dnn { -void PermuteLayer::checkCurrentOrder(int currentOrder) +class PermuteLayerImpl : public PermuteLayer { - if(currentOrder < 0 || currentOrder > 3) +public: + void checkCurrentOrder(int currentOrder) { - CV_Error( - Error::StsBadArg, - "Orders of dimensions in Permute layer parameter" - "must be in [0...3] interval"); - } - - if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end()) - { - CV_Error(Error::StsBadArg, - "Permute layer parameter contains duplicated orders."); - } -} + if(currentOrder < 0 || currentOrder > 3) + { + CV_Error( + Error::StsBadArg, + "Orders of dimensions in Permute layer parameter" + "must be in [0...3] interval"); + } -void PermuteLayer::checkNeedForPermutation() -{ - _needsPermute = false; - for (size_t i = 0; i < _numAxes; ++i) - { - if (_order[i] != i) + if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end()) { - _needsPermute = true; - break; + CV_Error(Error::StsBadArg, + "Permute layer parameter contains duplicated orders."); } } -} -PermuteLayer::PermuteLayer(LayerParams ¶ms) : Layer(params) -{ - if (!params.has("order")) + void checkNeedForPermutation() { _needsPermute = false; - return; - } - - DictValue paramOrder = params.get("order"); - if(paramOrder.size() > 4) - { - CV_Error( - Error::StsBadArg, - "Too many (> 4) orders of dimensions in Permute layer"); + for (size_t i = 0; i < _numAxes; ++i) + { + if (_order[i] != i) + { + _needsPermute = true; + break; + } + } } - _numAxes = paramOrder.size(); - - for (size_t i = 0; i < _numAxes; i++) + PermuteLayerImpl(const LayerParams ¶ms) { - int currentOrder = paramOrder.get(i); - checkCurrentOrder(currentOrder); - _order.push_back(currentOrder); - } + if (!params.has("order")) + { + _needsPermute = false; + return; + } - checkNeedForPermutation(); -} + DictValue paramOrder = params.get("order"); + if(paramOrder.size() > 4) + { + CV_Error( + Error::StsBadArg, + "Too many (> 4) orders of dimensions in Permute layer"); + } -void PermuteLayer::computeStrides() -{ - _oldStride.resize(_numAxes); - _newStride.resize(_numAxes); + _numAxes = paramOrder.size(); - _oldStride[_numAxes - 1] = 1; - _newStride[_numAxes - 1] = 1; + for (size_t i = 0; i < _numAxes; i++) + { + int currentOrder = paramOrder.get(i); + checkCurrentOrder(currentOrder); + _order.push_back(currentOrder); + } - for(int i = _numAxes - 2; i >= 0; i--) - { - _oldStride[i] = _oldStride[i + 1] * _oldDimensionSize[i + 1]; - _newStride[i] = _newStride[i + 1] * _newDimensionSize[i + 1]; + setParamsFrom(params); + checkNeedForPermutation(); } - _count = _oldStride[0] * _oldDimensionSize[0]; -} - -void PermuteLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - if(!_needsPermute) + void computeStrides() { - return; - } + _oldStride.resize(_numAxes); + _newStride.resize(_numAxes); - CV_Assert(inputs.size() > 0); - CV_Assert((int)_numAxes == inputs[0]->shape().dims()); + _oldStride[_numAxes - 1] = 1; + _newStride[_numAxes - 1] = 1; - outputs.resize(inputs.size()); + for(int i = _numAxes - 2; i >= 0; i--) + { + _oldStride[i] = _oldStride[i + 1] * _oldDimensionSize[i + 1]; + _newStride[i] = _newStride[i + 1] * _newDimensionSize[i + 1]; + } - _oldDimensionSize = inputs[0]->shape(); - for (size_t i = 0; i < _numAxes; i++) - { - _newDimensionSize[i] = _oldDimensionSize[_order[i]]; + _count = _oldStride[0] * _oldDimensionSize[0]; } - for (size_t i = 0; i < inputs.size(); i++) + void allocate(const std::vector &inputs, std::vector &outputs) { - CV_Assert(inputs[i]->rows() == _oldDimensionSize[2] && inputs[i]->cols() == _oldDimensionSize[3]); - outputs[i].create(BlobShape(_newDimensionSize)); - } + if(!_needsPermute) + { + return; + } - computeStrides(); -} + CV_Assert(inputs.size() > 0); + const Mat& inp0 = *inputs[0]; + CV_Assert((int)_numAxes == inp0.dims); -void PermuteLayer::forward(std::vector &inputs, std::vector &outputs) -{ - if(!_needsPermute) - { - for (size_t j = 0; j < inputs.size(); j++) + outputs.resize(inputs.size()); + + _newDimensionSize.resize(_numAxes); + _oldDimensionSize.resize(_numAxes); + + for (size_t i = 0; i < _numAxes; i++) { - outputs[j].matRef() = inputs[j]->matRef(); + _oldDimensionSize[i] = inp0.size[i]; + _newDimensionSize[i] = inp0.size[_order[i]]; } - return; + + for (size_t i = 0; i < inputs.size(); i++) + { + CV_Assert(inputs[i]->size == inp0.size); + outputs[i].create(_numAxes, &_newDimensionSize[0], CV_32F); + } + + computeStrides(); } - for (size_t k = 0; k < inputs.size(); k++) + void forward(std::vector &inputs, std::vector &outputs) { - float *srcData = inputs[k]->ptrf(); - float *dstData = outputs[k].ptrf(); - - for (size_t i = 0; i < _count; ++i) + size_t k, ninputs = inputs.size(); + if(!_needsPermute) + { + for (k = 0; k < ninputs; k++) + outputs[k] = *inputs[k]; + } + else { - int oldPosition = 0; - int newPosition = i; + size_t i, j, count = _count, numAxes = _numAxes; + const size_t* newStride = &_newStride[0]; + const size_t* oldStride = &_oldStride[0]; + const size_t* order = &_order[0]; - for (size_t j = 0; j < _numAxes; ++j) + for (k = 0; k < ninputs; k++) { - oldPosition += (newPosition / _newStride[j]) * _oldStride[_order[j]]; - newPosition %= _newStride[j]; + const Mat& inp = *inputs[k]; + Mat& out = outputs[k]; + + CV_Assert(inp.dims == numAxes && inp.size == inputs[0]->size); + CV_Assert(out.dims == numAxes && out.size == outputs[0].size); + + for( i = 0; i < numAxes; i++ ) + { + CV_Assert(inp.size[i] == _oldDimensionSize[i]); + CV_Assert(out.size[i] == _newDimensionSize[i]); + } + + CV_Assert(inp.isContinuous() && out.isContinuous()); + CV_Assert(inp.type() == CV_32F && out.type() == CV_32F); + + const float *srcData = inp.ptr(); + float *dstData = out.ptr(); + + for (i = 0; i < count; ++i) + { + size_t oldPosition = 0; + size_t newPosition = i; + + for (j = 0; j < numAxes; ++j) + { + oldPosition += (newPosition / newStride[j]) * oldStride[order[j]]; + newPosition %= newStride[j]; + } + dstData[i] = srcData[oldPosition]; + } } - dstData[i] = srcData[oldPosition]; } } + + size_t _count; + std::vector _order; + + std::vector _oldDimensionSize; + std::vector _newDimensionSize; + + std::vector _oldStride; + std::vector _newStride; + bool _needsPermute; + + size_t _numAxes; +}; + +Ptr PermuteLayer::create(const LayerParams ¶ms) +{ + return Ptr(new PermuteLayerImpl(params)); } + } } diff --git a/modules/dnn/src/layers/permute_layer.hpp b/modules/dnn/src/layers/permute_layer.hpp deleted file mode 100644 index cc51c605585..00000000000 --- a/modules/dnn/src/layers/permute_layer.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__ -#include "../precomp.hpp" - -namespace cv -{ -namespace dnn -{ -class PermuteLayer : public Layer -{ - size_t _count; - std::vector _order; - - BlobShape _oldDimensionSize; - BlobShape _newDimensionSize; - - std::vector _oldStride; - std::vector _newStride; - bool _needsPermute; - - size_t _numAxes; - - void checkCurrentOrder(int currentOrder); - void checkNeedForPermutation(); - void computeStrides(); - -public: - PermuteLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); -}; -} -} -#endif diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 9aaee31ead2..e37addd70e2 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -41,11 +41,8 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "pooling_layer.hpp" -#include "opencl_kernels_dnn.hpp" #include #include -#include using std::max; using std::min; @@ -53,273 +50,199 @@ namespace cv { namespace dnn { -//TODO: add ceil_mode param - -PoolingLayerImpl::PoolingLayerImpl() -{ - globalPooling = false; -} -PoolingLayerImpl::PoolingLayerImpl(int type_, Size kernel_, Size stride_, Size pad_, const String &padMode_) -{ - globalPooling = false; - type = type_; - kernel = kernel_; - pad = pad_; - stride = stride_; - padMode = padMode_; -} - -void PoolingLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +//TODO: add ceil_mode param +class PoolingLayerImpl : public PoolingLayer { - CV_Assert(inputs.size() == 1); +public: + PoolingLayerImpl(const LayerParams& params) + { + type = PoolingLayer::MAX; - inp = inputs[0]->size2(); + if (params.has("pool")) + { + String pool = params.get("pool").toLowerCase(); + if (pool == "max") + type = PoolingLayer::MAX; + else if (pool == "ave") + type = PoolingLayer::AVE; + else if (pool == "stochastic") + type = PoolingLayer::STOCHASTIC; + else + CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); + } - if(globalPooling) - { - kernel = inp; + getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, + pad.height, pad.width, stride.height, stride.width, padMode); + setParamsFrom(params); } - computeOutputShape(inp); + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == 1); - useOpenCL = ocl::useOpenCL(); + inp = Size(inputs[0]->size[3], inputs[0]->size[2]); - outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) - { - CV_Assert(inputs[i]->rows() == inp.height && inputs[i]->cols() == inp.width); - if (type == MAX) + if(globalPooling) { - outputs[2 * i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width)); - outputs[2 * i + 1].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width)); + kernel = inp; } - else + + computeOutputShape(inp); + + outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) { - outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width)); + const Mat& inp_i = *inputs[i]; + CV_Assert(inp_i.size[2] == inp.height && inp_i.size[3] == inp.width); + int outsz[] = { inp_i.size[0], inp_i.size[1], out.height, out.width }; + + if (type == MAX) + { + outputs[2 * i].create(4, outsz, CV_32F); + outputs[2 * i + 1].create(4, outsz, CV_32F); + } + else + { + outputs[i].create(4, outsz, CV_32F); + } } } -} -void PoolingLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - for (size_t ii = 0; ii < inputs.size(); ii++) + void forward(std::vector &inputs, std::vector &outputs) { - switch (type) + for (size_t ii = 0; ii < inputs.size(); ii++) { - case MAX: - maxPooling(*inputs[ii], outputs[2 * ii], outputs[2 * ii + 1]); - break; - case AVE: - avePooling(*inputs[ii], outputs[ii]); - break; - default: - CV_Error(Error::StsNotImplemented, "Not implemented"); - break; + switch (type) + { + case MAX: + maxPooling(*inputs[ii], outputs[2 * ii], outputs[2 * ii + 1]); + break; + case AVE: + avePooling(*inputs[ii], outputs[ii]); + break; + default: + CV_Error(Error::StsNotImplemented, "Not implemented"); + break; + } } } -} - -void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst, Blob &mask) -{ - if (!useOpenCL) - maxPooling_cpu(src, dst, mask); - else - { - CV_Assert(maxPooling_ocl(src, dst, mask)); - } -} -bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst, Blob &mask) -{ - return pooling_ocl("MaxPoolForward", src, dst, &mask); -} - -void PoolingLayerImpl::avePooling(Blob &src, Blob &dst) -{ - if (!useOpenCL) - avePooling_cpu(src, dst); - else + void maxPooling(Mat &src, Mat &dst, Mat &mask) { - CV_Assert(avePooling_ocl(src, dst)); - } -} - -bool PoolingLayerImpl::avePooling_ocl(Blob &src, Blob &dst) -{ - return pooling_ocl("AvePoolForward", src, dst); -} + CV_DbgAssert(dst.size[2] == out.height && dst.size[3] == out.width); -void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst, Blob &mask) -{ - CV_DbgAssert(dst.rows() == out.height && dst.cols() == out.width); - - for (int n = 0; n < src.num(); ++n) - { - for (int c = 0; c < src.channels(); ++c) + for (int n = 0; n < src.size[0]; ++n) { - const float *srcData = src.ptrf(n, c); - float *dstData = dst.ptrf(n, c); - float *dstMaskData = mask.ptrf(n, c); - - for (int ph = 0; ph < out.height; ++ph) + for (int c = 0; c < src.size[1]; ++c) { - for (int pw = 0; pw < out.width; ++pw) - { - int hstart = ph * stride.height - pad.height; - int wstart = pw * stride.width - pad.width; - int hend = min(hstart + kernel.height, inp.height); - int wend = min(wstart + kernel.width, inp.width); - hstart = max(hstart, 0); - wstart = max(wstart, 0); - const int poolIndex = ph * out.width + pw; - float max_val = -FLT_MAX; - int max_index = -1; + const float *srcData = src.ptr(n, c); + float *dstData = dst.ptr(n, c); + float *dstMaskData = mask.ptr(n, c); - for (int h = hstart; h < hend; ++h) - for (int w = wstart; w < wend; ++w) - { - const int index = h * inp.width + w; - if (srcData[index] > max_val) + for (int ph = 0; ph < out.height; ++ph) + { + for (int pw = 0; pw < out.width; ++pw) + { + int hstart = ph * stride.height - pad.height; + int wstart = pw * stride.width - pad.width; + int hend = min(hstart + kernel.height, inp.height); + int wend = min(wstart + kernel.width, inp.width); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + const int poolIndex = ph * out.width + pw; + float max_val = -FLT_MAX; + int max_index = -1; + + for (int h = hstart; h < hend; ++h) + for (int w = wstart; w < wend; ++w) { - max_val = srcData[index]; - max_index = index; + const int index = h * inp.width + w; + if (srcData[index] > max_val) + { + max_val = srcData[index]; + max_index = index; + } } - } - dstData[poolIndex] = max_val; - dstMaskData[poolIndex] = max_index; + dstData[poolIndex] = max_val; + dstMaskData[poolIndex] = max_index; + } } } } } -} - - -#ifdef HAVE_OPENCL -bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask) -{ - const UMat &srcMat = src.umatRefConst(); - UMat &dstMat = dst.umatRef(); - UMat *maskUMat = mask == NULL ? NULL : &mask->umatRef(); - CV_Assert(maskUMat == NULL || maskUMat->type() == CV_32FC1); // FIXIT CV_32SC1 - CV_Assert(maskUMat == NULL || maskUMat->offset == 0); - - CV_Assert(srcMat.offset == 0 && dstMat.offset == 0); - ocl::Kernel ker(kname, ocl::dnn::pooling_oclsrc, - cv::format("-DT=%s%s", ocl::typeToStr(src.type()), maskUMat ? " -DMASK=1" : "")); - if (ker.empty()) - return false; - - BlobShape s = src.shape(); - size_t nthreads = dst.total(); - if (maskUMat) - { - ker.args((int)nthreads, - ocl::KernelArg::PtrReadOnly(srcMat), s[0], s[1], s[2], s[3], - out.height, out.width, kernel.height, kernel.width, - stride.height, stride.width, pad.height, pad.width, - ocl::KernelArg::PtrWriteOnly(dstMat), - ocl::KernelArg::PtrWriteOnly(*maskUMat)); - } - else + void avePooling(Mat &src, Mat &dst) { - ker.args((int)nthreads, - ocl::KernelArg::PtrReadOnly(srcMat), s[0], s[1], s[2], s[3], - out.height, out.width, kernel.height, kernel.width, - stride.height, stride.width, pad.height, pad.width, - ocl::KernelArg::PtrWriteOnly(dstMat)); - } - - size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); - if (!ker.run(1, &nthreads, &wgSize, true)) - return false; - - return true; -} -#else -bool PoolingLayerImpl::pooling_ocl(const char*, const Blob&, Blob&, Blob*) -{ - return false; -} -#endif - -void PoolingLayerImpl::avePooling_cpu(Blob &src, Blob &dst) -{ - for (int n = 0; n < src.num(); ++n) - { - for (int c = 0; c < src.channels(); ++c) + for (int n = 0; n < src.size[0]; ++n) { - const float *srcData = src.ptrf(n, c); - float *dstData = dst.ptrf(n, c); - - for (int ph = 0; ph < out.height; ++ph) + for (int c = 0; c < src.size[1]; ++c) { - for (int pw = 0; pw < out.width; ++pw) - { - int hstart = ph * stride.height - pad.height; - int wstart = pw * stride.width - pad.width; - int hend = min(hstart + kernel.height, inp.height + pad.height); - int wend = min(wstart + kernel.width, inp.width + pad.width); - int poolSize = (hend - hstart) * (wend - wstart); - hstart = max(hstart, 0); - wstart = max(wstart, 0); - hend = min(hend, inp.height); - wend = min(wend, inp.width); - - dstData[ph * out.width + pw] = 0.f; + const float *srcData = src.ptr(n, c); + float *dstData = dst.ptr(n, c); - for (int h = hstart; h < hend; ++h) - for (int w = wstart; w < wend; ++w) - dstData[ph * out.width + pw] += srcData[h * inp.width + w]; - - dstData[ph * out.width + pw] /= poolSize; + for (int ph = 0; ph < out.height; ++ph) + { + for (int pw = 0; pw < out.width; ++pw) + { + int hstart = ph * stride.height - pad.height; + int wstart = pw * stride.width - pad.width; + int hend = min(hstart + kernel.height, inp.height + pad.height); + int wend = min(wstart + kernel.width, inp.width + pad.width); + int poolSize = (hend - hstart) * (wend - wstart); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + hend = min(hend, inp.height); + wend = min(wend, inp.width); + + dstData[ph * out.width + pw] = 0.f; + + for (int h = hstart; h < hend; ++h) + for (int w = wstart; w < wend; ++w) + dstData[ph * out.width + pw] += srcData[h * inp.width + w]; + + dstData[ph * out.width + pw] /= poolSize; + } } } } } -} - -void PoolingLayerImpl::computeOutputShape(Size inpSz) -{ - if (padMode.empty()) { - //Yeah, something strange Caffe scheme-) - out.height = static_cast(ceil(static_cast(inpSz.height + 2 * pad.height - - kernel.height) / stride.height)) + 1; - out.width = static_cast(ceil(static_cast(inpSz.width + 2 * pad.width - - kernel.width) / stride.width)) + 1; - if (pad.height || pad.width) + void computeOutputShape(Size inpSz) + { + if (padMode.empty()) { + //Yeah, something strange Caffe scheme-) + out.height = static_cast(ceil(static_cast(inpSz.height + 2 * pad.height - + kernel.height) / stride.height)) + 1; + out.width = static_cast(ceil(static_cast(inpSz.width + 2 * pad.width - + kernel.width) / stride.width)) + 1; + + if (pad.height || pad.width) + { + // If we have padding, ensure that the last pooling starts strictly + // inside the image (instead of at the padding); otherwise clip the last. + if ((out.height - 1) * stride.height >= inpSz.height + pad.height) + --out.height; + if ((out.width - 1) * stride.width >= inpSz.width + pad.width) + --out.width; + CV_Assert((out.height - 1) * stride.height < inpSz.height + pad.height); + CV_Assert((out.width - 1) * stride.width < inpSz.width + pad.width); + } + } + else { - // If we have padding, ensure that the last pooling starts strictly - // inside the image (instead of at the padding); otherwise clip the last. - if ((out.height - 1) * stride.height >= inpSz.height + pad.height) - --out.height; - if ((out.width - 1) * stride.width >= inpSz.width + pad.width) - --out.width; - CV_Assert((out.height - 1) * stride.height < inpSz.height + pad.height); - CV_Assert((out.width - 1) * stride.width < inpSz.width + pad.width); + getConvPoolOutParams(inpSz.height, inpSz.width, kernel, stride, pad, + padMode, out.height, out.width); } } - else - { - getConvPoolOutParams(inpSz.height, inpSz.width, kernel, stride, pad, - padMode, out.height, out.width); - } -} -Ptr PoolingLayer::create(int type, Size kernel, Size stride, Size pad, - const String& padMode) -{ - return Ptr(new PoolingLayerImpl(type, kernel, stride, pad, padMode)); -} + Size inp, out; +}; -Ptr PoolingLayer::createGlobal(int type) +Ptr PoolingLayer::create(const LayerParams& params) { - Ptr l = PoolingLayer::create(type); - l->globalPooling = true; - return l; + return Ptr(new PoolingLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/pooling_layer.hpp b/modules/dnn/src/layers/pooling_layer.hpp deleted file mode 100644 index 266db1c50a4..00000000000 --- a/modules/dnn/src/layers/pooling_layer.hpp +++ /dev/null @@ -1,81 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class PoolingLayerImpl : public PoolingLayer -{ - bool useOpenCL; - Size inp, out; - - void computeOutputShape(Size inpSz); - - bool pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask = NULL); - - void maxPooling(Blob &src, Blob &dst, Blob &mask); - void maxPooling_cpu(Blob &src, Blob &dst, Blob &mask); - bool maxPooling_ocl(Blob &src, Blob &dst, Blob &mask); - - void avePooling(Blob &src, Blob &dst); - void avePooling_cpu(Blob &src, Blob &dst); - bool avePooling_ocl(Blob &src, Blob &dst); - -public: - - PoolingLayerImpl(); - PoolingLayerImpl(int type, Size kernel, Size stride, Size pad, const String& padMode); - - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} - -#endif diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp index a5343a41646..ee34485f9c5 100644 --- a/modules/dnn/src/layers/prior_box_layer.cpp +++ b/modules/dnn/src/layers/prior_box_layer.cpp @@ -41,7 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "prior_box_layer.hpp" #include #include #include @@ -51,193 +50,182 @@ namespace cv namespace dnn { -const std::string PriorBoxLayer::_layerName = std::string("PriorBox"); - -bool PriorBoxLayer::getParameterDict(const LayerParams ¶ms, - const std::string ¶meterName, - DictValue& result) -{ - if (!params.has(parameterName)) - { - return false; - } - - result = params.get(parameterName); - return true; -} - -template -T PriorBoxLayer::getParameter(const LayerParams ¶ms, - const std::string ¶meterName, - const size_t &idx, - const bool required, - const T& defaultValue) +class PriorBoxLayerImpl : public PriorBoxLayer { - DictValue dictValue; - bool success = getParameterDict(params, parameterName, dictValue); - if(!success) +public: + bool getParameterDict(const LayerParams ¶ms, + const std::string ¶meterName, + DictValue& result) { - if(required) + if (!params.has(parameterName)) { - std::string message = _layerName; - message += " layer parameter does not contain "; - message += parameterName; - message += " parameter."; - CV_Error(Error::StsBadArg, message); + return false; } - else - { - return defaultValue; - } - } - return dictValue.get(idx); -} -void PriorBoxLayer::getAspectRatios(const LayerParams ¶ms) -{ - DictValue aspectRatioParameter; - bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter); - CV_Assert(aspectRatioRetieved); + result = params.get(parameterName); + return true; + } - for (int i = 0; i < aspectRatioParameter.size(); ++i) + template + T getParameter(const LayerParams ¶ms, + const std::string ¶meterName, + const size_t &idx=0, + const bool required=true, + const T& defaultValue=T()) { - float aspectRatio = aspectRatioParameter.get(i); - bool alreadyExists = false; - - for (size_t j = 0; j < _aspectRatios.size(); ++j) + DictValue dictValue; + bool success = getParameterDict(params, parameterName, dictValue); + if(!success) { - if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6) + if(required) { - alreadyExists = true; - break; + std::string message = _layerName; + message += " layer parameter does not contain "; + message += parameterName; + message += " parameter."; + CV_Error(Error::StsBadArg, message); } - } - if (!alreadyExists) - { - _aspectRatios.push_back(aspectRatio); - if (_flip) + else { - _aspectRatios.push_back(1./aspectRatio); + return defaultValue; } } + return dictValue.get(idx); } -} - -void PriorBoxLayer::getVariance(const LayerParams ¶ms) -{ - DictValue varianceParameter; - bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter); - CV_Assert(varianceParameterRetrieved); - int varianceSize = varianceParameter.size(); - if (varianceSize > 1) + void getAspectRatios(const LayerParams ¶ms) { - // Must and only provide 4 variance. - CV_Assert(varianceSize == 4); + DictValue aspectRatioParameter; + bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter); + CV_Assert(aspectRatioRetieved); - for (int i = 0; i < varianceSize; ++i) + for (int i = 0; i < aspectRatioParameter.size(); ++i) { - float variance = varianceParameter.get(i); - CV_Assert(variance > 0); - _variance.push_back(variance); + float aspectRatio = aspectRatioParameter.get(i); + bool alreadyExists = false; + + for (size_t j = 0; j < _aspectRatios.size(); ++j) + { + if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6) + { + alreadyExists = true; + break; + } + } + if (!alreadyExists) + { + _aspectRatios.push_back(aspectRatio); + if (_flip) + { + _aspectRatios.push_back(1./aspectRatio); + } + } } } - else + + void getVariance(const LayerParams ¶ms) { - if (varianceSize == 1) + DictValue varianceParameter; + bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter); + CV_Assert(varianceParameterRetrieved); + + int varianceSize = varianceParameter.size(); + if (varianceSize > 1) { - float variance = varianceParameter.get(0); - CV_Assert(variance > 0); - _variance.push_back(variance); + // Must and only provide 4 variance. + CV_Assert(varianceSize == 4); + + for (int i = 0; i < varianceSize; ++i) + { + float variance = varianceParameter.get(i); + CV_Assert(variance > 0); + _variance.push_back(variance); + } } else { - // Set default to 0.1. - _variance.push_back(0.1f); + if (varianceSize == 1) + { + float variance = varianceParameter.get(0); + CV_Assert(variance > 0); + _variance.push_back(variance); + } + else + { + // Set default to 0.1. + _variance.push_back(0.1f); + } } } -} -PriorBoxLayer::PriorBoxLayer(LayerParams ¶ms) : Layer(params) -{ - _minSize = getParameter(params, "min_size"); - CV_Assert(_minSize > 0); + PriorBoxLayerImpl(const LayerParams ¶ms) + { + setParamsFrom(params); + _minSize = getParameter(params, "min_size"); + CV_Assert(_minSize > 0); - _flip = getParameter(params, "flip"); - _clip = getParameter(params, "clip"); + _flip = getParameter(params, "flip"); + _clip = getParameter(params, "clip"); - _aspectRatios.clear(); - _aspectRatios.push_back(1.); + _aspectRatios.clear(); + _aspectRatios.push_back(1.); - getAspectRatios(params); - getVariance(params); + getAspectRatios(params); + getVariance(params); - _numPriors = _aspectRatios.size(); + _numPriors = _aspectRatios.size(); - _maxSize = -1; - if (params.has("max_size")) - { - _maxSize = params.get("max_size").get(0); - CV_Assert(_maxSize > _minSize); + _maxSize = -1; + if (params.has("max_size")) + { + _maxSize = params.get("max_size").get(0); + CV_Assert(_maxSize > _minSize); - _numPriors += 1; + _numPriors += 1; + } } -} -void PriorBoxLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 2); + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == 2); - _layerWidth = inputs[0]->cols(); - _layerHeight = inputs[0]->rows(); + _layerWidth = inputs[0]->size[3]; + _layerHeight = inputs[0]->size[2]; - _imageWidth = inputs[1]->cols(); - _imageHeight = inputs[1]->rows(); + _imageWidth = inputs[1]->size[3]; + _imageHeight = inputs[1]->size[2]; - _stepX = static_cast(_imageWidth) / _layerWidth; - _stepY = static_cast(_imageHeight) / _layerHeight; + _stepX = static_cast(_imageWidth) / _layerWidth; + _stepY = static_cast(_imageHeight) / _layerHeight; - // Since all images in a batch has same height and width, we only need to - // generate one set of priors which can be shared across all images. - size_t outNum = 1; - // 2 channels. First channel stores the mean of each prior coordinate. - // Second channel stores the variance of each prior coordinate. - size_t outChannels = 2; - _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4; + // Since all images in a batch has same height and width, we only need to + // generate one set of priors which can be shared across all images. + int outNum = 1; + // 2 channels. First channel stores the mean of each prior coordinate. + // Second channel stores the variance of each prior coordinate. + int outChannels = 2; + _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4; - outputs[0].create(BlobShape(outNum, outChannels, _outChannelSize)); - outputs[0].matRef() = 0; -} + int outsz[] = { outNum, outChannels, (int)_outChannelSize }; + outputs[0].create(3, outsz, CV_32F); + } -void PriorBoxLayer::forward(std::vector &inputs, std::vector &outputs) -{ - (void)inputs; // to suppress unused parameter warning + void forward(std::vector &inputs, std::vector &outputs) + { + (void)inputs; // to suppress unused parameter warning - float* outputPtr = outputs[0].ptrf(); + float* outputPtr = outputs[0].ptr(); - // first prior: aspect_ratio = 1, size = min_size - int idx = 0; - for (size_t h = 0; h < _layerHeight; ++h) - { - for (size_t w = 0; w < _layerWidth; ++w) + // first prior: aspect_ratio = 1, size = min_size + int idx = 0; + for (size_t h = 0; h < _layerHeight; ++h) { - _boxWidth = _boxHeight = _minSize; - - float center_x = (w + 0.5) * _stepX; - float center_y = (h + 0.5) * _stepY; - // xmin - outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; - // ymin - outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight; - // xmax - outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; - // ymax - outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; - - if (_maxSize > 0) + for (size_t w = 0; w < _layerWidth; ++w) { - // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size) - _boxWidth = _boxHeight = sqrt(_minSize * _maxSize); + _boxWidth = _boxHeight = _minSize; + + float center_x = (w + 0.5) * _stepX; + float center_y = (h + 0.5) * _stepY; // xmin outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; // ymin @@ -246,62 +234,112 @@ void PriorBoxLayer::forward(std::vector &inputs, std::vector &outpu outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; // ymax outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; - } - // rest of priors - for (size_t r = 0; r < _aspectRatios.size(); ++r) - { - float ar = _aspectRatios[r]; - if (fabs(ar - 1.) < 1e-6) + if (_maxSize > 0) { - continue; + // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size) + _boxWidth = _boxHeight = sqrt(_minSize * _maxSize); + // xmin + outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; + // ymin + outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight; + // xmax + outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; + // ymax + outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; + } + + // rest of priors + for (size_t r = 0; r < _aspectRatios.size(); ++r) + { + float ar = _aspectRatios[r]; + if (fabs(ar - 1.) < 1e-6) + { + continue; + } + _boxWidth = _minSize * sqrt(ar); + _boxHeight = _minSize / sqrt(ar); + // xmin + outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; + // ymin + outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight; + // xmax + outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; + // ymax + outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; } - _boxWidth = _minSize * sqrt(ar); - _boxHeight = _minSize / sqrt(ar); - // xmin - outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth; - // ymin - outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight; - // xmax - outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth; - // ymax - outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight; } } - } - // clip the prior's coordidate such that it is within [0, 1] - if (_clip) - { - for (size_t d = 0; d < _outChannelSize; ++d) + // clip the prior's coordidate such that it is within [0, 1] + if (_clip) + { + for (size_t d = 0; d < _outChannelSize; ++d) + { + outputPtr[d] = std::min(std::max(outputPtr[d], 0.), 1.); + } + } + // set the variance. + outputPtr = outputs[0].ptr(0, 1); + if(_variance.size() == 1) { - outputPtr[d] = std::min(std::max(outputPtr[d], 0.), 1.); + Mat secondChannel(outputs[0].size[2], outputs[0].size[3], CV_32F, outputPtr); + secondChannel.setTo(Scalar(_variance[0])); } - } - // set the variance. - outputPtr = outputs[0].ptrf(0, 1); - if(_variance.size() == 1) - { - Mat secondChannel(outputs[0].rows(), outputs[0].cols(), CV_32F, outputPtr); - secondChannel.setTo(Scalar(_variance[0])); - } - else - { - int count = 0; - for (size_t h = 0; h < _layerHeight; ++h) + else { - for (size_t w = 0; w < _layerWidth; ++w) + int count = 0; + for (size_t h = 0; h < _layerHeight; ++h) { - for (size_t i = 0; i < _numPriors; ++i) + for (size_t w = 0; w < _layerWidth; ++w) { - for (int j = 0; j < 4; ++j) + for (size_t i = 0; i < _numPriors; ++i) { - outputPtr[count] = _variance[j]; - ++count; + for (int j = 0; j < 4; ++j) + { + outputPtr[count] = _variance[j]; + ++count; + } } } } } } + + size_t _layerWidth; + size_t _layerHeight; + + size_t _imageWidth; + size_t _imageHeight; + + size_t _outChannelSize; + + float _stepX; + float _stepY; + + float _minSize; + float _maxSize; + + float _boxWidth; + float _boxHeight; + + std::vector _aspectRatios; + std::vector _variance; + + bool _flip; + bool _clip; + + size_t _numPriors; + + static const size_t _numAxes = 4; + static const std::string _layerName; +}; + +const std::string PriorBoxLayerImpl::_layerName = std::string("PriorBox"); + +Ptr PriorBoxLayer::create(const LayerParams ¶ms) +{ + return Ptr(new PriorBoxLayerImpl(params)); } + } } diff --git a/modules/dnn/src/layers/prior_box_layer.hpp b/modules/dnn/src/layers/prior_box_layer.hpp deleted file mode 100644 index e398aa1650e..00000000000 --- a/modules/dnn/src/layers/prior_box_layer.hpp +++ /dev/null @@ -1,101 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__ -#include "../precomp.hpp" - -namespace cv -{ -namespace dnn -{ -class PriorBoxLayer : public Layer -{ - size_t _layerWidth; - size_t _layerHeight; - - size_t _imageWidth; - size_t _imageHeight; - - size_t _outChannelSize; - - float _stepX; - float _stepY; - - float _minSize; - float _maxSize; - - float _boxWidth; - float _boxHeight; - - std::vector _aspectRatios; - std::vector _variance; - - bool _flip; - bool _clip; - - size_t _numPriors; - - static const size_t _numAxes = 4; - static const std::string _layerName; - -public: - PriorBoxLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - - template - T getParameter(const LayerParams ¶ms, - const std::string ¶meterName, - const size_t &idx = 0, - const bool required = true, - const T& defaultValue = T()); - - bool getParameterDict(const LayerParams ¶ms, - const std::string ¶meterName, - DictValue& result); - - void getAspectRatios(const LayerParams ¶ms); - void getVariance(const LayerParams ¶ms); -}; -} -} -#endif diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 65545fee783..afb0d9ccf22 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -40,9 +40,9 @@ //M*/ #include "../precomp.hpp" -#include "recurrent_layers.hpp" #include "op_blas.hpp" #include +#include #include #include @@ -85,25 +85,25 @@ class LSTMLayerImpl : public LSTMLayer int numOut, numTimeStamps, numSamples, numInp; Mat hInternal, cInternal; Mat gates, dummyOnes; - int dtype; bool allocated; - Shape outTailShape; //shape of single output sample - Shape outTsMatShape, outTsShape; //shape of N output samples - Shape outResShape; //shape of T timestamps and N output samples + std::vector outTailShape; //shape of single output sample + std::vector outTsMatShape, outTsShape; //shape of N output samples + std::vector outResShape; //shape of T timestamps and N output samples bool useTimestampDim; bool produceCellOutput; public: - LSTMLayerImpl() + LSTMLayerImpl(const LayerParams& params) { + setParamsFrom(params); type = "LSTM"; useTimestampDim = true; produceCellOutput = false; allocated = false; - outTailShape = Shape::empty(); + outTailShape.clear(); } void setUseTimstampsDim(bool use) @@ -118,146 +118,155 @@ class LSTMLayerImpl : public LSTMLayer produceCellOutput = produce; } - void setC(const Blob &C) + void setC(const Mat &C) { - CV_Assert(cInternal.empty() || C.total() == cInternal.total()); + CV_Assert(C.type() == CV_32F); if (!cInternal.empty()) - C.reshaped(Shape::like(cInternal)).matRefConst().copyTo(cInternal); + { + CV_Assert(C.total() == cInternal.total() && cInternal.isContinuous()); + Mat cInternal_(C.dims, &C.size.p[0], C.type(), cInternal.ptr()); + C.copyTo(cInternal_); + } else - C.matRefConst().copyTo(cInternal); + C.copyTo(cInternal); } - void setH(const Blob &H) + void setH(const Mat &H) { - CV_Assert(hInternal.empty() || H.total() == hInternal.total()); + CV_Assert(H.type() == CV_32F); if (!hInternal.empty()) - H.reshaped(Shape::like(hInternal)).matRefConst().copyTo(hInternal); + { + CV_Assert(H.total() == hInternal.total() && hInternal.isContinuous()); + Mat hInternal_(H.dims, &H.size.p[0], H.type(), hInternal.ptr()); + H.copyTo(hInternal_); + } else - H.matRefConst().copyTo(hInternal); + H.copyTo(hInternal); } - Blob getC() const + Mat getC() const { - CV_Assert(!cInternal.empty()); - - //TODO: add convinient Mat -> Blob constructor - Blob res(outTsShape, cInternal.type()); - res.fill(res.shape(), res.type(), cInternal.data); - return res; + CV_Assert(shapeTotal(outTsShape) == cInternal.total()); + return Mat((int)outTsShape.size(), &outTsShape[0], cInternal.type(), (char*)cInternal.ptr()); } - Blob getH() const + Mat getH() const { - CV_Assert(!hInternal.empty()); - - Blob res(outTsShape, hInternal.type()); - res.fill(res.shape(), res.type(), hInternal.data); - return res; + CV_Assert(shapeTotal(outTsShape) == hInternal.total()); + return Mat((int)outTsShape.size(), &outTsShape[0], hInternal.type(), (char*)hInternal.ptr()); } - void setOutShape(const Shape &outTailShape_) + void setOutShape(const std::vector &outTailShape_) { - CV_Assert(!allocated || outTailShape_.total() == outTailShape.total()); + CV_Assert(!allocated || shapeTotal(outTailShape) == shapeTotal(outTailShape_)); outTailShape = outTailShape_; } - void setWeights(const Blob &Wh, const Blob &Wx, const Blob &bias) + void setWeights(const Mat &Wh, const Mat &Wx, const Mat &bias) { - CV_Assert(Wh.dims() == 2 && Wx.dims() == 2); - CV_Assert(Wh.size(0) == Wx.size(0)); - CV_Assert(Wh.size(0) == 4*Wh.size(1)); - CV_Assert(Wh.size(0) == (int)bias.total()); + CV_Assert(Wh.dims == 2 && Wx.dims == 2); + CV_Assert(Wh.rows == Wx.rows); + CV_Assert(Wh.rows == 4*Wh.cols); + CV_Assert(Wh.rows == (int)bias.total()); CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type()); blobs.resize(3); - blobs[0] = Wh; - blobs[1] = Wx; - blobs[2] = bias; - blobs[2].reshape(Shape(1, (int)bias.total())); + blobs[0] = Mat(Wh.clone()); + blobs[1] = Mat(Wx.clone()); + blobs[2] = Mat(bias.clone()).reshape(1, 1); } - void allocate(const std::vector &input, std::vector &output) + void allocate(const std::vector &input, std::vector &output) { CV_Assert(blobs.size() == 3); CV_Assert(input.size() == 1); + const Mat& inp0 = *input[0]; - Blob &Wh = blobs[0], &Wx = blobs[1]; - numOut = Wh.size(1); - numInp = Wx.size(1); + Mat &Wh = blobs[0], &Wx = blobs[1]; + numOut = Wh.size[1]; + numInp = Wx.size[1]; - if (!outTailShape.isEmpty()) - CV_Assert(outTailShape.total() == numOut); + if (!outTailShape.empty()) + CV_Assert(shapeTotal(outTailShape) == numOut); else - outTailShape = Shape(numOut); + outTailShape.assign(1, numOut); + outResShape.clear(); if (useTimestampDim) { - CV_Assert(input[0]->dims() >= 2 && (int)input[0]->total(2) == numInp); - numTimeStamps = input[0]->size(0); - numSamples = input[0]->size(1); - outResShape = Shape(numTimeStamps, numSamples) + outTailShape; + CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp); + numTimeStamps = inp0.size[0]; + numSamples = inp0.size[1]; + outResShape.push_back(numTimeStamps); } else { - CV_Assert(input[0]->dims() >= 1 && (int)input[0]->total(1) == numInp); + CV_Assert(inp0.dims >= 2 && (int)inp0.total(1) == numInp); numTimeStamps = 1; - numSamples = input[0]->size(0); - outResShape = Shape(numSamples) + outTailShape; + numSamples = inp0.size[0]; } - outTsMatShape = Shape(numSamples, numOut); - outTsShape = Shape(numSamples) + outTailShape; - dtype = input[0]->type(); - CV_Assert(dtype == CV_32F || dtype == CV_64F); - CV_Assert(Wh.type() == dtype); + outResShape.push_back(numSamples); + outResShape.insert(outResShape.end(), outTailShape.begin(), outTailShape.end()); + + outTsMatShape.clear(); + outTsMatShape.push_back(numSamples); + outTsMatShape.push_back(numOut); + + outTsShape.clear(); + outTsShape.push_back(numSamples); + outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end()); + + const int dtype = CV_32F; + CV_Assert(inp0.type() == dtype && Wh.type() == dtype); - output.resize( (produceCellOutput) ? 2 : 1 ); - output[0].create(outResShape, dtype); - if (produceCellOutput) - output[1].create(outResShape, dtype); + size_t i, noutputs = produceCellOutput ? 2 : 1; + output.resize(noutputs); + + for( i = 0; i < noutputs; i++ ) + output[i].create(outResShape, dtype); if (hInternal.empty()) { - hInternal.create(outTsMatShape.dims(), outTsMatShape.ptr(), dtype); - hInternal.setTo(0); + hInternal.create(outTsMatShape, dtype); + hInternal.setTo(0.); } else { - CV_Assert((int)hInternal.total() == numSamples*numOut); - hInternal = hInternal.reshape(1, outTsMatShape.dims(), outTsMatShape.ptr()); + CV_Assert(hInternal.total() == (size_t)numSamples*numOut); + hInternal = hInternal.reshape(1, outTsMatShape); } if (cInternal.empty()) { - cInternal.create(outTsMatShape.dims(), outTsMatShape.ptr(), dtype); - cInternal.setTo(0); + cInternal.create(outTsMatShape, dtype); + cInternal.setTo(0.); } else { - CV_Assert((int)cInternal.total() == numSamples*numOut); - cInternal = cInternal.reshape(1, outTsMatShape.dims(), outTsMatShape.ptr()); + CV_Assert(cInternal.total() == (size_t)numSamples*numOut); + cInternal = cInternal.reshape(1, outTsMatShape); } gates.create(numSamples, 4*numOut, dtype); dummyOnes.create(numSamples, 1, dtype); - dummyOnes.setTo(1); + dummyOnes.setTo(1.); allocated = true; } - void forward(std::vector &input, std::vector &output) + void forward(std::vector &input, std::vector &output) { - const Mat &Wh = blobs[0].getRefConst(); - const Mat &Wx = blobs[1].getRefConst(); - const Mat &bias = blobs[2].getRefConst(); + const Mat &Wh = blobs[0]; + const Mat &Wx = blobs[1]; + const Mat &bias = blobs[2]; int numSamplesTotal = numTimeStamps*numSamples; - Mat xTs = reshaped(input[0]->getRefConst(), Shape(numSamplesTotal, numInp)); + Mat xTs = input[0]->reshape(1, numSamplesTotal); - Shape outMatShape(numSamplesTotal, numOut); - Mat hOutTs = reshaped(output[0].getRef(), outMatShape); - Mat cOutTs = (produceCellOutput) ? reshaped(output[1].getRef(), outMatShape) : Mat(); + Mat hOutTs = output[0].reshape(1, numSamplesTotal); + Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat(); for (int ts = 0; ts < numTimeStamps; ts++) { @@ -278,13 +287,13 @@ class LSTMLayerImpl : public LSTMLayer tanh(gateG, gateG); //compute c_t - cv::multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1} - cv::multiply(gateI, gateG, gateI); // i_t (*) g_t - cv::add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t + multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1} + multiply(gateI, gateG, gateI); // i_t (*) g_t + add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t //compute h_t tanh(cInternal, hInternal); - cv::multiply(gateO, hInternal, hInternal); + multiply(gateO, hInternal, hInternal); //save results in output blobs hInternal.copyTo(hOutTs.rowRange(curRowRange)); @@ -294,14 +303,9 @@ class LSTMLayerImpl : public LSTMLayer } }; -Ptr LSTMLayer::create() +Ptr LSTMLayer::create(const LayerParams& params) { - return Ptr(new LSTMLayerImpl()); -} - -void LSTMLayer::forward(std::vector&, std::vector&) -{ - CV_Error(Error::StsInternal, "This function should be unreached"); + return Ptr(new LSTMLayerImpl(params)); } int LSTMLayer::inputNameToIndex(String inputName) @@ -333,8 +337,9 @@ class RNNLayerImpl : public RNNLayer public: - RNNLayerImpl() + RNNLayerImpl(const LayerParams& params) { + setParamsFrom(params); type = "RNN"; produceH = false; } @@ -344,68 +349,74 @@ class RNNLayerImpl : public RNNLayer produceH = produce; } - void setWeights(const Blob &W_xh, const Blob &b_h, const Blob &W_hh, const Blob &W_ho, const Blob &b_o) + void setWeights(const Mat &W_xh, const Mat &b_h, const Mat &W_hh, const Mat &W_ho, const Mat &b_o) { - CV_Assert(W_hh.dims() == 2 && W_xh.dims() == 2); - CV_Assert(W_hh.size(0) == W_xh.size(0) && W_hh.size(0) == W_hh.size(1) && (int)b_h.total() == W_xh.size(0)); - CV_Assert(W_ho.size(0) == (int)b_o.total()); - CV_Assert(W_ho.size(1) == W_hh.size(1)); + CV_Assert(W_hh.dims == 2 && W_xh.dims == 2); + CV_Assert(W_hh.size[0] == W_xh.size[0] && W_hh.size[0] == W_hh.size[1] && (int)b_h.total() == W_xh.size[0]); + CV_Assert(W_ho.size[0] == (int)b_o.total()); + CV_Assert(W_ho.size[1] == W_hh.size[1]); blobs.resize(5); - blobs[0] = W_xh; - blobs[1] = b_h; - blobs[2] = W_hh; - blobs[3] = W_ho; - blobs[4] = b_o; + blobs[0] = Mat(W_xh.clone()); + blobs[1] = Mat(b_h.clone()); + blobs[2] = Mat(W_hh.clone()); + blobs[3] = Mat(W_ho.clone()); + blobs[4] = Mat(b_o.clone()); } - void allocate(const std::vector &input, std::vector &output) + void allocate(const std::vector &input, std::vector &output) { CV_Assert(input.size() >= 1 && input.size() <= 2); - Wxh = blobs[0].matRefConst(); - bh = blobs[1].matRefConst(); - Whh = blobs[2].matRefConst(); - Who = blobs[3].matRefConst(); - bo = blobs[4].matRefConst(); + Wxh = blobs[0]; + bh = blobs[1]; + Whh = blobs[2]; + Who = blobs[3]; + bo = blobs[4]; numH = Wxh.rows; numX = Wxh.cols; numO = Who.rows; - CV_Assert(input[0]->dims() >= 2); - CV_Assert((int)input[0]->total(2) == numX); - CV_Assert(input[0]->type() == CV_32F || input[0]->type() == CV_64F); - dtype = input[0]->type(); - numTimestamps = input[0]->size(0); - numSamples = input[0]->size(1); + const Mat& inp0 = *input[0]; + + CV_Assert(inp0.dims >= 2); + CV_Assert(inp0.total(2) == numX); + dtype = CV_32F; + CV_Assert(inp0.type() == dtype); + numTimestamps = inp0.size[0]; + numSamples = inp0.size[1]; numSamplesTotal = numTimestamps * numSamples; hCurr.create(numSamples, numH, dtype); hPrev.create(numSamples, numH, dtype); - hPrev.setTo(0); + hPrev.setTo(0.); dummyBiasOnes.create(numSamples, 1, dtype); - dummyBiasOnes.setTo(1); + dummyBiasOnes.setTo(1.); bh = bh.reshape(1, 1); //is 1 x numH Mat bo = bo.reshape(1, 1); //is 1 x numO Mat reshapeOutput(output); } - void reshapeOutput(std::vector &output) + void reshapeOutput(std::vector &output) { - output.resize((produceH) ? 2 : 1); - output[0].create(Shape(numTimestamps, numSamples, numO), dtype); + output.resize(produceH ? 2 : 1); + int sz0[] = { numTimestamps, numSamples, numO }; + output[0].create(3, sz0, dtype); if (produceH) - output[1].create(Shape(numTimestamps, numSamples, numH), dtype); + { + int sz1[] = { numTimestamps, numSamples, numH }; + output[1].create(3, sz1, dtype); + } } - void forward(std::vector &input, std::vector &output) + void forward(std::vector &input, std::vector &output) { - Mat xTs = reshaped(input[0]->getRefConst(), Shape(numSamplesTotal, numX)); - Mat oTs = reshaped(output[0].getRef(), Shape(numSamplesTotal, numO)); - Mat hTs = (produceH) ? reshaped(output[1].getRef(), Shape(numSamplesTotal, numH)) : Mat(); + Mat xTs = input[0]->reshape(1, numSamplesTotal); + Mat oTs = output[0].reshape(1, numSamplesTotal); + Mat hTs = produceH ? output[1].reshape(1, numSamplesTotal) : Mat(); for (int ts = 0; ts < numTimestamps; ts++) { @@ -428,14 +439,9 @@ class RNNLayerImpl : public RNNLayer } }; -void RNNLayer::forward(std::vector&, std::vector&) -{ - CV_Error(Error::StsInternal, "This function should be unreached"); -} - -CV_EXPORTS_W Ptr RNNLayer::create() +CV_EXPORTS_W Ptr RNNLayer::create(const LayerParams& params) { - return Ptr(new RNNLayerImpl()); + return Ptr(new RNNLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/recurrent_layers.hpp b/modules/dnn/src/layers/recurrent_layers.hpp deleted file mode 100644 index 54451218428..00000000000 --- a/modules/dnn/src/layers/recurrent_layers.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__ -#define __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -} -} -#endif \ No newline at end of file diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index e3f0d1f8be8..f10fee4eae0 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -41,7 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "reshape_layer.hpp" #include namespace cv @@ -49,73 +48,158 @@ namespace cv namespace dnn { -ReshapeLayerImpl::ReshapeLayerImpl(const BlobShape &newShape_, Range applyingRange_, bool enableReordering_) : - enableReordering(enableReordering_) +static void computeShapeByReshapeMask(const std::vector &srcShape, + const std::vector &maskShape, + Range srcRange /*= Range::all()*/, + std::vector& dstShape) { - newShapeDesc = newShape_; - newShapeRange = applyingRange_; -} + int srcShapeSize = (int)srcShape.size(); + int maskShapeSize = (int)maskShape.size(); -void ReshapeLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - outputs.resize(inputs.size()); - outShapes.resize(inputs.size()); + if (srcRange == Range::all()) + srcRange = Range(0, srcShapeSize); + else + { + int sz = srcRange.size(); + srcRange.start = srcRange.start < 0 ? srcRange.start + srcShapeSize : srcRange.start; + srcRange.end = srcRange.end == INT_MAX ? srcShapeSize : srcRange.start + sz; + } + + CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShapeSize); + int dstShapeSize = srcShapeSize - srcRange.size() + maskShapeSize; + dstShape.resize(dstShapeSize); + + std::copy(srcShape.begin(), srcShape.begin() + srcRange.start, dstShape.begin()); + std::copy(srcShape.begin() + srcRange.end, srcShape.begin() + srcShapeSize, dstShape.begin() + srcRange.start + maskShapeSize); + + int inferDim = -1; + for (int i = 0; i < maskShapeSize; i++) + { + if (maskShape[i] > 0) + { + dstShape[srcRange.start + i] = maskShape[i]; + } + else if (maskShape[i] == 0) + { + if (srcRange.start + i >= srcShapeSize) + CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i)); + dstShape[srcRange.start + i] = srcShape[srcRange.start + i]; + } + else if (maskShape[i] == -1) + { + if (inferDim != -1) + CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)"); + inferDim = srcRange.start + i; + dstShape[inferDim] = 1; + } + else + CV_Error(Error::StsBadArg, "maskShape[i] >= -1"); + } + + size_t srcTotal = shapeTotal(srcShape); + size_t dstTotal = shapeTotal(dstShape); + + if (inferDim != -1) + { + if (srcTotal % dstTotal != 0) + CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1"); - for (size_t i = 0; i < inputs.size(); i++) + dstShape[inferDim] = (int)(srcTotal / dstTotal); + } + else { - outShapes[i] = computeShapeByReshapeMask(inputs[i]->shape(), newShapeDesc, newShapeRange); - outputs[i].shareFrom(*inputs[i]); - outputs[i].reshape(outShapes[i]); + CV_Assert(srcTotal == dstTotal); } } -void ReshapeLayerImpl::forward(std::vector &inputs, std::vector &outputs) + +class ReshapeLayerImpl : public ReshapeLayer { - for (size_t i = 0; i < outputs.size(); i++) +public: + ReshapeLayerImpl(const LayerParams& params) { - Blob srcBlob = *inputs[i]; - BlobShape inputShape = inputs[i]->shape(); - bool channelsReduced = inputShape.dims() > outShapes[i].dims() || - (inputShape.dims() == 4 && inputShape[1] > outShapes[i][1]); - bool performReordering = enableReordering && inputShape.dims() == 4 && channelsReduced; - - if (performReordering) + setParamsFrom(params); + int axis = params.get("axis", 0); + int numAxes = params.get("num_axes", -1); + enableReordering = params.get("reorder_dims", false); + CV_Assert(numAxes >= -1); + newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes); + + newShapeDesc.clear(); + if (params.has("dim")) { - Blob reordered_blob(inputShape, inputs[i]->type()); - - float *dstData = reordered_blob.matRef().ptr(); - const float *srcData = srcBlob.matRefConst().ptr(); + const DictValue ¶mShape = params.get("dim"); + int i, dims = paramShape.size(); + newShapeDesc.resize(dims); + for (i = 0; i < dims; i++) + newShapeDesc[i] = paramShape.get(i); + } + } - int num = inputShape[0], channels = inputShape[1], height = inputShape[2], width = inputShape[3]; - int total = num*channels*height*width; - for(int i_n = 0; i_n < num; i_n++) { - for(int i_c = 0; i_c < channels; i_c++) { - for(int i_h = 0; i_h < height; i_h++) { - for(int i_w = 0; i_w < width; i_w++) { - int src_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w; - int dst_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w; + void allocate(const std::vector &inputs, std::vector &outputs) + { + outputs.resize(inputs.size()); + outShapes.resize(inputs.size()); - CV_Assert(dst_i < total); - CV_Assert(src_i < total); + for (size_t i = 0; i < inputs.size(); i++) + { + std::vector inputShape(inputs[i]->size.p, inputs[i]->size.p + inputs[i]->dims); + computeShapeByReshapeMask(inputShape, newShapeDesc, newShapeRange, outShapes[i]); + outputs[i] = inputs[i]->reshape(1, outShapes[i]); + } + } - dstData[dst_i] = srcData[src_i]; + void forward(std::vector &inputs, std::vector &outputs) + { + for (size_t i = 0; i < outputs.size(); i++) + { + Mat srcBlob = *inputs[i]; + int dims = srcBlob.dims; + std::vector inputShape(srcBlob.size.p, srcBlob.size.p + dims); + bool channelsReduced = dims > (int)outShapes[i].size() || + (dims == 4 && inputShape[1] > outShapes[i][1]); + bool performReordering = enableReordering && dims == 4 && channelsReduced; + + if (performReordering) + { + Mat reordered_blob(inputShape, srcBlob.type()); + + float *dstData = reordered_blob.ptr(); + const float *srcData = srcBlob.ptr(); + + int num = inputShape[0], channels = inputShape[1], height = inputShape[2], width = inputShape[3]; + int total = num*channels*height*width; + for(int i_n = 0; i_n < num; i_n++) { + for(int i_c = 0; i_c < channels; i_c++) { + for(int i_h = 0; i_h < height; i_h++) { + for(int i_w = 0; i_w < width; i_w++) { + int src_i = channels*height*width*i_n + height*width*i_c + width*i_h + i_w; + int dst_i = channels*height*width*i_n + i_c + channels*width*i_h + channels*i_w; + + CV_Assert(dst_i < total); + CV_Assert(src_i < total); + + dstData[dst_i] = srcData[src_i]; + } } } } + + srcBlob = reordered_blob; } - srcBlob = reordered_blob; + // TODO: we should not assign srcBlob if performReordering is true. + outputs[i] = srcBlob.reshape(1, outShapes[i]); } - - outputs[i].shareFrom(srcBlob); - outputs[i].reshape(outShapes[i]); } -} -Ptr ReshapeLayer::create(const BlobShape &newShape, Range applyingRange /*= Range::all()*/, - bool enableReordering /*= false*/) + std::vector > outShapes; + bool enableReordering; +}; + +Ptr ReshapeLayer::create(const LayerParams& params) { - return Ptr(new ReshapeLayerImpl(newShape, applyingRange, enableReordering)); + return Ptr(new ReshapeLayerImpl(params)); } diff --git a/modules/dnn/src/layers/reshape_layer.hpp b/modules/dnn/src/layers/reshape_layer.hpp deleted file mode 100644 index 10718b838f4..00000000000 --- a/modules/dnn/src/layers/reshape_layer.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_RESHAPE_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_RESHAPE_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class ReshapeLayerImpl : public ReshapeLayer -{ - std::vector outShapes; - bool enableReordering; - -public: - ReshapeLayerImpl(const BlobShape &newShape_, Range applyingRange_, bool enableReordering_); - - void allocate(const std::vector &inputs, std::vector &outputs); - - void forward(std::vector &inputs, std::vector &outputs); -}; - -Ptr createFlattenLayer(LayerParams&); - -} -} - -#endif diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 952856002ef..b465be7c9e3 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -9,51 +9,70 @@ Implementation of Scale layer. */ -#include "scale_layer.hpp" +#include "../precomp.hpp" +#include "layers_common.hpp" +#include namespace cv { namespace dnn { -void ScaleLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) +class ScaleLayerImpl : public ScaleLayer { - CV_Assert(blobs.size() == 1 + hasBias); - - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) +public: + ScaleLayerImpl(const LayerParams& params) { - outputs[i].create(inputs[i]->shape()); + setParamsFrom(params); + hasBias = params.get("bias_term", false); } -} -void ScaleLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 1); + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(blobs.size() == 1 + hasBias); - Blob &inpBlob = *inputs[0]; + outputs.resize(inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) + { + const Mat& inp = *inputs[i]; + outputs[i].create(inp.dims, inp.size.p, inp.type()); + } + } - for (size_t ii = 0; ii < outputs.size(); ii++) + void forward(std::vector &inputs, std::vector &outputs) { - Blob &outBlob = outputs[ii]; + for (size_t ii = 0; ii < outputs.size(); ii++) + { + Mat &inpBlob = *inputs[ii]; + Mat &outBlob = outputs[ii]; - CV_Assert(inpBlob.channels() == blobs[0].total()); + CV_Assert(inpBlob.size[1] == blobs[0].total()); + if (hasBias) + CV_Assert(inpBlob.size[1] == blobs[1].total()); - if (hasBias) - CV_Assert(inpBlob.channels() == blobs[1].total()); + CV_Assert(inpBlob.type() == CV_32F && outBlob.type() == CV_32F); - for (int n = 0; n < inpBlob.channels(); n++) - { - float w = blobs[0].matRefConst().at(n); - float b = hasBias ? blobs[1].matRefConst().at(n) : 0; - outBlob.getPlane(0, n) = w*inpBlob.getPlane(0, n) + b; - } + for( int cn = 0; cn < inpBlob.size[0]; cn++ ) + { + for (int n = 0; n < inpBlob.size[1]; n++) + { + float w = blobs[0].at(n); + float b = hasBias ? blobs[1].at(n) : 0; + Mat outBlobPlane = getPlane(outBlob, cn, n); + Mat inpBlobPlane = getPlane(inpBlob, cn, n); + inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b); + } + } + } } -} -Ptr ScaleLayer::create(bool hasBias) + bool hasBias; +}; + + +Ptr ScaleLayer::create(const LayerParams& params) { - return Ptr(new ScaleLayerImpl(hasBias)); + return Ptr(new ScaleLayerImpl(params)); } } // namespace dnn diff --git a/modules/dnn/src/layers/scale_layer.hpp b/modules/dnn/src/layers/scale_layer.hpp deleted file mode 100644 index ee58af4326e..00000000000 --- a/modules/dnn/src/layers/scale_layer.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -// Copyright (C) 2016, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. - -/* -Declaration of scale layer, which multiplies and shifts channels in input blob. -*/ - - -#ifndef __OPENCV_DNN_LAYERS_SCALE_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_SCALE_LAYER_HPP__ -#include - -namespace cv -{ -namespace dnn -{ - -class ScaleLayerImpl : public ScaleLayer -{ -public: - ScaleLayerImpl(bool hasBias_): hasBias(hasBias_) {} - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - -private: - bool hasBias; -}; - -} -} - -#endif // __OPENCV_DNN_LAYERS_SCALE_LAYER_HPP__ diff --git a/modules/dnn/src/layers/shift_layer.cpp b/modules/dnn/src/layers/shift_layer.cpp index 6663640af20..03ba84b10e2 100644 --- a/modules/dnn/src/layers/shift_layer.cpp +++ b/modules/dnn/src/layers/shift_layer.cpp @@ -10,7 +10,6 @@ Implementation of shift layer, which adds up const values to blob. */ #include "../precomp.hpp" -#include "shift_layer.hpp" #include "op_blas.hpp" namespace cv @@ -18,139 +17,99 @@ namespace cv namespace dnn { -class ShiftLayerImpl { +class ShiftLayerImpl : public ShiftLayer +{ public: - static Ptr create(const std::vector &inputs, std::vector &outputs, - const std::vector& blobs); - - virtual ~ShiftLayerImpl() {} - - virtual void forward(std::vector &inputs, std::vector &outputs, const std::vector& blobs) = 0; - -protected: - ShiftLayerImpl() {} - virtual void allocate(const std::vector &inputs, std::vector &outputs, const std::vector& blobs) = 0; -}; - -namespace { + ShiftLayerImpl(const LayerParams ¶ms) + { + setParamsFrom(params); + CV_Assert(blobs.size() == 1); -class ShiftChannelsLayerImpl : public ShiftLayerImpl { -public: - virtual void forward(std::vector &inputs, std::vector &outputs, const std::vector& blobs) { - for (size_t ii = 0; ii < outputs.size(); ii++) +#ifdef HAVE_LAPACK { - Blob &inpBlob = *inputs[ii]; - Blob &outBlob = outputs[ii]; - - inpBlob.matRef().copyTo(outBlob.matRef()); - - for (int n = 0; n < inpBlob.num(); n++) + if (getBlasThreads() != cv::getThreadNum()) { - Mat dstMat(inpBlob.channels(), inpBlob.rows() * inpBlob.cols(), - outBlob.type(), outBlob.ptr(n)); - dnn::gemm(blobs[0].matRefConst(), biasOnesMat, 1, dstMat, 1); //TODO: gemv + setBlasThreads(cv::getThreadNum()); } } +#endif } -protected: - virtual void allocate(const std::vector &inputs, std::vector &outputs, const std::vector& blobs) { + virtual void allocate(const std::vector &inputs, std::vector &outputs) + { CV_Assert(inputs.size() > 0); - - const Blob &inpBlob = *inputs[0]; - CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F); - const Blob &biasBlob = blobs[0]; - CV_Assert(biasBlob.total() == (size_t)inpBlob.channels()); - + CV_Assert(blobs.size() > 0); + const Mat &inpBlob = *inputs[0]; + CV_Assert(inpBlob.dims == 4 && inpBlob.type() == CV_32F); + const Mat &biasBlob = blobs[0]; outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) + + if(inpBlob.dims == biasBlob.dims) { - CV_Assert(inputs[i]->type() == inpBlob.type()); - CV_Assert(inputs[i]->dims() == 4 && inputs[i]->channels() == inpBlob.channels()); + for (size_t i = 0; i < inputs.size(); i++) + { + CV_Assert(inputs[i]->type() == inpBlob.type()); + CV_Assert(inputs[i]->dims == inpBlob.dims); - outputs[i].shareFrom(*inputs[i]); + outputs[i] = *inputs[i]; + } } + else + { + CV_Assert(biasBlob.total() == (size_t)inpBlob.size[1]); - biasOnesMat = Mat::ones(1, inpBlob.rows() * inpBlob.cols(), inpBlob.type()); - } - -private: - Mat biasOnesMat; -}; - + for (size_t i = 0; i < inputs.size(); i++) + { + CV_Assert(inputs[i]->type() == inpBlob.type()); + CV_Assert(inputs[i]->dims == 4 && inputs[i]->size[1] == inpBlob.size[1]); -class ShiftElementsLayerImpl : public ShiftLayerImpl { -public: - virtual void forward(std::vector &inputs, std::vector &outputs, const std::vector& blobs) { - for (size_t ii = 0; ii < outputs.size(); ii++) - { - Blob &inpBlob = *inputs[ii]; - Blob &outBlob = outputs[ii]; + outputs[i] = *inputs[i]; + } - outBlob.matRef() = inpBlob.matRef() + blobs[0].matRefConst(); + biasOnesMat = Mat::ones(1, inpBlob.size[2] * inpBlob.size[3], inpBlob.type()); } } -protected: - virtual void allocate(const std::vector &inputs, std::vector &outputs, const std::vector& blobs) { + virtual void forward(std::vector &inputs, std::vector &outputs) + { CV_Assert(inputs.size() > 0); + CV_Assert(blobs.size() > 0); - const Blob &inpBlob = *inputs[0]; - CV_Assert(inpBlob.type() == CV_32F); - const Blob &biasBlob = blobs[0]; - CV_Assert(biasBlob.dims() == inpBlob.dims()); - - outputs.resize(inputs.size()); - for (size_t i = 0; i < inputs.size(); i++) + if(inputs[0]->dims == blobs[0].dims) { - CV_Assert(inputs[i]->type() == inpBlob.type()); - CV_Assert(inputs[i]->dims() == inpBlob.dims()); + for (size_t ii = 0; ii < outputs.size(); ii++) + { + Mat &inpBlob = *inputs[ii]; + Mat &outBlob = outputs[ii]; - outputs[i].shareFrom(*inputs[i]); + outBlob = inpBlob + blobs[0]; + } } - } -}; - -} - -Ptr ShiftLayerImpl::create(const std::vector &inputs, std::vector &outputs, - const std::vector& blobs) { - Ptr impl; - - CV_Assert(inputs.size() > 0); - CV_Assert(blobs.size() > 0); - - if(inputs[0]->dims() == blobs[0].dims()) - impl = Ptr(new ShiftElementsLayerImpl); - else - impl = Ptr(new ShiftChannelsLayerImpl); - - impl->allocate(inputs, outputs, blobs); - return impl; -} + else + { + for (size_t ii = 0; ii < outputs.size(); ii++) + { + Mat &inpBlob = *inputs[ii]; + Mat &outBlob = outputs[ii]; -ShiftLayer::ShiftLayer(LayerParams ¶ms) : Layer(params) -{ - CV_Assert(blobs.size() == 1); + inpBlob.copyTo(outBlob); - #ifdef HAVE_LAPACK - { - if (getBlasThreads() != cv::getThreadNum()) - { - setBlasThreads(cv::getThreadNum()); + for (int n = 0; n < inpBlob.size[0]; n++) + { + Mat dstMat(inpBlob.size[1], inpBlob.size[2] * inpBlob.size[3], + outBlob.type(), outBlob.ptr(n)); + dnn::gemm(blobs[0], biasOnesMat, 1, dstMat, 1); //TODO: gemv + } + } } } - #endif -} -void ShiftLayer::allocate(const std::vector &inputs, std::vector &outputs) -{ - impl = ShiftLayerImpl::create(inputs, outputs, blobs); -} + Mat biasOnesMat; +}; -void ShiftLayer::forward(std::vector &inputs, std::vector &outputs) +Ptr ShiftLayer::create(const LayerParams& params) { - impl->forward(inputs, outputs, blobs); + return Ptr(new ShiftLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/shift_layer.hpp b/modules/dnn/src/layers/shift_layer.hpp deleted file mode 100644 index 36808ffbf64..00000000000 --- a/modules/dnn/src/layers/shift_layer.hpp +++ /dev/null @@ -1,38 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -// Copyright (C) 2016, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. - -/* -Declaration of shift layer, which adds up const values to blob. -*/ - -#ifndef __OPENCV_DNN_LAYERS_SHIFT_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_SHIFT_LAYER_HPP__ -#include "../precomp.hpp" - -namespace cv -{ -namespace dnn -{ - -class ShiftLayerImpl; - -class ShiftLayer : public Layer -{ -public: - ShiftLayer() {} - ShiftLayer(LayerParams ¶ms); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); - -private: - cv::Ptr impl; - -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 01dc27fe447..d2d1643d40e 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -41,8 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "slice_layer.hpp" -#include #include namespace cv @@ -50,97 +48,89 @@ namespace cv namespace dnn { -SliceLayerImpl::SliceLayerImpl(int axis_ /*= 1*/) +class SliceLayerImpl : public SliceLayer { - axis = axis_; -} - -SliceLayerImpl::SliceLayerImpl(int axis_, const std::vector &sliceIndices_) -{ - axis = axis_; - sliceIndices = sliceIndices_; -} - -void SliceLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 1); - - const Blob &inpBlob = *inputs[0]; - useOpenCL = ocl::useOpenCL() && inpBlob.getState() == Blob::HEAD_AT_UMAT; +public: + SliceLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + axis = params.get("axis", 1); - axisIdx = inpBlob.canonicalAxis(axis); - int axisSize = inpBlob.size(axisIdx); - BlobShape inpShape = inpBlob.shape(); - int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; + if (params.has("slice_point")) + { + const DictValue &indicesValue = params.get("slice_point"); + int i, n = indicesValue.size(); + sliceIndices.resize(n); + for (i = 0; i < n; i++) + sliceIndices[i] = indicesValue.get(i); + } + } - if (sliceIndices.size()) //divide blob with respect to passed parameters + void allocate(const std::vector &inputs, std::vector &outputs) { - std::vector outAxisSize; - int prevSlice = 0; + CV_Assert(inputs.size() == 1); + const Mat &inpBlob = *inputs[0]; + int dims = inpBlob.dims; - for (size_t i = 0; i < sliceIndices.size(); i++) - { - if (!(prevSlice < sliceIndices[i] && sliceIndices[i] < axisSize)) - CV_Error(Error::StsBadArg, "Slice indices should be positive, increased and don't exceed size of sliced dimension"); + axisIdx = axis < 0 ? axis + dims : axis; + int axisSize = inpBlob.size[axisIdx]; + std::vector inpShape(inpBlob.size.p, inpBlob.size.p + dims); - outAxisSize.push_back(sliceIndices[i] - prevSlice); - prevSlice = sliceIndices[i]; + if (sliceIndices.size()) //divide blob with respect to passed parameters + { + std::vector outAxisSize; + int prevSlice = 0; + + for (size_t i = 0; i < sliceIndices.size(); i++) + { + if (!(prevSlice < sliceIndices[i] && sliceIndices[i] < axisSize)) + CV_Error(Error::StsBadArg, "Slice indices should be positive, increased and don't exceed size of sliced dimension"); + + outAxisSize.push_back(sliceIndices[i] - prevSlice); + prevSlice = sliceIndices[i]; + } + outAxisSize.push_back(axisSize - prevSlice); + + outputs.resize(outAxisSize.size()); + for (size_t i = 0; i < outAxisSize.size(); i++) + { + inpShape[axisIdx] = outAxisSize[i]; + outputs[i].create(inpShape, inpBlob.type()); + } } - outAxisSize.push_back(axisSize - prevSlice); - - outputs.resize(outAxisSize.size()); - for (size_t i = 0; i < outAxisSize.size(); i++) + else //divide blob with respect to count of output blobs { - inpShape[axisIdx] = outAxisSize[i]; - outputs[i].create(inpShape, inpBlob.type(), allocFlags); + CV_Assert(outputs.size() > 0 && axisSize % outputs.size() == 0); + int outAxisSize = axisSize / (int)outputs.size(); + + for (size_t i = 0; i < outputs.size(); i++) + { + inpShape[axisIdx] = outAxisSize; + outputs[i].create(inpShape, inpBlob.type()); + } } } - else //divide blob with respect to count of output blobs + + void forward(std::vector &inputs, std::vector &outputs) { - CV_Assert(outputs.size() > 0 && axisSize % outputs.size() == 0); - int outAxisSize = axisSize / (int)outputs.size(); + const Mat& inpMat = *inputs[0]; + std::vector ranges(inpMat.dims, Range::all()); + ranges[axisIdx].start = 0; for (size_t i = 0; i < outputs.size(); i++) { - inpShape[axisIdx] = outAxisSize; - outputs[i].create(inpShape, inpBlob.type(), allocFlags); + ranges[axisIdx].end = ranges[axisIdx].start + outputs[i].size[axisIdx]; + inpMat(&ranges[0]).copyTo(outputs[i]); + ranges[axisIdx].start = ranges[axisIdx].end; } } -} - -void SliceLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - #ifdef HAVE_OPENCL - if (useOpenCL) - forward_(inputs, outputs); - else - #endif - forward_(inputs, outputs); -} - -template -void SliceLayerImpl::forward_(std::vector &inputs, std::vector &outputs) -{ - const XMat& inpMat = inputs[0]->getRefConst(); - std::vector ranges(inputs[0]->dims(), Range::all()); - ranges[axisIdx].start = 0; - for (size_t i = 0; i < outputs.size(); i++) - { - ranges[axisIdx].end = ranges[axisIdx].start + outputs[i].size(axisIdx); - inpMat(&ranges[0]).copyTo(outputs[i].getRef()); - ranges[axisIdx].start = ranges[axisIdx].end; - } -} - -Ptr SliceLayer::create(int axis) -{ - return Ptr(new SliceLayerImpl(axis)); -} + int axisIdx; +}; -Ptr SliceLayer::create(int axis, const std::vector &sliceIndices) +Ptr SliceLayer::create(const LayerParams& params) { - return Ptr(new SliceLayerImpl(axis, sliceIndices)); + return Ptr(new SliceLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/slice_layer.hpp b/modules/dnn/src/layers/slice_layer.hpp deleted file mode 100644 index 4f7cbb37c2d..00000000000 --- a/modules/dnn/src/layers/slice_layer.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_SLICE_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_SLICE_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class SliceLayerImpl : public SliceLayer -{ - bool useOpenCL; - int axisIdx; - - template - void forward_(std::vector &inputs, std::vector &outputs); - -public: - SliceLayerImpl(int axis_ = 1); - SliceLayerImpl(int axis_, const std::vector &sliceIndices_); - - void allocate(const std::vector &inputs, std::vector &outputs); - - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 54751bc7cd6..5f7b4e4c91f 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -41,9 +41,6 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "softmax_layer.hpp" -#include -#include "opencl_kernels_dnn.hpp" #include #include using std::max; @@ -53,171 +50,112 @@ namespace cv namespace dnn { -SoftMaxLayerImpl::SoftMaxLayerImpl(int axis) +class SoftMaxLayerImpl : public SoftmaxLayer { - axisRaw = axis; -} - -void SoftMaxLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 1); - axis = inputs[0]->canonicalAxis(axisRaw); - - useOpenCL = ocl::useOpenCL(); - - BlobShape shape = inputs[0]->shape(); - outerSize = shape.total(0, axis); - channels = shape[axis]; - innerSize = shape.total(axis + 1); - - int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; - shape[axis] = 1; - buf.create(shape, inputs[0]->type(), allocFlag); +public: - outputs.resize(1); - outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag); -} - -void SoftMaxLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - Blob &src = *inputs[0]; - Blob &dst = outputs[0]; - - if (!useOpenCL) - forward_cpu(src, dst); - else + SoftMaxLayerImpl(const LayerParams& params) { - CV_Assert(forward_ocl(src, dst)); + axisRaw = params.get("axis", 1); + setParamsFrom(params); } -} -#ifdef HAVE_OPENCL -bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst) -{ - const UMat &srcMat = src.umatRefConst(); - UMat &dstMat = dst.umatRef(); - srcMat.copyTo(dstMat); - UMat &bufMat = buf.umatRef(); - CV_Assert(dstMat.offset == 0); - - String buildOpts = String("-DT=") + ocl::typeToStr(src.type()); - ocl::Kernel kmax, ksub, ksum, kdiv; - - if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts)) - return false; - - if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts)) - return false; - - if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts)) - return false; - - if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts)) - return false; - - size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize(); - size_t bufSize = buf.total(); - size_t totalSize = src.total(); - - kmax.args((int)outerSize, (int)channels, (int)innerSize, - ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat)); - if (!kmax.run(1, &bufSize, &wgSize, true)) - return false; - - ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize, - ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat)); - if (!ksub.run(1, &totalSize, &wgSize, true)) - return false; + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == 1); + const Mat& inp0 = *inputs[0]; + int dims = inp0.dims; + axis = axisRaw < 0 ? axisRaw + dims : axisRaw; - cv::exp(dstMat, dstMat); + outerSize = inp0.total(0, axis); + channels = inp0.size[axis]; + innerSize = inp0.total(axis + 1); - ksum.args((int)outerSize, (int)channels, (int)innerSize, - ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat)); - if (!ksum.run(1, &bufSize, &wgSize, true)) - return false; + std::vector shape(inp0.size.p, inp0.size.p + dims); + shape[axis] = 1; + buf.create(shape, inp0.type()); - kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize, - ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat)); - if (!kdiv.run(1, &totalSize, &wgSize, true)) - return false; + outputs.resize(1); + outputs[0].create(inp0.dims, inp0.size.p, inp0.type()); + } - return true; -} -#else -bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&) -{ - return false; -} -#endif + void forward(std::vector &inputs, std::vector &outputs) + { + const Mat &src = *inputs[0]; + Mat &dst = outputs[0]; -void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst) -{ - CV_Assert(src.type() == CV_32F); + CV_Assert(src.type() == CV_32F); + CV_Assert(src.isContinuous() && dst.isContinuous()); - float *srcPtr = src.ptrf(); - float *dstPtr = dst.ptrf(); - float *bufPtr = buf.ptrf(); + const float *srcPtr = src.ptr(); + float *dstPtr = dst.ptr(); + float *bufPtr = buf.ptr(); - size_t outerStep = src.total(axis); - size_t cnStep = src.total(axis + 1); + size_t outerStep = src.total(axis); + size_t cnStep = src.total(axis + 1); - //compute max along axis - for (size_t outerDim = 0; outerDim < outerSize; outerDim++) - { - size_t srcOffset = outerDim * outerStep; - size_t bufOffset = outerDim * cnStep; + //compute max along axis + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + { + size_t srcOffset = outerDim * outerStep; + size_t bufOffset = outerDim * cnStep; - memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float)); + memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float)); - for (size_t cnDim = 1; cnDim < channels; cnDim++) - { - for (size_t i = 0; i < innerSize; i++) - bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]); + for (size_t cnDim = 1; cnDim < channels; cnDim++) + { + for (size_t i = 0; i < innerSize; i++) + bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]); + } } - } - - //subtract max - for (size_t outerDim = 0; outerDim < outerSize; outerDim++) - { - size_t srcOffset = outerDim * outerStep; - size_t bufOffset = outerDim * cnStep; - for (size_t cnDim = 0; cnDim < channels; cnDim++) + //subtract max + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) { - for (size_t i = 0; i < innerSize; i++) - dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i]; + size_t srcOffset = outerDim * outerStep; + size_t bufOffset = outerDim * cnStep; + + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + for (size_t i = 0; i < innerSize; i++) + dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i]; + } } - } - - cv::exp(dst.matRef(), dst.matRef()); - - for (size_t outerDim = 0; outerDim < outerSize; outerDim++) - { - size_t srcOffset = outerDim * outerStep; - size_t bufOffset = outerDim * cnStep; - //sum exp along axis - for (size_t i = 0; i < innerSize; i++) - bufPtr[bufOffset + i] = 0.f; + cv::exp(dst, dst); - for (size_t cnDim = 0; cnDim < channels; cnDim++) + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) { - for (size_t i = 0; i < innerSize; i++) - bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i]; - } + size_t srcOffset = outerDim * outerStep; + size_t bufOffset = outerDim * cnStep; - //divide by computed sum - for (size_t cnDim = 0; cnDim < channels; cnDim++) - { + //sum exp along axis for (size_t i = 0; i < innerSize; i++) - dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i]; + bufPtr[bufOffset + i] = 0.f; + + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + for (size_t i = 0; i < innerSize; i++) + bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i]; + } + + //divide by computed sum + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + for (size_t i = 0; i < innerSize; i++) + dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i]; + } } } -} -Ptr SoftmaxLayer::create(int axis) + int axis, axisRaw; + Mat buf; + size_t outerSize, channels, innerSize; +}; + +Ptr SoftmaxLayer::create(const LayerParams& params) { - return Ptr(new SoftMaxLayerImpl(axis)); + return Ptr(new SoftMaxLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/softmax_layer.hpp b/modules/dnn/src/layers/softmax_layer.hpp deleted file mode 100644 index fad97dddc8c..00000000000 --- a/modules/dnn/src/layers/softmax_layer.hpp +++ /dev/null @@ -1,72 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class SoftMaxLayerImpl : public SoftmaxLayer -{ - int axis, axisRaw; - Blob buf; - bool useOpenCL; - size_t outerSize, channels, innerSize; - - - bool forward_ocl(Blob &src, Blob &dst); - void forward_cpu(Blob &src, Blob &dst); - -public: - - SoftMaxLayerImpl(int axis = 1); - void allocate(const std::vector &inputs, std::vector &outputs); - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} -#endif diff --git a/modules/dnn/src/layers/split_layer.cpp b/modules/dnn/src/layers/split_layer.cpp index cd3a507443b..ac6b39c8e1e 100644 --- a/modules/dnn/src/layers/split_layer.cpp +++ b/modules/dnn/src/layers/split_layer.cpp @@ -41,47 +41,54 @@ #include "../precomp.hpp" #include "layers_common.hpp" -#include "split_layer.hpp" -#include namespace cv { namespace dnn { -SplitLayerImpl::SplitLayerImpl(int outputsCount_ /*= -1*/) +class SplitLayerImpl : public SplitLayer { - outputsCount = outputsCount_; -} +public: + SplitLayerImpl(const LayerParams ¶ms) + { + setParamsFrom(params); + //TODO: maybe "top_count" param is useless because it can be determined by output connections number + if (params.has("top_count")) + { + outputsCount = params.get("top_count"); + CV_Assert(outputsCount >= 0); + } + else + { + outputsCount = -1; + } + } -void SplitLayerImpl::allocate(const std::vector &inputs, std::vector &outputs) -{ - CV_Assert(inputs.size() == 1); - useOpenCL = ocl::useOpenCL() && inputs[0]->getState() == Blob::HEAD_AT_UMAT; - int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT; + void allocate(const std::vector &inputs, std::vector &outputs) + { + CV_Assert(inputs.size() == 1); + const Mat& inp0 = *inputs[0]; - if (outputsCount >= 0) - outputs.resize(outputsCount); + if (outputsCount >= 0) + outputs.resize(outputsCount); - for (size_t i = 0; i < outputs.size(); i++) - outputs[i].create(inputs[0]->shape(), inputs[0]->type(), allocFlags); -} + for (size_t i = 0; i < outputs.size(); i++) + outputs[i].create(inp0.dims, inp0.size.p, inp0.type()); + } -void SplitLayerImpl::forward(std::vector &inputs, std::vector &outputs) -{ - for (size_t i = 0; i < outputs.size(); i++) + void forward(std::vector &inputs, std::vector &outputs) { - if (useOpenCL) - inputs[0]->umatRefConst().copyTo(outputs[i].umatRef()); - else - inputs[0]->matRefConst().copyTo(outputs[i].matRef()); + for (size_t i = 0; i < outputs.size(); i++) + { + inputs[0]->copyTo(outputs[i]); + } } -} - +}; -Ptr SplitLayer::create(int outputsCount) +Ptr SplitLayer::create(const LayerParams& params) { - return Ptr(new SplitLayerImpl(outputsCount)); + return Ptr(new SplitLayerImpl(params)); } } diff --git a/modules/dnn/src/layers/split_layer.hpp b/modules/dnn/src/layers/split_layer.hpp deleted file mode 100644 index 124cb1275b5..00000000000 --- a/modules/dnn/src/layers/split_layer.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_DNN_LAYERS_SPLIT_LAYER_HPP__ -#define __OPENCV_DNN_LAYERS_SPLIT_LAYER_HPP__ -#include "../precomp.hpp" -#include - -namespace cv -{ -namespace dnn -{ - -class SplitLayerImpl : public SplitLayer -{ - bool useOpenCL; - -public: - SplitLayerImpl(int outputsCount_ = -1); - - void allocate(const std::vector &inputs, std::vector &outputs); - - void forward(std::vector &inputs, std::vector &outputs); -}; - -} -} -#endif diff --git a/modules/dnn/src/precomp.hpp b/modules/dnn/src/precomp.hpp index 6932bc8e5bc..b622e38da2e 100644 --- a/modules/dnn/src/precomp.hpp +++ b/modules/dnn/src/precomp.hpp @@ -42,3 +42,4 @@ #include #include "cvconfig.h" #include +#include diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index b6aa3682216..84460e60477 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -51,31 +51,32 @@ struct Pin int blobIndex; }; -BlobShape blobShapeFromTensor(const tensorflow::TensorProto &tensor) +void blobShapeFromTensor(const tensorflow::TensorProto &tensor, std::vector& shape) { + shape.clear(); if (tensor.has_tensor_shape()) { const tensorflow::TensorShapeProto &_shape = tensor.tensor_shape(); - BlobShape shape = BlobShape::all(_shape.dim_size()); + int i, n = _shape.dim_size(); + shape.resize(n); - for (int i = 0; i < _shape.dim_size(); i++) + for (i = 0; i < n; i++) shape[i] = (int)_shape.dim(i).size(); - - return shape; } else { CV_Error(Error::StsError, "Unknown shape of input tensor"); - return BlobShape(); } } template -void parseTensor(const tensorflow::TensorProto &tensor, Blob &dstBlob) +void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) { - BlobShape shape = blobShapeFromTensor(tensor); + std::vector shape; + blobShapeFromTensor(tensor, shape); + int dims = (int)shape.size(); - if (shape.dims() == 4) + if (dims == 4) { // REORDER blob NHWC to NCHW swap(shape[2], shape[3]); // NHCW @@ -85,12 +86,12 @@ void parseTensor(const tensorflow::TensorProto &tensor, Blob &dstBlob) dstBlob.create(shape, CV_32F); int size = tensor.tensor_content().size() / sizeof(T); - CV_Assert(size == (int)dstBlob.matRefConst().total()); + CV_Assert(size == (int)dstBlob.total()); - float *dstData = dstBlob.matRef().ptr(); + float *dstData = dstBlob.ptr(); const T *data = reinterpret_cast(tensor.tensor_content().c_str()); - if (shape.dims() == 4) + if (dims == 4) { int num = shape[0], channels = shape[1], height = shape[2], width = shape[3]; int total = num*channels*height*width; @@ -115,7 +116,7 @@ void parseTensor(const tensorflow::TensorProto &tensor, Blob &dstBlob) } } -void blobFromTensor(const tensorflow::TensorProto &tensor, Blob &dstBlob) +void blobFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) { switch (tensor.dtype()) { case tensorflow::DT_FLOAT: @@ -235,10 +236,12 @@ void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) } DictValue parseDims(const tensorflow::TensorProto &tensor) { - BlobShape shape = blobShapeFromTensor(tensor); + std::vector shape; + blobShapeFromTensor(tensor, shape); + int dims = (int)shape.size(); CV_Assert(tensor.dtype() == tensorflow::DT_INT32); - CV_Assert(shape.dims() == 1); + CV_Assert(dims == 1); int size = tensor.tensor_content().size() / sizeof(int); const int *data = reinterpret_cast(tensor.tensor_content().c_str()); @@ -372,7 +375,7 @@ class TFImporter : public Importer { ~TFImporter() {} private: - void kernelFromTensor(const tensorflow::TensorProto &tensor, Blob &dstBlob); + void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob); void connect(const std::map& layers_name_id_map, Net& network, const Pin& outPin, const int input_layer_id, const int input_blob_id); @@ -391,13 +394,15 @@ TFImporter::TFImporter(const char *model) ReadTFNetParamsFromBinaryFileOrDie(model, &net); } -void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Blob &dstBlob) +void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) { - BlobShape shape = blobShapeFromTensor(tensor); + std::vector shape; + blobShapeFromTensor(tensor, shape); + int dims = (int)shape.size(); // TODO: other blob types CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT); - CV_Assert(shape.dims() == 4); + CV_Assert(dims == 4); // REORDER kernel HWIO to OIHW swap(shape[0], shape[2]); // IWHO @@ -407,9 +412,9 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Blob &d dstBlob.create(shape, CV_32F); int size = tensor.tensor_content().size() / sizeof(float); - CV_Assert(size == (int)dstBlob.matRefConst().total()); + CV_Assert(size == (int)dstBlob.total()); - float *dstData = dstBlob.matRef().ptr(); + float *dstData = dstBlob.ptr(); const float *data = reinterpret_cast(tensor.tensor_content().c_str()); int out_c = shape[0], input_c = shape[1], height = shape[2], width = shape[3]; @@ -533,7 +538,7 @@ void TFImporter::populateNet(Net dstNet) } kernelFromTensor(getConstBlob(layer, value_id), layerParams.blobs[0]); - BlobShape kshape = layerParams.blobs[0].shape(); + const int* kshape = layerParams.blobs[0].size.p; layerParams.set("kernel_h", kshape[2]); layerParams.set("kernel_w", kshape[3]); layerParams.set("num_output", kshape[0]); @@ -588,13 +593,11 @@ void TFImporter::populateNet(Net dstNet) blobFromTensor(getConstBlob(layer, value_id, -1, &kernel_blob_index), layerParams.blobs[0]); if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed - Mat data = layerParams.blobs[0].matRef().t(); - BlobShape shape(data.rows, data.cols); - layerParams.blobs[0].fill(shape, layerParams.blobs[0].type(), data.data); + Mat data = layerParams.blobs[0].t(); + layerParams.blobs[0] = data.clone(); } - BlobShape kshape = layerParams.blobs[0].shape(); - layerParams.set("num_output", kshape[0]); + layerParams.set("num_output", layerParams.blobs[0].size[0]); int id = dstNet.addLayer(name, "InnerProduct", layerParams); layer_id[name] = id; diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 738a4400e69..5bd22355acb 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -52,11 +52,11 @@ namespace dnn { #if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER #include "THDiskFile.h" -#ifdef NDEBUG +//#ifdef NDEBUG static bool dbgPrint = false; -#else -static bool dbgPrint = true; -#endif +//#else +//static bool dbgPrint = true; +//#endif enum LuaType { @@ -91,13 +91,13 @@ static inline bool endsWith(const String &str, const char *substr) struct TorchImporter : public ::cv::dnn::Importer { - typedef std::map > TensorsMap; + typedef std::map > TensorsMap; Net net; THFile *file; std::set readedIndexes; std::map storages; - std::map tensors; + std::map tensors; struct Module { @@ -343,9 +343,9 @@ struct TorchImporter : public ::cv::dnn::Importer std::cout << scalarParams; std::cout << "#" << tensorParams.size() << " tensorParams:\n"; - std::map >::const_iterator it; + std::map >::const_iterator it; for (it = tensorParams.begin(); it != tensorParams.end(); it++) - std::cout << it->first << ": Tensor " << it->second.second.shape() << "\n"; + std::cout << it->first << ": Tensor " << it->second.second.size << "\n"; } } @@ -364,7 +364,7 @@ struct TorchImporter : public ::cv::dnn::Importer if (typeidx == TYPE_NIL) { - tensors.insert(std::make_pair(indexTensor, Blob())); + tensors.insert(std::make_pair(indexTensor, Mat())); return; } @@ -398,9 +398,8 @@ struct TorchImporter : public ::cv::dnn::Importer Mat srcMat(ndims, (int*)isizes, typeTensor , storages[indexStorage].ptr() + offset*CV_ELEM_SIZE(typeTensor), (size_t*)ssteps); int dstType = CV_32F; - Blob blob; - blob.create(BlobShape(ndims, isizes), dstType); - srcMat.convertTo(blob.matRef(), dstType); + Mat blob; + srcMat.convertTo(blob, dstType); tensors.insert(std::make_pair(indexTensor, blob)); } @@ -523,7 +522,7 @@ struct TorchImporter : public ::cv::dnn::Importer readTorchTable(scalarParams, tensorParams); CV_Assert(tensorParams.count("weight")); - Blob weightBlob = tensorParams["weight"].second; + Mat weightBlob = tensorParams["weight"].second; layerParams.blobs.push_back(weightBlob); bool bias = tensorParams.count("bias") != 0; @@ -531,7 +530,7 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.blobs.push_back(tensorParams["bias"].second); layerParams.set("bias_term", bias); - layerParams.set("num_output", weightBlob.size(0)); + layerParams.set("num_output", weightBlob.size[0]); curModule->modules.push_back(newModule); } else if (nnName == "Reshape") @@ -608,7 +607,7 @@ struct TorchImporter : public ::cv::dnn::Importer } else { CV_Assert(tensorParams["weight"].second.total() == 1); - float negative_slope = *tensorParams["weight"].second.ptrf(); + float negative_slope = *tensorParams["weight"].second.ptr(); layerParams.set("negative_slope", negative_slope); newModule->apiType = "ReLU"; @@ -722,10 +721,10 @@ struct TorchImporter : public ::cv::dnn::Importer layerParams.set("adj_h", static_cast(scalarParams.get("adjH"))); layerParams.set("num_output", static_cast(scalarParams.get("nOutputPlane"))); - Blob weights = tensorParams["weight"].second; - BlobShape shape = weights.shape(), - reorderedShape = BlobShape(shape[1], shape[0], shape[2], shape[3]); - layerParams.blobs.push_back(weights.reshape(reorderedShape)); + Mat weights = tensorParams["weight"].second; + CV_Assert(weights.dims == 4); + int reorderedShape[] = { weights.size[1], weights.size[0], weights.size[2], weights.size[3] }; + layerParams.blobs.push_back(weights.reshape(1, 4, reorderedShape)); bool bias = tensorParams.count("bias"); layerParams.set("bias_term", bias); @@ -963,7 +962,7 @@ Ptr createTorchImporter(const String &filename, bool isBinary) } -Blob readTorchBlob(const String &filename, bool isBinary) +Mat readTorchBlob(const String &filename, bool isBinary) { Ptr importer(new TorchImporter(filename, isBinary)); importer->readObject(); diff --git a/modules/dnn/test/npy_blob.hpp b/modules/dnn/test/npy_blob.hpp index 7c9910172a7..b04e02c239e 100644 --- a/modules/dnn/test/npy_blob.hpp +++ b/modules/dnn/test/npy_blob.hpp @@ -44,22 +44,91 @@ #include "test_precomp.hpp" #include "cnpy.h" -inline cv::dnn::Blob blobFromNPY(const cv::String &path) +namespace cv { - cnpy::NpyArray npyBlob = cnpy::npy_load(path.c_str()); - cv::dnn::BlobShape shape((int)npyBlob.shape.size(), (int*)&npyBlob.shape[0]); - - cv::dnn::Blob blob(shape); - blob.fill(shape, CV_32F, npyBlob.data); +inline Mat blobFromNPY(const String &path) +{ + cnpy::NpyArray npyBlob = cnpy::npy_load(path.c_str()); + Mat blob = Mat((int)npyBlob.shape.size(), (int*)&npyBlob.shape[0], CV_32F, npyBlob.data).clone(); npyBlob.destruct(); return blob; } -inline void saveBlobToNPY(cv::dnn::Blob &blob, const cv::String &path) +inline void saveBlobToNPY(const Mat &blob, const String &path) +{ + cnpy::npy_save(path.c_str(), blob.ptr(), (unsigned*)&blob.size.p[0], blob.dims); +} + +inline size_t shapeTotal(const std::vector& shape) +{ + size_t p = 1, i, n = shape.size(); + for( i = 0; i < n; i++) + p *= shape[i]; + return p; +} + +inline bool shapeEqual(const std::vector& shape1, const std::vector& shape2) +{ + size_t i, n1 = shape1.size(), n2 = shape2.size(); + if( n1 != n2 ) + return false; + for( i = 0; i < n1; i++ ) + if( shape1[i] != shape2[i] ) + return false; + return true; +} + +inline std::vector getShape(const Mat& m) { - cv::dnn::BlobShape shape = blob.shape(); - cnpy::npy_save(path.c_str(), blob.ptrf(), (unsigned*)&shape[0], shape.dims()); + return m.empty() ? std::vector() : std::vector(&m.size.p[0], &m.size.p[0] + m.dims); +} + +inline std::vector makeShape(int a0, int a1=-1, int a2=-1, int a3=-1, int a4=-1, int a5=-1) +{ + std::vector s; + s.push_back(a0); + if(a1 > 0) + { + s.push_back(a1); + if(a2 > 0) + { + s.push_back(a2); + if(a3 > 0) + { + s.push_back(a3); + if(a4 > 0) + { + s.push_back(a4); + if(a5 > 0) + s.push_back(a5); + } + } + } + } + return s; +} + +inline std::vector concatShape(const std::vector& a, const std::vector& b) +{ + size_t na = a.size(), nb = b.size(); + std::vector c(na + nb); + + std::copy(a.begin(), a.end(), c.begin()); + std::copy(b.begin(), b.end(), c.begin() + na); + + return c; +} + +inline void printShape(const String& name, const std::vector& shape) +{ + printf("%s: [", name.c_str()); + size_t i, n = shape.size(); + for( i = 0; i < n; i++ ) + printf(" %d", shape[i]); + printf(" ]\n"); +} + } #endif diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 5d79c1a3bf0..536bda6a059 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -87,18 +87,17 @@ TEST(Reproducibility_AlexNet, Accuracy) Mat sample = imread(_tf("grace_hopper_227.png")); ASSERT_TRUE(!sample.empty()); - cv::cvtColor(sample, sample, cv::COLOR_BGR2RGB); Size inputSize(227, 227); if (sample.size() != inputSize) resize(sample, sample, inputSize); - net.setBlob(".data", dnn::Blob::fromImages(sample)); + net.setBlob(".data", blobFromImage(sample, 1.)); net.forward(); - Blob out = net.getBlob("prob"); - Blob ref = blobFromNPY(_tf("caffe_alexnet_prob.npy")); + Mat out = net.getBlob("prob"); + Mat ref = blobFromNPY(_tf("caffe_alexnet_prob.npy")); normAssert(ref, out); } @@ -120,14 +119,11 @@ TEST(Reproducibility_FCN, Accuracy) if (sample.size() != inputSize) resize(sample, sample, inputSize); - cv::cvtColor(sample, sample, cv::COLOR_BGR2RGB); - - net.setBlob(".data", dnn::Blob::fromImages(sample)); + net.setBlob(".data", blobFromImage(sample, 1.)); net.forward(); - Blob out = net.getBlob("score"); - - Blob ref = blobFromNPY(_tf("caffe_fcn8s_prob.npy")); + Mat out = net.getBlob("score"); + Mat ref = blobFromNPY(_tf("caffe_fcn8s_prob.npy")); normAssert(ref, out); } diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index b9a4ee12e2d..eb38d766c6b 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -50,16 +50,10 @@ inline const std::string &getOpenCVExtraDir() inline void normAssert(cv::InputArray ref, cv::InputArray test, const char *comment = "") { double normL1 = cvtest::norm(ref, test, cv::NORM_L1) / ref.getMat().total(); - EXPECT_LE(normL1, 0.0001) << comment; + EXPECT_LE(normL1, 0.002) << comment; double normInf = cvtest::norm(ref, test, cv::NORM_INF); - EXPECT_LE(normInf, 0.001) << comment; -} - -inline void normAssert(cv::dnn::Blob &ref, cv::dnn::Blob &test, const char *comment = "") -{ - ASSERT_EQ(ref.shape(), test.shape()) << comment; - normAssert(ref.matRefConst(), test.matRefConst(), comment); + EXPECT_LE(normInf, 0.08) << comment; } #endif diff --git a/modules/dnn/test/test_googlenet.cpp b/modules/dnn/test/test_googlenet.cpp index f1fafb0077b..82f3ec1a89e 100644 --- a/modules/dnn/test/test_googlenet.cpp +++ b/modules/dnn/test/test_googlenet.cpp @@ -72,23 +72,17 @@ static void launchGoogleNetTest() inpMats.push_back( imread(_tf("googlenet_1.jpg")) ); ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty()); - net.setBlob(".data", Blob::fromImages(inpMats)); + net.setBlob(".data", blobFromImages(inpMats, 1.)); net.forward(); - Blob out = net.getBlob("prob"); - Blob ref = blobFromNPY(_tf("googlenet_prob.npy")); + Mat out = net.getBlob("prob"); + Mat ref = blobFromNPY(_tf("googlenet_prob.npy")); normAssert(out, ref); } TEST(Reproducibility_GoogLeNet, Accuracy) { - OCL_OFF(launchGoogleNetTest()); -} - -OCL_TEST(Reproducibility_GoogLeNet, Accuracy) -{ - OCL_ON(launchGoogleNetTest()); - OCL_OFF(); + launchGoogleNetTest(); } } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index c4544499b39..34a8ef24354 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -55,31 +55,33 @@ using namespace cv::dnn; template static String _tf(TString filename) { - return (getOpenCVExtraDir() + "/dnn/layers/") + filename; + String basetestdir = getOpenCVExtraDir(); + size_t len = basetestdir.size(); + if(len > 0 && basetestdir[len-1] != '/' && basetestdir[len-1] != '\\') + return (basetestdir + "/dnn/layers") + filename; + return (basetestdir + "dnn/layers/") + filename; } - -enum RunLayerMode +void runLayer(Ptr layer, std::vector &inpBlobs, std::vector &outBlobs) { - ALLOC_ONLY = 1, - FORWARD_ONLY = 2, - ALLOC_AND_FORWARD = ALLOC_ONLY | FORWARD_ONLY -}; + size_t i, ninputs = inpBlobs.size(); + std::vector inp_(ninputs); + std::vector inp(ninputs); + std::vector outp; -typedef Ptr > PtrToVecPtrBlob; - -PtrToVecPtrBlob -runLayer(Ptr layer, std::vector &inpBlobs, std::vector &outBlobs, int mode = ALLOC_AND_FORWARD) -{ - PtrToVecPtrBlob inpPtrs(new std::vector()); - inpPtrs->reserve(inpBlobs.size()); - for (size_t i = 0; i < inpBlobs.size(); i++) - inpPtrs->push_back(&inpBlobs[i]); + for( i = 0; i < ninputs; i++ ) + { + inp_[i] = inpBlobs[i].clone(); + inp[i] = &inp_[i]; + } - if (mode & ALLOC_ONLY) layer->allocate(*inpPtrs, outBlobs); - if (mode & FORWARD_ONLY) layer->forward(*inpPtrs, outBlobs); + layer->allocate(inp, outp); + layer->forward(inp, outp); - return inpPtrs; + size_t noutputs = outp.size(); + outBlobs.resize(noutputs); + for( i = 0; i < noutputs; i++ ) + outBlobs[i] = outp[i]; } @@ -100,102 +102,59 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool importer->populateNet(net); } - Blob inp = blobFromNPY(inpfile); - Blob ref = blobFromNPY(outfile); + Mat inp = blobFromNPY(inpfile); + Mat ref = blobFromNPY(outfile); net.setBlob(".input", inp); net.forward(); - Blob out = net.getBlob("output"); + Mat out = net.getBlob("output"); normAssert(ref, out); } TEST(Layer_Test_Softmax, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_softmax")); -} -OCL_TEST(Layer_Test_Softmax, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_softmax")); - OCL_OFF(); + testLayerUsingCaffeModels("layer_softmax"); } TEST(Layer_Test_LRN_spatial, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_lrn_spatial")); -} -OCL_TEST(Layer_Test_LRN_spatial, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_lrn_spatial")); - OCL_OFF(); + testLayerUsingCaffeModels("layer_lrn_spatial"); } TEST(Layer_Test_LRN_channels, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_lrn_channels")); -} -OCL_TEST(Layer_Test_LRN_channels, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_lrn_channels")); - OCL_OFF(); + testLayerUsingCaffeModels("layer_lrn_channels"); } TEST(Layer_Test_Convolution, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_convolution", true)); -} -OCL_TEST(Layer_Test_Convolution, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_convolution", true)); - OCL_OFF(); + testLayerUsingCaffeModels("layer_convolution", true); } TEST(Layer_Test_DeConvolution, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_deconvolution", true, false)); -} - -OCL_TEST(Layer_Test_DeConvolution, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_deconvolution", true, false);); - OCL_OFF(); + testLayerUsingCaffeModels("layer_deconvolution", true, false); } TEST(Layer_Test_InnerProduct, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_inner_product", true)); -} -OCL_TEST(Layer_Test_InnerProduct, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_inner_product", true)); - OCL_OFF(); + testLayerUsingCaffeModels("layer_inner_product", true); } TEST(Layer_Test_Pooling_max, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_pooling_max")); - OCL_ON(); -} -OCL_TEST(Layer_Test_Pooling_max, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_pooling_max")); - OCL_OFF(); + testLayerUsingCaffeModels("layer_pooling_max"); } TEST(Layer_Test_Pooling_ave, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_pooling_ave")); - OCL_ON(); -} -OCL_TEST(Layer_Test_Pooling_ave, Accuracy) -{ - OCL_ON(testLayerUsingCaffeModels("layer_pooling_ave")); - OCL_OFF(); + testLayerUsingCaffeModels("layer_pooling_ave"); } TEST(Layer_Test_MVN, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_mvn")); + testLayerUsingCaffeModels("layer_mvn"); } TEST(Layer_Test_Reshape, squeeze) @@ -204,20 +163,25 @@ TEST(Layer_Test_Reshape, squeeze) params.set("axis", 2); params.set("num_axes", 1); - Blob inp(BlobShape(4, 3, 1, 2)); - std::vector inpVec(1, &inp); - std::vector outVec; + int sz[] = {4, 3, 1, 2}; + Mat inp(4, sz, CV_32F); + std::vector inpVec(1, &inp); + std::vector outVec; Ptr rl = LayerFactory::createLayerInstance("Reshape", params); rl->allocate(inpVec, outVec); rl->forward(inpVec, outVec); - EXPECT_EQ(outVec[0].shape(), BlobShape(4, 3, 2)); + Mat& out = outVec[0]; + std::vector shape(out.size.p, out.size.p + out.dims); + int sh0[] = {4, 3, 2}; + std::vector shape0(sh0, sh0+3); + EXPECT_TRUE(shapeEqual(shape, shape0)); } TEST(Layer_Test_BatchNorm, Accuracy) { - OCL_OFF(testLayerUsingCaffeModels("layer_batch_norm", true)); + testLayerUsingCaffeModels("layer_batch_norm", true); } //template @@ -232,16 +196,15 @@ TEST(Layer_Test_BatchNorm, Accuracy) //} //TEST(Layer_Concat, Accuracy) //{ -// OCL_OFF(test_Layer_Concat()); +// test_Layer_Concat()); //} //OCL_TEST(Layer_Concat, Accuracy) //{ // OCL_ON(test_Layer_Concat()); -// OCL_OFF(); +// ); //} -template -void test_Reshape_Split_Slice_layers() +static void test_Reshape_Split_Slice_layers() { Net net; { @@ -250,46 +213,41 @@ void test_Reshape_Split_Slice_layers() importer->populateNet(net); } - Blob input(BlobShape(6, 12)); + Mat input(6, 12, CV_32F); RNG rng(0); - rng.fill(input.getRef(), RNG::UNIFORM, -1, 1); + rng.fill(input, RNG::UNIFORM, -1, 1); net.setBlob(".input", input); net.forward(); - Blob output = net.getBlob("output"); + Mat output = net.getBlob("output"); normAssert(input, output); } TEST(Layer_Test_Reshape_Split_Slice, Accuracy) { - OCL_OFF(test_Reshape_Split_Slice_layers()); -} -OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy) -{ - OCL_ON(test_Reshape_Split_Slice_layers()); - OCL_OFF(); + test_Reshape_Split_Slice_layers(); } class Layer_LSTM_Test : public ::testing::Test { public: int numInp, numOut; - Blob Wh, Wx, b; + Mat Wh, Wx, b; Ptr layer; - std::vector inputs, outputs; + std::vector inputs, outputs; Layer_LSTM_Test() {} - void init(const BlobShape &inpShape_, const BlobShape &outShape_) + void init(const std::vector &inpShape_, const std::vector &outShape_) { - numInp = inpShape_.total(); - numOut = outShape_.total(); + numInp = (int)shapeTotal(inpShape_); + numOut = (int)shapeTotal(outShape_); - Wh = Blob(BlobShape(4 * numOut, numOut)); - Wx = Blob(BlobShape(4 * numOut, numInp)); - b = Blob(BlobShape(4 * numOut, 1)); + Wh = Mat::ones(4 * numOut, numOut, CV_32F); + Wx = Mat::ones(4 * numOut, numInp, CV_32F); + b = Mat::ones(4 * numOut, 1, CV_32F); - layer = LSTMLayer::create(); + layer = LSTMLayer::create(LayerParams()); layer->setWeights(Wh, Wx, b); layer->setOutShape(outShape_); } @@ -297,27 +255,43 @@ class Layer_LSTM_Test : public ::testing::Test TEST_F(Layer_LSTM_Test, get_set_test) { - BlobShape TN(4); - BlobShape inpShape(5, 3, 2), inpResShape = TN + inpShape; - BlobShape outShape(3, 1, 2), outResShape = TN + outShape; + const int TN = 4; + std::vector inpShape = makeShape(5, 3, 2); + std::vector outShape = makeShape(3, 1, 2); + std::vector inpResShape = concatShape(makeShape(TN), inpShape); + std::vector outResShape = concatShape(makeShape(TN), outShape); init(inpShape, outShape); layer->setProduceCellOutput(true); layer->setUseTimstampsDim(false); layer->setOutShape(outShape); - layer->setC(Blob(outResShape)); - layer->setH(Blob(outResShape)); + Mat C((int)outResShape.size(), &outResShape[0], CV_32F); + randu(C, -1., 1.); + Mat H = C.clone(); + randu(H, -1., 1.); + layer->setC(C); + layer->setH(H); - inputs.push_back(Blob(inpResShape)); + Mat inp((int)inpResShape.size(), &inpResShape[0], CV_32F); + randu(inp, -1., 1.); + + inputs.push_back(inp); runLayer(layer, inputs, outputs); EXPECT_EQ(2u, outputs.size()); - EXPECT_EQ(outResShape, outputs[0].shape()); - EXPECT_EQ(outResShape, outputs[1].shape()); - EXPECT_EQ(outResShape, layer->getC().shape()); - EXPECT_EQ(outResShape, layer->getH().shape()); + printShape("outResShape", outResShape); + printShape("out0", getShape(outputs[0])); + printShape("out1", getShape(outputs[0])); + printShape("C", getShape(layer->getC())); + printShape("H", getShape(layer->getH())); + + EXPECT_TRUE(shapeEqual(outResShape, getShape(outputs[0]))); + EXPECT_TRUE(shapeEqual(outResShape, getShape(outputs[1]))); + + EXPECT_TRUE(shapeEqual(outResShape, getShape(layer->getC()))); + EXPECT_TRUE(shapeEqual(outResShape, getShape(layer->getH()))); EXPECT_EQ(0, layer->inputNameToIndex("x")); EXPECT_EQ(0, layer->outputNameToIndex("h")); @@ -326,24 +300,24 @@ TEST_F(Layer_LSTM_Test, get_set_test) TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent) { - Ptr layer = LSTMLayer::create(); + Ptr layer = LSTMLayer::create(LayerParams()); - Blob Wx = blobFromNPY(_tf("lstm.prototxt.w_0.npy")); - Blob Wh = blobFromNPY(_tf("lstm.prototxt.w_2.npy")); - Blob b = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); + Mat Wx = blobFromNPY(_tf("lstm.prototxt.w_0.npy")); + Mat Wh = blobFromNPY(_tf("lstm.prototxt.w_2.npy")); + Mat b = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); layer->setWeights(Wh, Wx, b); - Blob inp = blobFromNPY(_tf("recurrent.input.npy")); - std::vector inputs(1, inp), outputs; + Mat inp = blobFromNPY(_tf("recurrent.input.npy")); + std::vector inputs(1, inp), outputs; runLayer(layer, inputs, outputs); - Blob h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy")); + Mat h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy")); normAssert(h_t_reference, outputs[0]); } TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent) { - Ptr layer = RNNLayer::create(); + Ptr layer = RNNLayer::create(LayerParams()); layer->setWeights( blobFromNPY(_tf("rnn.prototxt.w_0.npy")), @@ -352,10 +326,10 @@ TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent) blobFromNPY(_tf("rnn.prototxt.w_3.npy")), blobFromNPY(_tf("rnn.prototxt.w_4.npy")) ); - std::vector output, input(1, blobFromNPY(_tf("recurrent.input.npy"))); + std::vector output, input(1, blobFromNPY(_tf("recurrent.input.npy"))); runLayer(layer, input, output); - Blob h_ref = blobFromNPY(_tf("rnn.prototxt.h_1.npy")); + Mat h_ref = blobFromNPY(_tf("rnn.prototxt.h_1.npy")); normAssert(h_ref, output[0]); } @@ -364,10 +338,10 @@ class Layer_RNN_Test : public ::testing::Test { public: int nX, nH, nO, nT, nS; - Blob Whh, Wxh, bh, Who, bo; + Mat Whh, Wxh, bh, Who, bo; Ptr layer; - std::vector inputs, outputs; + std::vector inputs, outputs; Layer_RNN_Test() { @@ -377,13 +351,13 @@ class Layer_RNN_Test : public ::testing::Test nH = 64; nO = 100; - Whh = Blob(BlobShape(nH, nH)); - Wxh = Blob(BlobShape(nH, nX)); - bh = Blob(BlobShape(nH, 1)); - Who = Blob(BlobShape(nO, nH)); - bo = Blob(BlobShape(nO, 1)); + Whh = Mat::ones(nH, nH, CV_32F); + Wxh = Mat::ones(nH, nX, CV_32F); + bh = Mat::ones(nH, 1, CV_32F); + Who = Mat::ones(nO, nH, CV_32F); + bo = Mat::ones(nO, 1, CV_32F); - layer = RNNLayer::create(); + layer = RNNLayer::create(LayerParams()); layer->setProduceHiddenOutput(true); layer->setWeights(Wxh, bh, Whh, Who, bo); } @@ -391,12 +365,15 @@ class Layer_RNN_Test : public ::testing::Test TEST_F(Layer_RNN_Test, get_set_test) { - inputs.push_back(Blob(BlobShape(nT, nS, 1, nX))); + int sz[] = { nT, nS, 1, nX }; + Mat inp(4, sz, CV_32F); + randu(inp, -1., 1.); + inputs.push_back(inp); runLayer(layer, inputs, outputs); EXPECT_EQ(outputs.size(), 2u); - EXPECT_EQ(outputs[0].shape(), BlobShape(nT, nS, nO)); - EXPECT_EQ(outputs[1].shape(), BlobShape(nT, nS, nH)); + EXPECT_TRUE(shapeEqual(getShape(outputs[0]), makeShape(nT, nS, nO))); + EXPECT_TRUE(shapeEqual(getShape(outputs[1]), makeShape(nT, nS, nH))); } } diff --git a/modules/dnn/test/test_main.cpp b/modules/dnn/test/test_main.cpp index 42917f29976..ff8ec044807 100644 --- a/modules/dnn/test/test_main.cpp +++ b/modules/dnn/test/test_main.cpp @@ -8,24 +8,4 @@ namespace cvtest using namespace cv; using namespace cv::dnn; -TEST(BlobShape_SimpleConstr, Regression) -{ - BlobShape sd; - - BlobShape s1(0); - EXPECT_EQ(s1.dims(), 1); - EXPECT_EQ(s1[0], 0); - - BlobShape s2(0, 0); - EXPECT_EQ(s2.dims(), 2); - EXPECT_EQ(s2[0], 0); - EXPECT_EQ(s2[1], 0); -} - -TEST(BlobShape_EmptyFill, Regression) -{ - BlobShape s(10, (int*)NULL); - EXPECT_EQ(s.dims(), 10); -} - } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index bb73dd5cbb4..13690d39e9e 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -40,13 +40,13 @@ TEST(Test_TensorFlow, read_inception) resize(sample, input, Size(224, 224)); input -= 128; // mean sub - dnn::Blob inputBlob = dnn::Blob::fromImages(input); + Mat inputBlob = blobFromImage(input, 1.); net.setBlob("_input.input", inputBlob); net.forward(); - Blob out = net.getBlob("softmax2"); - std::cout << out.dims() << std::endl; + Mat out = net.getBlob("softmax2"); + std::cout << out.dims << std::endl; } TEST(Test_TensorFlow, inception_accuracy) @@ -62,15 +62,13 @@ TEST(Test_TensorFlow, inception_accuracy) Mat sample = imread(_tf("grace_hopper_227.png")); ASSERT_TRUE(!sample.empty()); resize(sample, sample, Size(224, 224)); - cv::cvtColor(sample, sample, cv::COLOR_BGR2RGB); - dnn::Blob inputBlob = dnn::Blob::fromImages(sample); + Mat inputBlob = blobFromImage(sample, 1.); net.setBlob(".input", inputBlob); net.forward(); - Blob out = net.getBlob("softmax2"); - - Blob ref = blobFromNPY(_tf("tf_inception_prob.npy")); + Mat out = net.getBlob("softmax2"); + Mat ref = blobFromNPY(_tf("tf_inception_prob.npy")); normAssert(ref, out); } diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index dae3eda8e6f..3da0ccac78c 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -82,7 +82,7 @@ static void runTorchNet(String prefix, String outLayerName = "", ASSERT_TRUE(importer != NULL); importer->populateNet(net); - Blob inp, outRef; + Mat inp, outRef; ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) ); ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) ); @@ -90,14 +90,14 @@ static void runTorchNet(String prefix, String outLayerName = "", net.forward(); if (outLayerName.empty()) outLayerName = net.getLayerNames().back(); - Blob out = net.getBlob(outLayerName); + Mat out = net.getBlob(outLayerName); normAssert(outRef, out); if (check2ndBlob) { - Blob out2 = net.getBlob(outLayerName + ".1"); - Blob ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary); + Mat out2 = net.getBlob(outLayerName + ".1"); + Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary); normAssert(out2, ref2); } } @@ -169,15 +169,12 @@ TEST(Torch_Importer, ENet_accuracy) } Mat sample = imread(_tf("street.png", false)); - cv::cvtColor(sample, sample, cv::COLOR_BGR2RGB); - sample.convertTo(sample, CV_32F, 1/255.0); - dnn::Blob inputBlob = dnn::Blob::fromImages(sample); + Mat inputBlob = blobFromImage(sample, 1./255); net.setBlob("", inputBlob); net.forward(); - dnn::Blob out = net.getBlob(net.getLayerNames().back()); - - Blob ref = blobFromNPY(_tf("torch_enet_prob.npy", false)); + Mat out = net.getBlob(net.getLayerNames().back()); + Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false)); normAssert(ref, out); } diff --git a/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown b/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown index 1eaaf251ac5..01bfca11704 100644 --- a/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown +++ b/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown @@ -41,7 +41,7 @@ Explanation Now image is actually a 3-dimensional array with 224x224x3 shape. - Next, we convert the image to 4-dimensional blob (so-called batch) with 1x3x224x224 shape by using special cv::dnn::Blob::fromImages constructor. + Next, we convert the image to 4-dimensional blob (so-called batch) with 1x3x224x224 shape by using special cv::dnn::blobFromImages constructor. -# Pass the blob to the network @snippet dnn/samples/caffe_googlenet.cpp Set input blob diff --git a/modules/tracking/src/gtrTracker.cpp b/modules/tracking/src/gtrTracker.cpp index 5fc9092528c..0ebdd55c9ae 100644 --- a/modules/tracking/src/gtrTracker.cpp +++ b/modules/tracking/src/gtrTracker.cpp @@ -165,19 +165,14 @@ bool TrackerGOTURNImpl::updateImpl(const Mat& image, Rect2d& boundingBox) searchPatch = searchPatch - 128; //Convert to Float type - targetPatch.convertTo(targetPatch, CV_32F); - searchPatch.convertTo(searchPatch, CV_32F); - - dnn::Blob targetBlob = dnn::Blob::fromImages(targetPatch); - dnn::Blob searchBlob = dnn::Blob::fromImages(searchPatch); + Mat targetBlob = dnn::blobFromImage(targetPatch); + Mat searchBlob = dnn::blobFromImage(searchPatch); net.setBlob(".data1", targetBlob); net.setBlob(".data2", searchBlob); net.forward(); - dnn::Blob res = net.getBlob("scale"); - - Mat resMat = res.matRefConst().reshape(1, 1); + Mat resMat = net.getBlob("scale").reshape(1, 1); curBB.x = targetPatchRect.x + (resMat.at(0) * targetPatchRect.width / INPUT_SIZE) - targetPatchRect.width; curBB.y = targetPatchRect.y + (resMat.at(1) * targetPatchRect.height / INPUT_SIZE) - targetPatchRect.height;