Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GELU layer for vision transformers #23219

Merged
merged 2 commits into from Feb 10, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions modules/dnn/include/opencv2/dnn/all_layers.hpp
Expand Up @@ -802,6 +802,18 @@ CV__DNN_INLINE_NS_BEGIN
static Ptr<SeluLayer> create(const LayerParams &params);
};

class CV_EXPORTS GeluLayer : public ActivationLayer
{
public:
static Ptr<GeluLayer> create(const LayerParams &params);
};

class CV_EXPORTS GeluApproximationLayer : public ActivationLayer
{
public:
static Ptr<GeluApproximationLayer> create(const LayerParams &params);
};

class CV_EXPORTS ThresholdedReluLayer : public ActivationLayer
{
public:
Expand Down
2 changes: 2 additions & 0 deletions modules/dnn/src/init.cpp
Expand Up @@ -145,6 +145,8 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(HardSigmoid, HardSigmoidLayer);
CV_DNN_REGISTER_LAYER_CLASS(Selu, SeluLayer);
CV_DNN_REGISTER_LAYER_CLASS(ThresholdedRelu,ThresholdedReluLayer);
CV_DNN_REGISTER_LAYER_CLASS(Gelu, GeluLayer);
CV_DNN_REGISTER_LAYER_CLASS(GeluApproximation, GeluApproximationLayer);
CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer);
CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer);
CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer);
Expand Down
73 changes: 73 additions & 0 deletions modules/dnn/src/layers/elementwise_layers.cpp
Expand Up @@ -837,6 +837,63 @@ struct BaseDefaultFunctor : public BaseFunctor
static const char* const ocl_kernel_name;
};

struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
{
typedef GeluLayer Layer;

explicit GeluFunctor() {}

bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV;
}

inline float calculate(float x) const
{
return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
}

int64 getFLOPSPerElement() const { return 100; }
};

template<>
const char* const BaseDefaultFunctor<GeluFunctor>::ocl_kernel_name = "GeluForward";

namespace GeluApproximationConstants
{
static constexpr float sqrt_2_pi = 0.7978845834732056f;
static constexpr float coef_sqrt_2_pi = 0.044714998453855515f * sqrt_2_pi;
}

struct GeluApproximationFunctor : public BaseDefaultFunctor<GeluApproximationFunctor>
{
typedef GeluApproximationLayer Layer;

explicit GeluApproximationFunctor() {}

bool supportBackend(int backendId, int)
{
return backendId == DNN_BACKEND_OPENCV;
}

inline float calculate(float x) const
{
return 0.5f * x * (1.f + tanh(x * (GeluApproximationConstants::sqrt_2_pi +
GeluApproximationConstants::coef_sqrt_2_pi * x * x)));
}

inline void setKernelParams(ocl::Kernel& kernel) const
{
kernel.set(3, GeluApproximationConstants::sqrt_2_pi);
kernel.set(4, GeluApproximationConstants::coef_sqrt_2_pi);
}

int64 getFLOPSPerElement() const { return 100; }
};

template<>
const char* const BaseDefaultFunctor<GeluApproximationFunctor>::ocl_kernel_name = "GeluApproximationForward";

struct TanHFunctor : public BaseDefaultFunctor<TanHFunctor>
{
typedef TanHLayer Layer;
Expand Down Expand Up @@ -2694,6 +2751,22 @@ Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
return l;
}

Ptr<GeluLayer> GeluLayer::create(const LayerParams& params)
{
Ptr<GeluLayer> l(new ElementWiseLayer<GeluFunctor>(GeluFunctor()));
l->setParamsFrom(params);

return l;
}

Ptr<GeluApproximationLayer> GeluApproximationLayer::create(const LayerParams& params)
{
Ptr<GeluApproximationLayer> l(new ElementWiseLayer<GeluApproximationFunctor>(GeluApproximationFunctor()));
l->setParamsFrom(params);

return l;
}

Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
{
Ptr<TanHLayer> l(new ElementWiseLayer<TanHFunctor>());
Expand Down
179 changes: 179 additions & 0 deletions modules/dnn/src/onnx/onnx_graph_simplifier.cpp
Expand Up @@ -132,6 +132,183 @@ class ONNXGraphWrapper : public ImportGraphWrapper
opencv_onnx::GraphProto& net;
};

/* Fusion for Gelu.

Graph before fusion:
+---------------------------------------------+
| |
[Input] -> Div[B=sqrt(2)] -> Erf -> Add[B=1] -> Mul -> Mul[B=0.5] -> [Output]

Graph after fusion:
[Input] -> Gelu -> [Output]

*/
class GeluSubGraph : public Subgraph
{
public:
GeluSubGraph()
{
int input = addNodeToMatch("");
int div = addNodeToMatch("Div", input, addNodeToMatch("") /* B=sqrt(2) */ );
int erf = addNodeToMatch("Erf", div);
int add = addNodeToMatch("Add", erf, addNodeToMatch("") /* B=1 */ );
int mul = addNodeToMatch("Mul", input, add);
addNodeToMatch("Mul", mul, addNodeToMatch("") /* B=0.5 */) ;

setFusedNode("Gelu", input);
}

static bool isWithInitializer(const std::vector<int>& matchedNodesIds)
{
// if node.getType() is Constant, Constant nodes are placed between other nodes
if (matchedNodesIds[2] - matchedNodesIds[1] != 1)
return false;
// if Initializer, there is no Constant node between other nodes
return true;
}

static float extractConstant(const Ptr<ImportGraphWrapper>& net, int node_id, int input_id, bool withInitializer)
{
if (withInitializer)
{
auto onnx_net = net.dynamicCast<ONNXGraphWrapper>();
int initializer_id = onnx_net->getInputInitializerId(node_id, input_id);
Mat const_mat = onnx_net->getMatFromInitializer(initializer_id);
return *const_mat.ptr<float>();
} else {
const Ptr<ImportNodeWrapper> node = net->getNode(node_id);
int constant_id = getInputNodeId(net, node, input_id);
Ptr<ImportNodeWrapper> constant_ptr = net->getNode(constant_id);
opencv_onnx::NodeProto* constant_node = constant_ptr.dynamicCast<ONNXNodeWrapper>()->node;
opencv_onnx::TensorProto constant_proto = constant_node->attribute(0).t();
Mat constant_mat = getMatFromTensor(constant_proto);
return *constant_mat.ptr<float>();
}
}

virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
std::vector<int>& matchedNodesIds,
std::vector<int>& targetNodesIds) CV_OVERRIDE
{
if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
{
bool withInitializer = isWithInitializer(matchedNodesIds);

// Check Div[B=sqrt(2)]
float divisor = extractConstant(net, matchedNodesIds[0], 1, withInitializer);
if (divisor - M_SQRT2 >= 1e-6)
return false;

// Check Add[B=1]
float add_const = extractConstant(net, matchedNodesIds[2], 1, withInitializer);
if (add_const - 1.f >= 1e-6)
return false;

// Check Mul[B=0.5]
float mul_const = extractConstant(net, matchedNodesIds[4], 1, withInitializer);
if (mul_const - 0.5f >= 1e-6)
return false;

return true;
}
return false;
}
};

/* Fusion for GeluApproximation.

Graph before fusion:
+--------+------+----------------+------------------------------------+
| | | | |
[Input] -> Mul -> Mul -> Mul[ ] -> Add -> Mul[ ] -> Tanh -> Add[A=1] -> Mul -> Mul(A=0.5) -> [Output]
/ \
A=0.044714998453855515 A=sqrt(2/pie)

Graph after fusion:
[Input] -> GeluApproximation -> [Output]

*/
class GeluApproximationSubGraph : public Subgraph
{
public:
GeluApproximationSubGraph()
{
int input = addNodeToMatch("");
int mul0 = addNodeToMatch("Mul", input, input);
int mul1 = addNodeToMatch("Mul", input, mul0);
int mul2 = addNodeToMatch("Mul", addNodeToMatch("") /* A=0.044714998453855515 */, mul1);
int add0 = addNodeToMatch("Add", input, mul2);
int mul3 = addNodeToMatch("Mul", addNodeToMatch("") /* A=sqrt(2/pie) */, add0);
int tanh = addNodeToMatch("Tanh", mul3);
int add1 = addNodeToMatch("Add", addNodeToMatch("") /* A=1 */, tanh);
int mul4 = addNodeToMatch("Mul", input, add1);
addNodeToMatch("Mul", addNodeToMatch("") /* A=0.5 */, mul4);

setFusedNode("GeluApproximation", input);
}

static bool isWithInitializer(const std::vector<int>& matchedNodesIds)
{
// if node.getType() is Constant, Constant nodes are placed between other nodes
if (matchedNodesIds[2] - matchedNodesIds[1] != 1)
return false;
// if Initializer, there is no Constant node between other nodes
return true;
}

static float extractConstant(const Ptr<ImportGraphWrapper>& net, int node_id, int input_id, bool withInitializer)
{
if (withInitializer)
{
auto onnx_net = net.dynamicCast<ONNXGraphWrapper>();
int initializer_id = onnx_net->getInputInitializerId(node_id, input_id);
Mat const_mat = onnx_net->getMatFromInitializer(initializer_id);
return *const_mat.ptr<float>();
} else {
const Ptr<ImportNodeWrapper> node = net->getNode(node_id);
int constant_id = getInputNodeId(net, node, input_id);
Ptr<ImportNodeWrapper> constant_ptr = net->getNode(constant_id);
opencv_onnx::NodeProto* constant_node = constant_ptr.dynamicCast<ONNXNodeWrapper>()->node;
opencv_onnx::TensorProto constant_proto = constant_node->attribute(0).t();
Mat constant_mat = getMatFromTensor(constant_proto);
return *constant_mat.ptr<float>();
}
}

virtual bool match(const Ptr<ImportGraphWrapper>& net, int nodeId,
std::vector<int>& matchedNodesIds,
std::vector<int>& targetNodesIds) CV_OVERRIDE
{
if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds))
{
bool withInitializer = isWithInitializer(matchedNodesIds);

// Check Mul[A=0.044714998453855515]
float coef = extractConstant(net, matchedNodesIds[2], 0, withInitializer);
if (coef - 0.044714998453855515 >= 1e-6)
return false;

// Check Mul[A=sqrt(2/pie)]
float sqrt_2_pie = extractConstant(net, matchedNodesIds[4], 0, withInitializer);
if (sqrt_2_pie - 0.7978845834732056 >= 1e-6)
return false;

// Check Add[A=1]
float add_const = extractConstant(net, matchedNodesIds[6], 0, withInitializer);
if (add_const - 1.f >= 1e-6)
return false;

// Check Mul[A=0.5]
float mul_const = extractConstant(net, matchedNodesIds[8], 0, withInitializer);
if (mul_const - 0.5f >= 1e-6)
return false;

return true;
}
return false;
}
};

class LayerNormSubGraph : public Subgraph
{
public:
Expand Down Expand Up @@ -904,6 +1081,8 @@ class BatchNormalizationSubgraph2 : public BatchNormalizationSubgraphBase
void simplifySubgraphs(opencv_onnx::GraphProto& net)
{
std::vector<Ptr<Subgraph> > subgraphs;
subgraphs.push_back(makePtr<GeluSubGraph>());
subgraphs.push_back(makePtr<GeluApproximationSubGraph>());
subgraphs.push_back(makePtr<LayerNormSubGraph>());
subgraphs.push_back(makePtr<GatherCastSubgraph>());
subgraphs.push_back(makePtr<MulCastSubgraph>());
Expand Down
3 changes: 2 additions & 1 deletion modules/dnn/src/onnx/onnx_importer.cpp
Expand Up @@ -4050,7 +4050,8 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version)
std::vector<std::string> simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos",
"Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish",
"Identity", "Log", "Round", "Reciprocal", "Selu", "Sign", "Sigmoid", "Sin", "Sinh", "Softmax",
"Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu"};
"Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu", "Gelu",
"GeluApproximation"};
for (const auto& name : simpleLayers)
{
dispatch[name] = &ONNXImporter::parseSimpleLayers;
Expand Down
16 changes: 16 additions & 0 deletions modules/dnn/src/opencl/activations.cl
Expand Up @@ -307,6 +307,22 @@ __kernel void ThresholdedReluForward(const int n, __global T* in, __global T* ou
out[index] = (in[index] > alpha ? in[index] : 0.f);
}

__kernel void GeluForward(const int n, __global T* in, __global T* out)
{
int index = get_global_id(0);
if(index < n)
out[index] = (T)0.5f * in[index] * ( (T)1.f + erf(in[index] * M_SQRT1_2) );
}

__kernel void GeluApproximationForward(const int n, __global T* in, __global T* out,
const KERNEL_ARG_DTYPE sqrt_2_pi,
const KERNEL_ARG_DTYPE coef_sqrt_2_pi)
{
int index = get_global_id(0);
if(index < n)
out[index] = (T)0.5f * in[index] * ( (T)1.f + tanh(in[index] * (sqrt_2_pi + coef_sqrt_2_pi * in[index] * in[index])) );
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use this OpenCL code:

__kernel void GeluForward(const int n, __global T* in, __global T* out)
{
    int index = get_global_id(0);
    if (index < n)
    {
        T x = in[index];
        out[index] = (T)0.5f * x * ( (T)1.f + erf(x * M_SQRT1_2) );
    }
}

__kernel void GeluApproximationForward(const int n, __global T* in, __global T* out)
{
    // see GeluApproximationConstants from .cpp
    const T sqrt_2_pi = 0.7978845834732056f;
    const T coef_sqrt_2_pi = 0.044714998453855515f * sqrt_2_pi;

    int index = get_global_id(0);
    if(index < n)
    {
        T x = in[index];
        out[index] = (T)0.5f * x * ( (T)1.f + tanh(x * (sqrt_2_pi + coef_sqrt_2_pi * x * x)) );
    }
}

and drop setKernelParams() method.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay


__kernel void ShrinkForward(const int n, __global T* in, __global T* out,
const KERNEL_ARG_DTYPE bias,
const KERNEL_ARG_DTYPE lambd)
Expand Down
6 changes: 6 additions & 0 deletions modules/dnn/test/test_onnx_importer.cpp
Expand Up @@ -2451,6 +2451,12 @@ TEST_P(Test_ONNX_layers, LayerNormExpanded)
testONNXModels("layer_norm_expanded_with_initializers");
}

TEST_P(Test_ONNX_layers, Gelu)
{
testONNXModels("gelu");
testONNXModels("gelu_approximation");
}

INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets());

}} // namespace