From f649442dddefbd69b057174cd731d2d0fee18a29 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 9 Feb 2021 21:04:39 -0600 Subject: [PATCH] New custom operator extension mechanism (#30690) * initial commit: simple demo * polish copyright format * add grap op simple demo * adapt uncertain number of argument * change trait marco name * add place & dtype support for add kernel * add dispath and infershape func * poish code & add notes * add dynamic_loader dep for paddle_framework * add new custom op test dir * polish impl details * add unittest for new custom op * fix failed unittest * Costum op (#1) * fix compile error * wrap framework tensor with LoDTensor * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * add CustomTensor default constructor * add size() for CustomTensor * make size const for CustomTensor * refactor place related api to circle the concept * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * make place const * make Tensor copy * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * remove additional head of framework * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * add gpu test * merge latest cwh code in * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * Remove ShareData from user && Change CustomTensor to Tensor && Support more data type (#2) * fix compile error * wrap framework tensor with LoDTensor * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * add CustomTensor default constructor * add size() for CustomTensor * make size const for CustomTensor * refactor place related api to circle the concept * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * make place const * make Tensor copy * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * remove additional head of framework * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * add gpu test * merge latest cwh code in * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * hid share data from and to * rename CustomTensor to Tensor * refactor register design & add test * change op_funtion to op_meta_info * split op meta info into .h and .cc * move get methods into friend class * move OpMetaInfoHelper into framework space * move CustomTensorUtils into framework space * change pybind api name * move PD C API into op meta info * add register custom op api * remove inference cmake change * refactor copy to api && change Reshape to lowercase && support more dtype && add more test (#3) * fix compile error * wrap framework tensor with LoDTensor * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * add CustomTensor default constructor * add size() for CustomTensor * make size const for CustomTensor * refactor place related api to circle the concept * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * make place const * make Tensor copy * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * remove additional head of framework * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * add gpu test * merge latest cwh code in * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * hid share data from and to * rename CustomTensor to Tensor * support multi dtype * remove lod, make reshape lowercase, add copy test and refactor copy api * remove lod, make reshape lowercase, add copy test and refactor copy api * remove lod, make reshape lowercase, add copy test and refactor copy api * remove lod, make reshape lowercase, add copy test and refactor copy api * fix copy to error * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * polish detail & error message * polish test details * Add cast api && Change copy related api to copy_to && add more test (#4) * fix compile error * wrap framework tensor with LoDTensor * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * add CustomTensor default constructor * add size() for CustomTensor * make size const for CustomTensor * refactor place related api to circle the concept * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * fix compile error * make place const * make Tensor copy * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * debug CustomTensor core * remove additional head of framework * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * use back to shared ptr for custom tensor * add gpu test * merge latest cwh code in * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * adjust ut code of custom op * hid share data from and to * rename CustomTensor to Tensor * support multi dtype * remove lod, make reshape lowercase, add copy test and refactor copy api * remove lod, make reshape lowercase, add copy test and refactor copy api * remove lod, make reshape lowercase, add copy test and refactor copy api * remove lod, make reshape lowercase, add copy test and refactor copy api * fix copy to error * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add more test * add type cast * add cast and make copy to api * add cast and make copy to api * add cast and make copy to api * add cast and make copy to api * merge cwh code * merge cwh code * merge cwh code * merge cwh code * merge cwh code * add more error log * add more error log * polish code * used for test * remove test comment * remove test comment * fix uint8 type error * fix lost uint8 type error * add test for coverage * polish details by reviewer comments * add prefix for DISABLE_COPY_AND_ASSIGN Co-authored-by: Jiabin Yang <360788950@qq.com> --- paddle/extension.h | 18 + paddle/fluid/extension/include/all.h | 25 + paddle/fluid/extension/include/dispatch.h | 46 ++ paddle/fluid/extension/include/dtype.h | 39 ++ paddle/fluid/extension/include/op_meta_info.h | 315 +++++++++++ paddle/fluid/extension/include/place.h | 22 + paddle/fluid/extension/include/tensor.h | 95 ++++ paddle/fluid/extension/src/op_meta_info.cc | 120 ++++ paddle/fluid/extension/src/tensor.cc | 378 +++++++++++++ paddle/fluid/framework/CMakeLists.txt | 10 +- paddle/fluid/framework/custom_operator.cc | 534 ++++++++++++++++++ paddle/fluid/framework/custom_operator.h | 32 ++ paddle/fluid/framework/custom_tensor_test.cc | 246 ++++++++ paddle/fluid/framework/custom_tensor_utils.h | 145 +++++ paddle/fluid/framework/data_type.cc | 4 + paddle/fluid/framework/data_type_transform.cc | 4 +- paddle/fluid/framework/op_meta_info_helper.h | 54 ++ paddle/fluid/pybind/CMakeLists.txt | 2 +- paddle/fluid/pybind/pybind.cc | 71 ++- .../fluid/tests/custom_op/CMakeLists.txt | 3 + .../paddle/fluid/tests/custom_op/__init__.py | 13 + .../fluid/tests/custom_op/relu_op_simple.cc | 116 ++++ .../fluid/tests/custom_op/relu_op_simple.cu | 73 +++ .../fluid/tests/custom_op/setup_build.py | 4 + .../fluid/tests/custom_op/setup_install.py | 4 + .../tests/custom_op/setup_install_simple.py | 28 + .../custom_op/test_custom_op_with_setup.py | 4 + .../fluid/tests/custom_op/test_jit_load.py | 4 + .../tests/custom_op/test_setup_install.py | 7 +- .../custom_op/test_simple_custom_op_jit.py | 66 +++ .../custom_op/test_simple_custom_op_setup.py | 156 +++++ python/paddle/utils/cpp_extension/__init__.py | 1 + .../utils/cpp_extension/cpp_extension.py | 4 + .../utils/cpp_extension/extension_utils.py | 51 +- python/setup.py.in | 2 + 35 files changed, 2651 insertions(+), 45 deletions(-) create mode 100644 paddle/extension.h create mode 100644 paddle/fluid/extension/include/all.h create mode 100644 paddle/fluid/extension/include/dispatch.h create mode 100644 paddle/fluid/extension/include/dtype.h create mode 100644 paddle/fluid/extension/include/op_meta_info.h create mode 100644 paddle/fluid/extension/include/place.h create mode 100644 paddle/fluid/extension/include/tensor.h create mode 100644 paddle/fluid/extension/src/op_meta_info.cc create mode 100644 paddle/fluid/extension/src/tensor.cc create mode 100644 paddle/fluid/framework/custom_operator.cc create mode 100644 paddle/fluid/framework/custom_operator.h create mode 100644 paddle/fluid/framework/custom_tensor_test.cc create mode 100644 paddle/fluid/framework/custom_tensor_utils.h create mode 100644 paddle/fluid/framework/op_meta_info_helper.h create mode 100644 python/paddle/fluid/tests/custom_op/__init__.py create mode 100644 python/paddle/fluid/tests/custom_op/relu_op_simple.cc create mode 100644 python/paddle/fluid/tests/custom_op/relu_op_simple.cu create mode 100644 python/paddle/fluid/tests/custom_op/setup_install_simple.py create mode 100644 python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py create mode 100644 python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py diff --git a/paddle/extension.h b/paddle/extension.h new file mode 100644 index 0000000000000..1c64b92c5a374 --- /dev/null +++ b/paddle/extension.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +// All paddle apis in C++ frontend +#include "paddle/fluid/extension/include/all.h" diff --git a/paddle/fluid/extension/include/all.h b/paddle/fluid/extension/include/all.h new file mode 100644 index 0000000000000..5aa61f8203e75 --- /dev/null +++ b/paddle/fluid/extension/include/all.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#if !defined(_MSC_VER) && __cplusplus < 199711L +#error C++11 or later compatible compiler is required to use Paddle. +#endif + +#include "paddle/fluid/extension/include/dispatch.h" +#include "paddle/fluid/extension/include/dtype.h" +#include "paddle/fluid/extension/include/op_meta_info.h" +#include "paddle/fluid/extension/include/place.h" +#include "paddle/fluid/extension/include/tensor.h" diff --git a/paddle/fluid/extension/include/dispatch.h b/paddle/fluid/extension/include/dispatch.h new file mode 100644 index 0000000000000..a782b2b132113 --- /dev/null +++ b/paddle/fluid/extension/include/dispatch.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/extension/include/dtype.h" + +namespace paddle { + +#define PD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \ + case enum_type: { \ + using HINT = type; \ + __VA_ARGS__(); \ + break; \ + } + +#define PD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \ + PD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__) + +#define PD_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& dtype = TYPE; \ + switch (dtype) { \ + PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \ + __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \ + __VA_ARGS__) \ + default: \ + throw std::runtime_error("function not implemented for this type."); \ + } \ + }() + +// TODD(chenweihang): implement other DISPATH macros in next PR + +} // namespace paddle diff --git a/paddle/fluid/extension/include/dtype.h b/paddle/fluid/extension/include/dtype.h new file mode 100644 index 0000000000000..3db1f5c308471 --- /dev/null +++ b/paddle/fluid/extension/include/dtype.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/complex128.h" +#include "paddle/fluid/platform/complex64.h" +#include "paddle/fluid/platform/float16.h" + +namespace paddle { + +enum DataType { + FLOAT32, + FLOAT64, + BFLOAT16, + COMPLEX128, + COMPLEX64, + FLOAT16, + INT64, + INT32, + INT16, + UINT8, + INT8, + BOOL, + // TODO(JiabinYang) support more data types if needed. +}; + +} // namespace paddle diff --git a/paddle/fluid/extension/include/op_meta_info.h b/paddle/fluid/extension/include/op_meta_info.h new file mode 100644 index 0000000000000..2f3d973a8f697 --- /dev/null +++ b/paddle/fluid/extension/include/op_meta_info.h @@ -0,0 +1,315 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include + +#include "paddle/fluid/extension/include/tensor.h" + +/** + * Op Meta Info Related Define. + * + * Used to maintain operator core information. + * + */ + +namespace paddle { +namespace framework { +class OpMetaInfoHelper; +} // namespace framework + +using Tensor = paddle::Tensor; + +#define PD_DISABLE_COPY_AND_ASSIGN(classname) \ + private: \ + classname(const classname&) = delete; \ + classname(classname&&) = delete; \ + classname& operator=(const classname&) = delete; \ + classname& operator=(classname&&) = delete + +///////////////// Util Define and Function //////////////// + +inline std::string Grad(const std::string& var_name) { + std::string result; + result.reserve(var_name.size() + 5U); + result += var_name; + result += "@GRAD"; + return result; +} + +////////////////////// Kernel Function (PD_KERNEL) //////////////////////// + +// Record Op kernel core function +using KernelFunc = std::vector (*)(std::vector inputs, + std::vector attrs); + +template +struct TypeTag {}; + +template +struct KernelFuncImpl; + +template +struct KernelFuncImpl { + static Return Compute(std::vector inputs, + std::vector attrs) { + return ComputeCallHelper>::template Compute<0, 0>( + inputs, attrs); + } + + private: + template + struct ComputeCallHelper; + + // for Tensor input + template + struct ComputeCallHelper { + template + static Return Compute(std::vector inputs, + std::vector attrs, + const PreviousArgs&... pargs) { + static_assert(attr_idx == 0, + "Input tensor should appear before attributes."); + const Tensor& arg = inputs[in_idx]; + return ComputeCallHelper::template Compute( + inputs, attrs, pargs..., arg); + } + }; + + // TODO(chenweihang): add support for attribute input + // int attribute input (not used now) + template + struct ComputeCallHelper { + template + static Return Compute(std::vector inputs, + std::vector attrs, + const PreviousArgs&... pargs) { + try { + int arg = boost::any_cast(attrs[attr_idx]); + return ComputeCallHelper::template Compute( + inputs, attrs, pargs..., arg); + } catch (boost::bad_any_cast&) { + throw std::runtime_error( + "Attribute cast error in custom operator. Expected int value."); + } + } + }; + + // end: base template + template + struct ComputeCallHelper> { + template + static Return Compute(std::vector inputs, + std::vector attrs, const Args&... args) { + return impl_fn(args...); + } + }; +}; + +#define PD_KERNEL(...) \ + ::paddle::KernelFuncImpl::Compute + +/////////////// InferShape Function (PD_INFER_SHAPE) /////////////// + +// Record Op infershape core function +using InferShapeFunc = std::vector> (*)( + std::vector> input_shapes); + +template +struct InferShapeFuncImpl; + +template +struct InferShapeFuncImpl { + static Return InferShape(std::vector> input_shapes) { + return InferShapeCallHelper>::template InferShape<0>( + input_shapes); + } + + private: + template + struct InferShapeCallHelper; + + // only one type input: std::vector + template + struct InferShapeCallHelper, Tail...> { + template + static Return InferShape(std::vector> input_shapes, + const PreviousArgs&... pargs) { + std::vector arg = input_shapes[in_idx]; + return InferShapeCallHelper::template InferShape( + input_shapes, pargs..., arg); + } + }; + + // end: base template + template + struct InferShapeCallHelper> { + template + static Return InferShape(std::vector> input_shapes, + const Args&... args) { + return impl_fn(args...); + } + }; +}; + +#define PD_INFER_SHAPE(...) \ + ::paddle::InferShapeFuncImpl::InferShape + +/////////////// InferDataType Function (PD_INFER_DTYPE) /////////////// + +// Record Op Infer dtype core function +using InferDtypeFunc = + std::vector (*)(std::vector input_dtypes); + +template +struct InferDtypeFuncImpl; + +template +struct InferDtypeFuncImpl { + static Return InferDtype(std::vector input_dtypes) { + return InferDtypeCallHelper>::template InferDtype<0>( + input_dtypes); + } + + private: + template + struct InferDtypeCallHelper; + + // Only one type input now: DataType + template + struct InferDtypeCallHelper { + template + static Return InferDtype(std::vector input_dtypes, + const PreviousArgs&... pargs) { + DataType arg = input_dtypes[in_idx]; + return InferDtypeCallHelper::template InferDtype( + input_dtypes, pargs..., arg); + } + }; + + // end: base template + template + struct InferDtypeCallHelper> { + template + static Return InferDtype(std::vector input_dtypes, + const Args&... args) { + return impl_fn(args...); + } + }; +}; + +#define PD_INFER_DTYPE(...) \ + ::paddle::InferDtypeFuncImpl::InferDtype + +////////////////////// Op Meta Info ////////////////////// + +class OpMetaInfo { + public: + explicit OpMetaInfo(const std::string& op_name) : name_(op_name) {} + OpMetaInfo& Inputs(std::vector&& inputs); + OpMetaInfo& Outputs(std::vector&& outputs); + OpMetaInfo& SetKernelFn(KernelFunc&& func); + OpMetaInfo& SetInferShapeFn(InferShapeFunc&& func); + OpMetaInfo& SetInferDtypeFn(InferDtypeFunc&& func); + + private: + friend class framework::OpMetaInfoHelper; + + // 1. desc info + std::string name_; + std::vector inputs_; + std::vector outputs_; + std::vector attrs_; + + // 2. func info + KernelFunc kernel_fn_; + InferShapeFunc infer_shape_fn_; + InferDtypeFunc infer_dtype_fn_; +}; + +//////////////// Op Meta Info Map ///////////////// + +class OpMetaInfoMap { + public: + // this function's impl should keep in header file. + // if move to cc file, meta info can not be added + // into map + static OpMetaInfoMap& Instance() { + static OpMetaInfoMap g_custom_op_meta_info_map; + return g_custom_op_meta_info_map; + } + + std::vector& operator[](const std::string& name); + + const std::unordered_map>& GetMap() + const; + + private: + OpMetaInfoMap() = default; + std::unordered_map> map_; + + PD_DISABLE_COPY_AND_ASSIGN(OpMetaInfoMap); +}; + +//////////////// Op Meta Info Builder ///////////////// + +class OpMetaInfoBuilder { + public: + explicit OpMetaInfoBuilder(std::string&& name); + OpMetaInfoBuilder& Inputs(std::vector&& inputs); + OpMetaInfoBuilder& Outputs(std::vector&& outputs); + OpMetaInfoBuilder& SetKernelFn(KernelFunc&& func); + OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc&& func); + OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc&& func); + OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name); + + private: + // Forward Op name + std::string name_; + // Point to the currently constructed op meta info + OpMetaInfo* info_ptr_; +}; + +/////////////////////// Op register API ///////////////////////// + +// For inference: compile directly with framework +// Call after PD_BUILD_OPERATOR(...) +void RegisterAllCustomOperator(); + +/////////////////////// Op register Macro ///////////////////////// + +#define PD_BUILD_OPERATOR(op_name) \ + static ::paddle::OpMetaInfoBuilder __op_meta_info_##__COUNTER__##__ = \ + ::paddle::OpMetaInfoBuilder(op_name) + +} // namespace paddle + +///////////////////// C API /////////////////// + +#ifdef __cplusplus +extern "C" { +#endif + +// C-API to get global OpMetaInfoMap. +paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap(); + +#ifdef __cplusplus +} +#endif diff --git a/paddle/fluid/extension/include/place.h b/paddle/fluid/extension/include/place.h new file mode 100644 index 0000000000000..91d4f41c21351 --- /dev/null +++ b/paddle/fluid/extension/include/place.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +namespace paddle { + +// TODO(yangjiabin): Add other place support in next PR +enum class PlaceType { kUNK = -1, kCPU, kGPU }; + +} // namespace paddle diff --git a/paddle/fluid/extension/include/tensor.h b/paddle/fluid/extension/include/tensor.h new file mode 100644 index 0000000000000..1140efe5c1906 --- /dev/null +++ b/paddle/fluid/extension/include/tensor.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/extension/include/dtype.h" +#include "paddle/fluid/extension/include/place.h" + +namespace paddle { +namespace framework { +class CustomTensorUtils; +} // namespace framework +class Tensor { + public: + /// \brief Construct a Tensor on None Place for CustomOp. + /// Generally it's only used for user to create Tensor. + explicit Tensor(const PlaceType& place); + /// \brief Reset the shape of the tensor. + /// Generally it's only used for the input tensor. + /// Reshape must be called before calling + /// mutable_data() or copy_from_cpu() + /// \param shape The shape to set. + void reshape(const std::vector& shape); + + /// \brief Get the memory pointer in CPU or GPU with + /// specific data type. + /// Please Reshape the tensor first before call this. + /// It's usually used to get input data pointer. + /// \param place The place of the tensor this will + /// override the original place of current tensor. + template + T* mutable_data(const PlaceType& place); + + /// \brief Get the memory pointer in CPU or GPU with + /// specific data type. Please Reshape the tensor + /// first before call this.It's usually used to get + /// input data pointer. + template + T* mutable_data(); + + /// \brief Get the memory pointer directly. + /// It's usually used to get the output data pointer. + /// \return The tensor data buffer pointer. + template + T* data() const; + + /// \brief Copy the host memory to tensor data. + /// It's usually used to set the input tensor data. + /// \param PlaceType of target place, from which + /// the tensor will copy. + + template + Tensor copy_to(const PlaceType& place); + + /// \brief Return the shape of the Tensor. + std::vector shape() const; + + /// \brief Return the data type of the tensor. + /// It's usually used to get the output tensor data type. + /// \return The data type of the tensor. + DataType type() const; + + /// \brief Get the size of current tensor. + /// Use this method to get the size of tensor + /// \return int64_t. + int64_t size() const; + + /// \brief Get the place of current tensor. + /// Use this method to get the place of tensor + /// \return Place. + const PlaceType& place() const; + + /// \brief Cast datatype from one to another + Tensor cast(const DataType& target_type); + + private: + friend class framework::CustomTensorUtils; + mutable std::shared_ptr tensor_; + mutable PlaceType place_; +}; + +} // namespace paddle diff --git a/paddle/fluid/extension/src/op_meta_info.cc b/paddle/fluid/extension/src/op_meta_info.cc new file mode 100644 index 0000000000000..0238dd7a7eca7 --- /dev/null +++ b/paddle/fluid/extension/src/op_meta_info.cc @@ -0,0 +1,120 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/extension/include/op_meta_info.h" + +#include +#include +#include + +#include "paddle/fluid/framework/custom_operator.h" + +namespace paddle { + +////////////////////// Op Meta Info ////////////////////// + +OpMetaInfo& OpMetaInfo::Inputs(std::vector&& inputs) { + inputs_ = std::forward>(inputs); + return *this; +} +OpMetaInfo& OpMetaInfo::Outputs(std::vector&& outputs) { + outputs_ = std::forward>(outputs); + return *this; +} +OpMetaInfo& OpMetaInfo::SetKernelFn(KernelFunc&& func) { + kernel_fn_ = std::forward(func); + return *this; +} +OpMetaInfo& OpMetaInfo::SetInferShapeFn(InferShapeFunc&& func) { + infer_shape_fn_ = std::forward(func); + return *this; +} +OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) { + infer_dtype_fn_ = std::forward(func); + return *this; +} + +//////////////// Op Meta Info Map ///////////////// + +std::vector& OpMetaInfoMap::operator[](const std::string& name) { + return map_[name]; +} + +const std::unordered_map>& +OpMetaInfoMap::GetMap() const { + return map_; +} + +//////////////// Op Meta Info Builder ///////////////// + +OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name) { + name_ = std::forward(name); + auto& info_vector = OpMetaInfoMap::Instance()[name_]; + auto op_meta = OpMetaInfo(name_); + info_vector.emplace_back(std::move(op_meta)); + info_ptr_ = &(info_vector.back()); +} + +OpMetaInfoBuilder& OpMetaInfoBuilder::Inputs( + std::vector&& inputs) { + info_ptr_->Inputs(std::forward>(inputs)); + return *this; +} + +OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs( + std::vector&& outputs) { + info_ptr_->Outputs(std::forward>(outputs)); + return *this; +} + +OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc&& func) { + info_ptr_->SetKernelFn(std::forward(func)); + return *this; +} + +OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc&& func) { + info_ptr_->SetInferShapeFn(std::forward(func)); + return *this; +} + +OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc&& func) { + info_ptr_->SetInferDtypeFn(std::forward(func)); + return *this; +} + +OpMetaInfoBuilder& OpMetaInfoBuilder::SetBackwardOp( + const std::string& bwd_op_name) { + auto& info_vector = OpMetaInfoMap::Instance()[name_]; + auto op_meta = OpMetaInfo(bwd_op_name); + info_vector.emplace_back(std::move(op_meta)); + info_ptr_ = &(info_vector.back()); + return *this; +} + +/////////////////////// Op register API ///////////////////////// + +void RegisterAllCustomOperator() { + auto& op_meta_info_map = OpMetaInfoMap::Instance(); + framework::RegisterOperatorWithMetaInfoMap(op_meta_info_map); +} + +} // namespace paddle + +extern "C" { + +paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() { + return paddle::OpMetaInfoMap::Instance(); +} + +} // end extern "C" diff --git a/paddle/fluid/extension/src/tensor.cc b/paddle/fluid/extension/src/tensor.cc new file mode 100644 index 0000000000000..ef747567b226c --- /dev/null +++ b/paddle/fluid/extension/src/tensor.cc @@ -0,0 +1,378 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/extension/include/tensor.h" +#include +#include "paddle/fluid/framework/custom_tensor_utils.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/transform.h" + +namespace paddle { + +template +struct CastDataTypeFunctor { + HOSTDEVICE inline OutType operator()(InType in) const { + return static_cast(in); + } +}; + +template +struct CastDataType { + CastDataType(const framework::Tensor &in, framework::Tensor *out, + const platform::DeviceContext *ctx) + : in_(in), out_(out), ctx_(ctx) {} + const framework::Tensor in_; + framework::Tensor *out_; + const platform::DeviceContext *ctx_; + + template + void apply() { + auto *in_begin = in_.data(); + auto *in_end = in_begin + in_.numel(); + auto *out_begin = out_->mutable_data(in_.place()); + + if (platform::is_cpu_place(in_.place())) { + platform::Transform trans; + auto *context = static_cast(ctx_); + trans(*context, in_begin, in_end, out_begin, + CastDataTypeFunctor()); +#ifdef __NVCC__ + } else if (platform::is_gpu_place(in_.place())) { + platform::Transform trans; + auto *context = static_cast(ctx_); + trans(*context, in_begin, in_end, out_begin, + CastDataTypeFunctor()); + context->Wait(); +#endif + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Place type is not supported when casting data type.")); + } + } +}; +template +void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc, + int64_t ele_size) { +#ifdef PADDLE_WITH_CUDA + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + int device_num = paddle::platform::GetCurrentDeviceId(); + platform::CUDAPlace gpu_place(device_num); + auto *dev_ctx = + static_cast(pool.Get(gpu_place)); + if ((src_plc == PlaceType::kGPU) && (dst_plc == PlaceType::kCPU)) { + memory::Copy(platform::CPUPlace(), static_cast(dst), gpu_place, src, + ele_size, dev_ctx->stream()); + } else if ((src_plc == PlaceType::kGPU) && (dst_plc == PlaceType::kGPU)) { + memory::Copy(gpu_place, static_cast(dst), gpu_place, src, ele_size, + dev_ctx->stream()); + } else if ((src_plc == PlaceType::kCPU) && (dst_plc == PlaceType::kGPU)) { + memory::Copy(gpu_place, static_cast(dst), platform::CPUPlace(), src, + ele_size, dev_ctx->stream()); + } else { + PADDLE_THROW(platform::errors::Unavailable( + "Only GPU related Copy can reach this func.")); + } + cudaStreamSynchronize(dev_ctx->stream()); +#endif +} + +#define GET_CASTED_TENSOR \ + if (!tensor_) { \ + tensor_ = std::make_shared(); \ + } \ + auto *tensor = static_cast(tensor_.get()); + +void Tensor::reshape(const std::vector &shape) { + GET_CASTED_TENSOR + tensor->Resize(framework::make_ddim(shape)); +} + +Tensor::Tensor(const PlaceType &place) + : tensor_(std::make_shared()), place_(place) {} + +template +T *Tensor::mutable_data(const PlaceType &place) { + place_ = place; + return mutable_data(); +} + +template +T *Tensor::mutable_data() { + GET_CASTED_TENSOR + PADDLE_ENFORCE_GT( + tensor->numel(), 0, + platform::errors::PreconditionNotMet( + "You should call Tensor::Reshape(const std::vector " + "&shape)" + "function before retrieving mutable_data from input tensor.")); + switch (static_cast(place_)) { + case static_cast(PlaceType::kCPU): { + return tensor->mutable_data(platform::CPUPlace()); + } +#ifdef PADDLE_WITH_CUDA + case static_cast(PlaceType::kGPU): { + int device_num = platform::GetCurrentDeviceId(); + VLOG(1) << "Custom Operator: mutable data cuda device id - " + << device_num; + return tensor->mutable_data(platform::CUDAPlace(device_num)); + } +#endif + default: + PADDLE_THROW(platform::errors::Unavailable( + "Custom operator unsupported place id(%d)", + static_cast(place_))); + } +} + +template +T *Tensor::data() const { + GET_CASTED_TENSOR; + auto *res = tensor->data(); + return res; +} + +DataType Tensor::type() const { + GET_CASTED_TENSOR; + auto type = tensor->type(); + if (type == framework::proto::VarType::FP32) { + return DataType::FLOAT32; + } else if (type == framework::proto::VarType::INT64) { + return DataType::INT64; + } else if (type == framework::proto::VarType::INT32) { + return DataType::INT32; + } else if (type == framework::proto::VarType::INT16) { + return DataType::INT16; + } else if (type == framework::proto::VarType::INT8) { + return DataType::INT8; + } else if (type == framework::proto::VarType::UINT8) { + return DataType::UINT8; + } else if (type == framework::proto::VarType::FP64) { + return DataType::FLOAT64; + } else if (type == framework::proto::VarType::BF16) { + return DataType::BFLOAT16; + } else if (type == framework::proto::VarType::FP16) { + return DataType::FLOAT16; + } else if (type == framework::proto::VarType::COMPLEX64) { + return DataType::COMPLEX64; + } else if (type == framework::proto::VarType::COMPLEX128) { + return DataType::COMPLEX128; + } else if (type == framework::proto::VarType::BOOL) { + return DataType::BOOL; + } + return DataType::FLOAT32; +} + +template +Tensor Tensor::copy_to(const PlaceType &target_place) { + GET_CASTED_TENSOR; + PADDLE_ENFORCE_GE(tensor->numel(), 0, + platform::errors::PreconditionNotMet( + "You should call Tensor::Reshape(const " + "std::vector &shape)" + "function before copying data from cpu.")); + size_t ele_size = tensor->numel() * sizeof(T); + auto *p_src_data = tensor->data(); + auto src_place = place(); + Tensor target = Tensor(target_place); + target.reshape(shape()); + auto *p_target_data = target.template mutable_data(); + + if ((src_place == PlaceType::kCPU) && (target_place == PlaceType::kCPU)) { + std::memcpy(static_cast(p_target_data), p_src_data, ele_size); + } else if ((src_place == PlaceType::kGPU) && + (target_place == PlaceType::kCPU)) { + GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); + } else if ((src_place == PlaceType::kCPU) && + (target_place == PlaceType::kGPU)) { + GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); + } else if ((src_place == PlaceType::kGPU) && + (target_place == PlaceType::kGPU)) { + GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); + } else { + PADDLE_THROW(platform::errors::Unavailable( + "Not supported place transform of place: %d to place: %d", + static_cast(src_place), static_cast(target_place))); + } + return target; +} + +template Tensor Tensor::copy_to( + const PlaceType &target_place); +template Tensor Tensor::copy_to( + const PlaceType &target_place); +template Tensor Tensor::copy_to( + const PlaceType &target_place); +template Tensor Tensor::copy_to( + const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); +template Tensor Tensor::copy_to(const PlaceType &target_place); + +template float *Tensor::data() const; +template double *Tensor::data() const; +template int64_t *Tensor::data() const; +template int32_t *Tensor::data() const; +template uint8_t *Tensor::data() const; +template int8_t *Tensor::data() const; +template paddle::platform::float16 *Tensor::data() + const; +template paddle::platform::bfloat16 *Tensor::data() + const; +template paddle::platform::complex128 * +Tensor::data() const; +template paddle::platform::complex64 * +Tensor::data() const; +template int16_t *Tensor::data() const; +template bool *Tensor::data() const; + +template float *Tensor::mutable_data(); +template double *Tensor::mutable_data(); +template int64_t *Tensor::mutable_data(); +template int32_t *Tensor::mutable_data(); +template uint8_t *Tensor::mutable_data(); +template int8_t *Tensor::mutable_data(); +template paddle::platform::float16 * +Tensor::mutable_data(); +template paddle::platform::bfloat16 * +Tensor::mutable_data(); +template paddle::platform::complex128 * +Tensor::mutable_data(); +template paddle::platform::complex64 * +Tensor::mutable_data(); +template int16_t *Tensor::mutable_data(); +template bool *Tensor::mutable_data(); + +template float *Tensor::mutable_data(const PlaceType &place); +template double *Tensor::mutable_data(const PlaceType &place); +template int64_t *Tensor::mutable_data(const PlaceType &place); +template int32_t *Tensor::mutable_data(const PlaceType &place); +template uint8_t *Tensor::mutable_data(const PlaceType &place); +template int8_t *Tensor::mutable_data(const PlaceType &place); +template paddle::platform::float16 * +Tensor::mutable_data(const PlaceType &place); +template paddle::platform::bfloat16 * +Tensor::mutable_data(const PlaceType &place); +template paddle::platform::complex128 * +Tensor::mutable_data(const PlaceType &place); +template paddle::platform::complex64 * +Tensor::mutable_data(const PlaceType &place); +template int16_t *Tensor::mutable_data(const PlaceType &place); +template bool *Tensor::mutable_data(const PlaceType &place); + +std::vector Tensor::shape() const { + GET_CASTED_TENSOR + return framework::vectorize(tensor->dims()); +} + +const PlaceType &Tensor::place() const { + GET_CASTED_TENSOR; + if (platform::is_cpu_place(tensor->place())) { + place_ = PlaceType::kCPU; + } else if (platform::is_gpu_place(tensor->place())) { + place_ = PlaceType::kGPU; + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Current Tensor hold unsupported Place Type, Please Init it" + "using Tensor::mutable_data(PaddlePlace) which T is" + "either Place::kCPU or Place::kGPU")); + } + return place_; +} + +Tensor Tensor::cast(const DataType &target_type) { + GET_CASTED_TENSOR; + Tensor rlt = Tensor(place()); + rlt.reshape(this->shape()); + auto rlt_tensor_ = static_cast(rlt.tensor_.get()); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto ctx = pool.Get(tensor->place()); + auto src_type = tensor->type(); + auto dst_type = + framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(target_type); + switch (src_type) { + case framework::proto::VarType::FP16: + framework::VisitDataType( + dst_type, CastDataType(*tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::BF16: + framework::VisitDataType(dst_type, CastDataType( + *tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::FP32: + framework::VisitDataType(dst_type, + CastDataType(*tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::FP64: + framework::VisitDataType(dst_type, + CastDataType(*tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::INT32: + framework::VisitDataType(dst_type, + CastDataType(*tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::INT64: + framework::VisitDataType( + dst_type, CastDataType(*tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::BOOL: + framework::VisitDataType(dst_type, + CastDataType(*tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::INT16: + framework::VisitDataType( + dst_type, CastDataType(*tensor, rlt_tensor_, ctx)); + break; + case framework::proto::VarType::UINT8: + framework::VisitDataType( + dst_type, CastDataType(*tensor, rlt_tensor_, ctx)); + break; + // TODO(JiabinYang): Support Complex later + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Data type (%s) is not supported when casting data type.", + framework::DataTypeToString(src_type))); + } + return rlt; +} + +int64_t Tensor::size() const { + GET_CASTED_TENSOR; + return tensor->numel(); +} + +namespace framework { + +void CustomTensorUtils::ShareDataTo(const paddle::Tensor &src, void *dst) { + static_cast(dst)->ShareDataWith( + *static_cast(src.tensor_.get())); +} + +void CustomTensorUtils::ShareDataFrom(const void *src, + const paddle::Tensor &dst) { + if (!dst.tensor_) { + dst.tensor_ = std::make_shared(); + } + auto *tensor = static_cast(dst.tensor_.get()); + tensor->ShareDataWith(*static_cast(src)); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 4feffe65f7389..14179172db229 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -320,11 +320,17 @@ message(STATUS "branch: ${PADDLE_BRANCH}") configure_file(commit.h.in commit.h) -set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer) +cc_library(custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor) +cc_library(op_meta_info SRCS ../extension/src/op_meta_info.cc DEPS custom_tensor) +cc_library(custom_operator SRCS custom_operator.cc DEPS operator op_registry device_context dynamic_loader custom_tensor op_meta_info) +cc_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog) + +set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator) cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES}) cc_library(paddle_framework_shared - SHARED SRCS executor.cc operator.cc + SHARED SRCS executor.cc operator.cc custom_operator.cc ../extension/src/tensor.cc + ../extension/src/op_meta_info.cc ${CMAKE_CURRENT_SOURCE_DIR}/c/c_api.cc ${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc DEPS ${FLUID_FRAMEWORK_MODULES}) diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc new file mode 100644 index 0000000000000..1e2a77e915dea --- /dev/null +++ b/paddle/fluid/framework/custom_operator.cc @@ -0,0 +1,534 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/custom_operator.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/extension/include/tensor.h" +#include "paddle/fluid/framework/attribute.h" +#include "paddle/fluid/framework/c/c_api.h" +#include "paddle/fluid/framework/custom_tensor_utils.h" +#include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/framework/op_meta_info_helper.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/dynload/dynamic_loader.h" +#include "paddle/fluid/string/string_helper.h" + +namespace paddle { +namespace framework { + +namespace detail { + +// dynamic lib load func +template +static T* DynLoad(void* handle, std::string name) { + T* func = reinterpret_cast(dlsym(handle, name.c_str())); +#if !defined(_WIN32) + auto errorno = dlerror(); +#else + auto errorno = GetLastError(); +#endif // !_WIN32 + PADDLE_ENFORCE_NOT_NULL( + func, platform::errors::NotFound( + "Failed to load dynamic operator library, error message(%s).", + errorno)); + return func; +} + +inline bool IsGradVar(const std::string& var_name) { + std::string suffix = kGradVarSuffix; + return var_name.rfind(suffix) != std::string::npos; +} + +inline std::string NoGrad(const std::string& var_name) { + std::string suffix = kGradVarSuffix; + return var_name.substr(0, var_name.size() - kGradVarSuffixSize); +} + +inline bool IsMemberOf(const std::vector& vec, + const std::string& name) { + return std::find(vec.cbegin(), vec.cend(), name) != vec.cend(); +} + +} // namespace detail + +////////////////// Kernel Define //////////////////// + +// custom op kernel call function define +static void RunKernelFunc(const framework::ExecutionContext& ctx, + const paddle::KernelFunc& func, + const std::vector& inputs, + const std::vector& outputs) { + VLOG(1) << "Custom Operator: Start run KernelFunc."; + std::vector custom_ins; + for (auto& in_name : inputs) { + VLOG(1) << "Custom Operator: input name - " << in_name; + auto* x = ctx.Input(in_name); + PADDLE_ENFORCE_NOT_NULL(x, platform::errors::NotFound( + "Input tensor (%s) is nullptr.", in_name)); + PADDLE_ENFORCE_EQ(x->IsInitialized(), true, + platform::errors::InvalidArgument( + "Input tensor (%s) is not initialized.")); + auto custom_in = paddle::Tensor( + CustomTensorUtils::ConvertInnerPlaceToEnumPlace(x->place())); + CustomTensorUtils::ShareDataFrom(static_cast(x), custom_in); + custom_ins.emplace_back(custom_in); + } + + std::vector attrs; + + VLOG(1) << "Run ComputeFunc."; + auto outs = func(custom_ins, attrs); + + VLOG(1) << "Custom Operator: Share outputs into ExecutionContext."; + for (size_t i = 0; i < outputs.size(); ++i) { + auto* true_out = ctx.Output(outputs[i]); + CustomTensorUtils::ShareDataTo(outs.at(i), true_out); + } +} + +//////////////////// Operator Define ///////////////// + +class CustomOperator : public OperatorWithKernel { + public: + using OperatorWithKernel::OperatorWithKernel; + + // Dummy infershape + // Because it is a pure virtual function, it must be implemented + void InferShape(framework::InferShapeContext* ctx) const override { + VLOG(1) << "Custom Operator: Dummy infer shape of custom operator."; + } + + /** + * NOTE: [Skip the Kernel Selection] + * Custom Op only registers one Op kernel on each device, so that the + * data type selection and promotion that depends on GetExpectedKernelType, + * as well as the adaptation of various other special situations, + * need users to implement, to avoid users needs to implement + * GetExpectedKernelType function when expanding other cases. + * The RAW type is used here as the data type, indicating that + * it can only be determined at runtime. + */ + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + return framework::OpKernelType(proto::VarType::RAW, ctx.GetPlace()); + } + + /** + * NOTE: [Skip Input Variable Cast for DataType] + * Because the kernel data type is RAW, we should skip the cast for + * data type difference when PrepareData. + */ + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const OpKernelType& expected_kernel_type) { + return OpKernelType(expected_kernel_type.data_type_, + expected_kernel_type.place_, tensor.layout()); + } +}; + +class CustomOpMaker : public OpProtoAndCheckerMaker { + public: + explicit CustomOpMaker(const std::vector& inputs, + const std::vector& outputs, + const std::vector& attrs) + : inputs_(inputs), outputs_(outputs), attrs_(attrs) {} + + void Make() override { + for (auto& in_name : inputs_) { + AddInput(in_name, "The input " + in_name + "of Custom operator."); + } + for (auto& out_name : outputs_) { + AddOutput(out_name, "The output " + out_name + "of Custom Operator."); + } + // TODO(chenweihang): support attrs in later PR + AddComment(R"DOC( +Custom Operator. + +According to the Tensor operation function implemented by the user +independently of the framework, it is encapsulated into a framework +operator to adapt to various execution scenarios such as dynamic graph, +mode static graph mode, and inference mode. + +)DOC"); + } + + private: + std::vector inputs_; + std::vector outputs_; + std::vector attrs_; +}; + +template +class CustomGradOpMaker; + +template <> +class CustomGradOpMaker : public SingleGradOpMaker { + public: + explicit CustomGradOpMaker( + const OpDesc& fwd_op, const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var, + const std::vector& grad_block, const std::string& name, + const std::vector& inputs, + const std::vector& outputs) + : SingleGradOpMaker(fwd_op, no_grad_set, grad_to_var, grad_block), + name_(name), + inputs_(inputs), + outputs_(outputs) {} + + protected: + void Apply(GradOpPtr grad_op) const override { + grad_op->SetType(name_); + + auto fwd_op_inputs = this->InputNames(); + auto fwd_op_outputs = this->OutputNames(); + + for (auto& in_name : inputs_) { + VLOG(1) << "Custom Operator: GradOpDescMaker - input: " << in_name; + if (!detail::IsGradVar(in_name)) { + if (detail::IsMemberOf(fwd_op_inputs, in_name)) { + grad_op->SetInput(in_name, this->Input(in_name)); + } else if (detail::IsMemberOf(fwd_op_outputs, in_name)) { + grad_op->SetInput(in_name, this->Output(in_name)); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input tensor name `%s` is invalid, expected it is the input " + "or output of forward operator.", + in_name)); + } + } else { + grad_op->SetInput(in_name, this->OutputGrad(detail::NoGrad(in_name))); + } + } + for (auto& out_name : outputs_) { + VLOG(1) << "Custom Operator: GradOpDescMaker - output: " << out_name; + grad_op->SetOutput(out_name, this->InputGrad(detail::NoGrad(out_name))); + } + // TODO(chenweihang): support attrs in later PR + } + + private: + std::string name_; + std::vector inputs_; + std::vector outputs_; +}; + +template <> +class CustomGradOpMaker + : public SingleGradOpMaker { + public: + explicit CustomGradOpMaker( + const std::string& type, + const imperative::NameVarBaseMap& var_base_map_in, + const imperative::NameVarBaseMap& var_base_map_out, + const AttributeMap& attrs, + const std::map& inplace_map, + const std::string& name, const std::vector& inputs, + const std::vector& outputs) + : SingleGradOpMaker( + type, var_base_map_in, var_base_map_out, attrs, inplace_map), + name_(name), + inputs_(inputs), + outputs_(outputs) {} + + protected: + // TODO(chenweihang): The code is duplicated with the previous one, because + // ere OpMaker's Input, Output and other methods are protected. Putting the + // function implementation outside the class will cause the method to be + // uncallable, + // so it is still implemented in the class for the time being. + void Apply(GradOpPtr grad_op) const override { + grad_op->SetType(name_); + + auto fwd_op_inputs = this->InputNames(); + auto fwd_op_outputs = this->OutputNames(); + + for (auto& in_name : inputs_) { + VLOG(1) << "Custom Operator: GradOpBaseMaker - input: " << in_name; + if (!detail::IsGradVar(in_name)) { + if (detail::IsMemberOf(fwd_op_inputs, in_name)) { + grad_op->SetInput(in_name, this->Input(in_name)); + } else if (detail::IsMemberOf(fwd_op_outputs, in_name)) { + grad_op->SetInput(in_name, this->Output(in_name)); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "The input tensor name `%s` is invalid, expected it is the input " + "or output of forward operator.", + in_name)); + } + } else { + grad_op->SetInput(in_name, this->OutputGrad(detail::NoGrad(in_name))); + } + } + for (auto& out_name : outputs_) { + VLOG(1) << "Custom Operator: GradOpBaseMaker - output: " << out_name; + grad_op->SetOutput(out_name, this->InputGrad(detail::NoGrad(out_name))); + } + // TODO(chenweihang): support attrs in later PR + } + + private: + std::string name_; + std::vector inputs_; + std::vector outputs_; +}; + +//////////// Operator and Kernel Register ////////////// + +void RegisterOperatorKernelWithPlace(const std::string& name, + const paddle::KernelFunc& kernel_func, + const proto::VarType::Type type, + const PlaceType& place, + const std::vector& inputs, + const std::vector& outputs) { + OpKernelType key(type, + CustomTensorUtils::ConvertEnumPlaceToInnerPlace(place)); + VLOG(1) << "Custom Operator: op kernel key: " << key; + OperatorWithKernel::AllOpKernels()[name][key] = + [kernel_func, inputs, outputs](const framework::ExecutionContext& ctx) { + VLOG(1) << "Custom Operator: run custom kernel func in lambda."; + RunKernelFunc(ctx, kernel_func, inputs, outputs); + }; +} + +void RegisterOperatorKernel(const std::string& name, + const paddle::KernelFunc& kernel_func, + const std::vector& inputs, + const std::vector& outputs) { + VLOG(1) << "Custom Operator: op name in kernel: " << name; + // NOTE [ Dummy Op Kernel Key ] + // TODO(chenweihang): Because execute engine need get device context based + // op_kernel_key.place_, so we should register kernel for each + // device. But this is not entirely correct, if user only give a cpu kernel, + // but call api in gpu device, it will cause error. + RegisterOperatorKernelWithPlace(name, kernel_func, proto::VarType::RAW, + PlaceType::kCPU, inputs, outputs); + RegisterOperatorKernelWithPlace(name, kernel_func, proto::VarType::RAW, + PlaceType::kGPU, inputs, outputs); +} + +void RegisterOperatorWithMetaInfo( + const std::vector& op_meta_infos) { + /* Op register */ + OpInfo info; + + auto& base_op_meta = op_meta_infos.front(); + + auto op_name = OpMetaInfoHelper::GetOpName(base_op_meta); + auto& op_inputs = OpMetaInfoHelper::GetInputs(base_op_meta); + auto& op_outputs = OpMetaInfoHelper::GetOutputs(base_op_meta); + auto& op_attrs = OpMetaInfoHelper::GetAttrs(base_op_meta); + auto& kernel_fn = OpMetaInfoHelper::GetKernelFn(base_op_meta); + auto& infer_shape_func = OpMetaInfoHelper::GetInferShapeFn(base_op_meta); + auto& infer_dtype_func = OpMetaInfoHelper::GetInferDtypeFn(base_op_meta); + + VLOG(1) << "Custom Operator: forward, op name: " << op_name; + VLOG(1) << "Custom Operator: forward, op inputs: " + << string::join_strings(op_inputs, ','); + VLOG(1) << "Custom Operator: forward, op outputs: " + << string::join_strings(op_outputs, ','); + + // Op + info.creator_ = [](const std::string& op_name, const VariableNameMap& inputs, + const VariableNameMap& outputs, + const AttributeMap& attrs) { + return new CustomOperator(op_name, inputs, outputs, attrs); + }; + + // OpMaker + info.proto_ = new proto::OpProto; + info.proto_->set_type(op_name); + + info.checker_ = new OpAttrChecker(); + CustomOpMaker custom_maker(op_inputs, op_outputs, op_attrs); + custom_maker(info.proto_, info.checker_); + PADDLE_ENFORCE_EQ( + info.proto_->IsInitialized(), true, + platform::errors::PreconditionNotMet( + "Fail to initialize %s's OpProto, because %s is not initialized.", + op_name, info.proto_->InitializationErrorString())); + + // InferShape + PADDLE_ENFORCE_NOT_NULL( + infer_shape_func, + platform::errors::PreconditionNotMet( + "InferShapeFn is nullptr. Need to set the InferShapeFn of custom " + "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); + info.infer_shape_ = [op_inputs, op_outputs, + infer_shape_func](InferShapeContext* ctx) { + std::vector> input_shapes; + + VLOG(1) << "Custom Operator: InferShape - get input ddim."; + for (auto& in_name : op_inputs) { + OP_INOUT_CHECK(ctx->HasInput(in_name), "Input", in_name, "Custom"); + auto ddim = ctx->GetInputDim(in_name); + input_shapes.emplace_back(framework::vectorize(ddim)); + } + + VLOG(1) << "Custom Operator: InferShape - calc output ddim."; + auto output_shapes = infer_shape_func(input_shapes); + + VLOG(1) << "Custom Operator: InferShape - set output ddim."; + for (size_t i = 0; i < op_outputs.size(); ++i) { + ctx->SetOutputDim(op_outputs[i], framework::make_ddim(output_shapes[i])); + } + }; + + // Infer Dtype + PADDLE_ENFORCE_NOT_NULL( + infer_dtype_func, + platform::errors::PreconditionNotMet( + "InferDtypeFn is nullptr. Need to set the InferDtypeFn of custom " + "operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))")); + info.infer_var_type_ = [op_inputs, op_outputs, + infer_dtype_func](InferVarTypeContext* ctx) { + std::vector input_dtypes; + + VLOG(1) << "Custom Operator: InferDtype - get input dtype."; + for (auto& in_name : op_inputs) { + auto dtype = ctx->GetInputDataType(in_name); + input_dtypes.emplace_back( + CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype)); + } + + VLOG(1) << "Custom Operator: InferDtype - infer output dtype."; + auto output_dtypes = infer_dtype_func(input_dtypes); + + VLOG(1) << "Custom Operator: InferDtype - set output dtype."; + for (size_t i = 0; i < op_outputs.size(); ++i) { + ctx->SetOutputDataType( + op_outputs[i], + CustomTensorUtils::ConvertEnumDTypeToInnerDType(output_dtypes[i])); + } + }; + + // Kernel func + RegisterOperatorKernel(op_name, kernel_fn, op_inputs, op_outputs); + + // If grad op or double grad op exists + std::string cur_op_name = op_name; + for (size_t i = 1; i < op_meta_infos.size(); ++i) { + auto& cur_grad_op = op_meta_infos[i]; + + auto& grad_op_name = OpMetaInfoHelper::GetOpName(cur_grad_op); + auto& grad_op_inputs = OpMetaInfoHelper::GetInputs(cur_grad_op); + auto& grad_op_outputs = OpMetaInfoHelper::GetOutputs(cur_grad_op); + auto& grad_kernel_fn = OpMetaInfoHelper::GetKernelFn(cur_grad_op); + + VLOG(1) << "Custom Operator: backward, op name: " << grad_op_name; + VLOG(1) << "Custom Operator: backward, op inputs: " + << string::join_strings(grad_op_inputs, ','); + VLOG(1) << "Custom Operator: backward, op outputs: " + << string::join_strings(grad_op_outputs, ','); + + // GradOpDescMaker + info.grad_op_maker_ = [grad_op_name, grad_op_inputs, grad_op_outputs]( + const OpDesc& fwd_op, + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var, + const std::vector& grad_block) { + CustomGradOpMaker maker( + fwd_op, no_grad_set, grad_to_var, grad_block, grad_op_name, + grad_op_inputs, grad_op_outputs); + return maker(); + }; + + // GradOpBaseMaker + info.dygraph_grad_op_maker_ = [grad_op_name, grad_op_inputs, + grad_op_outputs]( + const std::string& type, + const imperative::NameVarBaseMap& var_base_map_in, + const imperative::NameVarBaseMap& var_base_map_out, + const framework::AttributeMap& attrs, + const std::map& inplace_map) { + CustomGradOpMaker maker( + type, var_base_map_in, var_base_map_out, attrs, inplace_map, + grad_op_name, grad_op_inputs, grad_op_outputs); + return maker(); + }; + + /* Grad op register */ + OpInfo grad_info; + + // Grad Op + grad_info.creator_ = []( + const std::string& type, const VariableNameMap& inputs, + const VariableNameMap& outputs, const AttributeMap& attrs) { + return new CustomOperator(type, inputs, outputs, attrs); + }; + + // Grad InferShape (gradient's shape is same with forward input default) + grad_info.infer_shape_ = [grad_op_outputs](InferShapeContext* ctx) { + for (auto& out_name : grad_op_outputs) { + ctx->ShareDim(detail::NoGrad(out_name), out_name); + } + }; + + // Kernel func + RegisterOperatorKernel(grad_op_name, grad_kernel_fn, grad_op_inputs, + grad_op_outputs); + + // update current info + OpInfoMap::Instance().Insert(cur_op_name, info); + cur_op_name = grad_op_name; + info = grad_info; + } + // insert last info + OpInfoMap::Instance().Insert(cur_op_name, info); +} + +void RegisterOperatorWithMetaInfoMap( + const paddle::OpMetaInfoMap& op_meta_info_map) { + auto& meta_info_map = op_meta_info_map.GetMap(); + + PADDLE_ENFORCE_EQ(meta_info_map.empty(), false, + platform::errors::PreconditionNotMet( + "No custom operator that needs to be registered.")); + VLOG(1) << "Custom Operator: size of op meta info map - " + << meta_info_map.size(); + // pair: {op_type, OpMetaInfo} + for (auto& pair : meta_info_map) { + VLOG(1) << "Custom Operator: pair first -> op name: " << pair.first; + RegisterOperatorWithMetaInfo(pair.second); + } +} + +////////////////////// User APIs /////////////////////// + +// load op api +void LoadOpMetaInfoAndRegisterOp(const std::string& dso_name) { + void* handle = paddle::platform::dynload::GetOpDsoHandle(dso_name); + + typedef OpMetaInfoMap& get_op_meta_info_map_t(); + auto* get_op_meta_info_map = + detail::DynLoad(handle, "PD_GetOpMetaInfoMap"); + auto& op_meta_info_map = get_op_meta_info_map(); + + RegisterOperatorWithMetaInfoMap(op_meta_info_map); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/custom_operator.h b/paddle/fluid/framework/custom_operator.h new file mode 100644 index 0000000000000..f2f97e5e5822a --- /dev/null +++ b/paddle/fluid/framework/custom_operator.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/extension/include/op_meta_info.h" + +namespace paddle { +namespace framework { + +// Load custom op api: register op after user compiled +void LoadOpMetaInfoAndRegisterOp(const std::string& dso_name); + +// Register custom op api: register op directly +void RegisterOperatorWithMetaInfoMap( + const paddle::OpMetaInfoMap& op_meta_info_map); + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/custom_tensor_test.cc b/paddle/fluid/framework/custom_tensor_test.cc new file mode 100644 index 0000000000000..643ee8270a0c5 --- /dev/null +++ b/paddle/fluid/framework/custom_tensor_test.cc @@ -0,0 +1,246 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "glog/logging.h" +#include "gtest/gtest.h" +#include "paddle/fluid/extension/include/all.h" +#include "paddle/fluid/framework/custom_tensor_utils.h" +#include "paddle/fluid/framework/lod_tensor.h" + +template +paddle::Tensor InitCPUTensorForTest() { + std::vector tensor_shape{5, 5}; + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); + t1.reshape(tensor_shape); + auto* p_data_ptr = t1.mutable_data(paddle::PlaceType::kCPU); + for (int64_t i = 0; i < t1.size(); i++) { + p_data_ptr[i] = 5; + } + return t1; +} + +template +void TestCopyTensor() { + auto t1 = InitCPUTensorForTest(); + auto t1_cpu_cp = t1.template copy_to(paddle::PlaceType::kCPU); + CHECK((paddle::PlaceType::kCPU == t1_cpu_cp.place())); + for (int64_t i = 0; i < t1.size(); i++) { + CHECK_EQ(t1_cpu_cp.template data()[i], 5); + } +#ifdef PADDLE_WITH_CUDA + VLOG(2) << "Do GPU copy test"; + auto t1_gpu_cp = t1_cpu_cp.template copy_to(paddle::PlaceType::kGPU); + CHECK((paddle::PlaceType::kGPU == t1_gpu_cp.place())); + auto t1_gpu_cp_cp = t1_gpu_cp.template copy_to(paddle::PlaceType::kGPU); + CHECK((paddle::PlaceType::kGPU == t1_gpu_cp_cp.place())); + auto t1_gpu_cp_cp_cpu = + t1_gpu_cp.template copy_to(paddle::PlaceType::kCPU); + CHECK((paddle::PlaceType::kCPU == t1_gpu_cp_cp_cpu.place())); + for (int64_t i = 0; i < t1.size(); i++) { + CHECK_EQ(t1_gpu_cp_cp_cpu.template data()[i], 5); + } +#endif +} + +void TestAPIPlace() { + std::vector tensor_shape = {5, 5}; +#ifdef PADDLE_WITH_CUDA + auto t1 = paddle::Tensor(paddle::PlaceType::kGPU); + t1.reshape(tensor_shape); + t1.mutable_data(); + CHECK((paddle::PlaceType::kGPU == t1.place())); +#endif + auto t2 = paddle::Tensor(paddle::PlaceType::kCPU); + t2.reshape(tensor_shape); + t2.mutable_data(); + CHECK((paddle::PlaceType::kCPU == t2.place())); +} + +void TestAPISizeAndShape() { + std::vector tensor_shape = {5, 5}; + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); + t1.reshape(tensor_shape); + CHECK_EQ(t1.size(), 25); + CHECK(t1.shape() == tensor_shape); +} + +template +paddle::DataType TestDtype() { + std::vector tensor_shape = {5, 5}; + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); + t1.reshape(tensor_shape); + t1.template mutable_data(); + return t1.type(); +} + +template +void TestCast(paddle::DataType data_type) { + std::vector tensor_shape = {5, 5}; + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); + t1.reshape(tensor_shape); + t1.template mutable_data(); + auto t2 = t1.cast(data_type); + CHECK_EQ(t2.type(), data_type); +} + +void GroupTestCopy() { + VLOG(2) << "Float cpu-cpu-gpu-gpu-cpu"; + TestCopyTensor(); + VLOG(2) << "Double cpu-cpu-gpu-gpu-cpu"; + TestCopyTensor(); + // TODO(JiabinYang): Support these test later + // VLOG(2) << "Fp16 cpu-cpu-gpu-gpu-cpu"; + // TestCopyTensor(); + // VLOG(2) << "BF16 cpu-cpu-gpu-gpu-cpu"; + // TestCopyTensor(); + // VLOG(2) << "complex128 cpu-cpu-gpu-gpu-cpu"; + // TestCopyTensor(); + // VLOG(2) << "complex64 cpu-cpu-gpu-gpu-cpu"; + // TestCopyTensor(); + // VLOG(2) << "int cpu-cpu-gpu-gpu-cpu"; + TestCopyTensor(); + VLOG(2) << "int64 cpu-cpu-gpu-gpu-cpu"; + TestCopyTensor(); + VLOG(2) << "int16 cpu-cpu-gpu-gpu-cpu"; + TestCopyTensor(); + VLOG(2) << "int8 cpu-cpu-gpu-gpu-cpu"; + TestCopyTensor(); + VLOG(2) << "uint8 cpu-cpu-gpu-gpu-cpu"; + TestCopyTensor(); +} + +void GroupTestCast() { + VLOG(2) << "int cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "int32 cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "int64 cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "double cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "bfloat16 cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "float16 cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "bool cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "uint8 cast"; + TestCast(paddle::DataType::FLOAT32); + VLOG(2) << "float cast"; + TestCast(paddle::DataType::FLOAT32); +} + +void GroupTestDtype() { + CHECK(TestDtype() == paddle::DataType::FLOAT32); + CHECK(TestDtype() == paddle::DataType::FLOAT64); + CHECK(TestDtype() == paddle::DataType::FLOAT16); + CHECK(TestDtype() == paddle::DataType::BFLOAT16); + CHECK(TestDtype() == + paddle::DataType::COMPLEX128); + CHECK(TestDtype() == + paddle::DataType::COMPLEX64); + CHECK(TestDtype() == paddle::DataType::INT32); + CHECK(TestDtype() == paddle::DataType::INT64); + CHECK(TestDtype() == paddle::DataType::INT16); + CHECK(TestDtype() == paddle::DataType::INT8); + CHECK(TestDtype() == paddle::DataType::UINT8); +} + +void GroupTestDtypeConvert() { + // enum -> proto + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::COMPLEX128) == + paddle::framework::proto::VarType::COMPLEX128); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::COMPLEX64) == + paddle::framework::proto::VarType::COMPLEX64); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::FLOAT64) == + paddle::framework::proto::VarType::FP64); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::FLOAT32) == + paddle::framework::proto::VarType::FP32); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::FLOAT16) == + paddle::framework::proto::VarType::FP16); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::BFLOAT16) == + paddle::framework::proto::VarType::BF16); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::UINT8) == + paddle::framework::proto::VarType::UINT8); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::INT8) == paddle::framework::proto::VarType::INT8); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::INT32) == + paddle::framework::proto::VarType::INT32); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::INT64) == + paddle::framework::proto::VarType::INT64); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::INT16) == + paddle::framework::proto::VarType::INT16); + CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( + paddle::DataType::BOOL) == paddle::framework::proto::VarType::BOOL); + // proto -> enum + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::COMPLEX128) == + paddle::DataType::COMPLEX128); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::COMPLEX64) == + paddle::DataType::COMPLEX64); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::FP64) == + paddle::DataType::FLOAT64); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::FP32) == + paddle::DataType::FLOAT32); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::FP16) == + paddle::DataType::FLOAT16); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::BF16) == + paddle::DataType::BFLOAT16); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::INT64) == + paddle::DataType::INT64); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::INT32) == + paddle::DataType::INT32); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::INT8) == paddle::DataType::INT8); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::UINT8) == + paddle::DataType::UINT8); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::INT16) == + paddle::DataType::INT16); + CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( + paddle::framework::proto::VarType::BOOL) == paddle::DataType::BOOL); +} + +TEST(CustomTensor, copyTest) { + VLOG(2) << "TestCopy"; + GroupTestCopy(); + VLOG(2) << "TestDtype"; + GroupTestDtype(); + VLOG(2) << "TestShape"; + TestAPISizeAndShape(); + VLOG(2) << "TestPlace"; + TestAPIPlace(); + VLOG(2) << "TestCast"; + GroupTestCast(); + VLOG(2) << "TestDtypeConvert"; + GroupTestDtypeConvert(); +} diff --git a/paddle/fluid/framework/custom_tensor_utils.h b/paddle/fluid/framework/custom_tensor_utils.h new file mode 100644 index 0000000000000..4b465d3911df1 --- /dev/null +++ b/paddle/fluid/framework/custom_tensor_utils.h @@ -0,0 +1,145 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/extension/include/tensor.h" +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/platform/gpu_info.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace framework { + +class CustomTensorUtils { + public: + /// \brief Share data TO another tensor. + /// Use this to pass tensor from op to op + /// \return void. + static void ShareDataTo(const paddle::Tensor& src, void* dst); + + /// \brief Share data FROM another tensor. + /// Use this to pass tensor from op to op + /// \return void. + static void ShareDataFrom(const void* src, const Tensor& dst); + + static framework::proto::VarType::Type ConvertEnumDTypeToInnerDType( + const paddle::DataType& dtype) { + switch (dtype) { + case paddle::DataType::COMPLEX128: + return framework::proto::VarType::COMPLEX128; + case paddle::DataType::COMPLEX64: + return framework::proto::VarType::COMPLEX64; + case paddle::DataType::FLOAT64: + return framework::proto::VarType::FP64; + case paddle::DataType::FLOAT32: + return framework::proto::VarType::FP32; + case paddle::DataType::FLOAT16: + return framework::proto::VarType::FP16; + case paddle::DataType::BFLOAT16: + return framework::proto::VarType::BF16; + case paddle::DataType::UINT8: + return framework::proto::VarType::UINT8; + case paddle::DataType::INT8: + return framework::proto::VarType::INT8; + case paddle::DataType::INT32: + return framework::proto::VarType::INT32; + case paddle::DataType::INT64: + return framework::proto::VarType::INT64; + case paddle::DataType::INT16: + return framework::proto::VarType::INT16; + case paddle::DataType::BOOL: + return framework::proto::VarType::BOOL; + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type code(%d) when casting enum data type into " + "paddle data type.", + static_cast(dtype))); + } + } + + static paddle::DataType ConvertInnerDTypeToEnumDType( + const framework::proto::VarType::Type& dtype) { + switch (dtype) { + case framework::proto::VarType::COMPLEX128: + return paddle::DataType::COMPLEX128; + case framework::proto::VarType::COMPLEX64: + return paddle::DataType::COMPLEX64; + case framework::proto::VarType::FP64: + return paddle::DataType::FLOAT64; + case framework::proto::VarType::FP32: + return paddle::DataType::FLOAT32; + case framework::proto::VarType::FP16: + return paddle::DataType::FLOAT16; + case framework::proto::VarType::BF16: + return paddle::DataType::BFLOAT16; + case framework::proto::VarType::INT64: + return paddle::DataType::INT64; + case framework::proto::VarType::INT32: + return paddle::DataType::INT32; + case framework::proto::VarType::INT8: + return paddle::DataType::INT8; + case framework::proto::VarType::UINT8: + return paddle::DataType::UINT8; + case framework::proto::VarType::INT16: + return paddle::DataType::INT16; + case framework::proto::VarType::BOOL: + return paddle::DataType::BOOL; + default: + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type `%s` when casting paddle data type into " + "enum data type.", + DataTypeToString(dtype))); + } + } + + // PaddlePlace <-> platform::Place + static platform::Place ConvertEnumPlaceToInnerPlace(const PlaceType& pc) { + if (pc == PlaceType::kCPU) { + return platform::Place(platform::CPUPlace()); + } else if (pc == PlaceType::kGPU) { +#ifdef PADDLE_WITH_CUDA + return platform::Place( + platform::CUDAPlace(platform::GetCurrentDeviceId())); +#endif + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported place type code(%d) when " + "casting enum place to paddle place.", + static_cast(pc))); + } + return platform::Place(); + } + + static PlaceType ConvertInnerPlaceToEnumPlace(const platform::Place& pc) { + if (platform::is_cpu_place(pc)) { + return PlaceType::kCPU; + } else if (platform::is_gpu_place(pc)) { +#ifdef PADDLE_WITH_CUDA + return PlaceType::kGPU; +#endif + } else { + PADDLE_THROW( + platform::errors::Unimplemented("Unsupported place type `%s` when " + "casting paddle place to enum place.", + pc)); + } + return PlaceType::kUNK; + } +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc index d62b33bbc65e7..de6239959316b 100644 --- a/paddle/fluid/framework/data_type.cc +++ b/paddle/fluid/framework/data_type.cc @@ -87,6 +87,10 @@ std::string DataTypeToString(const proto::VarType::Type type) { if (it != gDataTypeMap().proto_to_str_.end()) { return it->second; } + // deal with RAW type + if (type == proto::VarType::RAW) { + return "RAW(runtime decided type)"; + } PADDLE_THROW(platform::errors::Unimplemented( "Not support proto::VarType::Type(%d) as tensor type.", static_cast(type))); diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index 30a2ac2c6f6be..084c6e6816bd5 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -97,10 +97,10 @@ void TransDataType(const OpKernelType& kernel_type_for_var, framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; case proto::VarType::INT16: - framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); + framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; case proto::VarType::UINT8: - framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); + framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; default: PADDLE_THROW(platform::errors::Unimplemented( diff --git a/paddle/fluid/framework/op_meta_info_helper.h b/paddle/fluid/framework/op_meta_info_helper.h new file mode 100644 index 0000000000000..06d9c94172df9 --- /dev/null +++ b/paddle/fluid/framework/op_meta_info_helper.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "paddle/fluid/extension/include/op_meta_info.h" + +namespace paddle { +namespace framework { + +class OpMetaInfoHelper { + public: + static const std::string& GetOpName(const paddle::OpMetaInfo& info) { + return info.name_; + } + static const std::vector& GetInputs( + const paddle::OpMetaInfo& info) { + return info.inputs_; + } + static const std::vector& GetOutputs( + const paddle::OpMetaInfo& info) { + return info.outputs_; + } + static const std::vector& GetAttrs( + const paddle::OpMetaInfo& info) { + return info.attrs_; + } + static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info) { + return info.kernel_fn_; + } + static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info) { + return info.infer_shape_fn_; + } + static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info) { + return info.infer_dtype_fn_; + } +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index e4b86a998a952..bdf018db6f883 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,7 +1,7 @@ set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context - gloo_wrapper infer_io_utils heter_wrapper generator op_version_registry ps_gpu_wrapper) + gloo_wrapper infer_io_utils heter_wrapper generator op_version_registry ps_gpu_wrapper custom_operator) if (WITH_GPU) set(PYBIND_DEPS ${PYBIND_DEPS} dynload_cuda) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 745bda49ecfa0..750fb6e225803 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/feed_fetch_method.h" @@ -397,7 +398,7 @@ PYBIND11_MODULE(core_noavx, m) { PyCapsule_GetPointer(dltensor->ptr(), "dltensor")); PyCapsule_SetName(dltensor->ptr(), "used_dltensor"); DLTensor dl = dmt->dl_tensor; - Tensor tensor; + framework::Tensor tensor; if (dl.ctx.device_type == kDLCPU) { paddle::framework::TensorFromDLPack(dl, &tensor); @@ -535,77 +536,80 @@ PYBIND11_MODULE(core_noavx, m) { BindImperative(&m); - py::class_(m, "Tensor", py::buffer_protocol()) - .def("__array__", [](Tensor &self) { return TensorToPyArray(self); }) + py::class_(m, "Tensor", py::buffer_protocol()) + .def("__array__", + [](framework::Tensor &self) { return TensorToPyArray(self); }) .def("_is_initialized", - [](const Tensor &self) { return self.IsInitialized(); }) + [](const framework::Tensor &self) { return self.IsInitialized(); }) .def("_get_dims", - [](const Tensor &self) { return vectorize(self.dims()); }) + [](const framework::Tensor &self) { return vectorize(self.dims()); }) .def("_set_dims", - [](Tensor &self, const std::vector &dim) { + [](framework::Tensor &self, const std::vector &dim) { self.Resize(make_ddim(dim)); }) .def("_set_layout", - [](Tensor &self, const std::string &layout) { + [](framework::Tensor &self, const std::string &layout) { self.set_layout(StringToDataLayout(layout)); }) .def("_alloc_float", - [](Tensor &self, paddle::platform::CUDAPlace &place) { + [](framework::Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](Tensor &self, paddle::platform::XPUPlace &place) { + [](framework::Tensor &self, paddle::platform::XPUPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](Tensor &self, paddle::platform::CPUPlace &place) { + [](framework::Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("_alloc_double", - [](Tensor &self, paddle::platform::CPUPlace &place) { + [](framework::Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](Tensor &self, paddle::platform::CPUPlace &place) { + [](framework::Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](Tensor &self, paddle::platform::XPUPlace &place) { + [](framework::Tensor &self, paddle::platform::XPUPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](Tensor &self, paddle::platform::CUDAPlace &place) { + [](framework::Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { + [](framework::Tensor &self, + paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { + [](framework::Tensor &self, + paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) .def("_mutable_data", - [](Tensor &self, paddle::platform::CPUPlace &place, + [](framework::Tensor &self, paddle::platform::CPUPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast(self.mutable_data(place, type)); }) .def("_mutable_data", - [](Tensor &self, paddle::platform::XPUPlace &place, + [](framework::Tensor &self, paddle::platform::XPUPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast(self.mutable_data(place, type)); }) .def("_mutable_data", - [](Tensor &self, paddle::platform::CUDAPlace &place, + [](framework::Tensor &self, paddle::platform::CUDAPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast(self.mutable_data(place, type)); }) .def("_mutable_data", - [](Tensor &self, paddle::platform::CUDAPinnedPlace &place, + [](framework::Tensor &self, paddle::platform::CUDAPinnedPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast(self.mutable_data(place, type)); }) - .def("_clear", &Tensor::clear) + .def("_clear", &framework::Tensor::clear) .def("set", SetTensorFromPyArray, py::arg("array"), py::arg("place"), py::arg("zero_copy") = false) .def("set", SetTensorFromPyArray, @@ -637,7 +641,9 @@ PYBIND11_MODULE(core_noavx, m) { t.set(np.ndarray([5, 30]), fluid.CPUPlace()) )DOC") - .def("shape", [](Tensor &self) { return vectorize(self.dims()); }, R"DOC( + .def("shape", + [](framework::Tensor &self) { return vectorize(self.dims()); }, + R"DOC( Return the shape of LoDTensor. Returns: @@ -655,7 +661,7 @@ PYBIND11_MODULE(core_noavx, m) { print(t.shape()) # [5, 30] )DOC") .def("_to_dlpack", - [](Tensor &self) { + [](framework::Tensor &self) { DLPackTensor dlpack_tensor(self, 1); DLManagedTensor *dmt = dlpack_tensor.ToCudfCompatibleDLManagedTensor(); @@ -680,20 +686,22 @@ PYBIND11_MODULE(core_noavx, m) { .def("_get_float_element", TensorGetElement) .def("_set_double_element", TensorSetElement) .def("_get_double_element", TensorGetElement) - .def("_place", [](Tensor &self) { return self.place(); }) - .def("_dtype", [](Tensor &self) { return self.type(); }) + .def("_place", [](framework::Tensor &self) { return self.place(); }) + .def("_dtype", [](framework::Tensor &self) { return self.type(); }) .def("_layout", - [](Tensor &self) { return DataLayoutToString(self.layout()); }) - .def("_share_data_with", &Tensor::ShareDataWith) + [](framework::Tensor &self) { + return DataLayoutToString(self.layout()); + }) + .def("_share_data_with", &framework::Tensor::ShareDataWith) .def("__getitem__", PySliceTensor, py::return_value_policy::reference) - .def("__str__", [](const Tensor &self) { + .def("__str__", [](const framework::Tensor &self) { std::stringstream ostr; ostr << self; return ostr.str(); }); // TODO(cql): add reference: en_user_guide_lod_tensor - py::class_(m, "LoDTensor", R"DOC( + py::class_(m, "LoDTensor", R"DOC( LoDTensor is a Tensor with optional LoD (Level of Details) information, it can be used for variable-length sequences, see :ref:`user_guide_lod_tensor` for details. @@ -777,7 +785,8 @@ PYBIND11_MODULE(core_noavx, m) { t = fluid.LoDTensor() )DOC") - .def("__array__", [](Tensor &self) { return TensorToPyArray(self); }) + .def("__array__", + [](framework::Tensor &self) { return TensorToPyArray(self); }) .def("__init__", [](LoDTensor &instance, const std::vector> &recursive_sequence_lengths) { @@ -1735,6 +1744,8 @@ All parameter, weight, gradient are variables in Paddle. m.def("init_gflags", framework::InitGflags); m.def("init_glog", framework::InitGLOG); m.def("load_op_library", framework::LoadOpLib); + m.def("load_op_meta_info_and_register_op", + framework::LoadOpMetaInfoAndRegisterOp); m.def("init_devices", []() { framework::InitDevices(); }); m.def("is_compiled_with_cuda", IsCompiledWithCUDA); diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt index cc3c9c098c911..3c5a8a9f4a7cb 100644 --- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt +++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt @@ -30,3 +30,6 @@ endforeach() set_tests_properties(test_custom_op_with_setup PROPERTIES TIMEOUT 180) set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180) set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180) + +set_tests_properties(test_simple_custom_op_setup PROPERTIES TIMEOUT 250) +set_tests_properties(test_simple_custom_op_jit PROPERTIES TIMEOUT 180) diff --git a/python/paddle/fluid/tests/custom_op/__init__.py b/python/paddle/fluid/tests/custom_op/__init__.py new file mode 100644 index 0000000000000..6f0ea85344b7e --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle/fluid/tests/custom_op/relu_op_simple.cc b/python/paddle/fluid/tests/custom_op/relu_op_simple.cc new file mode 100644 index 0000000000000..684466a734147 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/relu_op_simple.cc @@ -0,0 +1,116 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/extension.h" + +template +void relu_cpu_forward_kernel(const data_t* x_data, + data_t* out_data, + int64_t x_numel) { + for (int i = 0; i < x_numel; ++i) { + out_data[i] = std::max(static_cast(0.), x_data[i]); + } +} + +template +void relu_cpu_backward_kernel(const data_t* grad_out_data, + const data_t* out_data, + data_t* grad_x_data, + int64_t out_numel) { + for (int i = 0; i < out_numel; ++i) { + grad_x_data[i] = + grad_out_data[i] * (out_data[i] > static_cast(0) ? 1. : 0.); + } +} + +std::vector relu_cpu_forward(const paddle::Tensor& x) { + auto out = paddle::Tensor(paddle::PlaceType::kCPU); + out.reshape(x.shape()); + + PD_DISPATCH_FLOATING_TYPES( + x.type(), "relu_cpu_forward", ([&] { + relu_cpu_forward_kernel( + x.data(), out.mutable_data(x.place()), x.size()); + })); + + return {out}; +} + +std::vector relu_cpu_backward(const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out) { + auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU); + grad_x.reshape(x.shape()); + + PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] { + relu_cpu_backward_kernel( + grad_out.data(), + out.data(), + grad_x.mutable_data(x.place()), + out.size()); + })); + + return {grad_x}; +} + +std::vector relu_cuda_forward(const paddle::Tensor& x); +std::vector relu_cuda_backward(const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out); + +std::vector ReluForward(const paddle::Tensor& x) { + // TODO(chenweihang): Check Input + if (x.place() == paddle::PlaceType::kCPU) { + return relu_cpu_forward(x); + } else if (x.place() == paddle::PlaceType::kGPU) { + return relu_cuda_forward(x); + } else { + throw std::runtime_error("Not implemented."); + } +} + +std::vector ReluBackward(const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out) { + // TODO(chenweihang): Check Input + if (x.place() == paddle::PlaceType::kCPU) { + return relu_cpu_backward(x, out, grad_out); + } else if (x.place() == paddle::PlaceType::kGPU) { + return relu_cuda_backward(x, out, grad_out); + } else { + throw std::runtime_error("Not implemented."); + } +} + +std::vector> ReluInferShape(std::vector x_shape) { + return {x_shape}; +} + +std::vector ReluInferDType(paddle::DataType x_dtype) { + return {x_dtype}; +} + +PD_BUILD_OPERATOR("relu2") + .Inputs({"X"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(ReluForward)) + .SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape)) + .SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType)) + .SetBackwardOp("relu2_grad") + .Inputs({"X", "Out", paddle::Grad("Out")}) + .Outputs({paddle::Grad("X")}) + .SetKernelFn(PD_KERNEL(ReluBackward)); diff --git a/python/paddle/fluid/tests/custom_op/relu_op_simple.cu b/python/paddle/fluid/tests/custom_op/relu_op_simple.cu new file mode 100644 index 0000000000000..a9ce517607093 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/relu_op_simple.cu @@ -0,0 +1,73 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/extension.h" + +template +__global__ void relu_cuda_forward_kernel(const data_t* x, + data_t* y, + const int num) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + for (int i = gid; i < num; i += blockDim.x * gridDim.x) { + y[i] = max(x[i], static_cast(0.)); + } +} + +template +__global__ void relu_cuda_backward_kernel(const data_t* dy, + const data_t* y, + data_t* dx, + const int num) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + for (int i = gid; i < num; i += blockDim.x * gridDim.x) { + dx[i] = dy[i] * (y[i] > 0 ? 1. : 0.); + } +} + +std::vector relu_cuda_forward(const paddle::Tensor& x) { + auto out = paddle::Tensor(paddle::PlaceType::kGPU); + out.reshape(x.shape()); + + int numel = x.size(); + int block = 512; + int grid = (numel + block - 1) / block; + PD_DISPATCH_FLOATING_TYPES( + x.type(), "relu_cuda_forward_kernel", ([&] { + relu_cuda_forward_kernel<<>>( + x.data(), out.mutable_data(x.place()), numel); + })); + + return {out}; +} + +std::vector relu_cuda_backward(const paddle::Tensor& x, + const paddle::Tensor& out, + const paddle::Tensor& grad_out) { + auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU); + grad_x.reshape(x.shape()); + + int numel = out.size(); + int block = 512; + int grid = (numel + block - 1) / block; + PD_DISPATCH_FLOATING_TYPES( + out.type(), "relu_cuda_backward_kernel", ([&] { + relu_cuda_backward_kernel<<>>( + grad_out.data(), + out.data(), + grad_x.mutable_data(x.place()), + numel); + })); + + return {grad_x}; +} diff --git a/python/paddle/fluid/tests/custom_op/setup_build.py b/python/paddle/fluid/tests/custom_op/setup_build.py index 01da3bba71010..5993ef1a124b7 100644 --- a/python/paddle/fluid/tests/custom_op/setup_build.py +++ b/python/paddle/fluid/tests/custom_op/setup_build.py @@ -15,6 +15,10 @@ from utils import paddle_includes, extra_compile_args from paddle.utils.cpp_extension import CppExtension, CUDAExtension, BuildExtension, setup +from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method + +# switch to old custom op method +use_new_custom_op_load_method(False) file_dir = os.path.dirname(os.path.abspath(__file__)) diff --git a/python/paddle/fluid/tests/custom_op/setup_install.py b/python/paddle/fluid/tests/custom_op/setup_install.py index 286f3a7044c81..80477bfbea8bc 100644 --- a/python/paddle/fluid/tests/custom_op/setup_install.py +++ b/python/paddle/fluid/tests/custom_op/setup_install.py @@ -15,6 +15,10 @@ from utils import paddle_includes, extra_compile_args from paddle.utils.cpp_extension import CUDAExtension, setup +from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method + +# switch to old custom op method +use_new_custom_op_load_method(False) setup( name='custom_relu2', diff --git a/python/paddle/fluid/tests/custom_op/setup_install_simple.py b/python/paddle/fluid/tests/custom_op/setup_install_simple.py new file mode 100644 index 0000000000000..f8eba6b3ad634 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/setup_install_simple.py @@ -0,0 +1,28 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from utils import paddle_includes, extra_compile_args +from paddle.utils.cpp_extension import CUDAExtension, setup + +setup( + name='simple_setup_relu2', + ext_modules=[ + CUDAExtension( + name='simple_setup_relu2', + sources=['relu_op_simple.cc', 'relu_op_simple.cu'], + include_dirs=paddle_includes, + extra_compile_args=extra_compile_args) + ]) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_op_with_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_op_with_setup.py index 1e87161c8461c..d7bf687b2f1e2 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_op_with_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_op_with_setup.py @@ -16,6 +16,10 @@ import unittest from test_custom_op import CustomOpTest, load_so from paddle.utils.cpp_extension.extension_utils import run_cmd +from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method + +# switch to old custom op method +use_new_custom_op_load_method(False) def compile_so(): diff --git a/python/paddle/fluid/tests/custom_op/test_jit_load.py b/python/paddle/fluid/tests/custom_op/test_jit_load.py index 47b45169cb862..aebfb56f93340 100644 --- a/python/paddle/fluid/tests/custom_op/test_jit_load.py +++ b/python/paddle/fluid/tests/custom_op/test_jit_load.py @@ -18,6 +18,10 @@ import numpy as np from paddle.utils.cpp_extension import load from utils import paddle_includes, extra_compile_args +from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method + +# switch to old custom op method +use_new_custom_op_load_method(False) # Compile and load custom op Just-In-Time. relu2 = load( diff --git a/python/paddle/fluid/tests/custom_op/test_setup_install.py b/python/paddle/fluid/tests/custom_op/test_setup_install.py index 3ebf9b8b032d3..bc49b26c45cae 100644 --- a/python/paddle/fluid/tests/custom_op/test_setup_install.py +++ b/python/paddle/fluid/tests/custom_op/test_setup_install.py @@ -20,6 +20,10 @@ import subprocess import numpy as np from paddle.utils.cpp_extension.extension_utils import run_cmd +from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method + +# switch to old custom op method +use_new_custom_op_load_method(False) class TestSetUpInstall(unittest.TestCase): @@ -38,7 +42,8 @@ def setUp(self): custom_egg_path = [ x for x in os.listdir(site_dir) if 'custom_relu2' in x ] - assert len(custom_egg_path) == 1 + assert len(custom_egg_path) == 1, "Matched egg number is %d." % len( + custom_egg_path) sys.path.append(os.path.join(site_dir, custom_egg_path[0])) def test_api(self): diff --git a/python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py new file mode 100644 index 0000000000000..43f2abd93f5a0 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py @@ -0,0 +1,66 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +import paddle +import numpy as np +from paddle.utils.cpp_extension import load +from utils import paddle_includes, extra_compile_args +from test_simple_custom_op_setup import relu2_dynamic, relu2_static + +# Compile and load custom op Just-In-Time. +simple_relu2 = load( + name='simple_jit_relu2', + sources=['relu_op_simple.cc', 'relu_op_simple.cu'], + extra_include_paths=paddle_includes, # add for Coverage CI + extra_cflags=extra_compile_args) # add for Coverage CI + + +class TestJITLoad(unittest.TestCase): + def setUp(self): + self.custom_op = simple_relu2 + self.dtypes = ['float32', 'float64'] + self.devices = ['cpu', 'gpu'] + + def test_static(self): + for device in self.devices: + for dtype in self.dtypes: + x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) + out = relu2_static(self.custom_op, device, dtype, x) + pd_out = relu2_static(self.custom_op, device, dtype, x, False) + self.assertTrue( + np.array_equal(out, pd_out), + "custom op out: {},\n paddle api out: {}".format(out, + pd_out)) + + def test_dynamic(self): + for device in self.devices: + for dtype in self.dtypes: + x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) + out, x_grad = relu2_dynamic(self.custom_op, device, dtype, x) + pd_out, pd_x_grad = relu2_dynamic(self.custom_op, device, dtype, + x, False) + self.assertTrue( + np.array_equal(out, pd_out), + "custom op out: {},\n paddle api out: {}".format(out, + pd_out)) + self.assertTrue( + np.array_equal(x_grad, pd_x_grad), + "custom op x grad: {},\n paddle api x grad: {}".format( + x_grad, pd_x_grad)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py new file mode 100644 index 0000000000000..7d9fb678c4623 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py @@ -0,0 +1,156 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import site +import unittest +import paddle +import paddle.static as static +import subprocess +import numpy as np +from paddle.utils.cpp_extension.extension_utils import run_cmd + + +def relu2_dynamic(func, device, dtype, np_x, use_func=True): + paddle.set_device(device) + + t = paddle.to_tensor(np_x) + t.stop_gradient = False + + out = func(t) if use_func else paddle.nn.functional.relu(t) + out.stop_gradient = False + + out.backward() + + return out.numpy(), t.grad + + +def relu2_static(func, device, dtype, np_x, use_func=True): + paddle.enable_static() + paddle.set_device(device) + + with static.scope_guard(static.Scope()): + with static.program_guard(static.Program()): + x = static.data(name='X', shape=[None, 8], dtype=dtype) + x.stop_gradient = False + out = func(x) if use_func else paddle.nn.functional.relu(x) + static.append_backward(out) + + exe = static.Executor() + exe.run(static.default_startup_program()) + + # in static mode, x data has been covered by out + out_v = exe.run(static.default_main_program(), + feed={'X': np_x}, + fetch_list=[out.name]) + + return out_v + + +def relu2_static_pe(func, device, dtype, np_x, use_func=True): + paddle.enable_static() + paddle.set_device(device) + + places = static.cpu_places() if device is 'cpu' else static.cuda_places() + with static.scope_guard(static.Scope()): + with static.program_guard(static.Program()): + x = static.data(name='X', shape=[None, 8], dtype=dtype) + x.stop_gradient = False + out = func(x) if use_func else paddle.nn.functional.relu(x) + static.append_backward(out) + + exe = static.Executor() + exe.run(static.default_startup_program()) + + # in static mode, x data has been covered by out + compiled_prog = static.CompiledProgram(static.default_main_program( + )).with_data_parallel( + loss_name=out.name, places=places) + out_v = exe.run(compiled_prog, + feed={'X': np_x}, + fetch_list=[out.name]) + + return out_v + + +class TestNewCustomOpSetUpInstall(unittest.TestCase): + def setUp(self): + cur_dir = os.path.dirname(os.path.abspath(__file__)) + # compile, install the custom op egg into site-packages under background + cmd = 'cd {} && python setup_install_simple.py install'.format(cur_dir) + run_cmd(cmd) + + # NOTE(Aurelius84): Normally, it's no need to add following codes for users. + # But we simulate to pip install in current process, so interpreter don't snap + # sys.path has been updated. So we update it manually. + + # See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3 + site_dir = site.getsitepackages()[0] + custom_egg_path = [ + x for x in os.listdir(site_dir) if 'simple_setup_relu2' in x + ] + assert len(custom_egg_path) == 1, "Matched egg number is %d." % len( + custom_egg_path) + sys.path.append(os.path.join(site_dir, custom_egg_path[0])) + + # usage: import the package directly + import simple_setup_relu2 + self.custom_op = simple_setup_relu2.relu2 + + self.dtypes = ['float32', 'float64'] + self.devices = ['cpu', 'gpu'] + + def test_static(self): + for device in self.devices: + for dtype in self.dtypes: + x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) + out = relu2_static(self.custom_op, device, dtype, x) + pd_out = relu2_static(self.custom_op, device, dtype, x, False) + self.assertTrue( + np.array_equal(out, pd_out), + "custom op out: {},\n paddle api out: {}".format(out, + pd_out)) + + def test_static_pe(self): + for device in self.devices: + for dtype in self.dtypes: + x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) + out = relu2_static_pe(self.custom_op, device, dtype, x) + pd_out = relu2_static_pe(self.custom_op, device, dtype, x, + False) + self.assertTrue( + np.array_equal(out, pd_out), + "custom op out: {},\n paddle api out: {}".format(out, + pd_out)) + + def test_dynamic(self): + for device in self.devices: + for dtype in self.dtypes: + x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) + out, x_grad = relu2_dynamic(self.custom_op, device, dtype, x) + pd_out, pd_x_grad = relu2_dynamic(self.custom_op, device, dtype, + x, False) + self.assertTrue( + np.array_equal(out, pd_out), + "custom op out: {},\n paddle api out: {}".format(out, + pd_out)) + self.assertTrue( + np.array_equal(x_grad, pd_x_grad), + "custom op x grad: {},\n paddle api x grad: {}".format( + x_grad, pd_x_grad)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/utils/cpp_extension/__init__.py b/python/paddle/utils/cpp_extension/__init__.py index 04e32842b0ec5..024fbb6bf7c4e 100644 --- a/python/paddle/utils/cpp_extension/__init__.py +++ b/python/paddle/utils/cpp_extension/__init__.py @@ -19,6 +19,7 @@ from .extension_utils import parse_op_info from .extension_utils import get_build_directory +from .extension_utils import load_op_meta_info_and_register_op from . import cpp_extension from . import extension_utils diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py index 8cd48100c99fc..dee0350160da3 100644 --- a/python/paddle/utils/cpp_extension/cpp_extension.py +++ b/python/paddle/utils/cpp_extension/cpp_extension.py @@ -25,6 +25,7 @@ from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context from .extension_utils import is_cuda_file, prepare_unix_cflags, add_std_without_repeat, get_build_directory from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from +from .extension_utils import use_new_custom_op_load_method IS_WINDOWS = os.name == 'nt' CUDA_HOME = find_cuda_home() @@ -132,6 +133,9 @@ def __init__(self, *args, **kwargs): super(BuildExtension, self).__init__(*args, **kwargs) self.no_python_abi_suffix = kwargs.get("no_python_abi_suffix", True) self.output_dir = kwargs.get("output_dir", None) + # for compatible two custom op define method + use_new_custom_op_load_method( + kwargs.get("use_new_method", use_new_custom_op_load_method())) def initialize_options(self): super(BuildExtension, self).initialize_options() diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index 14aaddfd6b50b..022161c87907d 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -29,6 +29,7 @@ from .. import load_op_library from ...fluid import core +from ...fluid.framework import OpProtoHolder from ...sysconfig import get_include, get_lib OS_NAME = platform.system() @@ -38,6 +39,20 @@ '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr', '-O3', '-DNVCC' ] +USING_NEW_CUSTOM_OP_LOAD_METHOD = True + + +# NOTE(chenweihang): In order to be compatible with +# the two custom op define method, after removing +# old method, we can remove them together +def use_new_custom_op_load_method(*args): + global USING_NEW_CUSTOM_OP_LOAD_METHOD + if len(args) == 0: + return USING_NEW_CUSTOM_OP_LOAD_METHOD + else: + assert len(args) == 1 and isinstance(args[0], bool) + USING_NEW_CUSTOM_OP_LOAD_METHOD = args[0] + @contextmanager def bootstrap_context(): @@ -51,6 +66,15 @@ def bootstrap_context(): bdist_egg.write_stub = origin_write_stub +def load_op_meta_info_and_register_op(lib_filename): + if USING_NEW_CUSTOM_OP_LOAD_METHOD: + core.load_op_meta_info_and_register_op(lib_filename) + else: + print("old branch") + core.load_op_library(lib_filename) + return OpProtoHolder.instance().update_op_proto() + + def custom_write_stub(resource, pyfile): """ Customized write_stub function to allow us to inject generated python @@ -77,7 +101,7 @@ def __bootstrap__(): assert os.path.exists(so_path) # load custom op shared library with abs path - new_custom_op = paddle.utils.load_op_library(so_path) + new_custom_op = paddle.utils.cpp_extension.load_op_meta_info_and_register_op(so_path) assert len(new_custom_op) == 1 m = inject_ext_module(__name__, new_custom_op[0]) @@ -90,8 +114,10 @@ def __bootstrap__(): _, op_info = CustomOpInfo.instance().last() so_path = op_info.build_directory - new_custom_op = load_op_library(so_path) - assert len(new_custom_op) == 1 + new_custom_op = load_op_meta_info_and_register_op(so_path) + assert len(new_custom_op + ) == 1, "The number of loaded costom operators is %d" % len( + new_custom_op) # NOTE: To avoid importing .so file instead of python file because they have same name, # we rename .so shared library to another name, see EasyInstallCommand. @@ -338,7 +364,7 @@ def parse_op_info(op_name): from paddle.fluid.framework import OpProtoHolder if op_name not in OpProtoHolder.instance().op_proto_map: raise ValueError( - "Please load {} shared library file firstly by `paddle.utils.load_op_library(...)`". + "Please load {} shared library file firstly by `paddle.utils.cpp_extension.load_op_meta_info_and_register_op(...)`". format(op_name)) op_proto = OpProtoHolder.instance().get_op_proto(op_name) @@ -361,7 +387,7 @@ def _import_module_from_library(name, build_directory): ext_path)) # load custom op_info and kernels from .so shared library - op_names = load_op_library(ext_path) + op_names = load_op_meta_info_and_register_op(ext_path) assert len(op_names) == 1 # generate Python api in ext_path @@ -473,7 +499,8 @@ def _write_setup_file(name, sources, file_path, include_dirs, compile_flags, extra_link_args={extra_link_args})], cmdclass={{"build_ext" : BuildExtension.with_options( output_dir=get_build_directory(), - no_python_abi_suffix=True) + no_python_abi_suffix=True, + use_new_method={use_new_method}) }})""").lstrip() with_cuda = False @@ -486,7 +513,8 @@ def _write_setup_file(name, sources, file_path, include_dirs, compile_flags, sources=list2str(sources), include_dirs=list2str(include_dirs), extra_compile_args=list2str(compile_flags), - extra_link_args=list2str(link_args)) + extra_link_args=list2str(link_args), + use_new_method=use_new_custom_op_load_method()) with open(file_path, 'w') as f: f.write(content) @@ -517,7 +545,10 @@ def parse_op_name_from(sources): """ def regex(content): - pattern = re.compile(r'REGISTER_OPERATOR\(([^,]+),') + if USING_NEW_CUSTOM_OP_LOAD_METHOD: + pattern = re.compile(r'BUILD_OPERATOR\(([^,]+),') + else: + pattern = re.compile(r'REGISTER_OPERATOR\(([^,]+),') content = re.sub(r'\s|\t|\n', '', content) op_name = pattern.findall(content) @@ -532,7 +563,9 @@ def regex(content): op_names |= regex(content) # TODO(Aurelius84): Support register more customs op at once - assert len(op_names) == 1 + assert len( + op_names) == 1, "The number of registered costom operators is %d" % len( + op_names) return list(op_names)[0] diff --git a/python/setup.py.in b/python/setup.py.in index 55fdbaff26463..d5c098aa9e350 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -380,6 +380,8 @@ def find_files(pattern, root): yield os.path.join(dirpath, filename) headers = ( + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/extension')) + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/framework')) + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/imperative')) + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/memory')) +