From d37922c85fe90f68eace795ea83c25e758f511e8 Mon Sep 17 00:00:00 2001 From: taox Date: Thu, 15 Oct 2020 00:25:23 -0700 Subject: [PATCH] [Metal] Enable optimize_for_mobile on Linux Currently, the optimize_for_mobile binary only works on macOS, which is not very convenient to use. This diff introduces a new buck target that separates out the objective-c code. The goal here is to be able to export models for metal on linux machines. Differential Revision: [D24322017](https://our.internmc.facebook.com/intern/diff/D24322017/) **NOTE FOR REVIEWERS**: This PR has internal Facebook specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D24322017/)! [ghstack-poisoned] --- aten/src/ATen/native/metal/MetalConvolution.h | 5 +- .../src/ATen/native/metal/MetalConvolution.mm | 4 - .../ATen/native/metal/MetalPrepackOpContext.h | 38 ++--- .../native/metal/MetalPrepackOpContext.mm | 71 --------- .../native/metal/MetalPrepackOpRegister.cpp | 140 ++++++++++++++++++ .../native/metal/MetalPrepackOpRegister.mm | 55 ------- .../src/ATen/native/metal/mpscnn/MPSCNNOps.mm | 9 +- 7 files changed, 158 insertions(+), 164 deletions(-) delete mode 100644 aten/src/ATen/native/metal/MetalPrepackOpContext.mm create mode 100644 aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp delete mode 100644 aten/src/ATen/native/metal/MetalPrepackOpRegister.mm diff --git a/aten/src/ATen/native/metal/MetalConvolution.h b/aten/src/ATen/native/metal/MetalConvolution.h index 6e811a34267c..7a7bdfbd21c2 100644 --- a/aten/src/ATen/native/metal/MetalConvolution.h +++ b/aten/src/ATen/native/metal/MetalConvolution.h @@ -1,6 +1,5 @@ #import -#import -#import + #include namespace at { @@ -49,8 +48,6 @@ struct Conv2DParams final { NeuronType neuronType(const Conv2dOpContext& context); -Tensor conv2d_prepack_run_impl(Conv2dOpContext& context, const Tensor& input); - } // namespace metal } // namespace native } // namespace at diff --git a/aten/src/ATen/native/metal/MetalConvolution.mm b/aten/src/ATen/native/metal/MetalConvolution.mm index 178f202e9530..1d316e2144d2 100644 --- a/aten/src/ATen/native/metal/MetalConvolution.mm +++ b/aten/src/ATen/native/metal/MetalConvolution.mm @@ -60,10 +60,6 @@ NeuronType neuronType(const Conv2dOpContext& context) { } } -Tensor conv2d_prepack_run_impl(Conv2dOpContext& context, const Tensor& input) { - return mpscnn::conv2d(input, context); -} - } // namespace metal } // namespace native } // namespace at diff --git a/aten/src/ATen/native/metal/MetalPrepackOpContext.h b/aten/src/ATen/native/metal/MetalPrepackOpContext.h index 6a07af68e049..4e20e242183a 100644 --- a/aten/src/ATen/native/metal/MetalPrepackOpContext.h +++ b/aten/src/ATen/native/metal/MetalPrepackOpContext.h @@ -1,4 +1,4 @@ -#import +#pragma once #include #include @@ -49,6 +49,13 @@ class Conv2dOpContext : public torch::jit::CustomClassHolder { output_min(output_min), output_max(output_max) {} + void release_resources() override { + if (releaseCallback) { + releaseCallback(conv2dOp); + conv2dOp = nullptr; + } + } + Tensor weight; c10::optional bias; std::vector stride; @@ -57,35 +64,10 @@ class Conv2dOpContext : public torch::jit::CustomClassHolder { int64_t groups; c10::optional output_min; c10::optional output_max; - id extra = nil; + void* conv2dOp = nullptr; // reserved for MPSCNNConv2dOp + std::function releaseCallback = nullptr; }; -c10::intrusive_ptr unpack( - Tensor&& weight, - c10::optional&& bias, - std::vector&& stride, - std::vector&& padding, - std::vector&& dilation, - int64_t groups, - c10::optional output_min, - c10::optional output_max); - -c10::intrusive_ptr conv2d_prepack( - Tensor&& weight, - c10::optional&& bias, - std::vector&& stride, - std::vector&& padding, - std::vector&& dilation, - int64_t groups, - c10::optional output_min, - c10::optional output_max); - -Tensor conv2d_prepack_run( - const Tensor& input, - const c10::intrusive_ptr& op_context); - -Tensor copy_to_host(const Tensor& input); - } // namespace metal } // namespace native } // namespace at diff --git a/aten/src/ATen/native/metal/MetalPrepackOpContext.mm b/aten/src/ATen/native/metal/MetalPrepackOpContext.mm deleted file mode 100644 index f51c8ad824a0..000000000000 --- a/aten/src/ATen/native/metal/MetalPrepackOpContext.mm +++ /dev/null @@ -1,71 +0,0 @@ -#import -#import -#import -#import - -#include - -namespace at { -namespace native { -namespace metal { - -c10::intrusive_ptr conv2d_prepack( - at::Tensor&& weight, - c10::optional&& bias, - std::vector&& stride, - std::vector&& padding, - std::vector&& dilation, - const int64_t groups, - c10::optional output_min, - c10::optional output_max) { - TORCH_CHECK(weight.dim() == 4); - return c10::make_intrusive( - std::move(weight), - std::move(bias), - stride, - padding, - dilation, - groups, - output_min, - output_max); -} - -c10::intrusive_ptr unpack( - Tensor&& weight, - c10::optional&& bias, - std::vector&& stride, - std::vector&& padding, - std::vector&& dilation, - int64_t groups, - c10::optional output_min, - c10::optional output_max) { - const Tensor weightContig = weight.contiguous(); - const auto ws = weightContig.sizes(); - auto packed_buffer = permuteWeights(weightContig.data_ptr(), ws.vec()); - auto packedWeight = at::empty(ws); - int64_t size_bytes = at::prod_intlist(ws) * sizeof(float); - memcpy(packedWeight.data_ptr(), packed_buffer.data(), size_bytes); - return c10::make_intrusive( - std::move(packedWeight), - std::move(bias), - stride, - padding, - dilation, - groups, - output_min, - output_max); -} - -Tensor conv2d_prepack_run( - const Tensor& input, - const c10::intrusive_ptr& op_context) { - return conv2d_prepack_run_impl(*op_context, input); -} - -Tensor copy_to_host(const Tensor& input) { - return mpscnn::copy_to_host(input); -} - -} // namespace metal -} // namespace native -} // namespace at diff --git a/aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp b/aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp new file mode 100644 index 000000000000..60cddbd7eb13 --- /dev/null +++ b/aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp @@ -0,0 +1,140 @@ +#include +#include + +#if defined(C10_IOS) +#import +#import +#endif + +namespace at { +namespace native { +namespace metal { + +c10::intrusive_ptr unpack( + Tensor&& weight, + c10::optional&& bias, + std::vector&& stride, + std::vector&& padding, + std::vector&& dilation, + int64_t groups, + c10::optional output_min, + c10::optional output_max) { +#if defined(C10_IOS) + const Tensor weightContig = weight.contiguous(); + const auto ws = weightContig.sizes(); + auto packed_buffer = permuteWeights(weightContig.data_ptr(), ws.vec()); + auto packedWeight = at::empty(ws); + int64_t size_bytes = at::prod_intlist(ws) * sizeof(float); + memcpy(packedWeight.data_ptr(), packed_buffer.data(), size_bytes); + return c10::make_intrusive( + std::move(packedWeight), + std::move(bias), + stride, + padding, + dilation, + groups, + output_min, + output_max); +#else + TORCH_CHECK(false, "unpack can only be invoked on iOS") + return c10::make_intrusive( + std::move(weight), + std::move(bias), + stride, + padding, + dilation, + groups, + output_min, + output_max); +#endif +} + +TORCH_LIBRARY(metal, m) { + m.class_("Conv2dOpContext") + .def_pickle( + [](const c10::intrusive_ptr& op_context) + -> SerializationTypeConv2dPrePack { // __getstate__ + return op_context->pack(); + }, + [](SerializationTypeConv2dPrePack state) + -> c10::intrusive_ptr { // __setstate__ + return unpack( + std::move(std::get<0>(state)), + std::move(std::get<1>(state)), + std::move(std::get<2>(state)), + std::move(std::get<3>(state)), + std::move(std::get<4>(state)), + std::move(std::get<5>(state)), + std::move(std::get<6>(state)), + std::move(std::get<7>(state))); + }); + m.def("copy_to_host(Tensor X) -> Tensor Y"); +} + +TORCH_LIBRARY(metal_prepack, m) { + m.def( + "conv2d_prepack(Tensor W, Tensor? B, int[2] stride, " + "int[2] padding, int[2] dilation, int groups, " + "Scalar? output_min=None, Scalar? output_max=None) " + "-> __torch__.torch.classes.metal.Conv2dOpContext"); + m.def( + "conv2d_run(Tensor X, " + "__torch__.torch.classes.metal.Conv2dOpContext W_prepack) -> Tensor Y"); +} + +c10::intrusive_ptr conv2d_prepack( + Tensor&& weight, + c10::optional&& bias, + std::vector&& stride, + std::vector&& padding, + std::vector&& dilation, + int64_t groups, + c10::optional output_min, + c10::optional output_max) { + TORCH_CHECK(weight.dim() == 4); + return c10::make_intrusive( + std::move(weight), + std::move(bias), + stride, + padding, + dilation, + groups, + output_min, + output_max); +} + +Tensor conv2d_prepack_run( + const Tensor& input, + const c10::intrusive_ptr& op_context) { +#if defined(C10_IOS) + return mpscnn::conv2d(input, *op_context); +#else + TORCH_CHECK(false, "conv2d_prepack_run can only be invoked on iOS"); + return input; +#endif +} + +Tensor copy_to_host(const Tensor& input) { +#if defined(C10_IOS) + return mpscnn::copy_to_host(input); +#else + TORCH_CHECK(false, "copy_to_host can only be invoked on iOS"); + return input; +#endif +} + +TORCH_LIBRARY_IMPL(metal_prepack, CPU, m) { + m.impl("conv2d_prepack", TORCH_FN(conv2d_prepack)); +} + +TORCH_LIBRARY_IMPL(metal_prepack, Metal, m) { + m.impl("conv2d_run", conv2d_prepack_run); +} + +TORCH_LIBRARY_IMPL(metal, Metal, m) { + m.impl("copy_to_host", copy_to_host); +} + +} // namespace metal +} // namespace native +} // namespace at diff --git a/aten/src/ATen/native/metal/MetalPrepackOpRegister.mm b/aten/src/ATen/native/metal/MetalPrepackOpRegister.mm deleted file mode 100644 index d1872398ddc9..000000000000 --- a/aten/src/ATen/native/metal/MetalPrepackOpRegister.mm +++ /dev/null @@ -1,55 +0,0 @@ -#include -#import - -namespace at { -namespace native { -namespace metal { - -TORCH_LIBRARY(metal, m) { - m.class_("Conv2dOpContext") - .def_pickle( - [](const c10::intrusive_ptr& op_context) - -> SerializationTypeConv2dPrePack { // __getstate__ - return op_context->pack(); - }, - [](SerializationTypeConv2dPrePack state) - -> c10::intrusive_ptr { // __setstate__ - return unpack( - std::move(std::get<0>(state)), - std::move(std::get<1>(state)), - std::move(std::get<2>(state)), - std::move(std::get<3>(state)), - std::move(std::get<4>(state)), - std::move(std::get<5>(state)), - std::move(std::get<6>(state)), - std::move(std::get<7>(state))); - }); - m.def("copy_to_host(Tensor X) -> Tensor Y"); -} - -TORCH_LIBRARY(metal_prepack, m) { - m.def( - "conv2d_prepack(Tensor W, Tensor? B, int[2] stride, " - "int[2] padding, int[2] dilation, int groups, " - "Scalar? output_min=None, Scalar? output_max=None) " - "-> __torch__.torch.classes.metal.Conv2dOpContext"); - m.def( - "conv2d_run(Tensor X, " - "__torch__.torch.classes.metal.Conv2dOpContext W_prepack) -> Tensor Y"); -} - -TORCH_LIBRARY_IMPL(metal_prepack, CPU, m) { - m.impl("conv2d_prepack", TORCH_FN(conv2d_prepack)); -} - -TORCH_LIBRARY_IMPL(metal_prepack, Metal, m) { - m.impl("conv2d_run", conv2d_prepack_run); -} - -TORCH_LIBRARY_IMPL(metal, Metal, m) { - m.impl("copy_to_host", copy_to_host); -} - -} // namespace metal -} // namespace native -} // namespace at diff --git a/aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm b/aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm index fc883337d401..d2b54f0dcb3e 100644 --- a/aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm +++ b/aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm @@ -88,14 +88,19 @@ Tensor conv2d(const Tensor& input, Conv2dOpContext& context) { context.stride, context.dilation, context.groups}; - MPSCNNConvOp* op = (MPSCNNConvOp*)context.extra; + MPSCNNConvOp* op = (__bridge MPSCNNConvOp*)(context.conv2dOp); NeuronType nt = neuronType(context); if (!op) { float* w = context.weight.data_ptr(); float* b = context.bias.has_value() ? ((*context.bias).data_ptr()) : nullptr; op = [MPSCNNConvOp conv2d:params weights:w bias:b neuronFilter:nt]; - context.extra = op; + context.conv2dOp = (void*)CFBridgingRetain(op); + context.releaseCallback = ^(void* res) { + if (res) { + CFBridgingRelease(res); + } + }; } auto outputSize = params.output_sizes();