[Metal] Enable optimize_for_mobile on Linux

Pull Request resolved: #46384 Currently, the optimize_for_mobile binary only works on macOS, which is not very convenient to use. This diff introduces a new buck target that separates out the objective-c code. The goal here is to be able to export models for metal on linux machines. ghstack-source-id: 114384675 Differential Revision: [D24322017](https://our.internmc.facebook.com/intern/diff/D24322017/) **NOTE FOR REVIEWERS**: This PR has internal Facebook specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D24322017/)!
pytorch · Oct 15, 2020 · ad8135a · ad8135a
1 parent 515a581
commit ad8135a
Show file tree

Hide file tree

Showing 7 changed files with 158 additions and 164 deletions.
diff --git a/aten/src/ATen/native/metal/MetalConvolution.h b/aten/src/ATen/native/metal/MetalConvolution.h
@@ -1,6 +1,5 @@
 #import <ATen/native/metal/MetalPrepackOpContext.h>
-#import <ATen/native/metal/MetalUtils.h>
-#import <ATen/native/metal/mpscnn/MPSCNNOp.h>
+
 #include <torch/script.h>
 
 namespace at {
@@ -49,8 +48,6 @@ struct Conv2DParams final {
 
 NeuronType neuronType(const Conv2dOpContext& context);
 
-Tensor conv2d_prepack_run_impl(Conv2dOpContext& context, const Tensor& input);
-
 } // namespace metal
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/metal/MetalConvolution.mm b/aten/src/ATen/native/metal/MetalConvolution.mm
@@ -60,10 +60,6 @@ NeuronType neuronType(const Conv2dOpContext& context) {
   }
 }
 
-Tensor conv2d_prepack_run_impl(Conv2dOpContext& context, const Tensor& input) {
-  return mpscnn::conv2d(input, context);
-}
-
 } // namespace metal
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/metal/MetalPrepackOpContext.h b/aten/src/ATen/native/metal/MetalPrepackOpContext.h
@@ -1,4 +1,4 @@
-#import <Foundation/Foundation.h>
+#pragma once
 
 #include <ATen/Tensor.h>
 #include <torch/custom_class.h>
@@ -49,6 +49,13 @@ class Conv2dOpContext : public torch::jit::CustomClassHolder {
         output_min(output_min),
         output_max(output_max) {}
 
+  void release_resources() override {
+    if (releaseCallback) {
+      releaseCallback(conv2dOp);
+      conv2dOp = nullptr;
+    }
+  }
+
   Tensor weight;
   c10::optional<Tensor> bias;
   std::vector<int64_t> stride;
@@ -57,35 +64,10 @@ class Conv2dOpContext : public torch::jit::CustomClassHolder {
   int64_t groups;
   c10::optional<Scalar> output_min;
   c10::optional<Scalar> output_max;
-  id extra = nil;
+  void* conv2dOp = nullptr; // reserved to hold MPSCNNConv2dOp objects
+  std::function<void(void*)> releaseCallback = nullptr;
 };
 
-c10::intrusive_ptr<Conv2dOpContext> unpack(
-    Tensor&& weight,
-    c10::optional<Tensor>&& bias,
-    std::vector<int64_t>&& stride,
-    std::vector<int64_t>&& padding,
-    std::vector<int64_t>&& dilation,
-    int64_t groups,
-    c10::optional<Scalar> output_min,
-    c10::optional<Scalar> output_max);
-
-c10::intrusive_ptr<Conv2dOpContext> conv2d_prepack(
-    Tensor&& weight,
-    c10::optional<Tensor>&& bias,
-    std::vector<int64_t>&& stride,
-    std::vector<int64_t>&& padding,
-    std::vector<int64_t>&& dilation,
-    int64_t groups,
-    c10::optional<Scalar> output_min,
-    c10::optional<Scalar> output_max);
-
-Tensor conv2d_prepack_run(
-    const Tensor& input,
-    const c10::intrusive_ptr<Conv2dOpContext>& op_context);
-
-Tensor copy_to_host(const Tensor& input);
-
 } // namespace metal
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/metal/MetalPrepackOpContext.mm b/aten/src/ATen/native/metal/MetalPrepackOpContext.mm
diff --git a/aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp b/aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp
@@ -0,0 +1,140 @@
+#include <ATen/core/op_registration/op_registration.h>
+#include <ATen/native/metal/MetalPrepackOpContext.h>
+
+#if defined(C10_IOS)
+#import <ATen/native/metal/MetalUtils.h>
+#import <ATen/native/metal/mpscnn/MPSCNNOps.h>
+#endif
+
+namespace at {
+namespace native {
+namespace metal {
+
+c10::intrusive_ptr<Conv2dOpContext> unpack(
+    Tensor&& weight,
+    c10::optional<Tensor>&& bias,
+    std::vector<int64_t>&& stride,
+    std::vector<int64_t>&& padding,
+    std::vector<int64_t>&& dilation,
+    int64_t groups,
+    c10::optional<Scalar> output_min,
+    c10::optional<Scalar> output_max) {
+#if defined(C10_IOS)
+  const Tensor weightContig = weight.contiguous();
+  const auto ws = weightContig.sizes();
+  auto packed_buffer = permuteWeights(weightContig.data_ptr<float>(), ws.vec());
+  auto packedWeight = at::empty(ws);
+  int64_t size_bytes = at::prod_intlist(ws) * sizeof(float);
+  memcpy(packedWeight.data_ptr(), packed_buffer.data(), size_bytes);
+  return c10::make_intrusive<Conv2dOpContext>(
+      std::move(packedWeight),
+      std::move(bias),
+      stride,
+      padding,
+      dilation,
+      groups,
+      output_min,
+      output_max);
+#else
+  TORCH_CHECK(false, "unpack can only be invoked on iOS")
+  return c10::make_intrusive<Conv2dOpContext>(
+      std::move(weight),
+      std::move(bias),
+      stride,
+      padding,
+      dilation,
+      groups,
+      output_min,
+      output_max);
+#endif
+}
+
+TORCH_LIBRARY(metal, m) {
+  m.class_<Conv2dOpContext>("Conv2dOpContext")
+      .def_pickle(
+          [](const c10::intrusive_ptr<Conv2dOpContext>& op_context)
+              -> SerializationTypeConv2dPrePack { // __getstate__
+            return op_context->pack();
+          },
+          [](SerializationTypeConv2dPrePack state)
+              -> c10::intrusive_ptr<Conv2dOpContext> { // __setstate__
+            return unpack(
+                std::move(std::get<0>(state)),
+                std::move(std::get<1>(state)),
+                std::move(std::get<2>(state)),
+                std::move(std::get<3>(state)),
+                std::move(std::get<4>(state)),
+                std::move(std::get<5>(state)),
+                std::move(std::get<6>(state)),
+                std::move(std::get<7>(state)));
+          });
+  m.def("copy_to_host(Tensor X) -> Tensor Y");
+}
+
+TORCH_LIBRARY(metal_prepack, m) {
+  m.def(
+      "conv2d_prepack(Tensor W, Tensor? B, int[2] stride, "
+      "int[2] padding, int[2] dilation, int groups, "
+      "Scalar? output_min=None, Scalar? output_max=None) "
+      "-> __torch__.torch.classes.metal.Conv2dOpContext");
+  m.def(
+      "conv2d_run(Tensor X, "
+      "__torch__.torch.classes.metal.Conv2dOpContext W_prepack) -> Tensor Y");
+}
+
+c10::intrusive_ptr<Conv2dOpContext> conv2d_prepack(
+    Tensor&& weight,
+    c10::optional<Tensor>&& bias,
+    std::vector<int64_t>&& stride,
+    std::vector<int64_t>&& padding,
+    std::vector<int64_t>&& dilation,
+    int64_t groups,
+    c10::optional<Scalar> output_min,
+    c10::optional<Scalar> output_max) {
+  TORCH_CHECK(weight.dim() == 4);
+  return c10::make_intrusive<Conv2dOpContext>(
+      std::move(weight),
+      std::move(bias),
+      stride,
+      padding,
+      dilation,
+      groups,
+      output_min,
+      output_max);
+}
+
+Tensor conv2d_prepack_run(
+    const Tensor& input,
+    const c10::intrusive_ptr<Conv2dOpContext>& op_context) {
+#if defined(C10_IOS)
+  return mpscnn::conv2d(input, *op_context);
+#else
+  TORCH_CHECK(false, "conv2d_prepack_run can only be invoked on iOS");
+  return input;
+#endif
+}
+
+Tensor copy_to_host(const Tensor& input) {
+#if defined(C10_IOS)
+  return mpscnn::copy_to_host(input);
+#else
+  TORCH_CHECK(false, "copy_to_host can only be invoked on iOS");
+  return input;
+#endif
+}
+
+TORCH_LIBRARY_IMPL(metal_prepack, CPU, m) {
+  m.impl("conv2d_prepack", TORCH_FN(conv2d_prepack));
+}
+
+TORCH_LIBRARY_IMPL(metal_prepack, Metal, m) {
+  m.impl("conv2d_run", conv2d_prepack_run);
+}
+
+TORCH_LIBRARY_IMPL(metal, Metal, m) {
+  m.impl("copy_to_host", copy_to_host);
+}
+
+} // namespace metal
+} // namespace native
+} // namespace at
diff --git a/aten/src/ATen/native/metal/MetalPrepackOpRegister.mm b/aten/src/ATen/native/metal/MetalPrepackOpRegister.mm
diff --git a/aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm b/aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm
@@ -88,14 +88,19 @@ Tensor conv2d(const Tensor& input, Conv2dOpContext& context) {
                       context.stride,
                       context.dilation,
                       context.groups};
-  MPSCNNConvOp* op = (MPSCNNConvOp*)context.extra;
+  MPSCNNConvOp* op = (__bridge MPSCNNConvOp*)(context.conv2dOp);
   NeuronType nt = neuronType(context);
   if (!op) {
     float* w = context.weight.data_ptr<float>();
     float* b = context.bias.has_value() ? ((*context.bias).data_ptr<float>())
                                         : nullptr;
     op = [MPSCNNConvOp conv2d:params weights:w bias:b neuronFilter:nt];
-    context.extra = op;
+    context.conv2dOp = (void*)CFBridgingRetain(op);
+    context.releaseCallback = ^(void* res) {
+      if (res) {
+        CFBridgingRelease(res);
+      }
+    };
   }
 
   auto outputSize = params.output_sizes();