Skip to content

Commit

Permalink
[Metal] Enable optimize_for_mobile on Linux
Browse files Browse the repository at this point in the history
Pull Request resolved: #46384

Currently, the optimize_for_mobile binary only works on macOS, which is not very convenient to use. This diff introduces a new buck target that separates out the objective-c code. The goal here is to be able to export models for metal on linux machines.
ghstack-source-id: 114384675

Differential Revision: [D24322017](https://our.internmc.facebook.com/intern/diff/D24322017/)

**NOTE FOR REVIEWERS**: This PR has internal Facebook specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D24322017/)!
  • Loading branch information
xta0 committed Oct 15, 2020
1 parent 515a581 commit ad8135a
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 164 deletions.
5 changes: 1 addition & 4 deletions aten/src/ATen/native/metal/MetalConvolution.h
@@ -1,6 +1,5 @@
#import <ATen/native/metal/MetalPrepackOpContext.h>
#import <ATen/native/metal/MetalUtils.h>
#import <ATen/native/metal/mpscnn/MPSCNNOp.h>

#include <torch/script.h>

namespace at {
Expand Down Expand Up @@ -49,8 +48,6 @@ struct Conv2DParams final {

NeuronType neuronType(const Conv2dOpContext& context);

Tensor conv2d_prepack_run_impl(Conv2dOpContext& context, const Tensor& input);

} // namespace metal
} // namespace native
} // namespace at
4 changes: 0 additions & 4 deletions aten/src/ATen/native/metal/MetalConvolution.mm
Expand Up @@ -60,10 +60,6 @@ NeuronType neuronType(const Conv2dOpContext& context) {
}
}

Tensor conv2d_prepack_run_impl(Conv2dOpContext& context, const Tensor& input) {
return mpscnn::conv2d(input, context);
}

} // namespace metal
} // namespace native
} // namespace at
38 changes: 10 additions & 28 deletions aten/src/ATen/native/metal/MetalPrepackOpContext.h
@@ -1,4 +1,4 @@
#import <Foundation/Foundation.h>
#pragma once

#include <ATen/Tensor.h>
#include <torch/custom_class.h>
Expand Down Expand Up @@ -49,6 +49,13 @@ class Conv2dOpContext : public torch::jit::CustomClassHolder {
output_min(output_min),
output_max(output_max) {}

void release_resources() override {
if (releaseCallback) {
releaseCallback(conv2dOp);
conv2dOp = nullptr;
}
}

Tensor weight;
c10::optional<Tensor> bias;
std::vector<int64_t> stride;
Expand All @@ -57,35 +64,10 @@ class Conv2dOpContext : public torch::jit::CustomClassHolder {
int64_t groups;
c10::optional<Scalar> output_min;
c10::optional<Scalar> output_max;
id extra = nil;
void* conv2dOp = nullptr; // reserved to hold MPSCNNConv2dOp objects
std::function<void(void*)> releaseCallback = nullptr;
};

c10::intrusive_ptr<Conv2dOpContext> unpack(
Tensor&& weight,
c10::optional<Tensor>&& bias,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& dilation,
int64_t groups,
c10::optional<Scalar> output_min,
c10::optional<Scalar> output_max);

c10::intrusive_ptr<Conv2dOpContext> conv2d_prepack(
Tensor&& weight,
c10::optional<Tensor>&& bias,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& dilation,
int64_t groups,
c10::optional<Scalar> output_min,
c10::optional<Scalar> output_max);

Tensor conv2d_prepack_run(
const Tensor& input,
const c10::intrusive_ptr<Conv2dOpContext>& op_context);

Tensor copy_to_host(const Tensor& input);

} // namespace metal
} // namespace native
} // namespace at
71 changes: 0 additions & 71 deletions aten/src/ATen/native/metal/MetalPrepackOpContext.mm

This file was deleted.

140 changes: 140 additions & 0 deletions aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp
@@ -0,0 +1,140 @@
#include <ATen/core/op_registration/op_registration.h>
#include <ATen/native/metal/MetalPrepackOpContext.h>

#if defined(C10_IOS)
#import <ATen/native/metal/MetalUtils.h>
#import <ATen/native/metal/mpscnn/MPSCNNOps.h>
#endif

namespace at {
namespace native {
namespace metal {

c10::intrusive_ptr<Conv2dOpContext> unpack(
Tensor&& weight,
c10::optional<Tensor>&& bias,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& dilation,
int64_t groups,
c10::optional<Scalar> output_min,
c10::optional<Scalar> output_max) {
#if defined(C10_IOS)
const Tensor weightContig = weight.contiguous();
const auto ws = weightContig.sizes();
auto packed_buffer = permuteWeights(weightContig.data_ptr<float>(), ws.vec());
auto packedWeight = at::empty(ws);
int64_t size_bytes = at::prod_intlist(ws) * sizeof(float);
memcpy(packedWeight.data_ptr(), packed_buffer.data(), size_bytes);
return c10::make_intrusive<Conv2dOpContext>(
std::move(packedWeight),
std::move(bias),
stride,
padding,
dilation,
groups,
output_min,
output_max);
#else
TORCH_CHECK(false, "unpack can only be invoked on iOS")
return c10::make_intrusive<Conv2dOpContext>(
std::move(weight),
std::move(bias),
stride,
padding,
dilation,
groups,
output_min,
output_max);
#endif
}

TORCH_LIBRARY(metal, m) {
m.class_<Conv2dOpContext>("Conv2dOpContext")
.def_pickle(
[](const c10::intrusive_ptr<Conv2dOpContext>& op_context)
-> SerializationTypeConv2dPrePack { // __getstate__
return op_context->pack();
},
[](SerializationTypeConv2dPrePack state)
-> c10::intrusive_ptr<Conv2dOpContext> { // __setstate__
return unpack(
std::move(std::get<0>(state)),
std::move(std::get<1>(state)),
std::move(std::get<2>(state)),
std::move(std::get<3>(state)),
std::move(std::get<4>(state)),
std::move(std::get<5>(state)),
std::move(std::get<6>(state)),
std::move(std::get<7>(state)));
});
m.def("copy_to_host(Tensor X) -> Tensor Y");
}

TORCH_LIBRARY(metal_prepack, m) {
m.def(
"conv2d_prepack(Tensor W, Tensor? B, int[2] stride, "
"int[2] padding, int[2] dilation, int groups, "
"Scalar? output_min=None, Scalar? output_max=None) "
"-> __torch__.torch.classes.metal.Conv2dOpContext");
m.def(
"conv2d_run(Tensor X, "
"__torch__.torch.classes.metal.Conv2dOpContext W_prepack) -> Tensor Y");
}

c10::intrusive_ptr<Conv2dOpContext> conv2d_prepack(
Tensor&& weight,
c10::optional<Tensor>&& bias,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& dilation,
int64_t groups,
c10::optional<Scalar> output_min,
c10::optional<Scalar> output_max) {
TORCH_CHECK(weight.dim() == 4);
return c10::make_intrusive<Conv2dOpContext>(
std::move(weight),
std::move(bias),
stride,
padding,
dilation,
groups,
output_min,
output_max);
}

Tensor conv2d_prepack_run(
const Tensor& input,
const c10::intrusive_ptr<Conv2dOpContext>& op_context) {
#if defined(C10_IOS)
return mpscnn::conv2d(input, *op_context);
#else
TORCH_CHECK(false, "conv2d_prepack_run can only be invoked on iOS");
return input;
#endif
}

Tensor copy_to_host(const Tensor& input) {
#if defined(C10_IOS)
return mpscnn::copy_to_host(input);
#else
TORCH_CHECK(false, "copy_to_host can only be invoked on iOS");
return input;
#endif
}

TORCH_LIBRARY_IMPL(metal_prepack, CPU, m) {
m.impl("conv2d_prepack", TORCH_FN(conv2d_prepack));
}

TORCH_LIBRARY_IMPL(metal_prepack, Metal, m) {
m.impl("conv2d_run", conv2d_prepack_run);
}

TORCH_LIBRARY_IMPL(metal, Metal, m) {
m.impl("copy_to_host", copy_to_host);
}

} // namespace metal
} // namespace native
} // namespace at
55 changes: 0 additions & 55 deletions aten/src/ATen/native/metal/MetalPrepackOpRegister.mm

This file was deleted.

9 changes: 7 additions & 2 deletions aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm
Expand Up @@ -88,14 +88,19 @@ Tensor conv2d(const Tensor& input, Conv2dOpContext& context) {
context.stride,
context.dilation,
context.groups};
MPSCNNConvOp* op = (MPSCNNConvOp*)context.extra;
MPSCNNConvOp* op = (__bridge MPSCNNConvOp*)(context.conv2dOp);
NeuronType nt = neuronType(context);
if (!op) {
float* w = context.weight.data_ptr<float>();
float* b = context.bias.has_value() ? ((*context.bias).data_ptr<float>())
: nullptr;
op = [MPSCNNConvOp conv2d:params weights:w bias:b neuronFilter:nt];
context.extra = op;
context.conv2dOp = (void*)CFBridgingRetain(op);
context.releaseCallback = ^(void* res) {
if (res) {
CFBridgingRelease(res);
}
};
}

auto outputSize = params.output_sizes();
Expand Down

0 comments on commit ad8135a

Please sign in to comment.