[XLATensor] Add conv2d

asuhan · asuhan · commit cd7cbcadc412 · 2019-01-16T18:40:42.000-08:00
diff --git a/test/test_operations.py b/test/test_operations.py
@@ -1172,7 +1172,28 @@ def test_relu(self):
         out = torch_xla._XLAC.relu(xt_x).to_tensor()
         self.assertEqualDbg(out.data, expected.data)
 
+    def test_conv2d(self):
+        in_channels = 3
+        out_channels = 7
+        kernel_size = 5
+        input = _gen_tensor(4, in_channels, 28, 28)
+        weight = torch.Tensor(out_channels, in_channels, kernel_size, kernel_size)
+        bias = torch.Tensor(out_channels)
+        xt_input = torch_xla._XLAC.XLATensor(input)
+        xt_weight = torch_xla._XLAC.XLATensor(weight)
+        xt_bias = torch_xla._XLAC.XLATensor(bias)
+        for stride in range(1, 4):
+          for padding in range(0, 3):
+            for with_bias in [True, False]:
+              conv_bias = bias if with_bias else None
+              conv_xt_bias = xt_bias if with_bias else None
+              expected = F.conv2d(input, weight, conv_bias, stride=stride, padding=padding)
+              out = torch_xla._XLAC.conv2d(xt_input, xt_weight, conv_xt_bias, stride=stride,
+                                           padding=padding, use_full_conv_precision=True).to_tensor()
+              self.assertEqualRel(out.data, expected.data)
+
 
 if __name__ == '__main__':
   torch.set_default_tensor_type('torch.FloatTensor')
+  torch.manual_seed(42)
   run_tests()
diff --git a/torch_xla/csrc/convolution.cpp b/torch_xla/csrc/convolution.cpp
@@ -241,10 +241,8 @@ xla::XlaOp BuildThnnConv2dBackwardWeight(
 }
 
 std::vector<std::pair<xla::int64, xla::int64>> MakePadding(
-    const torch::jit::Node* node) {
+    tensorflow::gtl::ArraySlice<xla::int64> padding) {
   std::vector<std::pair<xla::int64, xla::int64>> dims_padding;
-  const auto padding =
-      node->get<std::vector<int64_t>>(at::attr::padding).value();
   for (const auto dim_padding : padding) {
     dims_padding.emplace_back(dim_padding, dim_padding);
   }
@@ -257,13 +255,25 @@ xla::XlaOp BuildConvolution(
     const torch::jit::Node* node, const xla::XlaOp& input,
     const xla::XlaOp& kernel,
     const xla::PrecisionConfig::Precision conv_precision) {
-  const auto window_strides = XlaHelpers::I64List(
-      node->get<std::vector<int64_t>>(at::attr::stride).value());
-  const auto dims_padding = MakePadding(node);
+  const auto stride = node->get<std::vector<int64_t>>(at::attr::stride).value();
+  const auto padding =
+      node->get<std::vector<int64_t>>(at::attr::padding).value();
+  xla::PrecisionConfig precision_config =
+      XlaHelpers::BuildPrecisionConfig(conv_precision);
+  return BuildConvolution(input, kernel, XlaHelpers::I64List(stride),
+                          XlaHelpers::I64List(padding), conv_precision);
+}
+
+xla::XlaOp BuildConvolution(
+    const xla::XlaOp& input, const xla::XlaOp& kernel,
+    tensorflow::gtl::ArraySlice<xla::int64> stride,
+    tensorflow::gtl::ArraySlice<xla::int64> padding,
+    const xla::PrecisionConfig::Precision conv_precision) {
+  const auto dims_padding = MakePadding(padding);
   xla::PrecisionConfig precision_config =
       XlaHelpers::BuildPrecisionConfig(conv_precision);
   return xla::ConvWithGeneralPadding(
-      input, kernel, window_strides, dims_padding,
+      input, kernel, stride, dims_padding,
       /*feature_group_count*/ 1, /*batch_group_count=*/1, &precision_config);
 }
 
@@ -273,7 +283,20 @@ xla::XlaOp BuildConvolutionBias(
     const xla::PrecisionConfig::Precision conv_precision) {
   const auto node_inputs = node->inputs();
   XLA_CHECK_GE(node_inputs.size(), size_t(4));
-  const auto conv = BuildConvolution(node, input, kernel, conv_precision);
+  const auto stride = node->get<std::vector<int64_t>>(at::attr::stride).value();
+  const auto padding =
+      node->get<std::vector<int64_t>>(at::attr::padding).value();
+  return BuildConvolutionBias(input, kernel, bias, XlaHelpers::I64List(stride),
+                              XlaHelpers::I64List(padding), conv_precision);
+}
+
+xla::XlaOp BuildConvolutionBias(
+    const xla::XlaOp& input, const xla::XlaOp& kernel, const xla::XlaOp& bias,
+    tensorflow::gtl::ArraySlice<xla::int64> stride,
+    tensorflow::gtl::ArraySlice<xla::int64> padding,
+    const xla::PrecisionConfig::Precision conv_precision) {
+  const auto conv =
+      BuildConvolution(input, kernel, stride, padding, conv_precision);
   auto broadcast_sizes = XlaHelpers::SizesOfXlaOp(conv);
   XLA_CHECK_EQ(broadcast_sizes.size(), 4);
   // Remove the channels dimension.
diff --git a/torch_xla/csrc/convolution.h b/torch_xla/csrc/convolution.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
 #include "torch/csrc/jit/ir.h"
 
 namespace torch_xla {
@@ -12,12 +13,26 @@ xla::XlaOp BuildConvolution(
     const xla::XlaOp& kernel,
     const xla::PrecisionConfig::Precision conv_precision);
 
+// Same as above, with stride and padding provided as parameters.
+xla::XlaOp BuildConvolution(
+    const xla::XlaOp& input, const xla::XlaOp& kernel,
+    tensorflow::gtl::ArraySlice<xla::int64> stride,
+    tensorflow::gtl::ArraySlice<xla::int64> padding,
+    const xla::PrecisionConfig::Precision conv_precision);
+
 // Same as above, then broadcasts the bias and adds it to the result.
 xla::XlaOp BuildConvolutionBias(
     const torch::jit::Node* node, const xla::XlaOp& input,
     const xla::XlaOp& kernel, const xla::XlaOp& bias,
     const xla::PrecisionConfig::Precision conv_precision);
 
+// Same as above, with stride and padding provided as parameters.
+xla::XlaOp BuildConvolutionBias(
+    const xla::XlaOp& input, const xla::XlaOp& kernel, const xla::XlaOp& bias,
+    tensorflow::gtl::ArraySlice<xla::int64> stride,
+    tensorflow::gtl::ArraySlice<xla::int64> padding,
+    const xla::PrecisionConfig::Precision conv_precision);
+
 struct Conv2DGrads {
   xla::XlaOp grad_input;
   xla::XlaOp grad_weight;
diff --git a/torch_xla/csrc/init_python_bindings.cpp b/torch_xla/csrc/init_python_bindings.cpp
@@ -226,6 +226,17 @@ void InitXlaTensorBindings(py::module m) {
         return s.str();
       });
   m.def("relu", [](std::shared_ptr<XLATensor> self) { return self->relu(); });
+  m.def(
+      "conv2d",
+      [](std::shared_ptr<XLATensor> self, std::shared_ptr<XLATensor> weight,
+         std::shared_ptr<XLATensor> bias, int stride, int padding,
+         bool use_full_conv_precision) {
+        return self->conv2d(weight, bias, stride, padding,
+                            use_full_conv_precision);
+      },
+      py::arg("input"), py::arg("weight"), py::arg("bias") = nullptr,
+      py::arg("stride") = 1, py::arg("padding") = 0,
+      py::arg("use_full_conv_precision") = false);
 }
 
 }  // namespace
diff --git a/torch_xla/csrc/ops/conv2d.cpp b/torch_xla/csrc/ops/conv2d.cpp
@@ -0,0 +1,86 @@
+#include "ops/conv2d.h"
+#include "convolution.h"
+#include "lowering_context.h"
+#include "ops/infer_output_shape.h"
+#include "tensorflow/compiler/xla/xla_client/debug_macros.h"
+
+namespace torch_xla {
+namespace ir {
+namespace ops {
+
+namespace {
+
+// The bias doesn't matter for shape inference.
+xla::Shape NodeOutputShape(const NodeOperand& input, const NodeOperand& weight,
+                           int stride, int padding) {
+  std::vector<xla::int64> stride_2d(2, stride);
+  std::vector<xla::int64> padding_2d(2, padding);
+  auto lower_for_shape_fn =
+      [stride_2d,
+       padding_2d](tensorflow::gtl::ArraySlice<const xla::XlaOp> operands)
+      -> xla::XlaOp {
+    XLA_CHECK(operands.size() == 2 || operands.size() == 3)
+        << "Unexpected number of operands: " << operands.size();
+    // The precision doesn't matter for shape inference.
+    return BuildConvolution(operands[0], operands[1], absl::MakeSpan(stride_2d),
+                            absl::MakeSpan(padding_2d),
+                            xla::PrecisionConfig::DEFAULT);
+  };
+  return InferOutputShape({input.node->shape(), weight.node->shape()},
+                          lower_for_shape_fn);
+}
+
+xla::PrecisionConfig::Precision MakePrecisionConfig(
+    bool use_full_conv_precision) {
+  return use_full_conv_precision ? xla::PrecisionConfig::HIGHEST
+                                 : xla::PrecisionConfig::DEFAULT;
+}
+
+}  // namespace
+
+Conv2d::Conv2d(const NodeOperand& input, const NodeOperand& weight,
+               const NodeOperand& bias, int stride, int padding,
+               bool use_full_conv_precision)
+    : Node(ir::OpKind(at::aten::convolution), {input, weight, bias},
+           NodeOutputShape(input, weight, stride, padding)),
+      stride_(stride),
+      padding_(padding),
+      precision_(MakePrecisionConfig(use_full_conv_precision)) {}
+
+Conv2d::Conv2d(const NodeOperand& input, const NodeOperand& weight, int stride,
+               int padding, bool use_full_conv_precision)
+    : Node(ir::OpKind(at::aten::convolution), {input, weight},
+           NodeOutputShape(input, weight, stride, padding)),
+      stride_(stride),
+      padding_(padding),
+      precision_(MakePrecisionConfig(use_full_conv_precision)) {}
+
+XlaOpVector Conv2d::Lower(LoweringContext* loctx) const {
+  std::vector<xla::int64> stride_2d(2, stride_);
+  std::vector<xla::int64> padding_2d(2, padding_);
+  xla::XlaOp input = loctx->GetOutputOp(operand(0));
+  xla::XlaOp kernel = loctx->GetOutputOp(operand(1));
+  xla::XlaOp output;
+  if (operands().size() == 3) {
+    xla::XlaOp bias = loctx->GetOutputOp(operand(2));
+    output =
+        BuildConvolutionBias(input, kernel, bias, absl::MakeSpan(stride_2d),
+                             absl::MakeSpan(padding_2d), precision_);
+  } else {
+    XLA_CHECK_EQ(operands().size(), 2);
+    output = BuildConvolution(input, kernel, absl::MakeSpan(stride_2d),
+                              absl::MakeSpan(padding_2d), precision_);
+  }
+  return ReturnOp(output, loctx);
+}
+
+std::string Conv2d::ToString() const {
+  std::stringstream ss;
+  ss << Node::ToString() << ", stride=" << stride_ << ", padding=" << padding_
+     << ", precision=" << xla::PrecisionConfig::Precision_Name(precision_);
+  return ss.str();
+}
+
+}  // namespace ops
+}  // namespace ir
+}  // namespace torch_xla
diff --git a/torch_xla/csrc/ops/conv2d.h b/torch_xla/csrc/ops/conv2d.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "ir.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace torch_xla {
+namespace ir {
+namespace ops {
+
+// IR node for 2D convolutions with or without bias.
+class Conv2d : public Node {
+ public:
+  Conv2d(const NodeOperand& input, const NodeOperand& weight,
+         const NodeOperand& bias, int stride, int padding,
+         bool use_full_conv_precision);
+
+  Conv2d(const NodeOperand& input, const NodeOperand& weight, int stride,
+         int padding, bool use_full_conv_precision);
+
+  XlaOpVector Lower(LoweringContext* loctx) const override;
+
+  std::string ToString() const override;
+
+ private:
+  // The parameters of the convolution. Only support the same stride and padding
+  // in both dimension for now.
+  int stride_;
+  int padding_;
+  // The numeric precision to use on TPU.
+  xla::PrecisionConfig::Precision precision_;
+};
+
+}  // namespace ops
+}  // namespace ir
+}  // namespace torch_xla
diff --git a/torch_xla/csrc/tensor.cpp b/torch_xla/csrc/tensor.cpp
@@ -13,6 +13,7 @@
 #include "helpers.h"
 #include "lowering_context.h"
 #include "ops/arithmetic_ir_ops.h"
+#include "ops/conv2d.h"
 #include "ops/cross_replica_sum.h"
 #include "ops/device_data.h"
 #include "ops/generic.h"
@@ -621,6 +622,24 @@ std::shared_ptr<XLATensor> XLATensor::relu() {
                 GetDevice());
 }
 
+std::shared_ptr<XLATensor> XLATensor::conv2d(
+    const std::shared_ptr<XLATensor>& weight,
+    const std::shared_ptr<XLATensor>& bias, int stride, int padding,
+    bool use_full_conv_precision) {
+  std::shared_ptr<ir::ops::Conv2d> ir_node;
+  if (bias) {
+    ir_node = std::make_shared<ir::ops::Conv2d>(
+        ir::NodeOperand(GetIrNode()), ir::NodeOperand(weight->GetIrNode()),
+        ir::NodeOperand(bias->GetIrNode()), stride, padding,
+        use_full_conv_precision);
+  } else {
+    ir_node = std::make_shared<ir::ops::Conv2d>(
+        ir::NodeOperand(GetIrNode()), ir::NodeOperand(weight->GetIrNode()),
+        stride, padding, use_full_conv_precision);
+  }
+  return Create(ir_node, GetDevice());
+}
+
 std::shared_ptr<XLATensor> XLATensor::cross_replica_sum(
     const std::vector<std::vector<xla::int64>>& groups) {
   ir::NodePtr crs =
diff --git a/torch_xla/csrc/tensor.h b/torch_xla/csrc/tensor.h
@@ -150,6 +150,11 @@ class XLATensor {
   // Additional operations which are part of the PyTorch Tensor functionality.
   std::shared_ptr<XLATensor> relu();
 
+  std::shared_ptr<XLATensor> conv2d(const std::shared_ptr<XLATensor>& weight,
+                                    const std::shared_ptr<XLATensor>& bias,
+                                    int stride, int padding,
+                                    bool use_full_conv_precision);
+
   std::shared_ptr<XLATensor> cross_replica_sum(
       const std::vector<std::vector<xla::int64>>& groups);