pytorch · xta0 · Mar 16, 2021 · Mar 17, 2021 · Mar 17, 2021
diff --git a/aten/src/ATen/native/metal/mpscnn/MPSImageUtils.h b/aten/src/ATen/native/metal/mpscnn/MPSImageUtils.h
@@ -47,6 +47,35 @@ static inline MPSImage* imageFromTensor(const Tensor& tensor) {
   return implStorage.texture()->image();
 }
 
+/*
+ MPSImage carries a IntList shape which is identical to the shape of the CPU
+ tensor it’s converted from.
+
+ 1) 1D tensors (W,) are always stored as MPSImage(N=1, C=1, H=1, W=W).
+ 2) 2D tensors (H, W) are always stored as MPSImage(N=1, C=1, H=H, W=W).
+ 3) 3D tensors (C, H, W) are always stored as MPSImage(N=1, C=C, H=H, W=W).
+ 4) 4D tensors (N, C, H, W) are always stored as MPSImage(N=N, C=C, H=H, W=W).
+ 5) 5D tensors (T, N, C, H, W) are always stored as MPSImage(N=N*T, C=C, H=H, W=W).
+ 6) ...
+ */
+static inline std::vector<int64_t> computeTextureSize(IntArrayRef sizes) {
+  std::vector<int64_t> textureSize(4, 1);
+  int64_t index = 3;
+  int64_t batch = 1;
+  for (int i = sizes.size() - 1; i >= 0; i--) {
+    if (index != 0) {
+      textureSize[index] = sizes[i];
+      index--;
+      continue;
+    }
+    // For higher dimensional tensors,
+    // multiply rest of dims into textureSize[0]
+    batch *= sizes[i];
+  }
+  textureSize[0] = batch;
+  return textureSize;
+}
+
 } // namespace metal
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.h b/aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.h
@@ -9,12 +9,6 @@ namespace at {
 namespace native {
 namespace metal {
 
-enum class TextureType {
-  TextureNone,
-  TextureType2D,
-  TextureType2DArray,
-};
-
 class API_AVAILABLE(ios(10.0), macos(10.13)) MPSImageWrapper {
  public:
   MPSImageWrapper(IntArrayRef sizes);
@@ -30,7 +24,6 @@ class API_AVAILABLE(ios(10.0), macos(10.13)) MPSImageWrapper {
   void copyFromTexture(MPSImage* image);
   void setCommandBuffer(MetalCommandBuffer* buffer);
   MetalCommandBuffer* commandBuffer() const;
-  TextureType textureType() const;
   IntArrayRef textureSizes() const;
   MPSImage* image() const;
   void recycleImage();

diff --git a/aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.mm b/aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.mm
@@ -10,25 +10,12 @@
 namespace native {
 namespace metal {
 
-std::vector<int64_t> textureSizeFromSizes(IntArrayRef sizes, TextureType type) {
-  if (sizes.size() == 2) {
-    if (type == TextureType::TextureType2DArray) {
-      return {sizes[0], sizes[1], 1, 1};
-    } else if (type == TextureType::TextureType2D) {
-      return {1, 1, sizes[0], sizes[1]};
-    } else {
-      return {};
-    }
-  }
-  return sizes.vec();
-}
 MPSImageWrapper::MPSImageWrapper(IntArrayRef sizes) {
-  _textureSizes = textureSizeFromSizes(sizes, TextureType::TextureType2D);
+  _textureSizes = computeTextureSize(sizes);
 }
 
 void MPSImageWrapper::copyDataFromHost(const float* inputData) {
   TORCH_CHECK(inputData);
-  TORCH_CHECK(_textureSizes.size() == 4);
   _commandBuffer = [MetalCommandBuffer currentBuffer];
   _image = createTemporaryImage(_commandBuffer, _textureSizes, inputData);
 }
@@ -61,29 +48,16 @@
   return _textureSizes;
 }
 
-TextureType MPSImageWrapper::textureType() const {
-  if (!_image) {
-    return TextureType::TextureNone;
-  }
-  MTLTextureType textureType = _image.textureType;
-  if (textureType == MTLTextureType2D) {
-    return TextureType::TextureType2D;
-  } else if (textureType == MTLTextureType2DArray) {
-    return TextureType::TextureType2DArray;
-  }
-  return TextureType::TextureNone;
-}
-
 void MPSImageWrapper::allocateTextureStorage(IntArrayRef sizes) {
-  _textureSizes = sizes.vec();
+  _textureSizes = computeTextureSize(sizes);
   _image = createStaticImage(_textureSizes);
 }
 
 void MPSImageWrapper::allocateTemporaryTextureStorage(
     IntArrayRef sizes,
     MetalCommandBuffer* commandBuffer) {
   TORCH_CHECK(commandBuffer)
-  _textureSizes = sizes.vec();
+  _textureSizes = computeTextureSize(sizes);
   _commandBuffer = commandBuffer;
   _image = createTemporaryImage(commandBuffer, _textureSizes);
 }

diff --git a/aten/src/ATen/native/metal/mpscnn/tests/MPSCNNTests.mm b/aten/src/ATen/native/metal/mpscnn/tests/MPSCNNTests.mm
@@ -76,23 +76,24 @@ bool TEST(const std::vector<int64_t>& sizes, std::string name, Func block) {
   return b;
 }
 
-void PRINT_TENSOR(std::string name, const at::Tensor& tensor){
-    std::string str = name + ": ";
-    auto print = [&](const at::Tensor& t){
-        for(int i=0; i<t.numel(); ++i){
-            NSString* sf = [NSString stringWithFormat:@"%.2f",t.data_ptr<float>()[i]];
-            str += sf.UTF8String;
-            str += ", ";
-        }
-        std::cout<<str<<std::endl;
-    };
-    if(tensor.is_metal()){
-        MPSImage* image = at::native::metal::imageFromTensor(tensor);
-        auto t = at::native::metal::staticImageToTensor(image);
-        print(t);
-    } else {
-        print(tensor);
+void PRINT_TENSOR(std::string name, const at::Tensor& tensor) {
+  std::string str = name + ": ";
+  auto print = [&](const at::Tensor& t) {
+    for (int i = 0; i < t.numel(); ++i) {
+      NSString* sf =
+          [NSString stringWithFormat:@"%.2f", t.data_ptr<float>()[i]];
+      str += sf.UTF8String;
+      str += ", ";
     }
+    std::cout << str << std::endl;
+  };
+  if (tensor.is_metal()) {
+    MPSImage* image = at::native::metal::imageFromTensor(tensor);
+    auto t = at::native::metal::staticImageToTensor(image);
+    print(t);
+  } else {
+    print(tensor);
+  }
 }
 
 }
@@ -357,7 +358,6 @@ bool test_add_broadcast2() {
   });
 }
 
-
 bool test_sub() {
   __block std::vector<int64_t> x{5, 3, 167, 222};
   return TEST(x, __PRETTY_FUNCTION__, ^bool {
@@ -372,8 +372,8 @@ bool test_sub() {
 }
 
 bool test_sub_broadcast() {
-  __block std::vector<int64_t> x1{1, 3, 1, 1};
-  __block std::vector<int64_t> x2{1, 3, 192, 192};
+  __block std::vector<int64_t> x1{3, 1, 1};
+  __block std::vector<int64_t> x2{3, 192, 192};
   return TEST(x1, __PRETTY_FUNCTION__, ^bool {
     auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
     auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
@@ -386,8 +386,8 @@ bool test_sub_broadcast() {
 }
 
 bool test_sub_broadcast2() {
-  __block std::vector<int64_t> x1{3, 3, 192, 192};
-  __block std::vector<int64_t> x2{3, 3, 1, 192};
+  __block std::vector<int64_t> x1{2, 3, 3, 192, 192};
+  __block std::vector<int64_t> x2{2, 3, 3, 1, 192};
   return TEST(x1, __PRETTY_FUNCTION__, ^bool {
     auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
     auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
@@ -441,44 +441,44 @@ bool test_mul_broadcast2() {
 }
 
 bool test_div() {
-    __block std::vector<int64_t> x{1, 3, 24, 24};
-    return TEST(x, __PRETTY_FUNCTION__, ^bool {
-      auto X1 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
-      auto X2 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
-      auto Y1 = at::div(X1, X2);
-      auto MX1 = X1.metal();
-      auto MX2 = X2.metal();
-      auto Y2 = at::div(MX1, MX2).cpu();
-      return almostEqual(Y1, Y2);
-    });
+  __block std::vector<int64_t> x{1, 3, 24, 24};
+  return TEST(x, __PRETTY_FUNCTION__, ^bool {
+    auto X1 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+    auto X2 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+    auto Y1 = at::div(X1, X2);
+    auto MX1 = X1.metal();
+    auto MX2 = X2.metal();
+    auto Y2 = at::div(MX1, MX2).cpu();
+    return almostEqual(Y1, Y2);
+  });
 }
 
 bool test_div_broadcast() {
-    __block std::vector<int64_t> x1{4, 3, 24, 24};
-    __block std::vector<int64_t> x2{4, 3, 1, 1};
-    return TEST(x1, __PRETTY_FUNCTION__, ^bool {
-      auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
-      auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
-      auto Y1 = at::div(X1, X2);
-      auto MX1 = X1.metal();
-      auto MX2 = X2.metal();
-      auto Y2 = at::div(MX1, MX2).cpu();
-      return almostEqual(Y1, Y2);
-    });
+  __block std::vector<int64_t> x1{4, 3, 24, 24};
+  __block std::vector<int64_t> x2{4, 3, 1, 1};
+  return TEST(x1, __PRETTY_FUNCTION__, ^bool {
+    auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+    auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+    auto Y1 = at::div(X1, X2);
+    auto MX1 = X1.metal();
+    auto MX2 = X2.metal();
+    auto Y2 = at::div(MX1, MX2).cpu();
+    return almostEqual(Y1, Y2);
+  });
 }
 
 bool test_div_broadcast2() {
-    __block std::vector<int64_t> x2{1, 3, 24, 1};
-    __block std::vector<int64_t> x1{1, 3, 24, 24};
-    return TEST(x1, __PRETTY_FUNCTION__, ^bool {
-      auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
-      auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
-      auto Y1 = at::div(X1, X2);
-      auto MX1 = X1.metal();
-      auto MX2 = X2.metal();
-      auto Y2 = at::div(MX1, MX2).cpu();
-      return almostEqual(Y1, Y2);
-    });
+  __block std::vector<int64_t> x2{1, 3, 24, 1};
+  __block std::vector<int64_t> x1{1, 3, 24, 24};
+  return TEST(x1, __PRETTY_FUNCTION__, ^bool {
+    auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+    auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+    auto Y1 = at::div(X1, X2);
+    auto MX1 = X1.metal();
+    auto MX2 = X2.metal();
+    auto Y2 = at::div(MX1, MX2).cpu();
+    return almostEqual(Y1, Y2);
+  });
 }
 
 bool test_t() {

diff --git a/aten/src/ATen/native/metal/ops/MetalAddmm.mm b/aten/src/ATen/native/metal/ops/MetalAddmm.mm
@@ -31,7 +31,9 @@ Tensor addmm(
   auto weight_ = weight.t()
                      .view({weight.sizes()[1], weight.sizes()[0], 1, 1})
                      .contiguous();
-  MPSImage* X = imageFromTensor(input);
+  // Permute the input texture to become {N, C, 1, 1}
+  auto input_ = input.view({input.sizes()[0], input.sizes()[1], 1, 1});
+  MPSImage* X = imageFromTensor(input_);
   const int64_t N = X.numberOfImages;
   const int64_t oC = weight_.sizes()[0];
   const int64_t kH = X.height;
@@ -70,15 +72,16 @@ Tensor addmm(
   [fc setOffset:{.x = static_cast<NSInteger>(X.width / 2),
                  .y = static_cast<NSInteger>(X.height / 2),
                  .z = 0}];
-  std::vector<int64_t> outputSize = {N, oC, 1, 1};
+  std::vector<int64_t> textureSize = {N, oC, 1, 1};
   MetalTensorImplStorage mt{{N, oC}};
   MetalCommandBuffer* commandBuffer = getCommandBufferFromTensor(input);
-  mt.texture()->allocateTemporaryTextureStorage(outputSize, commandBuffer);
+  mt.texture()->allocateTemporaryTextureStorage(textureSize, commandBuffer);
   MPSImage* Y = mt.texture()->image();
   [fc encodeToCommandBuffer:commandBuffer.buffer
                 sourceImage:X
            destinationImage:Y];
-  auto output = makeTensor(std::move(mt), input.options());
+  // The output texture becomes {N, oC, 1, 1}. Make it {1, 1, N, oC}
+  auto output = makeTensor(std::move(mt), input.options()).view({N, oC});
   return output;
 }