Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[iOS GPU] Support multi-dimension tensors via MPSImage #54106

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 29 additions & 0 deletions aten/src/ATen/native/metal/mpscnn/MPSImageUtils.h
Expand Up @@ -47,6 +47,35 @@ static inline MPSImage* imageFromTensor(const Tensor& tensor) {
return implStorage.texture()->image();
}

/*
MPSImage carries a IntList shape which is identical to the shape of the CPU
tensor it’s converted from.

1) 1D tensors (W,) are always stored as MPSImage(N=1, C=1, H=1, W=W).
2) 2D tensors (H, W) are always stored as MPSImage(N=1, C=1, H=H, W=W).
3) 3D tensors (C, H, W) are always stored as MPSImage(N=1, C=C, H=H, W=W).
4) 4D tensors (N, C, H, W) are always stored as MPSImage(N=N, C=C, H=H, W=W).
5) 5D tensors (T, N, C, H, W) are always stored as MPSImage(N=N*T, C=C, H=H, W=W).
6) ...
*/
static inline std::vector<int64_t> computeTextureSize(IntArrayRef sizes) {
std::vector<int64_t> textureSize(4, 1);
int64_t index = 3;
int64_t batch = 1;
for (int i = sizes.size() - 1; i >= 0; i--) {
if (index != 0) {
textureSize[index] = sizes[i];
index--;
continue;
}
// For higher dimensional tensors,
// multiply rest of dims into textureSize[0]
batch *= sizes[i];
}
textureSize[0] = batch;
return textureSize;
}

} // namespace metal
} // namespace native
} // namespace at
7 changes: 0 additions & 7 deletions aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.h
Expand Up @@ -9,12 +9,6 @@ namespace at {
namespace native {
namespace metal {

enum class TextureType {
TextureNone,
TextureType2D,
TextureType2DArray,
};

class API_AVAILABLE(ios(10.0), macos(10.13)) MPSImageWrapper {
public:
MPSImageWrapper(IntArrayRef sizes);
Expand All @@ -30,7 +24,6 @@ class API_AVAILABLE(ios(10.0), macos(10.13)) MPSImageWrapper {
void copyFromTexture(MPSImage* image);
void setCommandBuffer(MetalCommandBuffer* buffer);
MetalCommandBuffer* commandBuffer() const;
TextureType textureType() const;
IntArrayRef textureSizes() const;
MPSImage* image() const;
void recycleImage();
Expand Down
32 changes: 3 additions & 29 deletions aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.mm
Expand Up @@ -10,25 +10,12 @@
namespace native {
namespace metal {

std::vector<int64_t> textureSizeFromSizes(IntArrayRef sizes, TextureType type) {
if (sizes.size() == 2) {
if (type == TextureType::TextureType2DArray) {
return {sizes[0], sizes[1], 1, 1};
} else if (type == TextureType::TextureType2D) {
return {1, 1, sizes[0], sizes[1]};
} else {
return {};
}
}
return sizes.vec();
}
MPSImageWrapper::MPSImageWrapper(IntArrayRef sizes) {
_textureSizes = textureSizeFromSizes(sizes, TextureType::TextureType2D);
_textureSizes = computeTextureSize(sizes);
}

void MPSImageWrapper::copyDataFromHost(const float* inputData) {
TORCH_CHECK(inputData);
TORCH_CHECK(_textureSizes.size() == 4);
_commandBuffer = [MetalCommandBuffer currentBuffer];
_image = createTemporaryImage(_commandBuffer, _textureSizes, inputData);
}
Expand Down Expand Up @@ -61,29 +48,16 @@
return _textureSizes;
}

TextureType MPSImageWrapper::textureType() const {
if (!_image) {
return TextureType::TextureNone;
}
MTLTextureType textureType = _image.textureType;
if (textureType == MTLTextureType2D) {
return TextureType::TextureType2D;
} else if (textureType == MTLTextureType2DArray) {
return TextureType::TextureType2DArray;
}
return TextureType::TextureNone;
}

void MPSImageWrapper::allocateTextureStorage(IntArrayRef sizes) {
_textureSizes = sizes.vec();
_textureSizes = computeTextureSize(sizes);
_image = createStaticImage(_textureSizes);
}

void MPSImageWrapper::allocateTemporaryTextureStorage(
IntArrayRef sizes,
MetalCommandBuffer* commandBuffer) {
TORCH_CHECK(commandBuffer)
_textureSizes = sizes.vec();
_textureSizes = computeTextureSize(sizes);
_commandBuffer = commandBuffer;
_image = createTemporaryImage(commandBuffer, _textureSizes);
}
Expand Down
106 changes: 53 additions & 53 deletions aten/src/ATen/native/metal/mpscnn/tests/MPSCNNTests.mm
Expand Up @@ -76,23 +76,24 @@ bool TEST(const std::vector<int64_t>& sizes, std::string name, Func block) {
return b;
}

void PRINT_TENSOR(std::string name, const at::Tensor& tensor){
std::string str = name + ": ";
auto print = [&](const at::Tensor& t){
for(int i=0; i<t.numel(); ++i){
NSString* sf = [NSString stringWithFormat:@"%.2f",t.data_ptr<float>()[i]];
str += sf.UTF8String;
str += ", ";
}
std::cout<<str<<std::endl;
};
if(tensor.is_metal()){
MPSImage* image = at::native::metal::imageFromTensor(tensor);
auto t = at::native::metal::staticImageToTensor(image);
print(t);
} else {
print(tensor);
void PRINT_TENSOR(std::string name, const at::Tensor& tensor) {
std::string str = name + ": ";
auto print = [&](const at::Tensor& t) {
for (int i = 0; i < t.numel(); ++i) {
NSString* sf =
[NSString stringWithFormat:@"%.2f", t.data_ptr<float>()[i]];
str += sf.UTF8String;
str += ", ";
}
std::cout << str << std::endl;
};
if (tensor.is_metal()) {
MPSImage* image = at::native::metal::imageFromTensor(tensor);
auto t = at::native::metal::staticImageToTensor(image);
print(t);
} else {
print(tensor);
}
}

}
Expand Down Expand Up @@ -357,7 +358,6 @@ bool test_add_broadcast2() {
});
}


bool test_sub() {
__block std::vector<int64_t> x{5, 3, 167, 222};
return TEST(x, __PRETTY_FUNCTION__, ^bool {
Expand All @@ -372,8 +372,8 @@ bool test_sub() {
}

bool test_sub_broadcast() {
__block std::vector<int64_t> x1{1, 3, 1, 1};
__block std::vector<int64_t> x2{1, 3, 192, 192};
__block std::vector<int64_t> x1{3, 1, 1};
__block std::vector<int64_t> x2{3, 192, 192};
return TEST(x1, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
Expand All @@ -386,8 +386,8 @@ bool test_sub_broadcast() {
}

bool test_sub_broadcast2() {
__block std::vector<int64_t> x1{3, 3, 192, 192};
__block std::vector<int64_t> x2{3, 3, 1, 192};
__block std::vector<int64_t> x1{2, 3, 3, 192, 192};
__block std::vector<int64_t> x2{2, 3, 3, 1, 192};
return TEST(x1, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
Expand Down Expand Up @@ -441,44 +441,44 @@ bool test_mul_broadcast2() {
}

bool test_div() {
__block std::vector<int64_t> x{1, 3, 24, 24};
return TEST(x, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto Y1 = at::div(X1, X2);
auto MX1 = X1.metal();
auto MX2 = X2.metal();
auto Y2 = at::div(MX1, MX2).cpu();
return almostEqual(Y1, Y2);
});
__block std::vector<int64_t> x{1, 3, 24, 24};
return TEST(x, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto Y1 = at::div(X1, X2);
auto MX1 = X1.metal();
auto MX2 = X2.metal();
auto Y2 = at::div(MX1, MX2).cpu();
return almostEqual(Y1, Y2);
});
}

bool test_div_broadcast() {
__block std::vector<int64_t> x1{4, 3, 24, 24};
__block std::vector<int64_t> x2{4, 3, 1, 1};
return TEST(x1, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto Y1 = at::div(X1, X2);
auto MX1 = X1.metal();
auto MX2 = X2.metal();
auto Y2 = at::div(MX1, MX2).cpu();
return almostEqual(Y1, Y2);
});
__block std::vector<int64_t> x1{4, 3, 24, 24};
__block std::vector<int64_t> x2{4, 3, 1, 1};
return TEST(x1, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto Y1 = at::div(X1, X2);
auto MX1 = X1.metal();
auto MX2 = X2.metal();
auto Y2 = at::div(MX1, MX2).cpu();
return almostEqual(Y1, Y2);
});
}

bool test_div_broadcast2() {
__block std::vector<int64_t> x2{1, 3, 24, 1};
__block std::vector<int64_t> x1{1, 3, 24, 24};
return TEST(x1, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto Y1 = at::div(X1, X2);
auto MX1 = X1.metal();
auto MX2 = X2.metal();
auto Y2 = at::div(MX1, MX2).cpu();
return almostEqual(Y1, Y2);
});
__block std::vector<int64_t> x2{1, 3, 24, 1};
__block std::vector<int64_t> x1{1, 3, 24, 24};
return TEST(x1, __PRETTY_FUNCTION__, ^bool {
auto X1 = at::rand(x1, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto X2 = at::rand(x2, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto Y1 = at::div(X1, X2);
auto MX1 = X1.metal();
auto MX2 = X2.metal();
auto Y2 = at::div(MX1, MX2).cpu();
return almostEqual(Y1, Y2);
});
}

bool test_t() {
Expand Down
11 changes: 7 additions & 4 deletions aten/src/ATen/native/metal/ops/MetalAddmm.mm
Expand Up @@ -31,7 +31,9 @@ Tensor addmm(
auto weight_ = weight.t()
.view({weight.sizes()[1], weight.sizes()[0], 1, 1})
.contiguous();
MPSImage* X = imageFromTensor(input);
// Permute the input texture to become {N, C, 1, 1}
auto input_ = input.view({input.sizes()[0], input.sizes()[1], 1, 1});
MPSImage* X = imageFromTensor(input_);
const int64_t N = X.numberOfImages;
const int64_t oC = weight_.sizes()[0];
const int64_t kH = X.height;
Expand Down Expand Up @@ -70,15 +72,16 @@ Tensor addmm(
[fc setOffset:{.x = static_cast<NSInteger>(X.width / 2),
.y = static_cast<NSInteger>(X.height / 2),
.z = 0}];
std::vector<int64_t> outputSize = {N, oC, 1, 1};
std::vector<int64_t> textureSize = {N, oC, 1, 1};
MetalTensorImplStorage mt{{N, oC}};
MetalCommandBuffer* commandBuffer = getCommandBufferFromTensor(input);
mt.texture()->allocateTemporaryTextureStorage(outputSize, commandBuffer);
mt.texture()->allocateTemporaryTextureStorage(textureSize, commandBuffer);
MPSImage* Y = mt.texture()->image();
[fc encodeToCommandBuffer:commandBuffer.buffer
sourceImage:X
destinationImage:Y];
auto output = makeTensor(std::move(mt), input.options());
// The output texture becomes {N, oC, 1, 1}. Make it {1, 1, N, oC}
auto output = makeTensor(std::move(mt), input.options()).view({N, oC});
return output;
}

Expand Down