Skip to content

Commit

Permalink
[pytorch-vulkan] add aten::randn_like & aten::normal_
Browse files Browse the repository at this point in the history
Summary:
Implemented `aten::normal_` shader and used it to create `aten::randn_like`.

Ops defintions:
https://pytorch.org/docs/stable/generated/torch.randn_like.html
https://pytorch.org/docs/stable/generated/torch.Tensor.normal_.html

Test Plan:
```
[ttingchulin@53491.od /data/sandcastle/boxes/fbsource (randn)]$ LD_LIBRARY_PATH=third-party/swiftshader/lib/linux-x64/ buck run fbcode/mode/dev-nosan //xplat/caffe2:pt_vulkan_api_test_bin  -- --gtest_filter="*<test>*" eg.  -- --gtest_filter="*randn_like*"

[==========] Running 2 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 2 tests from VulkanAPITest
[ RUN      ] VulkanAPITest.randn_like
[       OK ] VulkanAPITest.randn_like (230 ms)
[ RUN      ] VulkanAPITest.randn_like_large
[       OK ] VulkanAPITest.randn_like_large (570 ms)
[----------] 2 tests from VulkanAPITest (801 ms total)

[----------] Global test environment tear-down
[==========] 2 tests from 1 test suite ran. (801 ms total)
[  PASSED  ] 2 tests.

[ttingchulin@53491.od /data/sandcastle/boxes/fbsource (randn)]$ LD_LIBRARY_PATH=third-party/swiftshader/lib/linux-x64/ buck run fbcode/mode/dev-nosan //xplat/caffe2:pt_vulkan_api_test_bin  -- --gtest_filter="*<test>*" eg.  -- --gtest_filter="*normal_*"
[==========] Running 3 tests from 1 test suite.
[----------] Global test environment set-up.
[----------] 3 tests from VulkanAPITest
[ RUN      ] VulkanAPITest.normal_
[       OK ] VulkanAPITest.normal_ (222 ms)
[ RUN      ] VulkanAPITest.normal_large
[       OK ] VulkanAPITest.normal_large (136 ms)
[ RUN      ] VulkanAPITest.normal_error
[       OK ] VulkanAPITest.normal_error (37 ms)
[----------] 3 tests from VulkanAPITest (396 ms total)

[----------] Global test environment tear-down
[==========] 3 tests f.
```

Reviewed By: yipjustin

Differential Revision: D48814024
  • Loading branch information
tina134 authored and facebook-github-bot committed Sep 11, 2023
1 parent 405f014 commit ba6202d
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 0 deletions.
31 changes: 31 additions & 0 deletions aten/src/ATen/native/vulkan/glsl/normal_.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#version 450 core
#define PRECISION $precision
#define FORMAT $format

#include "random.h"

layout(std430) buffer;

/* Qualifiers: layout - storage - precision - memory */

layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION restrict Block {
ivec3 size;
float mean;
float std;
} uBlock;

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

void main() {
ivec3 pos = ivec3(gl_GlobalInvocationID);

if (all(lessThan(pos, uBlock.size))) {
vec4 v = vec4(
get_gaussrand(ivec4(pos, -20), uBlock.mean, uBlock.std),
get_gaussrand(ivec4(pos, 40), uBlock.mean, uBlock.std),
get_gaussrand(ivec4(pos, -30), uBlock.mean, uBlock.std),
get_gaussrand(ivec4(pos, 15), uBlock.mean, uBlock.std));
imageStore(uOutput, pos, v);
}
}
32 changes: 32 additions & 0 deletions aten/src/ATen/native/vulkan/glsl/random.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
* Random utility functions
*/

// the epsilong defined for fp16 in PyTorch
#define PI 3.14159265358979323846264

uint pcg_hash(uint v) {
// From: https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
uint state = v * 747796405u + 2891336453u;
Expand All @@ -15,7 +18,36 @@ float rand2(ivec4 pos) {
return fract(s / 1234567.0);
}

float rand2_nonzero(ivec4 pos) {
float v = rand2(pos);
int offset = 0;
while (v == 0.0) {
offset++;
v = rand2(ivec4(pos.x + offset, pos.y, pos.z, pos.w));
}
return v;
}

float get_uniform(ivec4 pos, float from, float to) {
float v = rand2(pos);
return from + v * (to - from);
}

float get_gaussrand(ivec4 pos, float mean, float std) {
// Implementation of Box-Muller transform from the pseudo from Wikipedia,
// which converts two uniformly sampled random numbers into two numbers of
// Gaussian distribution. Since the shader file can only use one for a position,
// we flip a coin by the 3rd uniformly sampled number to decide which one to keep.
// https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
float u1 = rand2_nonzero(pos);
float u2 = rand2_nonzero(ivec4(pos.x+10, pos.y+20, pos.z+30, pos.w+40));
float u3 = rand2_nonzero(ivec4(pos.x-10, pos.y-20, pos.z-30, pos.w-40));

float mag = std * sqrt(-2.0 * log(u1));
float v;
if (u3 > 0.5)
v = mag * cos(2.0 * PI * u2) + mean;
else
v = mag * sin(2.0 * PI * u2) + mean;
return v;
}
63 changes: 63 additions & 0 deletions aten/src/ATen/native/vulkan/ops/Random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,74 @@ Tensor rand_like(
return input_arg.clone().detach().uniform_(0.0, 1.0);
}

Tensor& normal_(
Tensor& self,
const double mean,
const double std,
const c10::optional<at::Generator> /* not implemented */) {
TORCH_CHECK(
self.is_vulkan(),
"Vulkan: In-place operator is only supported on Vulkan tensors.");

TORCH_CHECK(
std >= 0,
"Vulkan: Standard deviation (std) can be negative.");

api::Context* const context = api::context();

vTensor& v_self = convert(self);

const struct Block final {
uvec3 extents;
float mean;
float std;
} block{v_self.extents(), static_cast<float>(mean), static_cast<float>(std)};

api::UniformParamsBuffer params(context, block);
api::PipelineBarrier pipeline_barrier{};

context->submit_compute_job(
// shader descriptor
// shader_descriptor,
VK_KERNEL(normal_),
// pipeline barrier
pipeline_barrier,
// global work group size
v_self.extents(),
// local work group size
adaptive_work_group_size(v_self.extents()),
// fence handle
VK_NULL_HANDLE,
// shader arguments
v_self.image(
pipeline_barrier,
api::PipelineStage::COMPUTE,
api::MemoryAccessType::WRITE),
// params buffer
params.buffer());

return self;
}

Tensor randn_like(
const at::Tensor& input_arg,
const c10::optional<c10::ScalarType> /* not implemented */,
const c10::optional<c10::Layout> /* not implemented */,
const c10::optional<c10::Device> /* not implemented */,
const c10::optional<bool> /* not implemented */,
const c10::optional<c10::MemoryFormat> /* not implemented */) {
// Returns a tensor with the same size as input that is filled with random
// numbers from a normal distribution with mean 0 and standard deviation 1.
return input_arg.clone().detach().normal_(0.0, 1.0);
}

#ifdef USE_VULKAN_API

TORCH_LIBRARY_IMPL(aten, Vulkan, m) {
m.impl(TORCH_SELECTIVE_NAME("aten::uniform_"), TORCH_FN(uniform_));
m.impl(TORCH_SELECTIVE_NAME("aten::rand_like"), TORCH_FN(rand_like));
m.impl(TORCH_SELECTIVE_NAME("aten::normal_"), TORCH_FN(normal_));
m.impl(TORCH_SELECTIVE_NAME("aten::randn_like"), TORCH_FN(randn_like));
}

#endif /* USE_VULKAN_API */
Expand Down
60 changes: 60 additions & 0 deletions aten/src/ATen/test/vulkan_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3969,6 +3969,66 @@ TEST_F(VulkanAPITest, rand_like) {
test_uniform(out_vulkan, a_min, a_max);
}

void test_normal(at::Tensor out_vulkan, const float mean, const float std) {
// Verify the distribution is normal. The difference between given mean vs generated mean should be within 5% of standard deviation, and the same for standard deviation itself.
ASSERT_TRUE(std::abs(at::mean(out_vulkan.cpu()).item<float>() - mean) < std::abs(std) * 0.05);
ASSERT_TRUE(std::abs(at::std(out_vulkan.cpu()).item<float>() - std) < std::abs(std) * 0.05);
}

TEST_F(VulkanAPITest, normal_) {
float a_mean = -10.0;
float a_std = 2.0;

auto a_vulkan =
at::zeros({3, 4, 5, 6}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
a_vulkan.normal_(a_mean, a_std);

test_normal(a_vulkan, a_mean, a_std);
}

TEST_F(VulkanAPITest, normal_large) {
float a_mean = 1.0;
float a_std = 0.001;

auto a_vulkan =
at::zeros({30, 40, 50, 60}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
a_vulkan.normal_(a_mean, a_std);

test_normal(a_vulkan, a_mean, a_std);
}

TEST_F(VulkanAPITest, normal_error) {
float a_mean = 1.0;
float a_std = -1;

auto a_vulkan =
at::zeros({30, 40, 50, 60}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
EXPECT_THROW(a_vulkan.normal_(a_mean, a_std), ::c10::Error);
}

TEST_F(VulkanAPITest, randn_like) {
float a_mean = 0.0;
float a_std = 1.0;

auto a_vulkan =
at::zeros({8, 7, 6, 5}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
const auto out_vulkan = at::randn_like(a_vulkan);
// verify that the input are still all zeros (not in-place)
ASSERT_TRUE(at::mean(a_vulkan.cpu()).item<float>() == 0.0);
test_normal(out_vulkan, a_mean, a_std);
}

TEST_F(VulkanAPITest, randn_like_large) {
float a_mean = 0.0;
float a_std = 1.0;

auto a_vulkan =
at::zeros({80, 70, 60, 50}, at::device(at::kCPU).dtype(at::kFloat)).vulkan();
const auto out_vulkan = at::randn_like(a_vulkan);

test_normal(out_vulkan, a_mean, a_std);
}

void test_t(const at::IntArrayRef input_shape) {
const auto in_cpu = at::rand(input_shape, at::device(at::kCPU).dtype(at::kFloat));
const auto out_cpu = at::t(in_cpu);
Expand Down

0 comments on commit ba6202d

Please sign in to comment.