Skip to content

Commit

Permalink
[Vulkan] Add convert_qconv2d_context op (#97714)
Browse files Browse the repository at this point in the history
Summary:
This diffs adds a convert_qconv2d_context op, which converts a cpu quantized Conv2dPackedParamsBase object (used by quantized::conv2d) into a vulkan Conv2dPackedContext object.
This op is used in a later diff (D44189363), to do a graph rewrite of quantized conv2d and conv2d_relu ops

Test Plan:
On Mac
```
cd ~/fbsource
buck1 run -c pt.vulkan_full_precision=1 //xplat/caffe2:pt_vulkan_quantized_api_test_binAppleMac\#macosx-arm64
```

On Android
```
cd ~/fbsource
buck1 build -c ndk.custom_libcxx=false -c pt.enable_qpl=0 -c pt.vulkan_full_precision=1 //xplat/caffe2:pt_vulkan_quantized_api_test_binAndroid\#android-arm64 --show-output
adb push buck-out/gen/xplat/caffe2/pt_vulkan_quantized_api_test_binAndroid\#android-arm64 /data/local/tmp/vulkan_quantized_api_test
adb shell "/data/local/tmp/vulkan_quantized_api_test"
```

Reviewed By: SS-JIA

Differential Revision: D41595032

Pull Request resolved: #97714
Approved by: https://github.com/SS-JIA
  • Loading branch information
manuelcandales authored and pytorchmergebot committed Mar 28, 2023
1 parent 662a8cf commit c92dfe2
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 7 deletions.
46 changes: 39 additions & 7 deletions aten/src/ATen/native/vulkan/ops/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#else
#include <ATen/ops/pad.h>
#include <ATen/ops/permute.h>
#include <ATen/ops/quantize_per_tensor.h>
#include <ATen/ops/zeros.h>
#endif

Expand Down Expand Up @@ -556,7 +557,7 @@ vTensor pack_biases(
quantized ? api::StorageType::TEXTURE_3D : api::StorageType::TEXTURE_2D,
};

if (quantized) {
if (quantized && bias->scalar_type() != c10::kFloat) {
v_bias.set_is_quantized();
v_bias.set_scale(bias_rearranged.q_scale());
v_bias.set_zero_point(bias_rearranged.q_zero_point());
Expand Down Expand Up @@ -1000,6 +1001,28 @@ c10::intrusive_ptr<Conv2dPackedContext> create_qconv2d_context(
output_max));
}

c10::intrusive_ptr<Conv2dPackedContext> convert_qconv2d_context(
const c10::intrusive_ptr<ConvPackedParamsBase<2>>& packed_params,
const c10::optional<Scalar>& output_min,
const c10::optional<Scalar>& output_max) {
std::tuple<Tensor, c10::optional<Tensor>> wb = packed_params->unpack();
Tensor weight = std::get<0>(wb);
c10::optional<Tensor> bias = std::get<1>(wb);

return c10::make_intrusive<Conv2dPackedContext>(Conv2dPackedContext(
weight,
bias,
packed_params->stride().vec(),
packed_params->padding().vec(),
packed_params->dilation().vec(),
/* transposed = */ false,
/* quantized = */ true,
/* output_padding_arg = */ {0},
packed_params->groups(),
output_min,
output_max));
}

Tensor run_conv2d_context_impl(
const Tensor& input_arg,
const c10::intrusive_ptr<Conv2dPackedContext>& conv_context,
Expand All @@ -1012,11 +1035,22 @@ Tensor run_conv2d_context_impl(
const vTensor& v_input = convert(input_arg);

// Extract everything from the PackedContext
const vTensor& v_weight = convert(
conv_context->get_val(Conv2dPackedContext::Packed::Weight).toTensor());
const Tensor weight =
conv_context->get_val(Conv2dPackedContext::Packed::Weight).toTensor();
const vTensor& v_weight = convert(weight);

const auto quantized =
conv_context->get_val(Conv2dPackedContext::Packed::isQuantized).toBool();

Tensor bias =
conv_context->get_val(Conv2dPackedContext::Packed::Bias).toTensor();
if (quantized && bias.scalar_type() == c10::kFloat) {
bias = at::quantize_per_tensor(
bias, v_weight.get_scale() * v_input.get_scale(), 0, c10::kQInt32);
conv_context->set_val(Conv2dPackedContext::Packed::Bias, bias);
}

const vTensor& v_bias = convert(
conv_context->get_val(Conv2dPackedContext::Packed::Bias).toTensor());
const vTensor& v_bias = convert(bias);

const auto overlay_region =
conv_context->get_val(Conv2dPackedContext::Packed::OverlayRegion)
Expand All @@ -1035,8 +1069,6 @@ Tensor run_conv2d_context_impl(

const auto transposed =
conv_context->get_val(Conv2dPackedContext::Packed::isTransposed).toBool();
const auto quantized =
conv_context->get_val(Conv2dPackedContext::Packed::isQuantized).toBool();

const float output_min = safe_downcast<float>(
conv_context->get_val(Conv2dPackedContext::Packed::OutputMin).toDouble());
Expand Down
6 changes: 6 additions & 0 deletions aten/src/ATen/native/vulkan/ops/Convolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#ifdef USE_VULKAN_API

#include <ATen/native/quantized/PackedParams.h>
#include <ATen/native/vulkan/ops/Common.h>
#include <ATen/native/vulkan/ops/VulkanPackedContext.h>

Expand Down Expand Up @@ -164,6 +165,11 @@ c10::intrusive_ptr<Conv2dPackedContext> create_qconv2d_context(
const c10::optional<Scalar>& output_min = c10::nullopt,
const c10::optional<Scalar>& output_max = c10::nullopt);

c10::intrusive_ptr<Conv2dPackedContext> convert_qconv2d_context(
const c10::intrusive_ptr<ConvPackedParamsBase<2>>& packed_params,
const c10::optional<Scalar>& output_min,
const c10::optional<Scalar>& output_max);

Tensor run_qconv2d_context(
const Tensor& input_arg,
double scale,
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/native/vulkan/ops/VulkanPackedContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class VulkanPackedContext {
return packed_.get(i);
}

inline void set_val(int64_t i, c10::IValue val) const {
return packed_.set(i, val);
}

virtual const c10::impl::GenericList unpack() const = 0;

virtual ~VulkanPackedContext() = default;
Expand Down
121 changes: 121 additions & 0 deletions aten/src/ATen/test/vulkan_quantized_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <ATen/native/quantized/cpu/QuantUtils.h>
#include <ATen/native/vulkan/api/api.h>
#include <ATen/native/vulkan/ops/Common.h>
#include <ATen/native/vulkan/ops/Convolution.h>
#include <ATen/native/vulkan/ops/Copy.h>
#include <ATen/native/vulkan/ops/Factory.h>
#include <ATen/native/vulkan/ops/QuantizedFunctions.h>
Expand Down Expand Up @@ -2917,6 +2918,126 @@ TEST_F(VulkanAPITest, conv2d_pw_quantized_prepack_random_params_int8_int32) {
/* groups */ 1);
}

void test_convert_qconv2d_context(
const bool random_quantization_params,
const at::IntArrayRef input_shape,
const at::IntArrayRef weight_shape,
const at::IntArrayRef bias_shape,
double in_scale = 0.13,
int in_zero_point = 11,
double w_scale = 0.29,
int w_zero_point = 19,
double out_scale = 0.15,
int out_zero_point = 10) {
c10::InferenceMode mode;

std::vector<int64_t> stride{2, 2};
std::vector<int64_t> padding{1, 1};
std::vector<int64_t> dilation{1, 1};
int64_t groups = 1;

if (random_quantization_params) {
// compute appropiate scale and zero point for input, weight and bias
in_scale = produce_random_scale(0.001, 1.2);
w_scale = produce_random_scale(0.001, 1.2);
out_scale = produce_random_scale(0.001, 1.2);
in_zero_point = produce_random_zero_point(c10::ScalarType::QUInt8);
w_zero_point = produce_random_zero_point(c10::ScalarType::QInt8);
out_zero_point = produce_random_zero_point(c10::ScalarType::QUInt8);
}

const float a = 1.26;
const float b = 5.97;
const float c = 0.59;

at::Tensor input_cpu = produce_random_tensor(input_shape, a, b, c);

at::Tensor input_vk = input_cpu.vulkan();
at::Tensor input_cpu_q = at::quantize_per_tensor(
input_cpu, in_scale, in_zero_point, c10::ScalarType::QUInt8);
at::Tensor input_vk_q = at::quantize_per_tensor(
input_vk, in_scale, in_zero_point, c10::ScalarType::QUInt8);

at::Tensor weight_cpu = produce_random_tensor(weight_shape, a, b, c);
at::Tensor weight_cpu_q = at::quantize_per_tensor(
weight_cpu, w_scale, w_zero_point, c10::ScalarType::QInt8);

at::Tensor bias_cpu = produce_random_tensor(bias_shape, a, b, c);

at::globalContext().setReleaseWeightsWhenPrepacking(false);

const auto prepack_cpu = callOpByName(
"quantized::conv2d_prepack",
"",
weight_cpu_q,
bias_cpu,
stride,
padding,
dilation,
groups)[0];

at::Tensor output_cpu_q = callOpByName(
"quantized::conv2d",
"",
input_cpu_q,
prepack_cpu,
stride,
padding,
dilation,
groups,
out_scale,
out_zero_point)[0]
.toTensor();

at::Tensor output_cpu_deq = at::dequantize(output_cpu_q);

// convert quantized context
const auto prepack_vulkan = at::native::vulkan::ops::convert_qconv2d_context(
prepack_cpu.toCustomClass<ConvPackedParamsBase<2>>(),
c10::nullopt,
c10::nullopt);

// run vulkan quantized conv2d
const auto vulkan_output = callOpByName(
"vulkan_prepack::run_qconv2d_context",
"",
input_vk_q,
out_scale,
out_zero_point,
prepack_vulkan);

at::Tensor output_vk_q = vulkan_output[0].toTensor();
at::Tensor output_vk_deq = at::dequantize(output_vk_q);
at::Tensor output_vk_deq_cpu = output_vk_deq.cpu();

// check
const auto check = almostEqual(
output_cpu_deq, output_vk_deq_cpu, safe_downcast<float>(out_scale));
if (!check) {
const auto vk_q_error =
at::abs(output_vk_deq_cpu - output_cpu_deq).max().item<double>();
std::cout << "Failed with shapes: input " << input_shape << " weight "
<< weight_shape << " bias " << bias_shape
<< " and params: in_scale " << in_scale << " weight_scale "
<< w_scale << " out_scale " << out_scale << " in_zero_point "
<< in_zero_point << " w_zero_point " << w_zero_point
<< " out_zero_point " << out_zero_point << std::endl;
std::cout << "error: " << vk_q_error << std::endl;
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, convert_qconv2d_context) {
test_convert_qconv2d_context(false, {1, 3, 8, 8}, {1, 3, 3, 3}, {1});
test_convert_qconv2d_context(false, {1, 4, 224, 128}, {16, 4, 3, 3}, {16});

for (int i = 0; i < 10; i += 1) {
test_convert_qconv2d_context(true, {1, 3, 8, 8}, {1, 3, 3, 3}, {1});
test_convert_qconv2d_context(true, {1, 4, 224, 128}, {16, 4, 3, 3}, {16});
}
}

} // namespace

#endif /* USE_VULKAN_API */

0 comments on commit c92dfe2

Please sign in to comment.