Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Vulkan] Add convert_qconv2d_context op #97714

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 39 additions & 7 deletions aten/src/ATen/native/vulkan/ops/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#else
#include <ATen/ops/pad.h>
#include <ATen/ops/permute.h>
#include <ATen/ops/quantize_per_tensor.h>
#include <ATen/ops/zeros.h>
#endif

Expand Down Expand Up @@ -556,7 +557,7 @@ vTensor pack_biases(
quantized ? api::StorageType::TEXTURE_3D : api::StorageType::TEXTURE_2D,
};

if (quantized) {
if (quantized && bias->scalar_type() != c10::kFloat) {
v_bias.set_is_quantized();
v_bias.set_scale(bias_rearranged.q_scale());
v_bias.set_zero_point(bias_rearranged.q_zero_point());
Expand Down Expand Up @@ -1000,6 +1001,28 @@ c10::intrusive_ptr<Conv2dPackedContext> create_qconv2d_context(
output_max));
}

c10::intrusive_ptr<Conv2dPackedContext> convert_qconv2d_context(
const c10::intrusive_ptr<ConvPackedParamsBase<2>>& packed_params,
const c10::optional<Scalar>& output_min,
const c10::optional<Scalar>& output_max) {
std::tuple<Tensor, c10::optional<Tensor>> wb = packed_params->unpack();
Tensor weight = std::get<0>(wb);
c10::optional<Tensor> bias = std::get<1>(wb);

return c10::make_intrusive<Conv2dPackedContext>(Conv2dPackedContext(
weight,
bias,
packed_params->stride().vec(),
packed_params->padding().vec(),
packed_params->dilation().vec(),
/* transposed = */ false,
/* quantized = */ true,
/* output_padding_arg = */ {0},
packed_params->groups(),
output_min,
output_max));
}

Tensor run_conv2d_context_impl(
const Tensor& input_arg,
const c10::intrusive_ptr<Conv2dPackedContext>& conv_context,
Expand All @@ -1012,11 +1035,22 @@ Tensor run_conv2d_context_impl(
const vTensor& v_input = convert(input_arg);

// Extract everything from the PackedContext
const vTensor& v_weight = convert(
conv_context->get_val(Conv2dPackedContext::Packed::Weight).toTensor());
const Tensor weight =
conv_context->get_val(Conv2dPackedContext::Packed::Weight).toTensor();
const vTensor& v_weight = convert(weight);

const auto quantized =
conv_context->get_val(Conv2dPackedContext::Packed::isQuantized).toBool();

Tensor bias =
conv_context->get_val(Conv2dPackedContext::Packed::Bias).toTensor();
if (quantized && bias.scalar_type() == c10::kFloat) {
bias = at::quantize_per_tensor(
bias, v_weight.get_scale() * v_input.get_scale(), 0, c10::kQInt32);
conv_context->set_val(Conv2dPackedContext::Packed::Bias, bias);
}

const vTensor& v_bias = convert(
conv_context->get_val(Conv2dPackedContext::Packed::Bias).toTensor());
const vTensor& v_bias = convert(bias);

const auto overlay_region =
conv_context->get_val(Conv2dPackedContext::Packed::OverlayRegion)
Expand All @@ -1035,8 +1069,6 @@ Tensor run_conv2d_context_impl(

const auto transposed =
conv_context->get_val(Conv2dPackedContext::Packed::isTransposed).toBool();
const auto quantized =
conv_context->get_val(Conv2dPackedContext::Packed::isQuantized).toBool();

const float output_min = safe_downcast<float>(
conv_context->get_val(Conv2dPackedContext::Packed::OutputMin).toDouble());
Expand Down
6 changes: 6 additions & 0 deletions aten/src/ATen/native/vulkan/ops/Convolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#ifdef USE_VULKAN_API

#include <ATen/native/quantized/PackedParams.h>
#include <ATen/native/vulkan/ops/Common.h>
#include <ATen/native/vulkan/ops/VulkanPackedContext.h>

Expand Down Expand Up @@ -164,6 +165,11 @@ c10::intrusive_ptr<Conv2dPackedContext> create_qconv2d_context(
const c10::optional<Scalar>& output_min = c10::nullopt,
const c10::optional<Scalar>& output_max = c10::nullopt);

c10::intrusive_ptr<Conv2dPackedContext> convert_qconv2d_context(
const c10::intrusive_ptr<ConvPackedParamsBase<2>>& packed_params,
const c10::optional<Scalar>& output_min,
const c10::optional<Scalar>& output_max);

Tensor run_qconv2d_context(
const Tensor& input_arg,
double scale,
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/native/vulkan/ops/VulkanPackedContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ class VulkanPackedContext {
return packed_.get(i);
}

inline void set_val(int64_t i, c10::IValue val) const {
return packed_.set(i, val);
}

virtual const c10::impl::GenericList unpack() const = 0;

virtual ~VulkanPackedContext() = default;
Expand Down
121 changes: 121 additions & 0 deletions aten/src/ATen/test/vulkan_quantized_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <ATen/native/quantized/cpu/QuantUtils.h>
#include <ATen/native/vulkan/api/api.h>
#include <ATen/native/vulkan/ops/Common.h>
#include <ATen/native/vulkan/ops/Convolution.h>
#include <ATen/native/vulkan/ops/Copy.h>
#include <ATen/native/vulkan/ops/Factory.h>
#include <ATen/native/vulkan/ops/QuantizedFunctions.h>
Expand Down Expand Up @@ -2917,6 +2918,126 @@ TEST_F(VulkanAPITest, conv2d_pw_quantized_prepack_random_params_int8_int32) {
/* groups */ 1);
}

void test_convert_qconv2d_context(
const bool random_quantization_params,
const at::IntArrayRef input_shape,
const at::IntArrayRef weight_shape,
const at::IntArrayRef bias_shape,
double in_scale = 0.13,
int in_zero_point = 11,
double w_scale = 0.29,
int w_zero_point = 19,
double out_scale = 0.15,
int out_zero_point = 10) {
c10::InferenceMode mode;

std::vector<int64_t> stride{2, 2};
std::vector<int64_t> padding{1, 1};
std::vector<int64_t> dilation{1, 1};
int64_t groups = 1;

if (random_quantization_params) {
// compute appropiate scale and zero point for input, weight and bias
in_scale = produce_random_scale(0.001, 1.2);
w_scale = produce_random_scale(0.001, 1.2);
out_scale = produce_random_scale(0.001, 1.2);
in_zero_point = produce_random_zero_point(c10::ScalarType::QUInt8);
w_zero_point = produce_random_zero_point(c10::ScalarType::QInt8);
out_zero_point = produce_random_zero_point(c10::ScalarType::QUInt8);
}

const float a = 1.26;
const float b = 5.97;
const float c = 0.59;

at::Tensor input_cpu = produce_random_tensor(input_shape, a, b, c);

at::Tensor input_vk = input_cpu.vulkan();
at::Tensor input_cpu_q = at::quantize_per_tensor(
input_cpu, in_scale, in_zero_point, c10::ScalarType::QUInt8);
at::Tensor input_vk_q = at::quantize_per_tensor(
input_vk, in_scale, in_zero_point, c10::ScalarType::QUInt8);

at::Tensor weight_cpu = produce_random_tensor(weight_shape, a, b, c);
at::Tensor weight_cpu_q = at::quantize_per_tensor(
weight_cpu, w_scale, w_zero_point, c10::ScalarType::QInt8);

at::Tensor bias_cpu = produce_random_tensor(bias_shape, a, b, c);

at::globalContext().setReleaseWeightsWhenPrepacking(false);

const auto prepack_cpu = callOpByName(
"quantized::conv2d_prepack",
"",
weight_cpu_q,
bias_cpu,
stride,
padding,
dilation,
groups)[0];

at::Tensor output_cpu_q = callOpByName(
"quantized::conv2d",
"",
input_cpu_q,
prepack_cpu,
stride,
padding,
dilation,
groups,
out_scale,
out_zero_point)[0]
.toTensor();

at::Tensor output_cpu_deq = at::dequantize(output_cpu_q);

// convert quantized context
const auto prepack_vulkan = at::native::vulkan::ops::convert_qconv2d_context(
prepack_cpu.toCustomClass<ConvPackedParamsBase<2>>(),
c10::nullopt,
c10::nullopt);

// run vulkan quantized conv2d
const auto vulkan_output = callOpByName(
"vulkan_prepack::run_qconv2d_context",
"",
input_vk_q,
out_scale,
out_zero_point,
prepack_vulkan);

at::Tensor output_vk_q = vulkan_output[0].toTensor();
at::Tensor output_vk_deq = at::dequantize(output_vk_q);
at::Tensor output_vk_deq_cpu = output_vk_deq.cpu();

// check
const auto check = almostEqual(
output_cpu_deq, output_vk_deq_cpu, safe_downcast<float>(out_scale));
if (!check) {
const auto vk_q_error =
at::abs(output_vk_deq_cpu - output_cpu_deq).max().item<double>();
std::cout << "Failed with shapes: input " << input_shape << " weight "
<< weight_shape << " bias " << bias_shape
<< " and params: in_scale " << in_scale << " weight_scale "
<< w_scale << " out_scale " << out_scale << " in_zero_point "
<< in_zero_point << " w_zero_point " << w_zero_point
<< " out_zero_point " << out_zero_point << std::endl;
std::cout << "error: " << vk_q_error << std::endl;
}

ASSERT_TRUE(check);
}

TEST_F(VulkanAPITest, convert_qconv2d_context) {
test_convert_qconv2d_context(false, {1, 3, 8, 8}, {1, 3, 3, 3}, {1});
test_convert_qconv2d_context(false, {1, 4, 224, 128}, {16, 4, 3, 3}, {16});

for (int i = 0; i < 10; i += 1) {
test_convert_qconv2d_context(true, {1, 3, 8, 8}, {1, 3, 3, 3}, {1});
test_convert_qconv2d_context(true, {1, 4, 224, 128}, {16, 4, 3, 3}, {16});
}
}

} // namespace

#endif /* USE_VULKAN_API */