From 43b565c8996105697e8862aa4db2268ccf7e71b8 Mon Sep 17 00:00:00 2001 From: Ethan Ng Date: Wed, 17 Sep 2025 10:10:35 -0700 Subject: [PATCH] Rename conv -> conv2d, conv1d_nchw -> conv1d_ncl, conv1d_nhwc -> conv1d_nlc (#14310) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/14310 Reviewed By: mcremon-meta Differential Revision: D82329465 --- backends/cadence/aot/TARGETS | 4 +- backends/cadence/aot/functions.yaml | 80 ++++----- backends/cadence/aot/functions_hifi.yaml | 80 ++++----- backends/cadence/aot/ops_registrations.py | 168 +++++++++--------- backends/cadence/aot/quantizer/patterns.py | 6 +- backends/cadence/aot/ref_implementations.py | 84 +++++---- backends/cadence/aot/replace_ops.py | 32 ++-- .../aot/tests/test_ref_implementations.py | 28 +-- .../aot/tests/test_replace_ops_passes.py | 14 +- .../aot/tests/test_type_dispatch_passes.py | 64 +++---- backends/cadence/aot/type_dispatch.py | 22 +-- .../cadence/generic/operators/CMakeLists.txt | 4 +- ..._out.cpp => quantized_conv2d_nchw_out.cpp} | 42 ++--- ..._out.cpp => quantized_conv2d_nhwc_out.cpp} | 42 ++--- .../cadence/generic/operators/targets.bzl | 8 +- .../cadence/hifi/operators/CMakeLists.txt | 4 +- ...cl_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 6 +- ...cl_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 6 +- ...lc_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 6 +- ...lc_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 6 +- ...hw_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 6 +- ...hw_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 6 +- ...se_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 6 +- ...se_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 6 +- ...ed_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 2 +- ...ed_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 2 +- ...t.cpp => op_quantized_conv2d_nchw_out.cpp} | 16 +- ...wc_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 6 +- ...wc_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 6 +- ...se_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 6 +- ...se_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 6 +- ...ed_asym8sxsym8s_asym8s_per_tensor_out.cpp} | 2 +- ...ed_asym8uxsym8u_asym8u_per_tensor_out.cpp} | 2 +- ...t.cpp => op_quantized_conv2d_nhwc_out.cpp} | 16 +- backends/cadence/hifi/operators/operators.h | 8 +- backends/cadence/hifi/operators/targets.bzl | 36 ++-- 36 files changed, 429 insertions(+), 409 deletions(-) rename backends/cadence/generic/operators/{quantized_conv_nchw_out.cpp => quantized_conv2d_nchw_out.cpp} (94%) rename backends/cadence/generic/operators/{quantized_conv_nhwc_out.cpp => quantized_conv2d_nhwc_out.cpp} (94%) rename backends/cadence/hifi/operators/{op_quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out.cpp} (96%) rename backends/cadence/hifi/operators/{op_quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out.cpp} (96%) rename backends/cadence/hifi/operators/{op_quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out.cpp} (95%) rename backends/cadence/hifi/operators/{op_quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out.cpp} (95%) rename backends/cadence/hifi/operators/{op_quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp} (97%) rename backends/cadence/hifi/operators/{op_quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp} (97%) rename backends/cadence/hifi/operators/{op_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp} (96%) rename backends/cadence/hifi/operators/{op_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp} (96%) rename backends/cadence/hifi/operators/{op_quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp} (98%) rename backends/cadence/hifi/operators/{op_quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp} (98%) rename backends/cadence/hifi/operators/{op_quantized_conv_nchw_out.cpp => op_quantized_conv2d_nchw_out.cpp} (98%) rename backends/cadence/hifi/operators/{op_quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp} (96%) rename backends/cadence/hifi/operators/{op_quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp} (96%) rename backends/cadence/hifi/operators/{op_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp} (95%) rename backends/cadence/hifi/operators/{op_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp} (95%) rename backends/cadence/hifi/operators/{op_quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp => op_quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp} (98%) rename backends/cadence/hifi/operators/{op_quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp => op_quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp} (98%) rename backends/cadence/hifi/operators/{op_quantized_conv_nhwc_out.cpp => op_quantized_conv2d_nhwc_out.cpp} (98%) diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS index 0ec09bf4f9e..b54f1ac3ba6 100644 --- a/backends/cadence/aot/TARGETS +++ b/backends/cadence/aot/TARGETS @@ -153,8 +153,8 @@ executorch_generated_lib( "//executorch/backends/cadence/generic/operators:dequantize_per_tensor", "//executorch/backends/cadence/generic/operators:quantize_per_tensor", "//executorch/backends/cadence/generic/operators:quantized_add_out", - "//executorch/backends/cadence/generic/operators:quantized_conv_nchw_out", - "//executorch/backends/cadence/generic/operators:quantized_conv_nhwc_out", + "//executorch/backends/cadence/generic/operators:quantized_conv2d_nchw_out", + "//executorch/backends/cadence/generic/operators:quantized_conv2d_nhwc_out", "//executorch/backends/cadence/generic/operators:quantized_fully_connected_out", "//executorch/backends/cadence/generic/operators:quantized_layer_norm", "//executorch/backends/cadence/generic/operators:quantized_linear_out", diff --git a/backends/cadence/aot/functions.yaml b/backends/cadence/aot/functions.yaml index 1c626887649..95c35055e9c 100644 --- a/backends/cadence/aot/functions.yaml +++ b/backends/cadence/aot/functions.yaml @@ -190,15 +190,15 @@ - arg_meta: null kernel_name: impl::generic::dequantize_per_tensor_out -- func: cadence::quantized_conv_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_out + kernel_name: impl::generic::quantized_conv2d_nchw_out -- func: cadence::quantized_conv_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_out + kernel_name: impl::generic::quantized_conv2d_nhwc_out - func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!) kernels: @@ -289,95 +289,95 @@ - arg_meta: null kernel_name: impl::generic::im2row_per_tensor_out -- func: cadence::quantized_conv_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nchw_per_tensor_out -- func: cadence::quantized_conv_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nhwc_per_tensor_out -- func: cadence::quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv1d_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv1d_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_ncl_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv1d_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::generic::quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv1d_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::generic::quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::generic::quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out - func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!) kernels: diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml index a5f3102d600..a0e84d94300 100644 --- a/backends/cadence/aot/functions_hifi.yaml +++ b/backends/cadence/aot/functions_hifi.yaml @@ -290,105 +290,105 @@ - arg_meta: null kernel_name: impl::HiFi::dequantize_per_tensor_out -- func: cadence::quantized_conv_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_out -- func: cadence::quantized_conv_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_out -- func: cadence::quantized_conv_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_per_tensor_out -- func: cadence::quantized_conv_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_per_tensor_out -- func: cadence::quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv1d_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv1d_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_ncl_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out -- func: cadence::quantized_conv1d_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out + kernel_name: impl::HiFi::quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out -- func: cadence::quantized_conv1d_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) +- func: cadence::quantized_conv1d_nlc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!) kernels: - arg_meta: null - kernel_name: impl::HiFi::quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out + kernel_name: impl::HiFi::quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out - func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!) kernels: diff --git a/backends/cadence/aot/ops_registrations.py b/backends/cadence/aot/ops_registrations.py index efb22a9e7d6..e483bea79d1 100644 --- a/backends/cadence/aot/ops_registrations.py +++ b/backends/cadence/aot/ops_registrations.py @@ -86,28 +86,28 @@ ) lib.define( - "quantized_conv_nhwc(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nhwc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( "quantized_matmul(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False) -> (Tensor Z)" @@ -122,100 +122,100 @@ "quantized_matmul_asym8sxasym8s_asym8s.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv1d_nchw_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv1d_nchw_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv1d_nchw_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv1d_ncl_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv1d_nchw_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv1d_ncl_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv1d_nhwc_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv1d_nlc_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv1d_nhwc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv1d_nlc_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv1d_nhwc_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv1d_nlc_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv1d_nhwc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv1d_nlc_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( - "quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" + "quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)" ) lib.define( - "quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" + "quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)" ) lib.define( "quantized_matmul_asym8uxasym8u_asym8u(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed=False) -> (Tensor Z)" @@ -704,8 +704,8 @@ def quantized_linear_asym8uxasym8u_asym8u_per_tensor_meta( return src.new_empty(out_size, dtype=src.dtype) -@register_fake("cadence::quantized_conv_nhwc") -def quantized_conv_nhwc_meta( +@register_fake("cadence::quantized_conv2d_nhwc") +def quantized_conv2d_nhwc_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -748,8 +748,8 @@ def quantized_conv_nhwc_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw") -def quantized_conv_nchw_meta( +@register_fake("cadence::quantized_conv2d_nchw") +def quantized_conv2d_nchw_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -792,8 +792,8 @@ def quantized_conv_nchw_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw.per_tensor") -def quantized_conv_nchw_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nchw.per_tensor") +def quantized_conv2d_nchw_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -836,8 +836,8 @@ def quantized_conv_nchw_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nhwc.per_tensor") -def quantized_conv_nhwc_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nhwc.per_tensor") +def quantized_conv2d_nhwc_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -880,8 +880,8 @@ def quantized_conv_nhwc_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor") +def quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -929,8 +929,8 @@ def quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor") +def quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -978,8 +978,8 @@ def quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor") +def quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1027,8 +1027,8 @@ def quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor") +def quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1076,8 +1076,8 @@ def quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor") +def quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1125,8 +1125,8 @@ def quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor") +def quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1174,8 +1174,8 @@ def quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor") +def quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1223,8 +1223,8 @@ def quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake("cadence::quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor") +def quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1272,8 +1272,10 @@ def quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake( + "cadence::quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor" +) +def quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1321,8 +1323,10 @@ def quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake( + "cadence::quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor" +) +def quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1370,8 +1374,10 @@ def quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake( + "cadence::quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor" +) +def quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -1419,8 +1425,10 @@ def quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake( + "cadence::quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor" +) +def quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -2177,8 +2185,8 @@ def roi_align_box_processor_meta( return rois.new_empty((rois.shape[0], 80), dtype=torch.uint8) -@register_fake("cadence::quantized_conv1d_nchw_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake("cadence::quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor") +def quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -2213,8 +2221,8 @@ def quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv1d_nchw_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake("cadence::quantized_conv1d_ncl_asym8uxsym8u_asym8u.per_tensor") +def quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -2249,8 +2257,8 @@ def quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv1d_nhwc_asym8sxsym8s_asym8s.per_tensor") -def quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_meta( +@register_fake("cadence::quantized_conv1d_nlc_asym8sxsym8s_asym8s.per_tensor") +def quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -2285,8 +2293,8 @@ def quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_meta( return input.new_empty(output_size, dtype=input.dtype) -@register_fake("cadence::quantized_conv1d_nhwc_asym8uxsym8u_asym8u.per_tensor") -def quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_meta( +@register_fake("cadence::quantized_conv1d_nlc_asym8uxsym8u_asym8u.per_tensor") +def quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_meta( input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py index b653be27e8f..9f67204fcf9 100644 --- a/backends/cadence/aot/quantizer/patterns.py +++ b/backends/cadence/aot/quantizer/patterns.py @@ -247,7 +247,7 @@ def get_anchors( ) def replacement_op(self) -> OpOverload: - return torch.ops.cadence.quantized_conv_nchw.default + return torch.ops.cadence.quantized_conv2d_nchw.default class Conv2dPattern(QuantizationPattern): @@ -286,7 +286,7 @@ def get_anchors( ) def replacement_op(self) -> OpOverload: - return torch.ops.cadence.quantized_conv_nchw.default + return torch.ops.cadence.quantized_conv2d_nchw.default class LayerNormPattern(QuantizationPattern): @@ -460,7 +460,7 @@ def get_anchors( ) def replacement_op(self) -> OpOverload: - return torch.ops.cadence.quantized_conv_nchw.default + return torch.ops.cadence.quantized_conv2d_nchw.default # Conv1d + regular relu op fusion diff --git a/backends/cadence/aot/ref_implementations.py b/backends/cadence/aot/ref_implementations.py index 2a53c2dde7a..5530b7c8117 100644 --- a/backends/cadence/aot/ref_implementations.py +++ b/backends/cadence/aot/ref_implementations.py @@ -623,8 +623,8 @@ def quantized_conv_per_tensor( ) -@impl(m, "quantized_conv_nchw.per_tensor") -def quantized_conv_nchw_per_tensor( +@impl(m, "quantized_conv2d_nchw.per_tensor") +def quantized_conv2d_nchw_per_tensor( input_tensor: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -679,8 +679,8 @@ def quantized_conv_nchw_per_tensor( ) -@impl(m, "quantized_conv_nhwc.per_tensor") -def quantized_conv_nhwc_per_tensor( +@impl(m, "quantized_conv2d_nhwc.per_tensor") +def quantized_conv2d_nhwc_per_tensor( input_tensor: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor, @@ -800,7 +800,7 @@ def variant( # Call the appropriate base function match layout: case "nchw": - return quantized_conv_nchw_per_tensor( + return quantized_conv2d_nchw_per_tensor( input_tensor, weight, bias, @@ -817,7 +817,7 @@ def variant( out_shift, ) case "nhwc": - return quantized_conv_nhwc_per_tensor( + return quantized_conv2d_nhwc_per_tensor( input_tensor, weight, bias, @@ -841,84 +841,92 @@ def variant( return decorator -@impl(m, "quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nchw", torch.int8, torch.int8) -def quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nchw", torch.uint8, torch.uint8) -def quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nhwc", torch.int8, torch.int8) -def quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nhwc", torch.uint8, torch.uint8) -def quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nchw", torch.int8, torch.int8) -def quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nchw", torch.uint8, torch.uint8) -def quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nhwc", torch.int8, torch.int8) -def quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nhwc", torch.uint8, torch.uint8) -def quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nchw", torch.int8, torch.int8) -def quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor() -> ( + torch.Tensor +): ... -@impl(m, "quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nchw", torch.uint8, torch.uint8) -def quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor() -> ( + torch.Tensor +): ... -@impl(m, "quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nhwc", torch.int8, torch.int8) -def quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor() -> ( + torch.Tensor +): ... -@impl(m, "quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nhwc", torch.uint8, torch.uint8) -def quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor() -> ( + torch.Tensor +): ... -@impl(m, "quantized_conv1d_nchw_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nchw", torch.int8, torch.int8, is_1d=True) -def quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv1d_nchw_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv1d_ncl_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nchw", torch.uint8, torch.uint8, is_1d=True) -def quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv1d_nhwc_asym8sxsym8s_asym8s.per_tensor") +@impl(m, "quantized_conv1d_nlc_asym8sxsym8s_asym8s.per_tensor") @quantized_conv_variant("nhwc", torch.int8, torch.int8, is_1d=True) -def quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... +def quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor() -> torch.Tensor: ... -@impl(m, "quantized_conv1d_nhwc_asym8uxsym8u_asym8u.per_tensor") +@impl(m, "quantized_conv1d_nlc_asym8uxsym8u_asym8u.per_tensor") @quantized_conv_variant("nhwc", torch.uint8, torch.uint8, is_1d=True) -def quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... +def quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor() -> torch.Tensor: ... def quantized_relu_common( diff --git a/backends/cadence/aot/replace_ops.py b/backends/cadence/aot/replace_ops.py index c575be6e7fc..3d5bd493cfe 100644 --- a/backends/cadence/aot/replace_ops.py +++ b/backends/cadence/aot/replace_ops.py @@ -787,8 +787,8 @@ class ReplaceTrivialConvWithLinear(ExportPass): trivial_conv_op_to_linear_op: Dict[EdgeOpOverload, EdgeOpOverload] = { exir_ops.edge.cadence.convolution.default: exir_ops.edge.aten.linear.default, - exir_ops.edge.cadence.quantized_conv_nchw.default: exir_ops.edge.cadence.quantized_linear.default, - exir_ops.edge.cadence.quantized_conv_nhwc.default: exir_ops.edge.cadence.quantized_linear.default, + exir_ops.edge.cadence.quantized_conv2d_nchw.default: exir_ops.edge.cadence.quantized_linear.default, + exir_ops.edge.cadence.quantized_conv2d_nhwc.default: exir_ops.edge.cadence.quantized_linear.default, } def call_operator(self, op, args, kwargs, meta): @@ -800,8 +800,8 @@ def call_operator(self, op, args, kwargs, meta): # extra args holding at least the zero point and scale of input, weight, bias, # and output tensor. quantized_op = ( - op == exir_ops.edge.cadence.quantized_conv_nchw.default - or op == exir_ops.edge.cadence.quantized_conv_nhwc.default + op == exir_ops.edge.cadence.quantized_conv2d_nchw.default + or op == exir_ops.edge.cadence.quantized_conv2d_nhwc.default ) assert (len(args) == 8 and not quantized_op) or ( len(args) >= 12 and quantized_op @@ -979,18 +979,18 @@ def call_operator( ) -> ProxyValue: if op not in { exir_ops.edge.cadence.convolution.default, - exir_ops.edge.cadence.quantized_conv_nchw.default, + exir_ops.edge.cadence.quantized_conv2d_nchw.default, }: return super().call_operator(op, args, kwargs, meta) - quantized_op = op == exir_ops.edge.cadence.quantized_conv_nchw.default + quantized_op = op == exir_ops.edge.cadence.quantized_conv2d_nchw.default if not quantized_op and len(args) == 8 and args[-1] is True: # Already in NHWC layout. return super().call_operator(op, args, kwargs, meta) new_op = ( - exir_ops.edge.cadence.quantized_conv_nhwc.default + exir_ops.edge.cadence.quantized_conv2d_nhwc.default if quantized_op else exir_ops.edge.cadence.convolution.default ) @@ -1067,8 +1067,8 @@ class ReplaceConvWithIm2RowAndLinear(ExportPass): # decompose to. conv_op_to_linear_op: Dict[EdgeOpOverload, EdgeOpOverload] = { exir_ops.edge.cadence.convolution.default: exir_ops.edge.aten.linear.default, - exir_ops.edge.cadence.quantized_conv_nchw.default: exir_ops.edge.cadence.quantized_linear.default, - exir_ops.edge.cadence.quantized_conv_nhwc.default: exir_ops.edge.cadence.quantized_linear.default, + exir_ops.edge.cadence.quantized_conv2d_nchw.default: exir_ops.edge.cadence.quantized_linear.default, + exir_ops.edge.cadence.quantized_conv2d_nhwc.default: exir_ops.edge.cadence.quantized_linear.default, } def call_operator(self, op, args, kwargs, meta): @@ -1077,8 +1077,8 @@ def call_operator(self, op, args, kwargs, meta): # Get the relevant args from convolution node. quantized_op = ( - op == exir_ops.edge.cadence.quantized_conv_nchw.default - or op == exir_ops.edge.cadence.quantized_conv_nhwc.default + op == exir_ops.edge.cadence.quantized_conv2d_nchw.default + or op == exir_ops.edge.cadence.quantized_conv2d_nhwc.default ) assert (len(args) == 8 and not quantized_op) or ( len(args) >= 12 and quantized_op @@ -1110,7 +1110,7 @@ def call_operator(self, op, args, kwargs, meta): # channel_last layout is specified by the channel_last arg of conv # op, which is either the last argument (15th) or implicitely False # if the op is quantized, or the last argument if not. - channel_last = op == exir_ops.edge.cadence.quantized_conv_nhwc.default + channel_last = op == exir_ops.edge.cadence.quantized_conv2d_nhwc.default # The weight tensor is [out_channels, in_channels, X] for NCHW layout, # and [out_channels, X, in_channels] for NHWC layout. Here, X is the # kernel_width for conv1d, and X = kernel_height * kernel_width for @@ -1622,12 +1622,12 @@ class ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass(ExportPass): exir_ops.edge.cadence.quantized_add.per_tensor, [1, 2, 4, 5], ), - exir_ops.edge.cadence.quantized_conv_nchw: ( - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw: ( + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, [8, 9, 12, 13], ), - exir_ops.edge.cadence.quantized_conv_nhwc: ( - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc: ( + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, [8, 9, 12, 13], ), exir_ops.edge.cadence.quantized_fully_connected: ( diff --git a/backends/cadence/aot/tests/test_ref_implementations.py b/backends/cadence/aot/tests/test_ref_implementations.py index 30b30e085dc..2589bd88601 100644 --- a/backends/cadence/aot/tests/test_ref_implementations.py +++ b/backends/cadence/aot/tests/test_ref_implementations.py @@ -906,9 +906,9 @@ def test_quantized_conv_per_tensor( convs = [ ( - torch.ops.cadence.quantized_conv_nchw.per_tensor + torch.ops.cadence.quantized_conv2d_nchw.per_tensor if memory_format == torch.contiguous_format - else torch.ops.cadence.quantized_conv_nhwc.per_tensor + else torch.ops.cadence.quantized_conv2d_nhwc.per_tensor ) ] @@ -916,30 +916,30 @@ def test_quantized_conv_per_tensor( if input_tensor.dtype == torch.int8 and weight.dtype == torch.int8: if memory_format == torch.contiguous_format: optimized_convs = [ - torch.ops.cadence.quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor, - torch.ops.cadence.quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor, - torch.ops.cadence.quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor, + torch.ops.cadence.quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor, + torch.ops.cadence.quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor, + torch.ops.cadence.quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor, ] else: optimized_convs = [ - torch.ops.cadence.quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor, - torch.ops.cadence.quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor, - torch.ops.cadence.quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor, + torch.ops.cadence.quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor, + torch.ops.cadence.quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor, + torch.ops.cadence.quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor, ] elif input_tensor.dtype == torch.uint8 and weight.dtype == torch.uint8: if memory_format == torch.contiguous_format: optimized_convs = [ - torch.ops.cadence.quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor, - torch.ops.cadence.quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor, - torch.ops.cadence.quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor, + torch.ops.cadence.quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor, + torch.ops.cadence.quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor, + torch.ops.cadence.quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor, ] else: optimized_convs = [ - torch.ops.cadence.quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor, - torch.ops.cadence.quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor, - torch.ops.cadence.quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor, + torch.ops.cadence.quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor, + torch.ops.cadence.quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor, + torch.ops.cadence.quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor, ] convs.extend(optimized_convs) diff --git a/backends/cadence/aot/tests/test_replace_ops_passes.py b/backends/cadence/aot/tests/test_replace_ops_passes.py index ca5168db2be..8f1f2e86deb 100644 --- a/backends/cadence/aot/tests/test_replace_ops_passes.py +++ b/backends/cadence/aot/tests/test_replace_ops_passes.py @@ -1666,7 +1666,7 @@ def create_quantized_convolution_graph_module( out_multiplier, out_shift, ), - op=exir_ops.edge.cadence.quantized_conv_nhwc.default, + op=exir_ops.edge.cadence.quantized_conv2d_nhwc.default, args=args, ) else: @@ -1680,7 +1680,7 @@ def create_quantized_convolution_graph_module( out_multiplier, out_shift, ), - op=exir_ops.edge.cadence.quantized_conv_nchw.default, + op=exir_ops.edge.cadence.quantized_conv2d_nchw.default, args=args, ) @@ -1688,7 +1688,7 @@ def test_quantized_convolution_default_channel_last(self) -> None: # Create a graph with a single convolution node. gm = self.create_quantized_convolution_graph_module() self.assertEqual( - count_node(gm, exir_ops.edge.cadence.quantized_conv_nchw.default), 1 + count_node(gm, exir_ops.edge.cadence.quantized_conv2d_nchw.default), 1 ) self.assertEqual(count_node(gm, exir_ops.edge.aten.permute_copy.default), 0) @@ -1698,7 +1698,8 @@ def test_quantized_convolution_default_channel_last(self) -> None: # Check that no replacement was made. self.assertEqual( count_node( - gm_after_replacement, exir_ops.edge.cadence.quantized_conv_nhwc.default + gm_after_replacement, + exir_ops.edge.cadence.quantized_conv2d_nhwc.default, ), 1, ) @@ -1714,7 +1715,7 @@ def test_no_transpose_if_already_quantized_conv_channel_last(self) -> None: # Check if graph module is valid by running exportpass on it. gm = ExportPass().call(gm).graph_module self.assertEqual( - count_node(gm, exir_ops.edge.cadence.quantized_conv_nhwc.default), 1 + count_node(gm, exir_ops.edge.cadence.quantized_conv2d_nhwc.default), 1 ) # Apply replacement pass. @@ -1723,7 +1724,8 @@ def test_no_transpose_if_already_quantized_conv_channel_last(self) -> None: # Check that no replacement was made. self.assertEqual( count_node( - gm_after_replacement, exir_ops.edge.cadence.quantized_conv_nhwc.default + gm_after_replacement, + exir_ops.edge.cadence.quantized_conv2d_nhwc.default, ), 1, ) diff --git a/backends/cadence/aot/tests/test_type_dispatch_passes.py b/backends/cadence/aot/tests/test_type_dispatch_passes.py index 4ae10ea83dd..870735aad1a 100644 --- a/backends/cadence/aot/tests/test_type_dispatch_passes.py +++ b/backends/cadence/aot/tests/test_type_dispatch_passes.py @@ -199,29 +199,29 @@ def test_dispatch_quantized_matmul( "int8_nchw", torch.int8, (1, 3, 8, 8), # x_shape - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nchw", torch.uint8, (1, 3, 8, 8), # x_shape - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw_asym8uxsym8u_asym8u.per_tensor, ), ( "int8_nhwc", torch.int8, (1, 8, 8, 3), # x_shape - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv_nhwc_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nhwc", torch.uint8, (1, 8, 8, 3), # x_shape - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv_nhwc_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc_asym8uxsym8u_asym8u.per_tensor, ), ] ) @@ -256,29 +256,29 @@ def test_dispatch_quantized_conv_2d( "int8_nchw_dilated", torch.int8, (1, 3, 8, 8), # x_shape - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nchw_dilated", torch.uint8, (1, 3, 8, 8), # x_shape - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u.per_tensor, ), ( "int8_nhwc_dilated", torch.int8, (1, 8, 8, 3), # x_shape - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nhwc_dilated", torch.uint8, (1, 8, 8, 3), # x_shape - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u.per_tensor, ), ] ) @@ -313,29 +313,29 @@ def test_dispatch_quantized_conv_2d_dilated( "int8_nchw_1d", torch.int8, (1, 3, 8), # x_shape - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv1d_nchw_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv1d_ncl_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nchw_1d", torch.uint8, (1, 3, 8), # x_shape - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv1d_nchw_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv1d_ncl_asym8uxsym8u_asym8u.per_tensor, ), ( "int8_nhwc_1d", torch.int8, (1, 8, 3), # x_shape - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv1d_nhwc_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv1d_nlc_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nhwc_1d", torch.uint8, (1, 8, 3), # x_shape - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv1d_nhwc_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv1d_nlc_asym8uxsym8u_asym8u.per_tensor, ), ] ) @@ -410,32 +410,32 @@ def test_dispatch_quantized_add( torch.int8, (1, 3, 8, 8), # x_shape (3, 1, 3, 3), # w_shape (groups=3, input_channels=3) - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nchw_depthwise", torch.uint8, (1, 3, 8, 8), # x_shape (3, 1, 3, 3), # w_shape (groups=3, input_channels=3) - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u.per_tensor, ), ( "int8_nhwc_depthwise", torch.int8, (1, 8, 8, 3), # x_shape (3, 3, 3, 1), # w_shape (groups=3, input_channels=3) - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor, ), ( "uint8_nhwc_depthwise", torch.uint8, (1, 8, 8, 3), # x_shape (3, 3, 3, 1), # w_shape (groups=3, input_channels=3) - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, - exir_ops.edge.cadence.quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u.per_tensor, ), ] ) diff --git a/backends/cadence/aot/type_dispatch.py b/backends/cadence/aot/type_dispatch.py index 958a78a4808..3bf86ad2e50 100644 --- a/backends/cadence/aot/type_dispatch.py +++ b/backends/cadence/aot/type_dispatch.py @@ -62,16 +62,16 @@ class CompileTimeTypeDispatchPass(ExportPass): weight_arg_idx=2, variant="default", ), - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor: OpConfig( - "quantized_conv_nchw", + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor: OpConfig( + "quantized_conv2d_nchw", type_dispatch_suffixes={ (torch.int8, torch.int8): "asym8sxsym8s_asym8s", (torch.uint8, torch.uint8): "asym8uxsym8u_asym8u", }, weight_arg_idx=1, ), - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor: OpConfig( - "quantized_conv_nhwc", + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor: OpConfig( + "quantized_conv2d_nhwc", type_dispatch_suffixes={ (torch.int8, torch.int8): "asym8sxsym8s_asym8s", (torch.uint8, torch.uint8): "asym8uxsym8u_asym8u", @@ -132,13 +132,13 @@ def call_operator( typed_op_name = f"{base_name}_{type_suffix}" if op in [ - exir_ops.edge.cadence.quantized_conv_nchw.per_tensor, - exir_ops.edge.cadence.quantized_conv_nhwc.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor, + exir_ops.edge.cadence.quantized_conv2d_nhwc.per_tensor, ]: groups = args[6] input_channels = ( args[0].to_tensor().shape[1] - if op == exir_ops.edge.cadence.quantized_conv_nchw.per_tensor + if op == exir_ops.edge.cadence.quantized_conv2d_nchw.per_tensor else args[0].to_tensor().shape[-1] ) is_depthwise = groups == input_channels @@ -151,9 +151,11 @@ def call_operator( elif is_dilated: typed_op_name = f"{base_name}_dilated_{type_suffix}" elif is_1d and groups == 1: - typed_op_name = ( - f"quantized_conv1d_{base_name.split('_')[-1]}_{type_suffix}" - ) + if "nchw" in base_name: + layout_suffix = "ncl" + else: + layout_suffix = "nlc" + typed_op_name = f"quantized_conv1d_{layout_suffix}_{type_suffix}" typed_op = getattr( getattr(exir_ops.edge.cadence, typed_op_name), config.variant diff --git a/backends/cadence/generic/operators/CMakeLists.txt b/backends/cadence/generic/operators/CMakeLists.txt index ea5b699f441..d88701007f9 100644 --- a/backends/cadence/generic/operators/CMakeLists.txt +++ b/backends/cadence/generic/operators/CMakeLists.txt @@ -80,8 +80,8 @@ target_include_directories( add_library( custom_ops "quantized_linear_out.cpp" - "quantized_conv_nchw_out.cpp" - "quantized_conv_nhwc_out.cpp" + "quantized_conv2d_nchw_out.cpp" + "quantized_conv2d_nhwc_out.cpp" "quantized_relu_out.cpp" "quantized_layer_norm.cpp" "quantize_per_tensor.cpp" diff --git a/backends/cadence/generic/operators/quantized_conv_nchw_out.cpp b/backends/cadence/generic/operators/quantized_conv2d_nchw_out.cpp similarity index 94% rename from backends/cadence/generic/operators/quantized_conv_nchw_out.cpp rename to backends/cadence/generic/operators/quantized_conv2d_nchw_out.cpp index 6eeabcf1d52..fbb01c82e65 100644 --- a/backends/cadence/generic/operators/quantized_conv_nchw_out.cpp +++ b/backends/cadence/generic/operators/quantized_conv2d_nchw_out.cpp @@ -157,7 +157,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic( // bias_scale, since it is a product of the two. The kernel will branch to // quantized::conv1d or quantized::conv2d based on the dimensionality of // activation tensor. -void quantized_conv_nchw( +void quantized_conv2d_nchw( const Tensor& input, const Tensor& weight, const Tensor& bias, @@ -228,7 +228,7 @@ void quantized_conv_nchw( #undef typed_quantized_conv2d_nchw } -void quantized_conv_nchw_out( +void quantized_conv2d_nchw_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -248,7 +248,7 @@ void quantized_conv_nchw_out( const float bias_scale_float = bias_scale.const_data_ptr()[0]; const int32_t weight_zero_point_int = weight_zero_point.const_data_ptr()[0]; - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -264,7 +264,7 @@ void quantized_conv_nchw_out( out); } -void quantized_conv_nchw_per_tensor_out( +void quantized_conv2d_nchw_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -282,7 +282,7 @@ void quantized_conv_nchw_per_tensor_out( __ET_UNUSED int64_t out_shift, bool channel_last, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -298,7 +298,7 @@ void quantized_conv_nchw_per_tensor_out( out); } -void quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -315,7 +315,7 @@ void quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -331,7 +331,7 @@ void quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -348,7 +348,7 @@ void quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -364,7 +364,7 @@ void quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out( out); } -void quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -381,7 +381,7 @@ void quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -397,7 +397,7 @@ void quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -414,7 +414,7 @@ void quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -430,7 +430,7 @@ void quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out( out); } -void quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -447,7 +447,7 @@ void quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -463,7 +463,7 @@ void quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -480,7 +480,7 @@ void quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -496,7 +496,7 @@ void quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out( out); } -void quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -513,7 +513,7 @@ void quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -529,7 +529,7 @@ void quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -546,7 +546,7 @@ void quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, diff --git a/backends/cadence/generic/operators/quantized_conv_nhwc_out.cpp b/backends/cadence/generic/operators/quantized_conv2d_nhwc_out.cpp similarity index 94% rename from backends/cadence/generic/operators/quantized_conv_nhwc_out.cpp rename to backends/cadence/generic/operators/quantized_conv2d_nhwc_out.cpp index d377048b142..eca836dcc94 100644 --- a/backends/cadence/generic/operators/quantized_conv_nhwc_out.cpp +++ b/backends/cadence/generic/operators/quantized_conv2d_nhwc_out.cpp @@ -144,7 +144,7 @@ __attribute__((noinline)) void conv2d_nhwc_core_generic( } } -void quantized_conv_nhwc( +void quantized_conv2d_nhwc( const Tensor& input, const Tensor& weight, const Tensor& bias, @@ -215,7 +215,7 @@ void quantized_conv_nhwc( #undef typed_quantized_conv2d_nhwc } -void quantized_conv_nhwc_out( +void quantized_conv2d_nhwc_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -235,7 +235,7 @@ void quantized_conv_nhwc_out( const float bias_scale_float = bias_scale.const_data_ptr()[0]; const int32_t weight_zero_point_int = weight_zero_point.const_data_ptr()[0]; - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -251,7 +251,7 @@ void quantized_conv_nhwc_out( out); } -void quantized_conv_nhwc_per_tensor_out( +void quantized_conv2d_nhwc_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -269,7 +269,7 @@ void quantized_conv_nhwc_per_tensor_out( __ET_UNUSED int64_t out_shift, bool channel_last, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -285,7 +285,7 @@ void quantized_conv_nhwc_per_tensor_out( out); } -void quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -302,7 +302,7 @@ void quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -318,7 +318,7 @@ void quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -335,7 +335,7 @@ void quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -351,7 +351,7 @@ void quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out( out); } -void quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -368,7 +368,7 @@ void quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -384,7 +384,7 @@ void quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -401,7 +401,7 @@ void quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -417,7 +417,7 @@ void quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out( out); } -void quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -434,7 +434,7 @@ void quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -450,7 +450,7 @@ void quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -467,7 +467,7 @@ void quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -483,7 +483,7 @@ void quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out( out); } -void quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -500,7 +500,7 @@ void quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -516,7 +516,7 @@ void quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out( out); } -void quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -533,7 +533,7 @@ void quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, diff --git a/backends/cadence/generic/operators/targets.bzl b/backends/cadence/generic/operators/targets.bzl index 4ff821158bc..b3c305c9c02 100644 --- a/backends/cadence/generic/operators/targets.bzl +++ b/backends/cadence/generic/operators/targets.bzl @@ -136,8 +136,8 @@ def define_common_targets(): ) runtime.cxx_library( - name = "quantized_conv_nchw_out", - srcs = ["quantized_conv_nchw_out.cpp"], + name = "quantized_conv2d_nchw_out", + srcs = ["quantized_conv2d_nchw_out.cpp"], exported_headers = ["operators.h", "quantized_ops.h"], platforms = CXX, deps = [ @@ -151,8 +151,8 @@ def define_common_targets(): ) runtime.cxx_library( - name = "quantized_conv_nhwc_out", - srcs = ["quantized_conv_nhwc_out.cpp"], + name = "quantized_conv2d_nhwc_out", + srcs = ["quantized_conv2d_nhwc_out.cpp"], exported_headers = ["operators.h", "quantized_ops.h"], platforms = CXX, deps = [ diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt index 6bd63c6d9f6..26555da9760 100644 --- a/backends/cadence/hifi/operators/CMakeLists.txt +++ b/backends/cadence/hifi/operators/CMakeLists.txt @@ -96,8 +96,8 @@ add_library( "op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp" - "op_quantized_conv_nchw_out.cpp" - "op_quantized_conv_nhwc_out.cpp" + "op_quantized_conv2d_nchw_out.cpp" + "op_quantized_conv2d_nhwc_out.cpp" "op_quantized_fully_connected_out" ) target_include_directories( diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 96% rename from backends/cadence/hifi/operators/op_quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out.cpp index 566325e0f10..b5ab0cdbaa2 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NCHW 1D convolution for int8 x int8 -> int8 -void xa_opt_quantized_conv1d_nchw_asym8sxsym8s_asym8s( +void xa_opt_quantized_conv1d_ncl_asym8sxsym8s_asym8s( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -144,7 +144,7 @@ void xa_opt_quantized_conv1d_nchw_asym8sxsym8s_asym8s( } } -void quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -161,7 +161,7 @@ void quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv1d_nchw_asym8sxsym8s_asym8s( + xa_opt_quantized_conv1d_ncl_asym8sxsym8s_asym8s( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 96% rename from backends/cadence/hifi/operators/op_quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out.cpp index de5f76b0fff..60e700f563b 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NCHW 1D convolution for uint8 x uint8 -> uint8 -void xa_opt_quantized_conv1d_nchw_asym8uxsym8u_asym8u( +void xa_opt_quantized_conv1d_ncl_asym8uxsym8u_asym8u( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -144,7 +144,7 @@ void xa_opt_quantized_conv1d_nchw_asym8uxsym8u_asym8u( } } -void quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -161,7 +161,7 @@ void quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv1d_nchw_asym8uxsym8u_asym8u( + xa_opt_quantized_conv1d_ncl_asym8uxsym8u_asym8u( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 95% rename from backends/cadence/hifi/operators/op_quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out.cpp index b549ad13307..c9a3d2b58de 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NHWC 1D convolution for int8 x int8 -> int8 -void xa_opt_quantized_conv1d_nhwc_asym8sxsym8s_asym8s( +void xa_opt_quantized_conv1d_nlc_asym8sxsym8s_asym8s( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -93,7 +93,7 @@ void xa_opt_quantized_conv1d_nhwc_asym8sxsym8s_asym8s( } } -void quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -110,7 +110,7 @@ void quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv1d_nhwc_asym8sxsym8s_asym8s( + xa_opt_quantized_conv1d_nlc_asym8sxsym8s_asym8s( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 95% rename from backends/cadence/hifi/operators/op_quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out.cpp index f5dbb083522..2d7a4cba509 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NHWC 1D convolution for uint8 x uint8 -> uint8 -void xa_opt_quantized_conv1d_nhwc_asym8uxsym8u_asym8u( +void xa_opt_quantized_conv1d_nlc_asym8uxsym8u_asym8u( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -93,7 +93,7 @@ void xa_opt_quantized_conv1d_nhwc_asym8uxsym8u_asym8u( } } -void quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -110,7 +110,7 @@ void quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv1d_nhwc_asym8uxsym8u_asym8u( + xa_opt_quantized_conv1d_nlc_asym8uxsym8u_asym8u( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 97% rename from backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp index e4074829cf0..e2584485686 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NCHW convolution for int8 x int8 -> int8 -void xa_opt_quantized_conv_nchw_asym8sxsym8s_asym8s( +void xa_opt_quantized_conv2d_nchw_asym8sxsym8s_asym8s( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -207,7 +207,7 @@ void xa_opt_quantized_conv_nchw_asym8sxsym8s_asym8s( } } -void quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -224,7 +224,7 @@ void quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nchw_asym8sxsym8s_asym8s( + xa_opt_quantized_conv2d_nchw_asym8sxsym8s_asym8s( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 97% rename from backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp index 201b5d7da16..8444fef6bd1 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NCHW convolution for uint8 x uint8 -> uint8 -void xa_opt_quantized_conv_nchw_asym8uxsym8u_asym8u( +void xa_opt_quantized_conv2d_nchw_asym8uxsym8u_asym8u( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -207,7 +207,7 @@ void xa_opt_quantized_conv_nchw_asym8uxsym8u_asym8u( } } -void quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -224,7 +224,7 @@ void quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nchw_asym8uxsym8u_asym8u( + xa_opt_quantized_conv2d_nchw_asym8uxsym8u_asym8u( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 96% rename from backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp index a0e47104e18..787984e52db 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Specialized depthwise NCHW convolution for int8 x int8 -> int8 -void xa_opt_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s( +void xa_opt_quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -162,7 +162,7 @@ void xa_opt_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s( kNnlibMaxDim); } -void quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -179,7 +179,7 @@ void quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s( + xa_opt_quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 96% rename from backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp index 03274413f65..219eaf44ad7 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Specialized depthwise NCHW convolution for uint8 x uint8 -> uint8 -void xa_opt_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u( +void xa_opt_quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -162,7 +162,7 @@ void xa_opt_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u( kNnlibMaxDim); } -void quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -179,7 +179,7 @@ void quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u( + xa_opt_quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 98% rename from backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp index 34c861faed5..fc279f2bbdf 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -122,7 +122,7 @@ __attribute__((noinline)) void conv2d_nchw_dilated_asym8sxsym8s_asym8s_core( } } -void quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 98% rename from backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp index 6393554e18f..08ca4657c75 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -123,7 +123,7 @@ __attribute__((noinline)) void conv2d_nchw_dilated_asym8uxsym8u_asym8u_core( } } -void quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_out.cpp similarity index 98% rename from backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nchw_out.cpp index 604f881ab96..984747d9316 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nchw_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nchw_out.cpp @@ -156,7 +156,7 @@ __attribute__((noinline)) void conv2d_nchw_core_generic( } } -void xa_opt_quantized_conv_nchw( +void xa_opt_quantized_conv2d_nchw( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -444,7 +444,7 @@ void xa_opt_quantized_conv_nchw( // bias_scale, since it is a product of the two. The kernel will branch to // quantized::conv1d or quantized::conv2d based on the dimensionality of // activation tensor. -void quantized_conv_nchw( +void quantized_conv2d_nchw( const Tensor& input, const Tensor& weight, const Tensor& bias, @@ -515,7 +515,7 @@ void quantized_conv_nchw( #undef typed_quantized_conv2d_nchw } -void quantized_conv_nchw_out( +void quantized_conv2d_nchw_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -546,7 +546,7 @@ void quantized_conv_nchw_out( optimized = 0; if (optimized) { - xa_opt_quantized_conv_nchw( + xa_opt_quantized_conv2d_nchw( ctx, input, weight, @@ -562,7 +562,7 @@ void quantized_conv_nchw_out( output_zero_point, out); } else { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, @@ -579,7 +579,7 @@ void quantized_conv_nchw_out( } } -void quantized_conv_nchw_per_tensor_out( +void quantized_conv2d_nchw_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -606,7 +606,7 @@ void quantized_conv_nchw_per_tensor_out( optimized = 0; if (optimized) { - xa_opt_quantized_conv_nchw( + xa_opt_quantized_conv2d_nchw( ctx, input, weight, @@ -622,7 +622,7 @@ void quantized_conv_nchw_per_tensor_out( output_zero_point, out); } else { - quantized_conv_nchw( + quantized_conv2d_nchw( input, weight, bias, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 96% rename from backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp index 3f62c82bfcd..9bd7e641144 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NHWC convolution for int8 x int8 -> int8 -void xa_opt_quantized_conv_nhwc_asym8sxsym8s_asym8s( +void xa_opt_quantized_conv2d_nhwc_asym8sxsym8s_asym8s( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -150,7 +150,7 @@ void xa_opt_quantized_conv_nhwc_asym8sxsym8s_asym8s( } } -void quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -167,7 +167,7 @@ void quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nhwc_asym8sxsym8s_asym8s( + xa_opt_quantized_conv2d_nhwc_asym8sxsym8s_asym8s( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 96% rename from backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp index 32267591cf3..433cbf76fce 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Optimized NHWC convolution for uint8 x uint8 -> uint8 -void xa_opt_quantized_conv_nhwc_asym8uxsym8u_asym8u( +void xa_opt_quantized_conv2d_nhwc_asym8uxsym8u_asym8u( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -150,7 +150,7 @@ void xa_opt_quantized_conv_nhwc_asym8uxsym8u_asym8u( } } -void quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -167,7 +167,7 @@ void quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nhwc_asym8uxsym8u_asym8u( + xa_opt_quantized_conv2d_nhwc_asym8uxsym8u_asym8u( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 95% rename from backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp index c232f7e5ef2..384ebbb4f48 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Specialized depthwise NHWC convolution for int8 x int8 -> int8 -void xa_opt_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s( +void xa_opt_quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -132,7 +132,7 @@ void xa_opt_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s( } } -void quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -149,7 +149,7 @@ void quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s( + xa_opt_quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 95% rename from backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp index 5ef102c31d1..07df1a416d7 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -22,7 +22,7 @@ namespace HiFi { namespace native { // Specialized depthwise NHWC convolution for uint8 x uint8 -> uint8 -void xa_opt_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u( +void xa_opt_quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -132,7 +132,7 @@ void xa_opt_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u( } } -void quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -149,7 +149,7 @@ void quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED int64_t out_multiplier, __ET_UNUSED int64_t out_shift, Tensor& out) { - xa_opt_quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u( + xa_opt_quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u( ctx, input, weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp similarity index 98% rename from backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp index 35a1cbda0f9..91965594a5d 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out.cpp @@ -122,7 +122,7 @@ __attribute__((noinline)) void conv2d_nhwc_dilated_asym8sxsym8s_asym8s_core( } } -void quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out( +void quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp similarity index 98% rename from backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp index 62b5008ab7e..14dc31a719f 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out.cpp @@ -122,7 +122,7 @@ __attribute__((noinline)) void conv2d_nhwc_dilated_asym8uxsym8u_asym8u_core( } } -void quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out( +void quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, diff --git a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp similarity index 98% rename from backends/cadence/hifi/operators/op_quantized_conv_nhwc_out.cpp rename to backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp index 5aa087c4b75..a5d503853c4 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv_nhwc_out.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp @@ -147,7 +147,7 @@ __attribute__((noinline)) void conv2d_nhwc_core_generic( } } -void xa_opt_quantized_conv_nhwc( +void xa_opt_quantized_conv2d_nhwc( KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -350,7 +350,7 @@ void xa_opt_quantized_conv_nhwc( } } -void quantized_conv_nhwc( +void quantized_conv2d_nhwc( const Tensor& input, const Tensor& weight, const Tensor& bias, @@ -421,7 +421,7 @@ void quantized_conv_nhwc( #undef typed_quantized_conv2d_nhwc } -void quantized_conv_nhwc_out( +void quantized_conv2d_nhwc_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -452,7 +452,7 @@ void quantized_conv_nhwc_out( optimized = 0; if (optimized) { - xa_opt_quantized_conv_nhwc( + xa_opt_quantized_conv2d_nhwc( ctx, input, weight, @@ -468,7 +468,7 @@ void quantized_conv_nhwc_out( output_zero_point, out); } else { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, @@ -485,7 +485,7 @@ void quantized_conv_nhwc_out( } } -void quantized_conv_nhwc_per_tensor_out( +void quantized_conv2d_nhwc_per_tensor_out( __ET_UNUSED KernelRuntimeContext& ctx, const Tensor& input, const Tensor& weight, @@ -512,7 +512,7 @@ void quantized_conv_nhwc_per_tensor_out( optimized = 0; if (optimized) { - xa_opt_quantized_conv_nhwc( + xa_opt_quantized_conv2d_nhwc( ctx, input, weight, @@ -528,7 +528,7 @@ void quantized_conv_nhwc_per_tensor_out( output_zero_point, out); } else { - quantized_conv_nhwc( + quantized_conv2d_nhwc( input, weight, bias, diff --git a/backends/cadence/hifi/operators/operators.h b/backends/cadence/hifi/operators/operators.h index 11b93f4a89c..f7f5194d91a 100644 --- a/backends/cadence/hifi/operators/operators.h +++ b/backends/cadence/hifi/operators/operators.h @@ -83,7 +83,7 @@ void quantized_linear_per_tensor_out( const ::executorch::aten::optional<::executorch::aten::Tensor>& offset, ::executorch::aten::Tensor& out); -void quantized_conv_nhwc_out( +void quantized_conv2d_nhwc_out( ::executorch::runtime::KernelRuntimeContext& ctx, const ::executorch::aten::Tensor& input, const ::executorch::aten::Tensor& weight, @@ -101,7 +101,7 @@ void quantized_conv_nhwc_out( const ::executorch::aten::Tensor& out_shift, ::executorch::aten::Tensor& out); -void quantized_conv_nchw_out( +void quantized_conv2d_nchw_out( ::executorch::runtime::KernelRuntimeContext& ctx, const ::executorch::aten::Tensor& input, const ::executorch::aten::Tensor& weight, @@ -119,7 +119,7 @@ void quantized_conv_nchw_out( const ::executorch::aten::Tensor& out_shift, ::executorch::aten::Tensor& out); -void quantized_conv_nchw_per_tensor_out( +void quantized_conv2d_nchw_per_tensor_out( ::executorch::runtime::KernelRuntimeContext& ctx, const ::executorch::aten::Tensor& input, const ::executorch::aten::Tensor& weight, @@ -137,7 +137,7 @@ void quantized_conv_nchw_per_tensor_out( int64_t out_shift, ::executorch::aten::Tensor& out); -void quantized_conv_nhwc_per_tensor_out( +void quantized_conv2d_nhwc_per_tensor_out( ::executorch::runtime::KernelRuntimeContext& ctx, const ::executorch::aten::Tensor& input, const ::executorch::aten::Tensor& weight, diff --git a/backends/cadence/hifi/operators/targets.bzl b/backends/cadence/hifi/operators/targets.bzl index fa263d4017c..ca474e8183b 100644 --- a/backends/cadence/hifi/operators/targets.bzl +++ b/backends/cadence/hifi/operators/targets.bzl @@ -63,24 +63,24 @@ OPERATORS = [ "ne", "permute_copy", "pow", - "quantized_conv_nchw_out", - "quantized_conv_nchw_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv_nchw_asym8uxsym8u_asym8u_per_tensor_out", - "quantized_conv1d_nchw_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv1d_nchw_asym8uxsym8u_asym8u_per_tensor_out", - "quantized_conv_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out", - "quantized_conv_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out", - "quantized_conv_nhwc_out", - "quantized_conv_nhwc_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv_nhwc_asym8uxsym8u_asym8u_per_tensor_out", - "quantized_conv1d_nhwc_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv1d_nhwc_asym8uxsym8u_asym8u_per_tensor_out", - "quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out", - "quantized_conv_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out", - "quantized_conv_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv2d_nchw_out", + "quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv1d_ncl_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv1d_ncl_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv2d_nchw_depthwise_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv2d_nchw_dilated_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv2d_nchw_dilated_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv2d_nhwc_out", + "quantized_conv2d_nhwc_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv2d_nhwc_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv1d_nlc_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv1d_nlc_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv2d_nhwc_depthwise_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv2d_nhwc_depthwise_asym8uxsym8u_asym8u_per_tensor_out", + "quantized_conv2d_nhwc_dilated_asym8sxsym8s_asym8s_per_tensor_out", + "quantized_conv2d_nhwc_dilated_asym8uxsym8u_asym8u_per_tensor_out", "quantized_fully_connected_out", "quantized_fully_connected_asym8sxasym8s_asym8s_per_tensor_out", "quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor_out",