Reverts 4ce1a06

PiperOrigin-RevId: 636666940
tensorflow · May 23, 2024 · 612177e · 612177e
1 parent 789adad
commit 612177e
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 8 deletions.
diff --git a/tensorflow/compiler/mlir/lite/tests/quantize-dynamic-range.mlir b/tensorflow/compiler/mlir/lite/tests/quantize-dynamic-range.mlir
@@ -149,32 +149,28 @@ func.func @QuantizeTransposeConvWeightOnly(%arg0: tensor<32x4x4x128xf32>, %arg1:
 
 // CHECK: %[[b:.*]] = arith.constant dense<0.000000e+00> : tensor<1x32x42x128xf32>
 // CHECK: %[[w:.*]] = "tfl.pseudo_qconst"() <{qtype = tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32:0, {1.000000e+00}>>
-// CHECK: %[[dq_w:.*]] = "tfl.dequantize"(%[[w]]) : (tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32:0, {1.000000e+00}>>) -> tensor<1x32x42x128xf32>
-// CHECK: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[dq_w]], %arg0, %[[b]]) <{
+// CHECK: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[w:.*]], %arg0, %[[b]]) <{
 // CHECK-NOT: asymmetric_quantize_inputs = true
 // CHECK-SAME: padding = "SAME"
 // CHECK: return %[[tconv:.*]]
 
 // PerTensor: %[[b:.*]] = arith.constant dense<0.000000e+00> : tensor<1x32x42x128xf32>
 // PerTensor: %[[w:.*]] = "tfl.pseudo_qconst"() <{qtype = tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>
-// PerTensor: %[[dq_w:.*]] = "tfl.dequantize"(%[[w]]) : (tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>) -> tensor<1x32x42x128xf32>
-// PerTensor: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[dq_w]], %arg0, %[[b]]) <{
+// PerTensor: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[w:.*]], %arg0, %[[b]]) <{
 // PerTensor-NOT: asymmetric_quantize_inputs = true
 // PerTensor-SAME: padding = "SAME"
 // PerTensor: return %[[tconv:.*]]
 
 // PerChannelWeightOnly: %[[b:.*]] = arith.constant dense<0.000000e+00> : tensor<1x32x42x128xf32>
 // PerChannelWeightOnly: %[[w:.*]] = "tfl.pseudo_qconst"() <{qtype = tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32:0, {1.000000e+00}>>
-// PerChannelWeightOnly: %[[dq_w:.*]] = "tfl.dequantize"(%[[w]]) : (tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32:0, {1.000000e+00}>>) -> tensor<1x32x42x128xf32>
-// PerChannelWeightOnly: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[dq_w]], %arg0, %[[b]]) <{
+// PerChannelWeightOnly: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[w]], %arg0, %[[b]]) <{
 // PerChannelWeightOnly-NOT: asymmetric_quantize_inputs = true
 // PerChannelWeightOnly-SAME: padding = "SAME"
 // PerChannelWeightOnly: return %[[tconv:.*]]
 
 // PerTensorWeightOnly: %[[b:.*]] = arith.constant dense<0.000000e+00> : tensor<1x32x42x128xf32>
 // PerTensorWeightOnly: %[[w:.*]] = "tfl.pseudo_qconst"() <{qtype = tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>
-// PerTensorWeightOnly: %[[dq_w:.*]] = "tfl.dequantize"(%[[w]]) : (tensor<1x32x42x128x!quant.uniform<i8<-127:127>:f32, 1.000000e+00>>) -> tensor<1x32x42x128xf32>
-// PerTensorWeightOnly: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[dq_w]], %arg0, %[[b]]) <{
+// PerTensorWeightOnly: %[[tconv:.*]] = "tfl.transpose_conv"(%arg1, %[[w]], %arg0, %[[b]]) <{
 // PerTensorWeightOnly-NOT: asymmetric_quantize_inputs = true
 // PerTensorWeightOnly-SAME: padding = "SAME"
 // PerTensorWeightOnly: return %[[tconv:.*]]

diff --git a/tensorflow/compiler/mlir/lite/tests/quantize.mlir b/tensorflow/compiler/mlir/lite/tests/quantize.mlir
@@ -472,3 +472,17 @@ func.func @NotQuantizeReadVariable() -> tensor<1x2x3x!quant.uniform<u8<1:255>:f3
   // CHECK-NEXT: %[[quantize:.*]] = "tfl.quantize"(%[[read]]) <{qtype = tensor<1x2x3x!quant.uniform<u8<1:255>:f32, 0.047244094488188976:128>>}> : (tensor<1x2x3xf32>) -> tensor<1x2x3x!quant.uniform<u8<1:255>:f32, 0.047244094488188976:128>>
   // CHECK-NEXT: return %[[quantize]]
 }
+
+// CHECK-LABEL: foldQuantWeightsIntoTposeConv
+func.func @foldQuantWeightsIntoTposeConv(%arg0: tensor<2x2x3x2048xf32>) -> tensor<2x3x2x2048xf32> {
+  %output_shape = arith.constant dense<[2, 3, 2, 2048]> : tensor<4xi32>
+  %q_weighs = "tfl.pseudo_qconst"() {qtype = tensor<4x2x2x2048x!quant.uniform<u8<1:255>:f32, 0.15:151>>, value = dense<-76> : tensor<4x2x2x2048xi8>} : () -> tensor<4x2x2x2048x!quant.uniform<u8<1:255>:f32, 0.15:151>>
+  %dq_weights = "tfl.dequantize"(%q_weighs) : (tensor<4x2x2x2048x!quant.uniform<u8<1:255>:f32, 0.15:151>>) -> tensor<4x2x2x2048xf32>
+  %bias = "tfl.no_value"() {value} : () -> none
+  %out = "tfl.transpose_conv"(%output_shape, %dq_weights, %arg0, %bias) {fused_activation_function = "NONE", padding = "SAME", stride_h = 1 : i32, stride_w = 1 : i32} : (tensor<4xi32>, tensor<4x2x2x2048xf32>, tensor<2x2x3x2048xf32>, none) -> tensor<2x3x2x2048xf32>
+  func.return %out : tensor<2x3x2x2048xf32>
+
+  // CHECK-NOT: "tfl.dequantize"
+  // CHECK: "tfl.transpose_conv"(%cst, %1, %arg0, %0) <{fused_activation_function = "NONE", padding = "SAME", stride_h = 1 : i32, stride_w = 1 : i32}> : (tensor<4xi32>, tensor<4x2x2x2048x!quant.uniform<u8<1:255>:f32
+}
+
diff --git a/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td
@@ -36,3 +36,13 @@ def : Pat<(TFL_QuantizeOp (TFL_DequantizeOp $in), $qt), (replaceWithValue $in)>;
 def : Pat<(TFL_DequantizeOp
              (TFL_QuantizeOp (Arith_ConstantOp F32ElementsAttr:$cst), $qt)),
           (TFL_ConstOp $cst)>;
+
+// Transpose conv supports hybrid computation with quantized weights.
+def FoldQuantWeightsIntoTposeConv : Pat<
+  (TFL_TransposeConvOp
+    $output_shape,
+    (TFL_DequantizeOp $quant_weights),
+    $quant_input,
+    $bias, $padding, $stride_h, $stride_w, $faf),
+  (TFL_TransposeConvOp $output_shape, $quant_weights,
+    $quant_input, $bias, $padding, $stride_h, $stride_w, $faf)>;