[mlir] [linalg] Add output shape verifier for linalg ops. #91673

sahas3 · 2024-05-09T22:19:03Z

mlir-opt --linalg-fold-unit-extent-dims pass on the following IR

#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
module {
  func.func @main(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
    %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
    ^bb0(%in: f32, %in_0: f32, %out: f32):
      %2 = arith.mulf %in, %in_0 : f32
      %3 = arith.addf %out, %2 : f32
      linalg.yield %3 : f32
    } -> tensor<?x1x61x1xf32>
    return %1 : tensor<?x1x61x1xf32>
  }
}

produces an incorrect tensor.expand_shape operation:

error: 'tensor.expand_shape' op expected dimension 0 of collapsed type to be dynamic since one or more of the corresponding dimensions in the expanded type is dynamic
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
         ^
/mathworks/devel/sandbox/sayans/geckWorks/g3294570/repro.mlir:8:10: note: see current operation: %5 = "tensor.expand_shape"(%4) <{reassociation = [[0, 1, 2, 3]]}> : (tensor<61xf32>) -> tensor<?x1x61x1xf32>
// -----// IR Dump After LinalgFoldUnitExtentDimsPass Failed (linalg-fold-unit-extent-dims) //----- //
#map = affine_map<(d0) -> (0, d0)>
#map1 = affine_map<(d0) -> ()>
#map2 = affine_map<(d0) -> (d0)>
"builtin.module"() ({
  "func.func"() <{function_type = (tensor<1x?x?x1xf32>, index) -> tensor<?x1x61x1xf32>, sym_name = "main"}> ({
  ^bb0(%arg0: tensor<1x?x?x1xf32>, %arg1: index):
    %0 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<f32>}> : () -> tensor<f32>
    %1 = "tensor.collapse_shape"(%arg0) <{reassociation = [[0, 1], [2, 3]]}> : (tensor<1x?x?x1xf32>) -> tensor<?x?xf32>
    %2 = "tensor.empty"() : () -> tensor<61xf32>
    %3 = "tensor.empty"() : () -> tensor<61xf32>
    %4 = "linalg.generic"(%1, %0, %2, %3) <{indexing_maps = [#map, #map1, #map2, #map2], iterator_types = [#linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 3, 1>}> ({
    ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32):
      %6 = "arith.mulf"(%arg2, %arg3) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
      %7 = "arith.addf"(%arg4, %6) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
      "linalg.yield"(%7) : (f32) -> ()
    }) : (tensor<?x?xf32>, tensor<f32>, tensor<61xf32>, tensor<61xf32>) -> tensor<61xf32>
    %5 = "tensor.expand_shape"(%4) <{reassociation = [[0, 1, 2, 3]]}> : (tensor<61xf32>) -> tensor<?x1x61x1xf32>
    "func.return"(%5) : (tensor<?x1x61x1xf32>) -> ()
  }) : () -> ()
}) : () -> ()

The reason of this is because the dimension d0 is determined to be an unit-dim that can be dropped based on the dimensions of operand arg0 to linalg.generic. Later on when iterating over operand outs the dimension d0 is determined to be an unit-dim even though the shape corresponding to it is Shape::kDynamic. For the linalg.generic to be valid d0 of outs does need to be 1 but that isn't properly processed in the current implementation and the dimension is dropped resulting in outs operand to be tensor<61xf32> in the example.

The fix is to also check that the dimension shape is actually 1 before dropping the dimension. The IR after the fix is:

#map = affine_map<()[s0, s1] -> (s0 * s1)>
#map1 = affine_map<(d0) -> (0, d0)>
#map2 = affine_map<(d0) -> ()>
module {
  func.func @main(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %cst = arith.constant dense<1.000000e+00> : tensor<f32>
    %collapsed = tensor.collapse_shape %arg0 [[0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32>
    %0 = tensor.empty(%arg1) : tensor<?x61xf32>
    %1 = affine.apply #map()[%arg1, %c1]
    %2 = tensor.empty(%1) : tensor<?x61xf32>
    %3 = linalg.generic {indexing_maps = [#map1, #map2, #map1, #map1], iterator_types = ["parallel"]} ins(%collapsed, %cst, %0 : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%2 : tensor<?x61xf32>) {
    ^bb0(%in: f32, %in_0: f32, %in_1: f32, %out: f32):
      %4 = arith.mulf %in, %in_0 : f32
      %5 = arith.addf %in_1, %4 : f32
      linalg.yield %5 : f32
    } -> tensor<?x61xf32>
    %expanded = tensor.expand_shape %3 [[0, 1], [2, 3]] output_shape [%c0, 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32>
    return %expanded : tensor<?x1x61x1xf32>
  }
}

github-actions · 2024-05-09T22:22:28Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvmbot · 2024-05-09T23:04:11Z

@llvm/pr-subscribers-mlir-linalg

Author: Sayan Saha (sahas3)

Changes

mlir-opt --linalg-fold-unit-extent-dims pass on the following IR

#map = affine_map&lt;(d0, d1, d2, d3, d4, d5, d6) -&gt; (d0, d1 + d4, d2 + d5, d6)&gt;
#map1 = affine_map&lt;(d0, d1, d2, d3, d4, d5, d6) -&gt; (d4, d5, d6, d3)&gt;
#map2 = affine_map&lt;(d0, d1, d2, d3, d4, d5, d6) -&gt; (d0, d1, d2, d3)&gt;
module {
  func.func @<!-- -->main(%arg0: tensor&lt;1x?x?x1xf32&gt;, %arg1: index) -&gt; tensor&lt;?x1x61x1xf32&gt; {
    %cst = arith.constant dense&lt;1.000000e+00&gt; : tensor&lt;1x1x1x1xf32&gt;
    %0 = tensor.empty(%arg1) : tensor&lt;?x1x61x1xf32&gt;
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor&lt;1x?x?x1xf32&gt;, tensor&lt;1x1x1x1xf32&gt;) outs(%0 : tensor&lt;?x1x61x1xf32&gt;) {
    ^bb0(%in: f32, %in_0: f32, %out: f32):
      %2 = arith.mulf %in, %in_0 : f32
      %3 = arith.addf %out, %2 : f32
      linalg.yield %3 : f32
    } -&gt; tensor&lt;?x1x61x1xf32&gt;
    return %1 : tensor&lt;?x1x61x1xf32&gt;
  }
}

produces an incorrect tensor.expand_shape operation:

error: 'tensor.expand_shape' op expected dimension 0 of collapsed type to be dynamic since one or more of the corresponding dimensions in the expanded type is dynamic
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor&lt;1x?x?x1xf32&gt;, tensor&lt;1x1x1x1xf32&gt;) outs(%0 : tensor&lt;?x1x61x1xf32&gt;) {
         ^
/mathworks/devel/sandbox/sayans/geckWorks/g3294570/repro.mlir:8:10: note: see current operation: %5 = "tensor.expand_shape"(%4) &lt;{reassociation = [[0, 1, 2, 3]]}&gt; : (tensor&lt;61xf32&gt;) -&gt; tensor&lt;?x1x61x1xf32&gt;
// -----// IR Dump After LinalgFoldUnitExtentDimsPass Failed (linalg-fold-unit-extent-dims) //----- //
#map = affine_map&lt;(d0) -&gt; (0, d0)&gt;
#map1 = affine_map&lt;(d0) -&gt; ()&gt;
#map2 = affine_map&lt;(d0) -&gt; (d0)&gt;
"builtin.module"() ({
  "func.func"() &lt;{function_type = (tensor&lt;1x?x?x1xf32&gt;, index) -&gt; tensor&lt;?x1x61x1xf32&gt;, sym_name = "main"}&gt; ({
  ^bb0(%arg0: tensor&lt;1x?x?x1xf32&gt;, %arg1: index):
    %0 = "arith.constant"() &lt;{value = dense&lt;1.000000e+00&gt; : tensor&lt;f32&gt;}&gt; : () -&gt; tensor&lt;f32&gt;
    %1 = "tensor.collapse_shape"(%arg0) &lt;{reassociation = [[0, 1], [2, 3]]}&gt; : (tensor&lt;1x?x?x1xf32&gt;) -&gt; tensor&lt;?x?xf32&gt;
    %2 = "tensor.empty"() : () -&gt; tensor&lt;61xf32&gt;
    %3 = "tensor.empty"() : () -&gt; tensor&lt;61xf32&gt;
    %4 = "linalg.generic"(%1, %0, %2, %3) &lt;{indexing_maps = [#map, #map1, #map2, #map2], iterator_types = [#linalg.iterator_type&lt;parallel&gt;], operandSegmentSizes = array&lt;i32: 3, 1&gt;}&gt; ({
    ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32):
      %6 = "arith.mulf"(%arg2, %arg3) &lt;{fastmath = #arith.fastmath&lt;none&gt;}&gt; : (f32, f32) -&gt; f32
      %7 = "arith.addf"(%arg4, %6) &lt;{fastmath = #arith.fastmath&lt;none&gt;}&gt; : (f32, f32) -&gt; f32
      "linalg.yield"(%7) : (f32) -&gt; ()
    }) : (tensor&lt;?x?xf32&gt;, tensor&lt;f32&gt;, tensor&lt;61xf32&gt;, tensor&lt;61xf32&gt;) -&gt; tensor&lt;61xf32&gt;
    %5 = "tensor.expand_shape"(%4) &lt;{reassociation = [[0, 1, 2, 3]]}&gt; : (tensor&lt;61xf32&gt;) -&gt; tensor&lt;?x1x61x1xf32&gt;
    "func.return"(%5) : (tensor&lt;?x1x61x1xf32&gt;) -&gt; ()
  }) : () -&gt; ()
}) : () -&gt; ()

The reason of this is because the dimension d0 is determined to be an unit-dim that can be dropped based on the dimensions of operand arg0 to linalg.generic. Later on when iterating over operand outs the dimension d0 is determined to be an unit-dim even though the shape corresponding to it is Shape::kDynamic. For the linalg.generic to be valid d0 of outs does need to be 1 but that isn't properly processed in the current implementation and the dimension is dropped resulting in outs operand to be tensor<61xf32> in the example.

The fix is to also check that the dimension shape is actually 1 before dropping the dimension. The IR after the fix is:

#map = affine_map&lt;()[s0, s1] -&gt; (s0 * s1)&gt;
#map1 = affine_map&lt;(d0) -&gt; (0, d0)&gt;
#map2 = affine_map&lt;(d0) -&gt; ()&gt;
module {
  func.func @<!-- -->main(%arg0: tensor&lt;1x?x?x1xf32&gt;, %arg1: index) -&gt; tensor&lt;?x1x61x1xf32&gt; {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %cst = arith.constant dense&lt;1.000000e+00&gt; : tensor&lt;f32&gt;
    %collapsed = tensor.collapse_shape %arg0 [[0, 1], [2, 3]] : tensor&lt;1x?x?x1xf32&gt; into tensor&lt;?x?xf32&gt;
    %0 = tensor.empty(%arg1) : tensor&lt;?x61xf32&gt;
    %1 = affine.apply #map()[%arg1, %c1]
    %2 = tensor.empty(%1) : tensor&lt;?x61xf32&gt;
    %3 = linalg.generic {indexing_maps = [#map1, #map2, #map1, #map1], iterator_types = ["parallel"]} ins(%collapsed, %cst, %0 : tensor&lt;?x?xf32&gt;, tensor&lt;f32&gt;, tensor&lt;?x61xf32&gt;) outs(%2 : tensor&lt;?x61xf32&gt;) {
    ^bb0(%in: f32, %in_0: f32, %in_1: f32, %out: f32):
      %4 = arith.mulf %in, %in_0 : f32
      %5 = arith.addf %in_1, %4 : f32
      linalg.yield %5 : f32
    } -&gt; tensor&lt;?x61xf32&gt;
    %expanded = tensor.expand_shape %3 [[0, 1], [2, 3]] output_shape [%c0, 1, 61, 1] : tensor&lt;?x61xf32&gt; into tensor&lt;?x1x61x1xf32&gt;
    return %expanded : tensor&lt;?x1x61x1xf32&gt;
  }
}

Full diff: https://github.com/llvm/llvm-project/pull/91673.diff

2 Files Affected:

(modified) mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp (+2-1)
(modified) mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir (+44)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
index 65efa18af18f6..c0829397f1f85 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -351,7 +351,8 @@ static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata(
   auto isUnitDim = [&](unsigned dim) {
     if (auto dimExpr = dyn_cast<AffineDimExpr>(exprs[dim])) {
       unsigned oldPosition = dimExpr.getPosition();
-      return !oldDimsToNewDimsMap.count(oldPosition);
+      return !oldDimsToNewDimsMap.count(oldPosition) &&
+             (operandShape[dim] == 1);
     }
     // Handle the other case where the shape is 1, and is accessed using a
     // constant 0.
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
index a9cbaaf7fdc48..d31ceb5fd719f 100644
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -1087,3 +1087,47 @@ func.func @drop_known_unit_constant_low_high(%arg0: tensor<1x383x128xf32>) -> te
 //       CHECK:   } : tensor<383x128xf32> to tensor<384x128xf32>
 //       CHECK:   tensor.expand_shape %[[PADDED]]
 //  CHECK-SAME:     {{\[}}[0, 1], [2]] output_shape [1, 384, 128] : tensor<384x128xf32> into tensor<1x384x128xf32>
+
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (0, d0)>
+// CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> ()>
+
+// CHECK-LABEL: func @drop_unit_dim_corresponding_to_dynamic_dim
+// CHECK-SAME:                    %[[ARG0:.*]]: tensor<1x?x?x1xf32>,
+// CHECK-SAME:                    %[[ARG1:.*]]: index) -> tensor<?x1x61x1xf32> {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant dense<1.000000e+00> : tensor<f32>
+// CHECK:           %[[VAL_3:.*]] = tensor.collapse_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32>
+// CHECK:           %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]]
+// CHECK:           %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%[[VAL_6]] : tensor<?x61xf32>) {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32, %[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32):
+// CHECK:             %[[VAL_12:.*]] = arith.mulf %[[VAL_8]], %[[VAL_9]] : f32
+// CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_10]], %[[VAL_12]] : f32
+// CHECK:             linalg.yield %[[VAL_13]] : f32
+// CHECK:           } -> tensor<?x61xf32>
+// CHECK:           %[[VAL_14:.*]] = tensor.expand_shape %[[VAL_7]] {{\[\[}}0, 1], [2, 3]] output_shape {{\[}}%[[VAL_0]], 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32>
+// CHECK:           return %[[VAL_14]] : tensor<?x1x61x1xf32>
+// CHECK:         }
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
+module {
+  func.func @drop_unit_dim_corresponding_to_dynamic_dim(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
+    %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
+    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
+    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
+    ^bb0(%in: f32, %in_0: f32, %out: f32):
+      %2 = arith.mulf %in, %in_0 : f32
+      %3 = arith.addf %out, %2 : f32
+      linalg.yield %3 : f32
+    } -> tensor<?x1x61x1xf32>
+    return %1 : tensor<?x1x61x1xf32>
+  }
+}

llvmbot · 2024-05-09T23:04:11Z

@llvm/pr-subscribers-mlir

Author: Sayan Saha (sahas3)

Changes

mlir-opt --linalg-fold-unit-extent-dims pass on the following IR

#map = affine_map&lt;(d0, d1, d2, d3, d4, d5, d6) -&gt; (d0, d1 + d4, d2 + d5, d6)&gt;
#map1 = affine_map&lt;(d0, d1, d2, d3, d4, d5, d6) -&gt; (d4, d5, d6, d3)&gt;
#map2 = affine_map&lt;(d0, d1, d2, d3, d4, d5, d6) -&gt; (d0, d1, d2, d3)&gt;
module {
  func.func @<!-- -->main(%arg0: tensor&lt;1x?x?x1xf32&gt;, %arg1: index) -&gt; tensor&lt;?x1x61x1xf32&gt; {
    %cst = arith.constant dense&lt;1.000000e+00&gt; : tensor&lt;1x1x1x1xf32&gt;
    %0 = tensor.empty(%arg1) : tensor&lt;?x1x61x1xf32&gt;
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor&lt;1x?x?x1xf32&gt;, tensor&lt;1x1x1x1xf32&gt;) outs(%0 : tensor&lt;?x1x61x1xf32&gt;) {
    ^bb0(%in: f32, %in_0: f32, %out: f32):
      %2 = arith.mulf %in, %in_0 : f32
      %3 = arith.addf %out, %2 : f32
      linalg.yield %3 : f32
    } -&gt; tensor&lt;?x1x61x1xf32&gt;
    return %1 : tensor&lt;?x1x61x1xf32&gt;
  }
}

produces an incorrect tensor.expand_shape operation:

error: 'tensor.expand_shape' op expected dimension 0 of collapsed type to be dynamic since one or more of the corresponding dimensions in the expanded type is dynamic
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor&lt;1x?x?x1xf32&gt;, tensor&lt;1x1x1x1xf32&gt;) outs(%0 : tensor&lt;?x1x61x1xf32&gt;) {
         ^
/mathworks/devel/sandbox/sayans/geckWorks/g3294570/repro.mlir:8:10: note: see current operation: %5 = "tensor.expand_shape"(%4) &lt;{reassociation = [[0, 1, 2, 3]]}&gt; : (tensor&lt;61xf32&gt;) -&gt; tensor&lt;?x1x61x1xf32&gt;
// -----// IR Dump After LinalgFoldUnitExtentDimsPass Failed (linalg-fold-unit-extent-dims) //----- //
#map = affine_map&lt;(d0) -&gt; (0, d0)&gt;
#map1 = affine_map&lt;(d0) -&gt; ()&gt;
#map2 = affine_map&lt;(d0) -&gt; (d0)&gt;
"builtin.module"() ({
  "func.func"() &lt;{function_type = (tensor&lt;1x?x?x1xf32&gt;, index) -&gt; tensor&lt;?x1x61x1xf32&gt;, sym_name = "main"}&gt; ({
  ^bb0(%arg0: tensor&lt;1x?x?x1xf32&gt;, %arg1: index):
    %0 = "arith.constant"() &lt;{value = dense&lt;1.000000e+00&gt; : tensor&lt;f32&gt;}&gt; : () -&gt; tensor&lt;f32&gt;
    %1 = "tensor.collapse_shape"(%arg0) &lt;{reassociation = [[0, 1], [2, 3]]}&gt; : (tensor&lt;1x?x?x1xf32&gt;) -&gt; tensor&lt;?x?xf32&gt;
    %2 = "tensor.empty"() : () -&gt; tensor&lt;61xf32&gt;
    %3 = "tensor.empty"() : () -&gt; tensor&lt;61xf32&gt;
    %4 = "linalg.generic"(%1, %0, %2, %3) &lt;{indexing_maps = [#map, #map1, #map2, #map2], iterator_types = [#linalg.iterator_type&lt;parallel&gt;], operandSegmentSizes = array&lt;i32: 3, 1&gt;}&gt; ({
    ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32):
      %6 = "arith.mulf"(%arg2, %arg3) &lt;{fastmath = #arith.fastmath&lt;none&gt;}&gt; : (f32, f32) -&gt; f32
      %7 = "arith.addf"(%arg4, %6) &lt;{fastmath = #arith.fastmath&lt;none&gt;}&gt; : (f32, f32) -&gt; f32
      "linalg.yield"(%7) : (f32) -&gt; ()
    }) : (tensor&lt;?x?xf32&gt;, tensor&lt;f32&gt;, tensor&lt;61xf32&gt;, tensor&lt;61xf32&gt;) -&gt; tensor&lt;61xf32&gt;
    %5 = "tensor.expand_shape"(%4) &lt;{reassociation = [[0, 1, 2, 3]]}&gt; : (tensor&lt;61xf32&gt;) -&gt; tensor&lt;?x1x61x1xf32&gt;
    "func.return"(%5) : (tensor&lt;?x1x61x1xf32&gt;) -&gt; ()
  }) : () -&gt; ()
}) : () -&gt; ()

The reason of this is because the dimension d0 is determined to be an unit-dim that can be dropped based on the dimensions of operand arg0 to linalg.generic. Later on when iterating over operand outs the dimension d0 is determined to be an unit-dim even though the shape corresponding to it is Shape::kDynamic. For the linalg.generic to be valid d0 of outs does need to be 1 but that isn't properly processed in the current implementation and the dimension is dropped resulting in outs operand to be tensor<61xf32> in the example.

The fix is to also check that the dimension shape is actually 1 before dropping the dimension. The IR after the fix is:

#map = affine_map&lt;()[s0, s1] -&gt; (s0 * s1)&gt;
#map1 = affine_map&lt;(d0) -&gt; (0, d0)&gt;
#map2 = affine_map&lt;(d0) -&gt; ()&gt;
module {
  func.func @<!-- -->main(%arg0: tensor&lt;1x?x?x1xf32&gt;, %arg1: index) -&gt; tensor&lt;?x1x61x1xf32&gt; {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %cst = arith.constant dense&lt;1.000000e+00&gt; : tensor&lt;f32&gt;
    %collapsed = tensor.collapse_shape %arg0 [[0, 1], [2, 3]] : tensor&lt;1x?x?x1xf32&gt; into tensor&lt;?x?xf32&gt;
    %0 = tensor.empty(%arg1) : tensor&lt;?x61xf32&gt;
    %1 = affine.apply #map()[%arg1, %c1]
    %2 = tensor.empty(%1) : tensor&lt;?x61xf32&gt;
    %3 = linalg.generic {indexing_maps = [#map1, #map2, #map1, #map1], iterator_types = ["parallel"]} ins(%collapsed, %cst, %0 : tensor&lt;?x?xf32&gt;, tensor&lt;f32&gt;, tensor&lt;?x61xf32&gt;) outs(%2 : tensor&lt;?x61xf32&gt;) {
    ^bb0(%in: f32, %in_0: f32, %in_1: f32, %out: f32):
      %4 = arith.mulf %in, %in_0 : f32
      %5 = arith.addf %in_1, %4 : f32
      linalg.yield %5 : f32
    } -&gt; tensor&lt;?x61xf32&gt;
    %expanded = tensor.expand_shape %3 [[0, 1], [2, 3]] output_shape [%c0, 1, 61, 1] : tensor&lt;?x61xf32&gt; into tensor&lt;?x1x61x1xf32&gt;
    return %expanded : tensor&lt;?x1x61x1xf32&gt;
  }
}

Full diff: https://github.com/llvm/llvm-project/pull/91673.diff

2 Files Affected:

(modified) mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp (+2-1)
(modified) mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir (+44)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
index 65efa18af18f6..c0829397f1f85 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -351,7 +351,8 @@ static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata(
   auto isUnitDim = [&](unsigned dim) {
     if (auto dimExpr = dyn_cast<AffineDimExpr>(exprs[dim])) {
       unsigned oldPosition = dimExpr.getPosition();
-      return !oldDimsToNewDimsMap.count(oldPosition);
+      return !oldDimsToNewDimsMap.count(oldPosition) &&
+             (operandShape[dim] == 1);
     }
     // Handle the other case where the shape is 1, and is accessed using a
     // constant 0.
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
index a9cbaaf7fdc48..d31ceb5fd719f 100644
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -1087,3 +1087,47 @@ func.func @drop_known_unit_constant_low_high(%arg0: tensor<1x383x128xf32>) -> te
 //       CHECK:   } : tensor<383x128xf32> to tensor<384x128xf32>
 //       CHECK:   tensor.expand_shape %[[PADDED]]
 //  CHECK-SAME:     {{\[}}[0, 1], [2]] output_shape [1, 384, 128] : tensor<384x128xf32> into tensor<1x384x128xf32>
+
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (0, d0)>
+// CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> ()>
+
+// CHECK-LABEL: func @drop_unit_dim_corresponding_to_dynamic_dim
+// CHECK-SAME:                    %[[ARG0:.*]]: tensor<1x?x?x1xf32>,
+// CHECK-SAME:                    %[[ARG1:.*]]: index) -> tensor<?x1x61x1xf32> {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant dense<1.000000e+00> : tensor<f32>
+// CHECK:           %[[VAL_3:.*]] = tensor.collapse_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32>
+// CHECK:           %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]]
+// CHECK:           %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%[[VAL_6]] : tensor<?x61xf32>) {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32, %[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32):
+// CHECK:             %[[VAL_12:.*]] = arith.mulf %[[VAL_8]], %[[VAL_9]] : f32
+// CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_10]], %[[VAL_12]] : f32
+// CHECK:             linalg.yield %[[VAL_13]] : f32
+// CHECK:           } -> tensor<?x61xf32>
+// CHECK:           %[[VAL_14:.*]] = tensor.expand_shape %[[VAL_7]] {{\[\[}}0, 1], [2, 3]] output_shape {{\[}}%[[VAL_0]], 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32>
+// CHECK:           return %[[VAL_14]] : tensor<?x1x61x1xf32>
+// CHECK:         }
+
+#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
+#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
+module {
+  func.func @drop_unit_dim_corresponding_to_dynamic_dim(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
+    %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
+    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
+    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
+    ^bb0(%in: f32, %in_0: f32, %out: f32):
+      %2 = arith.mulf %in, %in_0 : f32
+      %3 = arith.addf %out, %2 : f32
+      linalg.yield %3 : f32
+    } -> tensor<?x1x61x1xf32>
+    return %1 : tensor<?x1x61x1xf32>
+  }
+}

MaheshRavishankar

I The real issue here is that if d0 is unit-dim in one operand, any use of d0 to access any other dim has to be unit-dim as well (by constructions). Really we should be calling these linalg ops illegal.
I think we have a way to make sure the dimensions match for Linalg ops. Will try to find those.

MaheshRavishankar · 2024-05-09T23:38:24Z

mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir

+// CHECK:           %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]]
+// CHECK:           %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor<?x61xf32>
+// CHECK:           %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%[[VAL_6]] : tensor<?x61xf32>) {


To me, this should not happen. The input op had explicitly linked the dimension of one of the operand to the dimension of another. This pattern is dropping that link. The real issue is that the dimensions are inconsistent.

Thanks for the review. If I understand correctly you are suggesting that the verifier should error for the linalg.generic op since d0 is inferred to be different for different operands?

The op is created by the following flow (I am just providing small snippets with relevant ops for brevity):

%inserted_slice_37 = tensor.insert_slice %expanded_18 into %15[%c0_25, %c0_28, %c0_30, %c0_31] [1, 1, 61, 1] [1, 1, 1, 1] : tensor<1x1x61x1xf32> into tensor<?x?x?x?xf32> %18 = linalg.conv_2d_nhwc_hwcf ins(%inserted_slice_37, %cst_2 : tensor<?x?x?x?xf32>, tensor<1x1x1x1xf32>) outs(%17 : tensor<?x1x61x1xf32>) -> tensor<?x1x61x1xf32> %reduced = linalg.reduce ins(%cst_7 : tensor<3xi32>) outs(%cst_6 : tensor<i32>) dimensions = [0] (%in: i32, %init: i32) { %21 = arith.muli %in, %init : i32 linalg.yield %21 : i32 }

converts to following after --canonicalize:

%inserted_slice_13 = tensor.insert_slice %expanded_10 into %7[0, 0, 0, 0] [1, 1, 61, 1] [1, 1, 1, 1] : tensor<1x1x61x1xf32> into tensor<1x?x?x1xf32> %10 = linalg.conv_2d_nhwc_hwcf ins(%inserted_slice_13, %cst_2 : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%9 : tensor<?x1x61x1xf32>) -> tensor<?x1x61x1xf32> %reduced = linalg.reduce ins(%cst_6 : tensor<3xi32>) outs(%cst_5 : tensor<i32>) dimensions = [0] (%in: i32, %init: i32) { %13 = arith.muli %in, %init : i32 linalg.yield %13 : i32 }

which converts after --linalg-generalize-named-ops to :

%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>, affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>, affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%inserted_slice_13, %cst_2 : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%7 : tensor<?x1x61x1xf32>) { ^bb0(%in: f32, %in_16: f32, %out: f32): %12 = arith.mulf %in, %in_16 : f32 %13 = arith.addf %out, %12 : f32 linalg.yield %13 : f32 } -> tensor<?x1x61x1xf32>

producing the linalg.genericOp mentioned in the repro. So, I think in addition to the verifier the canonicalizer should be enhanced to also update the type of the outs operand as it is updating the ins operand.

Looking at the InferStaticShapeOfOperands ran during LinalgOps::canonicalization the outs operand for linalg.conv_2d_nhwc_hwcf (and consequently it's generic implementation) doesn't get updated because of the check that all maps of GenericOp needs to be projectedPermutation map. This fails for #map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)> as the second result of the map is not AffineDimExpr or AffineConstExpr (

llvm-project/mlir/lib/IR/AffineMap.cpp

Line 601 in d03a1a6

return false;

). Looking at the rest of the code for updating shapes, I don't follow why this check is present. Do you have any thoughts on why the check is necessary?

Skimming through it, I think you are right. Its probably too conservative. Removing that check and might make the static dimensions consistent.

New changes

revert the original change in DropUnitDims.

removes the isProjectedPermutation check in InferStaticShapeOfOperands

Adds a verifier for GenericOp to validate that dimensions are consistent across inputs and outputs operands.

I think the commonOpVerifier can be added to any LinalgOp but I am not sure what's the best way to do it. Is there a better way than adding the following snippet for all the LinalgOps?

LogicalResult <Any>Op::verify() { return commonOpVerifier(*this); }

Seems like adding the verifier to verifyStructuredOpInterface is an option to enable it for all Linalg ops. But that caused some failures for existing passes:

mlir-opt ---pass-pipeline="builtin.module(func.func(tosa-to-linalg-named))"

func.func @matmul_dyn_output(%arg0: tensor<1x1x8xf32>, %arg1: tensor<1x8x1xf32>) -> tensor<?x1x1xf32> { %0 = tosa.matmul %arg0, %arg1 : (tensor<1x1x8xf32>, tensor<1x8x1xf32>) -> tensor<?x1x1xf32> return %0 : tensor<?x1x1xf32> }

will lead to an error because the output dim 0 is dynamic though it should be 1 based on input dims. Running tosa-infer-shapes before tosa-to-linalg-named solves the problem here but seems like adding the verifier can cause spurious errors like this?

Thats unfortunate. I like going down this path. So thanks a lot for doing this. Need to find a way to land this without causing too much friction. I am happy to continue that work.
In the meantime, I will accept your original patch on this. Maybe send that out as a separate PR (and rename this PR?)

Thanks, opened #93317 for the original fix.

How are you envisioning fixing the spurious failures? Is it possible to run the InferStaticShapeOfOperands as part of building the op?

…ims.

sahas3 · 2024-05-22T15:03:43Z

@MaheshRavishankar can you please review the new changes when you have a chance? Thanks!

MaheshRavishankar

Something similar came up recently (https://discourse.llvm.org/t/rfc-remove-arith-math-ops-on-tensors/74357/63?u=maheshravishankar) Along those lines, it might not be a good idea to change the verifier here cause it might actually make it harder to lower to linalg from front end dialects.

Could we repurpose what you wanted to do more as a canonicalizer. If a particular dimension accessed using an expression is static but another using the same expression is dynamic, a canonicalization of linalg op could make the dynamic dimensions static (and introduce a tensor.cast on the operand). That might fix the issue you are having.

Sorry for the delay in the response, and apologies for the conflicting suggestions... I hadnt fully thought this through.

sahas3 force-pushed the unitDimsExpand branch from 2a14d79 to 2a6ed18 Compare May 9, 2024 23:02

sahas3 marked this pull request as ready for review May 9, 2024 23:03

sahas3 requested review from MaheshRavishankar and nicolasvasilache as code owners May 9, 2024 23:03

llvmbot added mlir:linalg mlir labels May 9, 2024

MaheshRavishankar requested changes May 9, 2024

View reviewed changes

sahas3 added 3 commits May 14, 2024 15:39

[BugFix] : Check dim shape in isUnitDim for each operand in dropUnitD…

9f2d501

…ims.

[BugFix] : Relax projectedPermutation chek + add GenericOp verifier.

8416474

[Task] : Add verifier for genericOp.

87e4804

sahas3 force-pushed the unitDimsExpand branch from 2a6ed18 to 6eb351b Compare May 15, 2024 01:35

[Task] : Run clang-format.

7ffc031

sahas3 force-pushed the unitDimsExpand branch from 6eb351b to 7ffc031 Compare May 15, 2024 11:47

[Task] : Add test for verifier.

aab27c9

sahas3 requested a review from MaheshRavishankar May 16, 2024 12:31

sahas3 changed the title ~~[mlir] [linalg] Check for dim shape to decide unit dim for each operand in dropUnitDims pass.~~ [mlir] [linalg] Add output shape verifier for linalg ops. May 24, 2024

MaheshRavishankar reviewed Jun 14, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[mlir] [linalg] Add output shape verifier for linalg ops. #91673

[mlir] [linalg] Add output shape verifier for linalg ops. #91673

Uh oh!

sahas3 commented May 9, 2024 •

edited

Loading

Uh oh!

github-actions bot commented May 9, 2024 •

edited

Loading

Uh oh!

llvmbot commented May 9, 2024

Uh oh!

llvmbot commented May 9, 2024

Uh oh!

MaheshRavishankar left a comment

Uh oh!

MaheshRavishankar May 9, 2024

Uh oh!

sahas3 May 10, 2024

Uh oh!

sahas3 May 13, 2024

Uh oh!

MaheshRavishankar May 13, 2024

Uh oh!

sahas3 May 15, 2024

Uh oh!

sahas3 May 16, 2024

Uh oh!

MaheshRavishankar May 22, 2024

Uh oh!

sahas3 May 24, 2024

Uh oh!

sahas3 commented May 22, 2024

Uh oh!

MaheshRavishankar left a comment

Uh oh!

Uh oh!

[mlir] [linalg] Add output shape verifier for linalg ops. #91673

Are you sure you want to change the base?

[mlir] [linalg] Add output shape verifier for linalg ops. #91673

Uh oh!

Conversation

sahas3 commented May 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented May 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 9, 2024

Uh oh!

llvmbot commented May 9, 2024

Uh oh!

MaheshRavishankar left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

sahas3 commented May 22, 2024

Uh oh!

MaheshRavishankar left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

sahas3 commented May 9, 2024 •

edited

Loading

github-actions bot commented May 9, 2024 •

edited

Loading