diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 19d2d854a3838..4eb2a0cb200a0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -746,12 +746,12 @@ static Value buildVectorWrite(RewriterBase &rewriter, Value value, auto vectorType = state.getCanonicalVecType( getElementTypeOrSelf(outputOperand->get().getType()), vectorTypeMap); + SmallVector indices(linalgOp.getRank(outputOperand), + arith::ConstantIndexOp::create(rewriter, loc, 0)); + Operation *write; if (vectorType.getRank() > 0) { AffineMap writeMap = inversePermutation(reindexIndexingMap(opOperandMap)); - SmallVector indices( - linalgOp.getRank(outputOperand), - arith::ConstantIndexOp::create(rewriter, loc, 0)); value = broadcastIfNeeded(rewriter, value, vectorType); assert(value.getType() == vectorType && "Incorrect type"); write = vector::TransferWriteOp::create( @@ -762,7 +762,7 @@ static Value buildVectorWrite(RewriterBase &rewriter, Value value, value = vector::BroadcastOp::create(rewriter, loc, vectorType, value); assert(value.getType() == vectorType && "Incorrect type"); write = vector::TransferWriteOp::create(rewriter, loc, value, - outputOperand->get(), ValueRange{}); + outputOperand->get(), indices); } write = state.maskOperation(rewriter, write, linalgOp, opOperandMap); diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index 9a14ab7d38d3e..95959fcf085fc 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -1481,23 +1481,23 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: func @reduce_1d( -// CHECK-SAME: %[[A:.*]]: tensor<32xf32> -func.func @reduce_1d(%arg0: tensor<32xf32>) -> tensor { +// CHECK-LABEL: func @reduce_to_rank_0( +// CHECK-SAME: %[[SRC:.*]]: tensor<32xf32> +func.func @reduce_to_rank_0(%arg0: tensor<32xf32>) -> tensor { // CHECK-DAG: %[[F0:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index %f0 = arith.constant 0.000000e+00 : f32 - // CHECK: %[[init:.*]] = tensor.empty() : tensor + // CHECK: %[[INIT:.*]] = tensor.empty() : tensor %0 = tensor.empty() : tensor %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor) -> tensor - // CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]] + // CHECK: %[[R:.*]] = vector.transfer_read %[[SRC]][%[[C0]]] // CHECK-SAME: : tensor<32xf32>, vector<32xf32> - // CHECK: %[[red:.*]] = vector.multi_reduction , %[[r]], %[[F0]] [0] + // CHECK: %[[RED:.*]] = vector.multi_reduction , %[[R]], %[[F0]] [0] // CHECK-SAME: : vector<32xf32> to f32 - // CHECK: %[[red_v1:.*]] = vector.broadcast %[[red]] : f32 to vector - // CHECK: %[[res:.*]] = vector.transfer_write %[[red_v1]], %[[init]][] + // CHECK: %[[RED_V1:.*]] = vector.broadcast %[[RED]] : f32 to vector + // CHECK: %[[RES:.*]] = vector.transfer_write %[[RED_V1]], %[[INIT]][] // CHECK-SAME: : vector, tensor %2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, @@ -1523,6 +1523,58 @@ module attributes {transform.with_named_sequence} { } +// ----- + +// CHECK-LABEL: func @reduce_to_rank_1( +// CHECK-SAME: %[[SRC:.*]]: tensor<32xf32> +func.func @reduce_to_rank_1(%arg0: tensor<32xf32>) -> tensor<1xf32> { + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[F0:.*]] = arith.constant dense<0.000000e+00> : vector<1xf32> + %f0 = arith.constant 0.000000e+00 : f32 + + // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1xf32> + %0 = tensor.empty() : tensor<1xf32> + + // CHECK: %[[INIT_ZERO:.*]] = vector.transfer_write %[[F0]], %[[INIT]][%[[C0]]] + // CHECK-SAME: : vector<1xf32>, tensor<1xf32> + %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<1xf32>) -> tensor<1xf32> + + // CHECK: %[[R:.*]] = vector.transfer_read %[[SRC]][%[[C0]]] + // CHECK-SAME: : tensor<32xf32>, vector<32xf32> + // CHECK: %[[INIT_ZERO_VEC:.*]] = vector.transfer_read %[[INIT_ZERO]][%[[C0]]] + // CHECK-SAME: : tensor<1xf32>, vector + // CHECK: %[[INIT_ZERO_SCL:.*]] = vector.extract %[[INIT_ZERO_VEC]][] + // CHECK-SAME: : f32 from vector + // CHECK: %[[RED:.*]] = vector.multi_reduction , %[[R]], %[[INIT_ZERO_SCL]] [0] + // CHECK-SAME: : vector<32xf32> to f32 + // CHECK: %[[RED_V1:.*]] = vector.broadcast %[[RED]] : f32 to vector + // CHECK: vector.transfer_write %[[RED_V1]], %[[INIT_ZERO]][%[[C0]]] + // CHECK-SAME: : vector, tensor<1xf32> + + %2 = linalg.generic { + indexing_maps = [affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (0)>], + iterator_types = ["reduction"]} + ins(%arg0 : tensor<32xf32>) + outs(%1 : tensor<1xf32>) { + ^bb0(%a: f32, %b: f32): + %3 = arith.addf %a, %b : f32 + linalg.yield %3 : f32 + } -> tensor<1xf32> + + return %2 : tensor<1xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + + // ----- // This test checks that vectorization does not occur when an input indexing map