diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h index 97163c4532378..a57aadcdcc5b0 100644 --- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h +++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h @@ -227,7 +227,8 @@ bool isLinearizableVector(VectorType type); /// /// Note: all read offsets are set to 0. Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, - ArrayRef inputVectorSizes, Value padValue, + ArrayRef inputVectorSizes, + std::optional padValue = std::nullopt, bool useInBoundsInsteadOfMasking = false, ArrayRef inputScalableVecDims = {}); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 3ee6ae1029f72..15c467b21c81e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1770,12 +1770,9 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp, rewriter.setInsertionPoint(packOp); Location loc = packOp.getLoc(); - auto padValue = packOp.getPaddingValue(); - if (!padValue) { - padValue = arith::ConstantOp::create( - rewriter, loc, - rewriter.getZeroAttr(packOp.getSourceType().getElementType())); - } + std::optional padValue = packOp.getPaddingValue() + ? std::optional(packOp.getPaddingValue()) + : std::nullopt; // If the input vector sizes are not provided, then the vector sizes are // determined by the result tensor shape. In case the vector sizes aren't @@ -1936,11 +1933,8 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp, } // -- Generate the read operation -- - auto padValue = arith::ConstantOp::create( - rewriter, loc, - rewriter.getZeroAttr(unpackOp.getSourceType().getElementType())); Value readResult = vector::createReadOrMaskedRead( - rewriter, loc, unpackOp.getSource(), readVectorSizes, padValue, + rewriter, loc, unpackOp.getSource(), readVectorSizes, std::nullopt, useInBoundsInsteadOfMasking, readScalableVectorFlags); // -- Generate the transpose operation -- diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp index 39dc7a4f284a6..cd8b359a20158 100644 --- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp +++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp @@ -319,7 +319,7 @@ bool vector::isLinearizableVector(VectorType type) { Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, ArrayRef inputVectorSizes, - Value padValue, + std::optional padValue, bool useInBoundsInsteadOfMasking, ArrayRef inputScalableVecDims) { assert(!llvm::is_contained(inputVectorSizes, ShapedType::kDynamic) && @@ -328,9 +328,11 @@ Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc, auto sourceShape = sourceShapedType.getShape(); assert(sourceShape.size() == inputVectorSizes.size() && "expected same ranks."); - auto vectorType = VectorType::get(inputVectorSizes, padValue.getType(), - inputScalableVecDims); - assert(padValue.getType() == sourceShapedType.getElementType() && + auto vectorType = + VectorType::get(inputVectorSizes, sourceShapedType.getElementType(), + inputScalableVecDims); + assert((!padValue.has_value() || + padValue.value().getType() == sourceShapedType.getElementType()) && "expected same pad element type to match source element type"); int64_t readRank = inputVectorSizes.size(); auto zero = arith::ConstantIndexOp::create(builder, loc, 0); diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index c09046b08e898..35f520a9f22a8 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -339,8 +339,8 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @test_vectorize_pack( // CHECK-SAME: %[[VAL_0:.*]]: tensor<32x8x16xf32>, // CHECK-SAME: %[[VAL_1:.*]]: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { -// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_2:.*]] = ub.poison : f32 +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]]], %[[VAL_2]] {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32> // CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> // CHECK: %[[VAL_6:.*]] = vector.transpose %[[VAL_5]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32> diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir index aa86678ba405f..62bf1f55c9af2 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir @@ -1068,16 +1068,16 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[DEST:.*]]: tensor, // CHECK-SAME: %[[SRC:.*]]: tensor func.func @test_vectorize_dynamic_shapes_unpack_scalable_vec(%dest: tensor, %src: tensor) -> tensor { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 - // CHECK: %[[C01:.*]] = arith.constant 0 - // CHECK: %[[C02:.*]] = arith.constant 0 + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-DAG: %[[C01:.*]] = arith.constant 0 + // CHECK-DAG: %[[C02:.*]] = arith.constant 0 // CHECK: %[[DIM4:.*]] = tensor.dim %[[SRC]], %[[C02]] : tensor // CHECK: %[[CNST14:.*]] = arith.constant 1 // CHECK: %[[DIM6:.*]] = tensor.dim %[[SRC]], %[[CNST14]] : tensor // CHECK: %[[CNST16:.*]] = arith.constant 16 : index // CHECK: %[[CNST2:.*]] = arith.constant 2 : index // CHECK: %[[MASK_READ:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[CNST16]], %[[CNST2]] : vector<2x1x[16]x2xi1> - // CHECK: %[[READ:.*]] = vector.mask %[[MASK_READ]] {{.*}} vector.transfer_read %{{.*}} : tensor, vector<2x1x[16]x2xf32> } : vector<2x1x[16]x2xi1> -> vector<2x1x[16]x2xf32> + // CHECK: %[[READ:.*]] = vector.mask %[[MASK_READ]] {{.*}} vector.transfer_read %{{.*}} %[[PAD]] {{.*}}: tensor, vector<2x1x[16]x2xf32> } : vector<2x1x[16]x2xi1> -> vector<2x1x[16]x2xf32> // CHECK: %[[TR:.*]] = vector.transpose %[[READ]], [0, 3, 1, 2] : vector<2x1x[16]x2xf32> to vector<2x2x1x[16]xf32> // CHECK: %[[SC:.*]] = vector.shape_cast %[[TR]] : vector<2x2x1x[16]xf32> to vector<4x[16]xf32> // CHECK: %[[MASK_WRITE:.*]] = vector.create_mask {{.*}} : vector<4x[16]xi1> @@ -1100,9 +1100,9 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[DEST:.*]]: tensor, // CHECK-SAME: %[[SRC:.*]]: tensor func.func @test_vectorize_dynamic_shapes_unpack_scalable_vec_and_tile_size(%dest: tensor, %src: tensor) -> tensor { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 - // CHECK: %[[C01:.*]] = arith.constant 0 - // CHECK: %[[C02:.*]] = arith.constant 0 + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-DAG: %[[C01:.*]] = arith.constant 0 + // CHECK-DAG: %[[C02:.*]] = arith.constant 0 // CHECK: %[[DIM4:.*]] = tensor.dim %[[SRC]], %[[C02]] : tensor // CHECK: %[[C1_2:.*]] = arith.constant 1 // CHECK: %[[DIM6:.*]] = tensor.dim %[[SRC]], %[[C1_2]] : tensor @@ -1110,7 +1110,7 @@ func.func @test_vectorize_dynamic_shapes_unpack_scalable_vec_and_tile_size(%dest // CHECK: %[[DIM_2:.*]] = tensor.dim %[[SRC]], %[[C2]] : tensor // CHECK: %[[C2_1:.*]] = arith.constant 2 : index // CHECK: %[[MASK_READ:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[DIM_2]], %[[C2_1]] : vector<2x1x[16]x2xi1> - // CHECK: %[[READ:.*]] = vector.mask %[[MASK_READ]] {{.*}} vector.transfer_read %{{.*}} : tensor, vector<2x1x[16]x2xf32> } : vector<2x1x[16]x2xi1> -> vector<2x1x[16]x2xf32> + // CHECK: %[[READ:.*]] = vector.mask %[[MASK_READ]] {{.*}} vector.transfer_read %{{.*}} %[[PAD]] {{.*}}: tensor, vector<2x1x[16]x2xf32> } : vector<2x1x[16]x2xi1> -> vector<2x1x[16]x2xf32> // CHECK: %[[TR:.*]] = vector.transpose %[[READ]], [0, 3, 1, 2] : vector<2x1x[16]x2xf32> to vector<2x2x1x[16]xf32> // CHECK: %[[SC:.*]] = vector.shape_cast %[[TR]] : vector<2x2x1x[16]xf32> to vector<4x[16]xf32> // CHECK: %[[MASK_WRITE:.*]] = vector.create_mask {{.*}} : vector<4x[16]xi1> @@ -1138,14 +1138,14 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> // CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]]= arith.constant 0 : index - // CHECK: %[[C8:.*]] = arith.constant 8 : index - // CHECK: %[[C80:.*]] = arith.constant 8 : index - // CHECK: %[[C32:.*]] = arith.constant 32 : index - // CHECK: %[[C16:.*]] = arith.constant 16 : index + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-DAG: %[[C0:.*]]= arith.constant 0 : index + // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index + // CHECK-DAG: %[[C80:.*]] = arith.constant 8 : index + // CHECK-DAG: %[[C32:.*]] = arith.constant 32 : index + // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[MSK0:.*]] = vector.create_mask %[[C8]], %[[C80]], %[[C32]], %[[C16]] : vector<16x8x32x16xi1> - // CHECK: %[[READ0:.*]] = vector.mask %[[MSK0]] { vector.transfer_read %[[SRC]]{{.*}}} : vector<16x8x32x16xi1> -> vector<16x8x32x16xf32> + // CHECK: %[[READ0:.*]] = vector.mask %[[MSK0]] { vector.transfer_read %[[SRC]]{{.*}} %[[PAD]] {{.*}} : vector<16x8x32x16xi1> -> vector<16x8x32x16xf32> // CHECK: %[[TRANSP0:.*]] = vector.transpose %[[READ0]], [0, 2, 1, 3] : vector<16x8x32x16xf32> to vector<16x32x8x16xf32> // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP0]] : vector<16x32x8x16xf32> to vector<512x128xf32> // CHECK: %[[C01:.*]] = arith.constant 0 : index @@ -1171,9 +1171,9 @@ func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<2 // CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> // CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-AD: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}} %[[PAD]] {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> // CHECK: %[[C00:.*]] = arith.constant 0 : index @@ -1196,9 +1196,9 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: // CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> // CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> func.func @test_vectorize_unpack_with_outer_perm(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}} %[[PAD]] {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> // CHECK: %[[C00:.*]] = arith.constant 0 : index @@ -1221,9 +1221,9 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: // CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> // CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}} %[[PAD]] {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> // CHECK: %[[C00:.*]] = arith.constant 0 : index @@ -1246,9 +1246,9 @@ func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>, // CHECK-SAME: %[[SRC:.*]]: tensor<8x4x16x16xf32> // CHECK-SAME: %[[DEST:.*]]: tensor<64x127xf32> func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x4x16x16xf32>, %dest: tensor<64x127xf32>) -> tensor<64x127xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32> + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}} %[[PAD]] {{.*}} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32> // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x4x16x16xf32> to vector<4x16x8x16xf32> // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<4x16x8x16xf32> to vector<64x128xf32> // CHECK: %[[C00:.*]] = arith.constant 0 : index @@ -1275,9 +1275,9 @@ func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf %0 = linalg.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32> return %0 : tensor<7x16xf32> } - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<4x7x4xf32>, vector<4x7x4xf32> + // CHECK-DAG: %[[PAD:.*]] = ub.poison : f32 + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}} %[[PAD]] {{.*}} : tensor<4x7x4xf32>, vector<4x7x4xf32> // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 0, 2] : vector<4x7x4xf32> to vector<7x4x4xf32> // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<7x4x4xf32> to vector<7x16xf32> // CHECK: %[[C00:.*]] = arith.constant 0 : index @@ -1308,7 +1308,7 @@ func.func @test_vectorize_pack(%src: tensor<32x8x16xf32>, %dest: tensor<4x1x32x1 %pack = linalg.pack %src outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %dest : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> return %pack : tensor<4x1x32x16x2xf32> } -// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[CST:.*]] = ub.poison : f32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %[[CST]] // CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32> @@ -1376,7 +1376,7 @@ func.func @test_vectorize_dynamic_pack(%src: tensor, %dest: tensor } -// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[CST:.*]] = ub.poison : f32 // CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C0_0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C1_0:.*]] = arith.constant 1 : index @@ -1417,7 +1417,7 @@ func.func @test_vectorize_pack_no_vector_sizes(%src: tensor<64x4xf32>, %dest: te %pack = linalg.pack %src outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %dest : tensor<64x4xf32> -> tensor<2x4x16x2xf32> return %pack : tensor<2x4x16x2xf32> } -// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[CST:.*]] = ub.poison : f32 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST]] // CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32>