diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp index b1527a5c3f838..a2ac41c263cc6 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp @@ -162,60 +162,20 @@ struct TransferReadLowering final : OpRewritePattern { stridedMetadata.getConstifiedMixedStrides(); SmallVector sizes = stridedMetadata.getConstifiedMixedSizes(); OpFoldResult offset = stridedMetadata.getConstifiedMixedOffset(); + memref::LinearizedMemRefInfo linearizedInfo; OpFoldResult linearizedIndices; - std::tie(std::ignore, linearizedIndices) = + std::tie(linearizedInfo, linearizedIndices) = memref::getLinearizedMemRefOffsetAndSize(rewriter, loc, elementBitWidth, elementBitWidth, offset, sizes, strides, indices); - // TODO(jerryyin): Fix the getLinearizedMemRefOffsetAndSize() function - // Note below doesn't give the correct result for the linearized size. - // Value totalSize = getValueOrCreateConstantIndexOp( - // rewriter, loc, linearizedInfo.linearizedSize); - // It computes the multiplied sizes of all dimensions instead of taking - // the maximum of each dimension size * stride. - SmallVector productExpressions; - unsigned sourceRank = cast(src.getType()).getRank(); - - SmallVector symbols(2 * sourceRank); - SmallVector offsetValues; - bindSymbolsList(rewriter.getContext(), MutableArrayRef{symbols}); - - size_t symbolIndex = 0; - for (size_t i = 0; i < sourceRank; ++i) { - AffineExpr strideExpr, sizeExpr; - OpFoldResult stride = strides[i]; - OpFoldResult size = sizes[i]; - if (auto constantStride = getConstantIntValue(stride)) { - strideExpr = rewriter.getAffineConstantExpr(*constantStride); - } else { - strideExpr = symbols[symbolIndex++]; - offsetValues.push_back( - getValueOrCreateConstantIndexOp(rewriter, loc, stride)); - } - - if (auto constantSize = getConstantIntValue(size)) { - sizeExpr = rewriter.getAffineConstantExpr(*constantSize); - } else { - sizeExpr = symbols[symbolIndex++]; - offsetValues.push_back( - getValueOrCreateConstantIndexOp(rewriter, loc, size)); - } - - productExpressions.push_back(strideExpr * sizeExpr); - } - - AffineMap maxMap = AffineMap::get( - /*dimCount=*/0, /*symbolCount=*/symbolIndex, productExpressions, - rewriter.getContext()); - Value totalSize = - rewriter.create(loc, maxMap, offsetValues); - // delta = bufferSize - linearizedOffset Value vectorSizeOffset = rewriter.create(loc, vectorSize); Value linearIndex = getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices); + Value totalSize = getValueOrCreateConstantIndexOp( + rewriter, loc, linearizedInfo.linearizedSize); Value delta = rewriter.create(loc, totalSize, linearIndex); // 1) check if delta < vectorSize diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp index ac397b597fd14..3f9fb071e0ba8 100644 --- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp +++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp @@ -66,7 +66,6 @@ std::pair getLinearizedMemRefOffsetAndSize( SmallVector symbols(2 * sourceRank); bindSymbolsList(builder.getContext(), MutableArrayRef{symbols}); AffineExpr addMulMap = builder.getAffineConstantExpr(0); - AffineExpr mulMap = builder.getAffineConstantExpr(1); SmallVector offsetValues(2 * sourceRank); @@ -75,18 +74,28 @@ std::pair getLinearizedMemRefOffsetAndSize( addMulMap = addMulMap + symbols[offsetIdx] * symbols[offsetIdx + 1]; offsetValues[offsetIdx] = indicesVec[i]; offsetValues[offsetIdx + 1] = strides[i]; - - mulMap = mulMap * symbols[i]; } - // Adjust linearizedIndices and size by the scale factor (dstBits / srcBits). int64_t scaler = dstBits / srcBits; - mulMap = mulMap.floorDiv(scaler); - OpFoldResult linearizedIndices = affine::makeComposedFoldedAffineApply( builder, loc, addMulMap.floorDiv(scaler), offsetValues); + + size_t symbolIndex = 0; + SmallVector values; + SmallVector productExpressions; + for (unsigned i = 0; i < sourceRank; ++i) { + AffineExpr strideExpr = symbols[symbolIndex++]; + values.push_back(strides[i]); + AffineExpr sizeExpr = symbols[symbolIndex++]; + values.push_back(sizes[i]); + + productExpressions.push_back((strideExpr * sizeExpr).floorDiv(scaler)); + } + AffineMap maxMap = AffineMap::get( + /*dimCount=*/0, /*symbolCount=*/symbolIndex, productExpressions, + builder.getContext()); OpFoldResult linearizedSize = - affine::makeComposedFoldedAffineApply(builder, loc, mulMap, sizes); + affine::makeComposedFoldedAffineMax(builder, loc, maxMap, values); // Adjust baseOffset by the scale factor (dstBits / srcBits). AffineExpr s0; diff --git a/mlir/test/Dialect/AMDGPU/transfer-read-to-load.mlir b/mlir/test/Dialect/AMDGPU/transfer-read-to-load.mlir index 22b425680fb05..20999af10553e 100644 --- a/mlir/test/Dialect/AMDGPU/transfer-read-to-load.mlir +++ b/mlir/test/Dialect/AMDGPU/transfer-read-to-load.mlir @@ -52,9 +52,9 @@ func.func @transfer_to_maskedload_fatrawbuffer_f16(%mem : memref<8x8xf16, #amdgp // ----- -// CHECK: #map = affine_map<()[s0, s1, s2] -> (s0 * s1 + s2)> -// CHECK: #map1 = affine_map<()[s0, s1, s2] -> (s0 * s1, s2)> -// CHECK-LABEL: func @transfer_to_maskedload_fatrawbuffer_dynamic_i8( +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s0 * s1 + s2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> (s0 * s1, s2)> +// CHECK: func @transfer_to_maskedload_fatrawbuffer_dynamic_i8( // CHECK-SAME: %[[ARG0:.*]]: memref> // CHECK-SAME: %[[ARG1:.*]]: index, %[[ARG2:.*]]: index // CHECK-SAME: %[[ARG3:.*]]: vector<4xi1> @@ -64,14 +64,14 @@ func.func @transfer_to_maskedload_fatrawbuffer_dynamic_i8(%mem : memref } -// CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<4xi8> -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[C4:.*]] = arith.constant 4 : index -// CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:2, %[[STRIDES:.*]]:2 = memref.extract_strided_metadata %[[ARG0]] -// CHECK: %[[LINEAR:.*]] = affine.apply #map()[%[[ARG1]], %[[STRIDES]]#0, %[[ARG2]]] -// CHECK: %[[SIZE:.*]] = affine.max #map1()[%[[STRIDES]]#0, %[[SIZES]]#0, %[[SIZES]]#1] -// CHECK: %[[IF:.*]] = scf.if -// CHECK: return +// CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<4xi8> +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[C4:.*]] = arith.constant 4 : index +// CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:2, %[[STRIDES:.*]]:2 = memref.extract_strided_metadata %[[ARG0]] +// CHECK-DAG: %[[SIZE:.*]] = affine.max #[[MAP1]]()[%[[STRIDES]]#0, %[[SIZES]]#0, %[[SIZES]]#1] +// CHECK-DAG: %[[LINEAR:.*]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[STRIDES]]#0, %[[ARG2]]] +// CHECK: %[[IF:.*]] = scf.if +// CHECK: return // ----- diff --git a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir index 1d6cbfa343ba5..0cb3b7b744476 100644 --- a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir +++ b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir @@ -104,7 +104,7 @@ func.func @memref_load_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, %a %1 = memref.load %0[%arg2, %arg3] : memref return %1 : i4 } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2, s0 floordiv 2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 2) * 8)> // CHECK: func @memref_load_i4_dynamic( @@ -112,7 +112,7 @@ func.func @memref_load_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, %a // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index -// CHECK: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] +// CHECK: %[[SIZE:.+]] = affine.max #[[MAP0]]()[%[[ARG1]], %[[ARG0]]] // CHECK: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] // CHECK: %[[LOAD:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] @@ -122,7 +122,7 @@ func.func @memref_load_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, %a // CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i8 to i4 // CHECK: return %[[TRUNC]] -// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8)> +// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8, s0 floordiv 8)> // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 8)> // CHECK32-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 8) * 32)> // CHECK32: func @memref_load_i4_dynamic( @@ -130,7 +130,7 @@ func.func @memref_load_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, %a // CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index -// CHECK32: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] +// CHECK32: %[[SIZE:.+]] = affine.max #[[MAP0]]()[%[[ARG1]], %[[ARG0]]] // CHECK32: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] // CHECK32: %[[LOAD:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] @@ -399,7 +399,7 @@ func.func @memref_store_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, % memref.store %arg4, %0[%arg2, %arg3] : memref return } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2, s0 floordiv 2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 2) * 8)> // CHECK: func @memref_store_i4_dynamic( @@ -408,7 +408,7 @@ func.func @memref_store_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, % // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: i4 -// CHECK-DAG: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] +// CHECK-DAG: %[[SIZE:.+]] = affine.max #[[MAP0]]()[%[[ARG1]], %[[ARG0]]] // CHECK-DAG: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref // CHECK-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG4]] : i4 to i8 // CHECK-DAG: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] @@ -423,7 +423,7 @@ func.func @memref_store_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, % // CHECK: %[[WRITE_RMW:.+]] = memref.atomic_rmw ori %[[SHIFTED_VAL]], %[[ALLOC]][%[[INDEX]]] : (i8, memref) -> i8 // CHECK: return -// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8)> +// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8, s0 floordiv 8)> // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 8)> // CHECK32-DAG: #[[MAP2:.+]] = affine_map<()[s0, s1, s2] -> ((s0 * s1) * 4 + s2 * 4 - ((s2 + s0 * s1) floordiv 8) * 32)> // CHECK32: func @memref_store_i4_dynamic( @@ -432,7 +432,7 @@ func.func @memref_store_i4_dynamic(%arg0: index, %arg1 : index, %arg2 : index, % // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index // CHECK32-SAME: %[[ARG4:[a-zA-Z0-9]+]]: i4 -// CHECK32-DAG: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] +// CHECK32-DAG: %[[SIZE:.+]] = affine.max #[[MAP0]]()[%[[ARG1]], %[[ARG0]]] // CHECK32-DAG: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref // CHECK32-DAG: %[[EXTUI:.+]] = arith.extui %[[ARG4]] : i4 to i32 // CHECK32-DAG: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir index 9e2d131f421b7..6c924492b513e 100644 --- a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir +++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir @@ -58,27 +58,27 @@ func.func @vector_load_i4_dynamic(%arg0 : index, %arg1 : index, %arg2 : index, % %1 = vector.load %0[%arg2, %arg3] : memref, vector<8xi4> return %1 : vector<8xi4> } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2, s0 floordiv 2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)> // CHECK: func.func @vector_load_i4_dynamic( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index -// CHECK: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] +// CHECK: %[[SIZE:.+]] = affine.max #[[MAP0]]()[%[[ARG1]], %[[ARG0]]] // CHECK: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] // CHECK: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref, vector<4xi8> // CHECK: %[[VEC_I4:.+]] = vector.bitcast %[[VEC]] : vector<4xi8> to vector<8xi4> -// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8)> +// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8, s0 floordiv 8)> // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 8)> // CHECK32: func.func @vector_load_i4_dynamic( // CHECK32-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK32-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index -// CHECK32: %[[SIZE:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]], %[[ARG1]]] +// CHECK32: %[[SIZE:.+]] = affine.max #[[MAP0]]()[%[[ARG1]], %[[ARG0]]] // CHECK32: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG1]], %[[ARG3]]] // CHECK32: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref, vector<1xi32> @@ -450,7 +450,7 @@ func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: ind return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 2, s0 floordiv 2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 2)> // CHECK: func @vector_store_i4_dynamic // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: vector<8xi4> @@ -458,13 +458,13 @@ func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: ind // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index -// CHECK: %[[SIZE:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK: %[[SIZE:.+]] = affine.max #[[MAP]]()[%[[ARG2]], %[[ARG1]]] // CHECK: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref // CHECK: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG2]], %[[ARG4]]] // CHECK: %[[VEC_I8:.+]] = vector.bitcast %[[ARG0]] : vector<8xi4> to vector<4xi8> // CHECK: vector.store %[[VEC_I8:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref, vector<4xi8> -// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8)> +// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> ((s0 * s1) floordiv 8, s0 floordiv 8)> // CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1, s2] -> ((s2 + s0 * s1) floordiv 8)> // CHECK32: func @vector_store_i4_dynamic // CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]+]]: vector<8xi4> @@ -472,7 +472,7 @@ func.func @vector_store_i4_dynamic(%arg0: vector<8xi4>, %arg1: index, %arg2: ind // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index // CHECK32-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index -// CHECK32: %[[SIZE:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK32: %[[SIZE:.+]] = affine.max #[[MAP]]()[%[[ARG2]], %[[ARG1]]] // CHECK32: %[[ALLOC:.+]] = memref.alloc(%[[SIZE]]) : memref // CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG2]], %[[ARG4]]] // CHECK32: %[[VEC_I8:.+]] = vector.bitcast %[[ARG0]] : vector<8xi4> to vector<1xi32> @@ -537,7 +537,7 @@ func.func @vector_maskedstore_i4( // CHECK: #[[$ATTR_10:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> // CHECK: #[[$ATTR_11:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> -// CHECK-LABEL: func.func @vector_maskedstore_i4( +// CHECK: func.func @vector_maskedstore_i4( // CHECK-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[NUM_EL_TO_STORE:[a-zA-Z0-9]+]]: index, @@ -557,7 +557,7 @@ func.func @vector_maskedstore_i4( // CHECK32: #[[$ATTR_17:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> // CHECK32: #[[$ATTR_18:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> -// CHECK32-LABEL: func.func @vector_maskedstore_i4( +// CHECK32: func.func @vector_maskedstore_i4( // CHECK32-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, // CHECK32-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, // CHECK32-SAME: %[[NUM_EL_TO_STORE:[a-zA-Z0-9]+]]: index, @@ -623,7 +623,7 @@ func.func @vector_maskedstore_i4_constant_mask( } // CHECK: #[[$ATTR_12:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> -// CHECK-LABEL: func.func @vector_maskedstore_i4_constant_mask( +// CHECK: func.func @vector_maskedstore_i4_constant_mask( // CHECK-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, // CHECK-SAME: %[[VAL_TO_STORE:[a-zA-Z0-9]+]]: vector<8xi4>) { @@ -639,7 +639,7 @@ func.func @vector_maskedstore_i4_constant_mask( // CHECK: vector.maskedstore %[[ALLOC]]{{\[}}%[[LIDX]]], %[[NEW_MASK]], %[[NEW_VAL]] : memref<12xi8>, vector<4xi1>, vector<4xi8> // CHECK32: #[[$ATTR_20:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> -// CHECK32-LABEL: func.func @vector_maskedstore_i4_constant_mask( +// CHECK32: func.func @vector_maskedstore_i4_constant_mask( // CHECK32-SAME: %[[IDX_1:[a-zA-Z0-9]+]]: index, // CHECK32-SAME: %[[IDX_2:[a-zA-Z0-9]+]]: index, // CHECK32-SAME: %[[VAL_TO_STORE:[a-zA-Z0-9]+]]: vector<8xi4>) {