diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index b99f77fdc8b30..b5f359293b865 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -1844,9 +1844,42 @@ bool arith::IndexCastOp::areCastCompatible(TypeRange inputs, return areIndexCastCompatible(inputs, outputs); } +static unsigned getBitwidth(Type type, unsigned indexBitwidth) { + Type elemType = getElementTypeOrSelf(type); + if (isa(elemType)) + return indexBitwidth; + return elemType.getIntOrFloatBitWidth(); +} + +template +struct InferExactOnIndexCast final : OpRewritePattern { + InferExactOnIndexCast(MLIRContext *context) + : OpRewritePattern(context) {} + + LogicalResult matchAndRewrite(CastOp op, + PatternRewriter &rewriter) const override { + if (op.getExact()) + return failure(); + + DataLayout layout = DataLayout::closest(op); + unsigned indexBitwidth = + layout.getTypeSizeInBits(IndexType::get(op.getContext())); + unsigned srcBW = getBitwidth(op.getIn().getType(), indexBitwidth); + unsigned dstBW = getBitwidth(op.getType(), indexBitwidth); + if (srcBW > dstBW) + return rewriter.notifyMatchFailure(op, "source is wider than dest"); + + rewriter.modifyOpInPlace(op, [&] { op.setExact(true); }); + return success(); + } +}; + OpFoldResult arith::IndexCastOp::fold(FoldAdaptor adaptor) { // index_cast(constant) -> constant - unsigned resultBitwidth = 64; // Default for index integer attributes. + DataLayout layout = DataLayout::closest(*this); + // Sane defaults for index integer attributes. + unsigned resultBitwidth = + layout.getTypeSizeInBits(IndexType::get(this->getContext())); if (auto intTy = dyn_cast(getElementTypeOrSelf(getType()))) resultBitwidth = intTy.getWidth(); @@ -1859,7 +1892,8 @@ OpFoldResult arith::IndexCastOp::fold(FoldAdaptor adaptor) { void arith::IndexCastOp::getCanonicalizationPatterns( RewritePatternSet &patterns, MLIRContext *context) { - patterns.add(context); + patterns.add>(context); } //===----------------------------------------------------------------------===// @@ -1873,7 +1907,10 @@ bool arith::IndexCastUIOp::areCastCompatible(TypeRange inputs, OpFoldResult arith::IndexCastUIOp::fold(FoldAdaptor adaptor) { // index_castui(constant) -> constant - unsigned resultBitwidth = 64; // Default for index integer attributes. + DataLayout layout = DataLayout::closest(*this); + // Sane defaults for index integer attributes. + unsigned resultBitwidth = + layout.getTypeSizeInBits(IndexType::get(this->getContext())); if (auto intTy = dyn_cast(getElementTypeOrSelf(getType()))) resultBitwidth = intTy.getWidth(); @@ -1886,7 +1923,8 @@ OpFoldResult arith::IndexCastUIOp::fold(FoldAdaptor adaptor) { void arith::IndexCastUIOp::getCanonicalizationPatterns( RewritePatternSet &patterns, MLIRContext *context) { - patterns.add(context); + patterns.add>(context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir b/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir index fd8910265cd89..58a1d91cf0cad 100644 --- a/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir +++ b/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir @@ -588,7 +588,7 @@ func.func @arm_sme_extract_tile_slice_ver_i128(%tile_slice_index : index) -> vec // CHECK-LABEL: @arm_sme_streaming_vl_bytes // CHECK: %[[CONST:.*]] = arith.constant 8 : index // CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64 -// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index +// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] exact : i64 to index // CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index func.func @arm_sme_streaming_vl_bytes() -> index { %svl_b = arm_sme.streaming_vl @@ -600,7 +600,7 @@ func.func @arm_sme_streaming_vl_bytes() -> index { // CHECK-LABEL: @arm_sme_streaming_vl_half_words // CHECK: %[[CONST:.*]] = arith.constant 4 : index // CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64 -// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index +// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] exact : i64 to index // CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index func.func @arm_sme_streaming_vl_half_words() -> index { %svl_h = arm_sme.streaming_vl @@ -612,7 +612,7 @@ func.func @arm_sme_streaming_vl_half_words() -> index { // CHECK-LABEL: @arm_sme_streaming_vl_words // CHECK: %[[CONST:.*]] = arith.constant 2 : index // CHECK: %[[CNTSD:.*]] = "arm_sme.intr.cntsd"() : () -> i64 -// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] : i64 to index +// CHECK: %[[CNTSD_IDX:.*]] = arith.index_cast %[[CNTSD]] exact : i64 to index // CHECK: %[[MUL:.*]] = arith.muli %[[CNTSD_IDX]], %[[CONST]] : index func.func @arm_sme_streaming_vl_words() -> index { %svl_w = arm_sme.streaming_vl diff --git a/mlir/test/Conversion/ShardToMPI/convert-shard-to-mpi.mlir b/mlir/test/Conversion/ShardToMPI/convert-shard-to-mpi.mlir index 08c3897e4e650..3fa9b472f670a 100644 --- a/mlir/test/Conversion/ShardToMPI/convert-shard-to-mpi.mlir +++ b/mlir/test/Conversion/ShardToMPI/convert-shard-to-mpi.mlir @@ -25,7 +25,7 @@ func.func @process_multi_index_reorder() -> (index, index) { // CHECK-LABEL: func @process_linear_index func.func @process_linear_index() -> index { // CHECK: %[[RES:.*]], %[[rank:.*]] = mpi.comm_rank - // CHECK: %[[cast:.*]] = arith.index_cast %[[rank]] : i32 to index + // CHECK: %[[cast:.*]] = arith.index_cast %[[rank]] exact : i32 to index %0 = shard.process_linear_index on @grid0 : index // CHECK: return %[[cast]] : index return %0 : index @@ -97,7 +97,7 @@ module { func.func @all_slice(%arg0 : tensor<3x5xf32>) -> tensor<3x1xf32> { // CHECK: [[v0:%.*]] = mpi.comm_world : !mpi.comm // CHECK: [[vretval:%.*]], [[vrank:%.*]] = mpi.comm_rank([[v0]]) : !mpi.retval, i32 - // CHECK: [[v1:%.*]] = arith.index_cast [[vrank]] : i32 to index + // CHECK: [[v1:%.*]] = arith.index_cast [[vrank]] exact : i32 to index // CHECK: [[v2:%.*]]:3 = affine.delinearize_index [[v1]] into (3, 4, 5) : index, index, index // CHECK: [[vextracted_slice:%.*]] = tensor.extract_slice // CHECK-SAME: [0, [[v2]]#2] [3, 1] [1, 1] : tensor<3x5xf32> to tensor<3x1xf32> @@ -203,7 +203,7 @@ module attributes { mpi.dlti = #dlti.map<"MPI:comm_world_rank" = 7> } { // CHECK: [[v1:%.*]] = mpi.comm_world : !mpi.comm // CHECK: [[vnewcomm:%.*]] = mpi.comm_split([[v1]], [[vc2_i32]], [[vc1_i32]]) : !mpi.comm // CHECK: [[vsize:%.*]] = mpi.comm_size([[vnewcomm]]) : i32 - // CHECK: [[v2:%.*]] = arith.index_cast [[vsize]] : i32 to index + // CHECK: [[v2:%.*]] = arith.index_cast [[vsize]] exact : i32 to index // CHECK: [[v3:%.*]] = arith.cmpi eq, [[v2]], [[vc4]] : index // CHECK: cf.assert [[v3]] // CHECK: [[valloc:%.*]] = memref.alloc() : memref<4x3x4xf32> @@ -227,7 +227,7 @@ module attributes { mpi.dlti = #dlti.map<"MPI:comm_world_rank" = 7> } { // CHECK: [[v1:%.*]] = mpi.comm_world : !mpi.comm // CHECK: [[vnewcomm:%.*]] = mpi.comm_split([[v1]], [[vc1_i32]], [[vc2_i32]]) : !mpi.comm // CHECK: [[vsize:%.*]] = mpi.comm_size([[vnewcomm]]) : i32 - // CHECK: [[v2:%.*]] = arith.index_cast [[vsize]] : i32 to index + // CHECK: [[v2:%.*]] = arith.index_cast [[vsize]] exact : i32 to index // CHECK: [[v3:%.*]] = arith.cmpi eq, [[v2]], [[vc5]] : index // CHECK: cf.assert [[v3]] // CHECK: [[valloc:%.*]] = memref.alloc() : memref<5x3x4xf32> @@ -252,7 +252,7 @@ module attributes { mpi.dlti = #dlti.map<"MPI:comm_world_rank" = 7> } { // CHECK: [[v0:%.*]] = mpi.comm_world : !mpi.comm // CHECK: [[vnewcomm:%.*]] = mpi.comm_split([[v0]], [[vc1_i32]], [[vc2_i32]]) : !mpi.comm // CHECK: [[vsize:%.*]] = mpi.comm_size([[vnewcomm]]) : i32 - // CHECK: [[v1:%.*]] = arith.index_cast [[vsize]] : i32 to index + // CHECK: [[v1:%.*]] = arith.index_cast [[vsize]] exact : i32 to index // CHECK: [[v2:%.*]] = arith.cmpi eq, [[v1]], [[vc5]] : index // CHECK: cf.assert [[v2]] // CHECK: [[valloc:%.*]] = memref.alloc() : memref<5x3x4xf32> @@ -509,7 +509,7 @@ func.func @mlp_1dgrid(%arg0: tensor<512x512xf32>, %arg1: tensor<2048x256xf32>, % // CHECK: [[v0:%.*]] = bufferization.to_buffer [[varg0]] : tensor<512x512xf32> to memref<512x512xf32> // CHECK: [[v1:%.*]] = mpi.comm_world : !mpi.comm // CHECK: [[vsize:%.*]] = mpi.comm_size([[v1]]) : i32 - // CHECK: [[v2:%.*]] = arith.index_cast [[vsize]] : i32 to index + // CHECK: [[v2:%.*]] = arith.index_cast [[vsize]] exact : i32 to index // CHECK: [[v3:%.*]] = arith.cmpi eq, [[v2]], [[vc4]] : index // CHECK: cf.assert [[v3]] // CHECK: [[valloc:%.*]] = memref.alloc() : memref<4x512x512xf32> diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstore_1d.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstore_1d.mlir index aebec7fc27c78..1b563a9bfc8eb 100644 --- a/mlir/test/Conversion/XeGPUToXeVM/loadstore_1d.mlir +++ b/mlir/test/Conversion/XeGPUToXeVM/loadstore_1d.mlir @@ -10,11 +10,11 @@ gpu.module @load_store_check { // CHECK: %[[SRCCE:.*]] = memref.memory_space_cast %[[SRC]] : memref<512xf32, 1> to memref<512xf32> %srcce = memref.memory_space_cast %src : memref<512xf32, 1> to memref<512xf32> // CHECK: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[SRCCE]] : memref<512xf32> -> index - // CHECK: %[[INTPTR_I64:.*]] = arith.index_castui %[[INTPTR]] : index to i64 + // CHECK: %[[INTPTR_I64:.*]] = arith.index_castui %[[INTPTR]] exact : index to i64 // CHECK: %[[DSTTE:.*]] = memref.memory_space_cast %[[DST]] : memref<256xf32, 1> to memref<256xf32> %dstte = memref.memory_space_cast %dst : memref<256xf32, 1> to memref<256xf32> // CHECK: %[[INTPTR1:.*]] = memref.extract_aligned_pointer_as_index %[[DSTTE]] : memref<256xf32> -> index - // CHECK: %[[INTPTR1_I64:.*]] = arith.index_castui %[[INTPTR1]] : index to i64 + // CHECK: %[[INTPTR1_I64:.*]] = arith.index_castui %[[INTPTR1]] exact : index to i64 %src_tdesc = xegpu.create_nd_tdesc %srcce : memref<512xf32> -> !xegpu.tensor_desc<32xf32> // CHECK: %[[ADDR:.*]] = arith.addi %[[INTPTR_I64]], %[[C384]] : i64 diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstore_nd_sub_byte.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstore_nd_sub_byte.mlir index 97e5ce14f8539..d0d78697326c6 100644 --- a/mlir/test/Conversion/XeGPUToXeVM/loadstore_nd_sub_byte.mlir +++ b/mlir/test/Conversion/XeGPUToXeVM/loadstore_nd_sub_byte.mlir @@ -11,11 +11,11 @@ gpu.module @load_store_check { // CHECK: %[[C128_I32:.*]] = arith.constant 128 : i32 // CHECK: %[[SRCCE:.*]] = memref.memory_space_cast %[[ARG0]] // CHECK: %[[SRCINDEX:.*]] = memref.extract_aligned_pointer_as_index %[[SRCCE]] - // CHECK: %[[SRCPTR64:.*]] = arith.index_castui %[[SRCINDEX]] : index to i64 + // CHECK: %[[SRCPTR64:.*]] = arith.index_castui %[[SRCINDEX]] exact : index to i64 %srcce = memref.memory_space_cast %src : memref<16x128xi4, 1> to memref<16x128xi4> // CHECK: %[[DSTTE:.*]] = memref.memory_space_cast %[[ARG1]] // CHECK: %[[DSTINDEX:.*]] = memref.extract_aligned_pointer_as_index %[[DSTTE]] - // CHECK: %[[DSTPTR64:.*]] = arith.index_castui %[[DSTINDEX]] : index to i64 + // CHECK: %[[DSTPTR64:.*]] = arith.index_castui %[[DSTINDEX]] exact : index to i64 %dstte = memref.memory_space_cast %dst : memref<16x128xi4, 1> to memref<16x128xi4> // CHECK: %[[PAYLOAD_SRC:.*]] = vector.insert %[[SRCPTR64]], %[[CST]] [0] : i64 into vector<4xi64> diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir index 4c84699a069f0..2ff956901e11c 100644 --- a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir +++ b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir @@ -10,7 +10,7 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>, // CHECK: %[[C2_I64:.*]] = arith.constant 2 : i64 // CHECK: %[[VAR2:.*]] = vector.extract %[[ARG3]][0] : i1 from vector<1xi1> // CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex> - // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64 + // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] exact : index to i64 // CHECK: %[[VAR3:.*]] = arith.muli %[[VAR1]], %[[C2_I64]] : i64 // CHECK: %[[VAR4:.*]] = arith.addi %[[ARG0]], %[[VAR3]] : i64 // CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1> @@ -56,7 +56,7 @@ gpu.func @store_scatter_i64_src_value_offset(%src: i64, %offset: vector<1xindex> // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64 // CHECK: %[[VAR2:.*]] = vector.extract %[[ARG2]][0] : i1 from vector<1xi1> // CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex> - // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64 + // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] exact : index to i64 %0 = arith.constant dense<2.9>: vector<1xf32> // CHECK: %[[VAR4:.*]] = arith.muli %[[VAR1]], %[[C4_I64]] : i64 // CHECK: %[[VAR5:.*]] = arith.addi %[[ARG0]], %[[VAR4]] : i64 @@ -77,7 +77,7 @@ gpu.module @test { gpu.func @prefetch_i64_src_value_offset(%src: i64, %offset: vector<1xindex>) { // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64 // CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex> - // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64 + // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] exact : index to i64 // CHECK: %[[VAR2:.*]] = arith.muli %[[VAR1]], %[[C4_I64]] : i64 // CHECK: %[[VAR3:.*]] = arith.addi %[[ARG0]], %[[VAR2]] : i64 // CHECK: %[[VAR4:.*]] = llvm.inttoptr %[[VAR3]] : i64 to !llvm.ptr<1> @@ -95,9 +95,9 @@ gpu.module @test { gpu.func @prefetch_memref_src_value_offset(%src: memref<256xf32>, %offset: vector<1xindex>) { // CHECK: %[[C4_I64:.*]] = arith.constant 4 : i64 // CHECK: %[[VAR0:.*]] = vector.extract %[[ARG1]][0] : index from vector<1xindex> - // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] : index to i64 + // CHECK: %[[VAR1:.*]] = arith.index_castui %[[VAR0]] exact : index to i64 // CHECK: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG0]] : memref<256xf32> -> index - // CHECK: %[[VAR2:.*]] = arith.index_castui %[[INTPTR]] : index to i64 + // CHECK: %[[VAR2:.*]] = arith.index_castui %[[INTPTR]] exact : index to i64 // CHECK: %[[VAR3:.*]] = arith.muli %[[VAR1]], %[[C4_I64]] : i64 // CHECK: %[[VAR4:.*]] = arith.addi %[[VAR2]], %[[VAR3]] : i64 // CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1> @@ -118,8 +118,8 @@ gpu.func @load_gather_from_dyn_memref_subview(%dyn: memref, %offset: vect // CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]], %[[STRIDES:.*]] = memref.extract_strided_metadata %{{.*}} : memref<16xf16, strided<[1], offset: ?>> -> memref, index, index, index // CHECK: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[BASE]] : memref -> index - // CHECK: %[[CAST1:.*]] = arith.index_castui %[[INTPTR]] : index to i64 - // CHECK: %[[CAST2:.*]] = arith.index_castui %[[OFFSET]] : index to i64 + // CHECK: %[[CAST1:.*]] = arith.index_castui %[[INTPTR]] exact : index to i64 + // CHECK: %[[CAST2:.*]] = arith.index_castui %[[OFFSET]] exact : index to i64 // CHECK: %[[MUL1:.*]] = arith.muli %[[CAST2]], %{{.*}} : i64 // CHECK: %[[ADD1:.*]] = arith.addi %[[CAST1]], %[[MUL1]] : i64 // CHECK: %[[MUL2:.*]] = arith.muli %{{.*}}, %{{.*}} : i64 diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 326afcae696cc..51e29214d1c24 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -580,7 +580,7 @@ func.func @orOfExtUI_nneg_mixed(%arg0: i8, %arg1: i8) -> i64 { // ----- // CHECK-LABEL: @indexCastOfSignExtend -// CHECK: %[[res:.+]] = arith.index_cast %arg0 : i8 to index +// CHECK: %[[res:.+]] = arith.index_cast %arg0 exact : i8 to index // CHECK: return %[[res]] func.func @indexCastOfSignExtend(%arg0: i8) -> index { %ext = arith.extsi %arg0 : i8 to i16 @@ -598,7 +598,7 @@ func.func @indexCastOfSignExtend_exact(%arg0: i8) -> index { } // CHECK-LABEL: @indexCastUIOfUnsignedExtend -// CHECK: %[[res:.+]] = arith.index_castui %arg0 : i8 to index +// CHECK: %[[res:.+]] = arith.index_castui %arg0 exact : i8 to index // CHECK: return %[[res]] func.func @indexCastUIOfUnsignedExtend(%arg0: i8) -> index { %ext = arith.extui %arg0 : i8 to i16 @@ -607,7 +607,7 @@ func.func @indexCastUIOfUnsignedExtend(%arg0: i8) -> index { } // CHECK-LABEL: @indexCastUIOfUnsignedExtend_nneg_on_extui -// CHECK: %[[res:.+]] = arith.index_castui %arg0 nneg : i8 to index +// CHECK: %[[res:.+]] = arith.index_castui %arg0 exact nneg : i8 to index // CHECK: return %[[res]] func.func @indexCastUIOfUnsignedExtend_nneg_on_extui(%arg0: i8) -> index { %ext = arith.extui %arg0 nneg : i8 to i16 @@ -616,7 +616,7 @@ func.func @indexCastUIOfUnsignedExtend_nneg_on_extui(%arg0: i8) -> index { } // CHECK-LABEL: @indexCastUIOfUnsignedExtend_nneg_on_castui -// CHECK: %[[res:.+]] = arith.index_castui %arg0 : i8 to index +// CHECK: %[[res:.+]] = arith.index_castui %arg0 exact : i8 to index // CHECK-NOT: nneg // CHECK: return %[[res]] func.func @indexCastUIOfUnsignedExtend_nneg_on_castui(%arg0: i8) -> index { @@ -647,18 +647,18 @@ func.func @indexCastUIOfUnsignedExtend_nneg_exact(%arg0: i8) -> index { // CHECK-LABEL: @indexCastUIOfIndexCastUI_no_exact // CHECK: arith.index_castui // CHECK: arith.index_castui -func.func @indexCastUIOfIndexCastUI_no_exact(%arg0: i32) -> i32 { - %idx = arith.index_castui %arg0 : i32 to index - %res = arith.index_castui %idx : index to i32 - return %res : i32 +func.func @indexCastUIOfIndexCastUI_no_exact(%arg0: i128) -> i128 { + %idx = arith.index_castui %arg0 : i128 to index + %res = arith.index_castui %idx : index to i128 + return %res : i128 } // CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_inner -// CHECK: return %arg0 : i32 -func.func @indexCastUIOfIndexCastUI_exact_inner(%arg0: i32) -> i32 { - %idx = arith.index_castui %arg0 exact : i32 to index - %res = arith.index_castui %idx : index to i32 - return %res : i32 +// CHECK: return %arg0 : i128 +func.func @indexCastUIOfIndexCastUI_exact_inner(%arg0: i128) -> i128 { + %idx = arith.index_castui %arg0 exact : i128 to index + %res = arith.index_castui %idx : index to i128 + return %res : i128 } // exact on outer only does NOT trigger the fold (outer exact on widening @@ -666,10 +666,10 @@ func.func @indexCastUIOfIndexCastUI_exact_inner(%arg0: i32) -> i32 { // CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_outer // CHECK: arith.index_castui // CHECK: arith.index_castui -func.func @indexCastUIOfIndexCastUI_exact_outer(%arg0: i32) -> i32 { - %idx = arith.index_castui %arg0 : i32 to index - %res = arith.index_castui %idx exact : index to i32 - return %res : i32 +func.func @indexCastUIOfIndexCastUI_exact_outer(%arg0: i128) -> i128 { + %idx = arith.index_castui %arg0 : i128 to index + %res = arith.index_castui %idx exact : index to i128 + return %res : i128 } // CHECK-LABEL: @indexCastUIOfIndexCastUI_exact_both diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir index 465e8fdd66422..a054cc49871b2 100644 --- a/mlir/test/Dialect/GPU/transform-gpu.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu.mlir @@ -792,12 +792,12 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> { // CHECK: %[[LIN_W:.*]] = affine.apply #[[$MAP_LIN_W]]()[%[[TIDX]], %[[TIDY]]] // // Compute the active warps below using the mask + popcnt - // CHECK: %[[LIN_W_i64:.*]] = arith.index_castui %[[LIN_W]] : index to i64 + // CHECK: %[[LIN_W_i64:.*]] = arith.index_castui %[[LIN_W]] exact : index to i64 // CHECK: %[[TWO_POW_W:.*]] = arith.shli %[[C1_i64]], %[[LIN_W_i64]] : i64 // CHECK: %[[FILTER_TILL_W:.*]] = arith.subi %[[TWO_POW_W]], %[[C1_i64]] : i64 // CHECK: %[[ACTIVE_TILL_W:.*]] = arith.andi %[[FILTER_TILL_W]], %[[C753_i64]] : i64 // CHECK: %[[LOGICAL_ID_W_i64:.*]] = math.ctpop %[[ACTIVE_TILL_W]] : i64 - // CHECK: %[[LOGICAL_ID_W:.*]] = arith.index_castui %[[LOGICAL_ID_W_i64]] : i64 to index + // CHECK: %[[LOGICAL_ID_W:.*]] = arith.index_castui %[[LOGICAL_ID_W_i64]] exact : i64 to index // // Dynamically compute whether this warp is active below using the mask + popcnt // CHECK: %[[IS_ACTIVE_W_MASK:.*]] = arith.andi %[[TWO_POW_W]], %[[C753_i64]] : i64 diff --git a/mlir/test/Dialect/Shape/canonicalize.mlir b/mlir/test/Dialect/Shape/canonicalize.mlir index 22add87ff3ed4..ad703169c112b 100644 --- a/mlir/test/Dialect/Shape/canonicalize.mlir +++ b/mlir/test/Dialect/Shape/canonicalize.mlir @@ -1419,7 +1419,7 @@ func.func @shape_of_from_reshape(%arg0: tensor<*xf32>, %arg1: tensor) - // CHECK-SAME: %[[INPUT:.*]]: tensor // CHECK-SAME: %[[SHAPE:.*]]: tensor<3xi32> func.func @shape_of_from_reshape_int_to_index(%arg0: tensor, %arg1: tensor<3xi32>) -> tensor<3xindex> { - // CHECK: %[[CAST_SHAPE:.*]] = arith.index_cast %[[SHAPE]] : tensor<3xi32> to tensor<3xindex> + // CHECK: %[[CAST_SHAPE:.*]] = arith.index_cast %[[SHAPE]] exact : tensor<3xi32> to tensor<3xindex> // CHECK: return %[[CAST_SHAPE]] : tensor<3xindex> %0 = tensor.reshape %arg0(%arg1) : (tensor, tensor<3xi32>) -> tensor %1 = shape.shape_of %0 : tensor -> tensor<3xindex> diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir index 35fd7c33e4cfe..6cd71a101246b 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir @@ -26,10 +26,10 @@ // CHECK-DAG: %[[mask:.*]] = arith.constant dense : vector<16xi1> // CHECK: %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref // CHECK: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 -// CHECK: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index +// CHECK: %[[q:.*]] = arith.index_cast %[[a]] exact : i64 to index // CHECK: %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref // CHECK: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 -// CHECK: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index +// CHECK: %[[s:.*]] = arith.index_cast %[[b]] exact : i64 to index // CHECK: %[[boundary:.*]] = affine.apply #[[$map0]]()[%[[q]], %[[s]]] // CHECK: scf.for %[[i:.*]] = %[[q]] to %[[boundary]] step %[[c16]] { // CHECK: %[[li:.*]] = vector.load %{{.*}}[%[[i]]] : memref, vector<16xi32> diff --git a/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir b/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir index 8df8ac6b1675c..a9c35a90af3d9 100644 --- a/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir +++ b/mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir @@ -18,7 +18,7 @@ func.func @sparse_metadata_init() -> !sparse_tensor.storage_specifier<#CSR> { // CHECK-LABEL: func.func @sparse_get_md( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.struct<(array<2 x i64>, array<3 x i64>)>) -> index { // CHECK: %[[VAL_1:.*]] = llvm.extractvalue %[[VAL_0]][0, 0] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> -// CHECK: %[[CAST:.*]] = arith.index_cast %[[VAL_1]] : i64 to index +// CHECK: %[[CAST:.*]] = arith.index_cast %[[VAL_1]] exact : i64 to index // CHECK: return %[[CAST]] : index func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#CSR>) -> index { %0 = sparse_tensor.storage_specifier.get %arg0 lvl_sz at 0 @@ -29,7 +29,7 @@ func.func @sparse_get_md(%arg0: !sparse_tensor.storage_specifier<#CSR>) -> index // CHECK-LABEL: func.func @sparse_set_md( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.struct<(array<2 x i64>, array<3 x i64>)>, // CHECK-SAME: %[[VAL_1:.*]]: index) -> !llvm.struct<(array<2 x i64>, array<3 x i64>)> { -// CHECK: %[[CAST:.*]] = arith.index_cast %[[VAL_1]] : index to i64 +// CHECK: %[[CAST:.*]] = arith.index_cast %[[VAL_1]] exact : index to i64 // CHECK: %[[VAL_2:.*]] = llvm.insertvalue %[[CAST]], %[[VAL_0]][0, 0] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> // CHECK: return %[[VAL_2]] : !llvm.struct<(array<2 x i64>, array<3 x i64>)> func.func @sparse_set_md(%arg0: !sparse_tensor.storage_specifier<#CSR>, %arg1: index) diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir index 278f02ed033ab..39f5ec48deaaa 100644 --- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir +++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir @@ -1535,7 +1535,7 @@ func.func @vector_insert_strided_slice_2d_to_2d(%laneid: index) -> (vector<64x1x // CHECK-PROP: %[[W:.*]] = gpu.warp_execute_on_lane_0(%[[THREADID]])[32] args(%[[IN2]] // CHECK-PROP: %[[GATHER:.*]] = vector.gather %[[AR1]][{{.*}}] // CHECK-PROP: %[[EXTRACT:.*]] = vector.shape_cast %[[GATHER]] : vector<1x64xi32> to vector<64xi32> -// CHECK-PROP: %[[CAST:.*]] = arith.index_cast %[[EXTRACT]] : vector<64xi32> to vector<64xindex> +// CHECK-PROP: %[[CAST:.*]] = arith.index_cast %[[EXTRACT]] exact : vector<64xi32> to vector<64xindex> // CHECK-PROP: %[[EXTRACTELT:.*]] = vector.extract %[[CAST]][{{.*}}] : index from vector<64xindex> // CHECK-PROP: gpu.yield %[[EXTRACTELT]] : index // CHECK-PROP: %[[APPLY:.*]] = affine.apply #[[$MAP]]()[%[[THREADID]]] @@ -1555,7 +1555,7 @@ func.func @transfer_read_prop_operands(%in2: vector<1x2xindex>, %ar1 : memref<1 ^bb0(%arg4: vector<1x64xindex>): %28 = vector.gather %ar1[%c0, %c0, %c0] [%arg4], %cst_0, %cst : memref<1x4x2xi32>, vector<1x64xindex>, vector<1x64xi1>, vector<1x64xi32> into vector<1x64xi32> %29 = vector.extract %28[0] : vector<64xi32> from vector<1x64xi32> - %30 = arith.index_cast %29 : vector<64xi32> to vector<64xindex> + %30 = arith.index_cast %29 exact : vector<64xi32> to vector<64xindex> %36 = vector.extract %30[%c0_i32] : index from vector<64xindex> %37 = vector.transfer_read %ar2[%c0, %36, %c0], %cst_6 {in_bounds = [true]} : memref<1x4x1024xf32>, vector<64xf32> gpu.yield %37 : vector<64xf32> diff --git a/mlir/test/Dialect/XeGPU/peephole-optimize.mlir b/mlir/test/Dialect/XeGPU/peephole-optimize.mlir index 83fec045b9973..95eb1f0138a17 100644 --- a/mlir/test/Dialect/XeGPU/peephole-optimize.mlir +++ b/mlir/test/Dialect/XeGPU/peephole-optimize.mlir @@ -6,7 +6,7 @@ // CHECK: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[C32:.*]] = arith.constant 32 : index // CHECK: %[[PTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG0]] : memref<64x64xf16> -> index -// CHECK: %[[T0:.*]] = arith.index_cast %[[PTR]] : index to i64 +// CHECK: %[[T0:.*]] = arith.index_cast %[[PTR]] exact : index to i64 // CHECK: %[[BDESC:.*]] = xegpu.create_nd_tdesc %[[T0]], shape : [64, %[[C32]]], strides : [%[[C32]], 1] : i64 // CHECK-SAME: -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout> // CHECK-NEXT: %[[B:.*]] = xegpu.load_nd %[[BDESC]][%{{.*}}, %[[C16]]] @@ -35,7 +35,7 @@ gpu.func @no_scf(%arg0: memref<64x64xf16>, %arg1: vector<8x16xf16>) -> vector<8x // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<64x64xi8>, %{{.*}}: vector<8x32xi8>) -> vector<8x16xi32> { // CHECK: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[PTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG0]] : memref<64x64xi8> -> index -// CHECK: %[[T0:.*]] = arith.index_cast %[[PTR]] : index to i64 +// CHECK: %[[T0:.*]] = arith.index_cast %[[PTR]] exact : index to i64 // CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[T0]], shape : [64, %[[C16]]], strides : [%[[C16]], 1] : i64 // CHECK-SAME: -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout> // CHECK: %[[T2:.*]] = xegpu.load_nd %[[T1]][%{{.*}}, %[[C16]]] @@ -68,7 +68,7 @@ gpu.func @no_scf_i8(%arg0: memref<64x64xi8>, %arg1: vector<8x32xi8>) -> vector<8 // CHECK: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[C256:.*]] = arith.constant 256 : index // CHECK: %[[PTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG1]] : memref<256x256xf16> -> index -// CHECK: %[[T3:.*]] = arith.index_cast %[[PTR]] : index to i64 +// CHECK: %[[T3:.*]] = arith.index_cast %[[PTR]] exact : index to i64 // CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[T3]], shape : [256, %[[C128]]], strides : [%c128, 1] // CHECK-SAME: : i64 -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout> // CHECK: %{{.*}} = scf.for %[[K:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}}) -> (vector<8x16xf32>) { @@ -111,7 +111,7 @@ gpu.func @gemm_b_transpose(%arg0: memref<256x256xf16>, %arg1: memref<256x256xf16 // CHECK: %[[C256:.*]] = arith.constant 256 : index // CHECK: scf.for %{{.*}} to %{{.*}} step %{{.*}} { // CHECK: %[[PTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG1]] : memref<256x256xf16> -> index -// CHECK: %[[T3:.*]] = arith.index_cast %[[PTR]] : index to i64 +// CHECK: %[[T3:.*]] = arith.index_cast %[[PTR]] exact : index to i64 // CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[T3]], shape : [256, %[[C128]]], strides : [%[[C128]], 1] : i64 // CHECK-SAME: -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout> // CHECK: %{{.*}} = scf.for %[[K:.*]] = %{{.*}} iter_args(%{{.*}}) -> (vector<8x16xf32>) { @@ -155,7 +155,7 @@ gpu.func @nested_scf(%arg0: memref<256x256xf16>, %arg1: memref<256x256xf16>, %ar // CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<32x16xi32> // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[PTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG1]] : memref<256x256xf16> -> index -// CHECK: %[[T2:.*]] = arith.index_cast %[[PTR]] : index to i64 +// CHECK: %[[T2:.*]] = arith.index_cast %[[PTR]] exact : index to i64 // CHECK: %[[T3:.*]] = xegpu.create_nd_tdesc %[[T2]], shape : [256, %[[C128]]], strides : [%[[C128]], 1] : i64 // CHECK-SAME: -> !xegpu.tensor_desc<32x8xi32, #xegpu.layout> // CHECK: %{{.*}}:4 = scf.for %[[K:.*]] = %{{.*}} iter_args(%{{.*}}) -> (vector<8x16xf32>, vector<8x16xf32>, vector<8x16xf32>, vector<8x16xf32>) { @@ -226,7 +226,7 @@ gpu.func @large_loads(%arg0: vector<8x16xf16>, %arg1: memref<256x256xf16>, %arg2 // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[PTR:.*]] = memref.extract_aligned_pointer_as_index %[[ARG1]] : // CHECK-SAME: memref<256x256xf16> -> index -// CHECK: %[[T2:.*]] = arith.index_cast %[[PTR]] : index to i64 +// CHECK: %[[T2:.*]] = arith.index_cast %[[PTR]] exact : index to i64 // CHECK: %[[T3:.*]] = xegpu.create_nd_tdesc %[[T2]], shape : [256, %[[C128]]], // CHECK-SAME: strides : [%[[C128]], 1] : i64 -> // CHECK-SAME: !xegpu.tensor_desc<32x8xi32, #xegpu.layout> diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir index dde58ba31860d..fd3ef7fccb284 100644 --- a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir +++ b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir @@ -516,7 +516,7 @@ gpu.func @scatter_ops_with_leading_dims(%src: memref<256xf16>, %laneid: index) { // CHECK: gpu.yield %{{.*}}, %{{.*}} : index, memref<256x256xf16> // CHECK-NEXT: } // CHECK-NEXT: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[W]]#1 : memref<256x256xf16> -> index -// CHECK-NEXT: arith.index_cast %[[INTPTR]] : index to i64 +// CHECK-NEXT: arith.index_cast %[[INTPTR]] exact : index to i64 gpu.func @memref_extract_aligned_pointer_as_index(%arg0 : memref<256x256xf16>, %laneid: index) { %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (index) { %ptr = memref.extract_aligned_pointer_as_index %arg0 : memref<256x256xf16> -> index @@ -530,7 +530,7 @@ gpu.func @memref_extract_aligned_pointer_as_index(%arg0 : memref<256x256xf16>, % // CHECK-LABEL: gpu.func @memref_alloca( // CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca() : memref<2048xi8, 3> // CHECK-NEXT: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[ALLOCA]] : memref<2048xi8, 3> -> index -// CHECK-NEXT: %[[CAST:.*]] = arith.index_cast %[[INTPTR]] : index to i64 +// CHECK-NEXT: %[[CAST:.*]] = arith.index_cast %[[INTPTR]] exact : index to i64 gpu.func @memref_alloca(%laneid: index) { %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (memref<2048xi8, 3>) { %alloca = memref.alloca() : memref<2048xi8, 3> diff --git a/mlir/test/Dialect/XeGPU/xegpu-vector-linearize.mlir b/mlir/test/Dialect/XeGPU/xegpu-vector-linearize.mlir index 0bb7d7d3d8b1b..209ef056b28c9 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-vector-linearize.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-vector-linearize.mlir @@ -91,7 +91,7 @@ func.func @test_vector_store_load_4x4x4(%buffer: memref<4x4x4xf32>) { // CHECK: %[[ADDI:.*]] = arith.addi %[[CAST2]], %[[CST]] : vector<4xindex> // CHECK: %[[INDEX_CAST1:.*]] = arith.index_cast %[[ADDI]] : vector<4xindex> to vector<4xi32> // CHECK: %[[MULI:.*]] = arith.muli %[[INDEX_CAST1]], %[[CAST1]] : vector<4xi32> -// CHECK: %[[INDEX_CAST2:.*]] = arith.index_cast %[[MULI]] : vector<4xi32> to vector<4xindex> +// CHECK: %[[INDEX_CAST2:.*]] = arith.index_cast %[[MULI]] exact : vector<4xi32> to vector<4xindex> // CHECK: %[[RESULT:.*]] = vector.shape_cast %[[INDEX_CAST2]] : vector<4xindex> to vector<2x2xindex> // CHECK: return %[[RESULT]] : vector<2x2xindex> func.func @test_linearize_index(%arg0: vector<2x2xindex>, %arg1: vector<2x2xi32>) -> vector<2x2xindex> {