Skip to content

AMDGPU: Add range attribute to mbcnt intrinsic callsites#189191

Open
arsenm wants to merge 1 commit intomainfrom
users/arsenm/amdgpu/add-range-callsite-attr-mbcnt
Open

AMDGPU: Add range attribute to mbcnt intrinsic callsites#189191
arsenm wants to merge 1 commit intomainfrom
users/arsenm/amdgpu/add-range-callsite-attr-mbcnt

Conversation

@arsenm
Copy link
Copy Markdown
Contributor

@arsenm arsenm commented Mar 28, 2026

It seems the known bits handling added in 686987a
is insufficient to perform many range based optimizations. For some reason
computeConstantRange doesn't fall back on KnownBits, and has a separate,
less used form which tries to use computeKnownBits.

It seems the known bits handling added in 686987a
is insufficient to perform many range based optimizations. For some reason
computeConstantRange doesn't fall back on KnownBits, and has a separate,
less used form which tries to use computeKnownBits.
@arsenm arsenm requested a review from ruiling March 28, 2026 20:15
Copy link
Copy Markdown
Contributor Author

arsenm commented Mar 28, 2026

This stack of pull requests is managed by Graphite. Learn more about stacking.

@arsenm arsenm marked this pull request as ready for review March 28, 2026 20:15
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Mar 28, 2026
@llvmbot
Copy link
Copy Markdown
Member

llvmbot commented Mar 28, 2026

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

It seems the known bits handling added in 686987a
is insufficient to perform many range based optimizations. For some reason
computeConstantRange doesn't fall back on KnownBits, and has a separate,
less used form which tries to use computeKnownBits.


Patch is 27.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/189191.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+22-2)
  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll (+1-1)
  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll (+22-22)
  • (modified) llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll (+236-15)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4f866087a1309..1ef6bada6a351 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1422,11 +1422,31 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
     break;
   }
-  case Intrinsic::amdgcn_mbcnt_hi: {
+  case Intrinsic::amdgcn_mbcnt_hi:
     // exec_hi is all 0, so this is just a copy.
     if (ST->isWave32())
       return IC.replaceInstUsesWith(II, II.getArgOperand(1));
-    break;
+    [[fallthrough]];
+  case Intrinsic::amdgcn_mbcnt_lo: {
+    ConstantRange AccRange = computeConstantRange(II.getArgOperand(1),
+                                                  /*ForSigned=*/false);
+    if (AccRange.isFullSet())
+      return nullptr;
+
+    // TODO: Can raise upper bound by inspecting first argument.
+    ConstantRange MbcntRange(APInt(32, 0), APInt(32, 32 + 1));
+    ConstantRange ComputedRange = AccRange.add(MbcntRange);
+    if (ComputedRange.isFullSet())
+      return nullptr;
+
+    if (std::optional<ConstantRange> ExistingRange = II.getRange()) {
+      ComputedRange = ComputedRange.intersectWith(*ExistingRange);
+      if (ComputedRange == *ExistingRange)
+        return nullptr;
+    }
+
+    II.addRangeRetAttr(ComputedRange);
+    return nullptr;
   }
   case Intrinsic::amdgcn_ballot: {
     Value *Arg = II.getArgOperand(0);
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll b/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll
index c87df041f8a26..1350f68ede271 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/canonicalize-add-to-gep.ll
@@ -6,7 +6,7 @@
 define amdgpu_ps <2 x float> @turn_add_into_gep(ptr addrspace(1) inreg %sbase) {
 ; CHECK-LABEL: define amdgpu_ps <2 x float> @turn_add_into_gep(
 ; CHECK-SAME: ptr addrspace(1) inreg [[SBASE:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[V:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NEXT:    [[V:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[V]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(1) [[SBASE]], i64 [[TMP1]]
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll
index 8c2e6fe19bd20..87fc259bdebff 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.wave.shuffle.ll
@@ -8,21 +8,21 @@
 define i32 @test_wave_shuffle_self_select(i32 %val) {
 ; CHECK-W32-LABEL: define i32 @test_wave_shuffle_self_select(
 ; CHECK-W32-SAME: i32 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-W32-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W32-NEXT:    [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; CHECK-W32-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[TID]])
 ; CHECK-W32-NEXT:    ret i32 [[RES]]
 ;
 ; CHECK-W64-LABEL: define i32 @test_wave_shuffle_self_select(
 ; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-W64-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-W64-NEXT:    [[TID1:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TID]])
+; CHECK-W64-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT:    [[TID1:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-W64-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[TID1]])
 ; CHECK-W64-NEXT:    ret i32 [[RES]]
 ;
 ; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_self_select(
 ; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[TID]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    ret i32 [[RES]]
 ;
@@ -48,8 +48,8 @@ define i32 @test_wave_shuffle_dpp_row_share_0(i32 %val) {
 ;
 ; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_0(
 ; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[MASKED:%.*]] = and i32 [[TID]], 112
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[MASKED]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    ret i32 [[RES]]
@@ -75,8 +75,8 @@ define i32 @test_wave_shuffle_dpp_row_share_0_no_or(i32 %val) {
 ;
 ; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_0_no_or(
 ; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[MASKED:%.*]] = and i32 [[TID]], 112
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[MASKED]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    ret i32 [[RES]]
@@ -101,8 +101,8 @@ define i32 @test_wave_shuffle_dpp_row_share_7(i32 %val) {
 ;
 ; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7(
 ; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[MASKED:%.*]] = and i32 [[TID]], 48
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -119,23 +119,23 @@ define i32 @test_wave_shuffle_dpp_row_share_7(i32 %val) {
 define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(i32 %val) {
 ; CHECK-W32-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(
 ; CHECK-W32-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W32-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W32-NEXT:    [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; CHECK-W32-NEXT:    [[SHARE_7:%.*]] = or i32 [[TID]], 7
 ; CHECK-W32-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
 ; CHECK-W32-NEXT:    ret i32 [[RES]]
 ;
 ; CHECK-W64-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(
 ; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W64-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-W64-NEXT:    [[TID1:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TID]])
+; CHECK-W64-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT:    [[TID1:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-W64-NEXT:    [[SHARE_7:%.*]] = or i32 [[TID1]], 7
 ; CHECK-W64-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
 ; CHECK-W64-NEXT:    ret i32 [[RES]]
 ;
 ; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_no_mask(
 ; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[SHARE_7:%.*]] = or i32 [[TID]], 7
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    ret i32 [[RES]]
@@ -158,7 +158,7 @@ define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(i32 %val) {
 ;
 ; CHECK-W64-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(
 ; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W64-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT:    [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; CHECK-W64-NEXT:    [[MASKED:%.*]] = and i32 [[TID]], 48
 ; CHECK-W64-NEXT:    [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
 ; CHECK-W64-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -166,7 +166,7 @@ define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(i32 %val) {
 ;
 ; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_7_lo_only(
 ; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[MASKED:%.*]] = and i32 [[TID]], 48
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -190,8 +190,8 @@ define i32 @test_wave_shuffle_dpp_row_share_w32_mask(i32 %val) {
 ;
 ; CHECK-W64-LABEL: define i32 @test_wave_shuffle_dpp_row_share_w32_mask(
 ; CHECK-W64-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-W64-NEXT:    [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-W64-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-W64-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-W64-NEXT:    [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-W64-NEXT:    [[MASKED:%.*]] = and i32 [[TID]], 16
 ; CHECK-W64-NEXT:    [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
 ; CHECK-W64-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
@@ -199,8 +199,8 @@ define i32 @test_wave_shuffle_dpp_row_share_w32_mask(i32 %val) {
 ;
 ; CHECK-NO-WAVE-SIZE-LABEL: define i32 @test_wave_shuffle_dpp_row_share_w32_mask(
 ; CHECK-NO-WAVE-SIZE-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; CHECK-NO-WAVE-SIZE-NEXT:    [[LO:%.*]] = tail call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NO-WAVE-SIZE-NEXT:    [[TID:%.*]] = tail call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[MASKED:%.*]] = and i32 [[TID]], 16
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[SHARE_7:%.*]] = or disjoint i32 [[MASKED]], 7
 ; CHECK-NO-WAVE-SIZE-NEXT:    [[RES:%.*]] = tail call i32 @llvm.amdgcn.wave.shuffle.i32(i32 [[VAL]], i32 [[SHARE_7]])
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll b/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll
index dfeaa872b846d..e4de4b947a1b6 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll
@@ -72,19 +72,19 @@ define i32 @mbcnt_lo_hi(i32 %x, i32 %y, i32 %z) {
 
 define i32 @ockl_lane_u32() {
 ; DEFAULT-LABEL: define i32 @ockl_lane_u32() {
-; DEFAULT-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; DEFAULT-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; DEFAULT-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; DEFAULT-NEXT:    [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; DEFAULT-NEXT:    ret i32 [[HI]]
 ;
 ; WAVE32-LABEL: define i32 @ockl_lane_u32
 ; WAVE32-SAME: () #[[ATTR1]] {
-; WAVE32-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE32-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; WAVE32-NEXT:    ret i32 [[LO]]
 ;
 ; WAVE64-LABEL: define i32 @ockl_lane_u32
 ; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; WAVE64-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; WAVE64-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE64-NEXT:    [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; WAVE64-NEXT:    ret i32 [[HI]]
 ;
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@@ -94,17 +94,17 @@ define i32 @ockl_lane_u32() {
 
 define i32 @mbcnt_lo_and63() {
 ; DEFAULT-LABEL: define i32 @mbcnt_lo_and63() {
-; DEFAULT-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; DEFAULT-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; DEFAULT-NEXT:    ret i32 [[LO]]
 ;
 ; WAVE32-LABEL: define i32 @mbcnt_lo_and63
 ; WAVE32-SAME: () #[[ATTR1]] {
-; WAVE32-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE32-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; WAVE32-NEXT:    ret i32 [[LO]]
 ;
 ; WAVE64-LABEL: define i32 @mbcnt_lo_and63
 ; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE64-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; WAVE64-NEXT:    ret i32 [[LO]]
 ;
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@@ -114,7 +114,7 @@ define i32 @mbcnt_lo_and63() {
 
 define i32 @mbcnt_hi_and31() {
 ; DEFAULT-LABEL: define i32 @mbcnt_hi_and31() {
-; DEFAULT-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
+; DEFAULT-NEXT:    [[HI:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
 ; DEFAULT-NEXT:    ret i32 [[HI]]
 ;
 ; WAVE32-LABEL: define i32 @mbcnt_hi_and31
@@ -123,7 +123,7 @@ define i32 @mbcnt_hi_and31() {
 ;
 ; WAVE64-LABEL: define i32 @mbcnt_hi_and31
 ; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
+; WAVE64-NEXT:    [[HI:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
 ; WAVE64-NEXT:    ret i32 [[HI]]
 ;
   %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 0)
@@ -133,19 +133,19 @@ define i32 @mbcnt_hi_and31() {
 
 define i32 @ockl_lane_u32_and127() {
 ; DEFAULT-LABEL: define i32 @ockl_lane_u32_and127() {
-; DEFAULT-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; DEFAULT-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; DEFAULT-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; DEFAULT-NEXT:    [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; DEFAULT-NEXT:    ret i32 [[HI]]
 ;
 ; WAVE32-LABEL: define i32 @ockl_lane_u32_and127
 ; WAVE32-SAME: () #[[ATTR1]] {
-; WAVE32-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE32-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
 ; WAVE32-NEXT:    ret i32 [[LO]]
 ;
 ; WAVE64-LABEL: define i32 @ockl_lane_u32_and127
 ; WAVE64-SAME: () #[[ATTR1]] {
-; WAVE64-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; WAVE64-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
+; WAVE64-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; WAVE64-NEXT:    [[HI:%.*]] = call range(i32 0, 65) i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]])
 ; WAVE64-NEXT:    ret i32 [[HI]]
 ;
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@@ -153,5 +153,226 @@ define i32 @ockl_lane_u32_and127() {
   %and = and i32 %hi, 127
   ret i32 %and
 }
+
+define i32 @known_range_mbcnt_lo(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @known_range_mbcnt_lo
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 [[UNKNOWN]], i32 0)
+; DEFAULT-NEXT:    ret i32 [[LO]]
+;
+; WAVE32-LABEL: define i32 @known_range_mbcnt_lo
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 [[UNKNOWN]], i32 0)
+; WAVE32-NEXT:    ret i32 [[LO]]
+;
+; WAVE64-LABEL: define i32 @known_range_mbcnt_lo
+; WAVE64-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE64-NEXT:    [[LO:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.lo(i32 [[UNKNOWN]], i32 0)
+; WAVE64-NEXT:    ret i32 [[LO]]
+;
+  %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %unknown, i32 0)
+  %min = call i32 @llvm.umin.i32(i32 %lo, i32 32)
+  ret i32 %min
+}
+
+define i32 @mbcnt_lo_add_unknown(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @mbcnt_lo_add_unknown
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 [[UNKNOWN]])
+; DEFAULT-NEXT:    ret i32 [[LO]]
+;
+; WAVE32-LABEL: define i32 @mbcnt_lo_add_unknown
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 [[UNKNOWN]])
+; WAVE32-NEXT:    ret i32 [[LO]]
+;
+; WAVE64-LABEL: define i32 @mbcnt_lo_add_unknown
+; WAVE64-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE64-NEXT:    [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 [[UNKNOWN]])
+; WAVE64-NEXT:    ret i32 [[LO]]
+;
+  %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 %unknown)
+  ret i32 %lo
+}
+
+define i32 @mbcnt_hi_add_unknown(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @mbcnt_hi_add_unknown
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[UNKNOWN]])
+; DEFAULT-NEXT:    ret i32 [[HI]]
+;
+; WAVE32-LABEL: define i32 @mbcnt_hi_add_unknown
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT:    ret i32 [[UNKNOWN]]
+;
+; WAVE64-LABEL: define i32 @mbcnt_hi_add_unknown
+; WAVE64-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE64-NEXT:    [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[UNKNOWN]])
+; WAVE64-NEXT:    ret i32 [[HI]]
+;
+  %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %unknown)
+  ret i32 %hi
+}
+
+define i32 @known_range_mbcnt_hi(i32 %unknown) {
+; DEFAULT-LABEL: define i32 @known_range_mbcnt_hi
+; DEFAULT-SAME: (i32 [[UNKNOWN:%.*]]) {
+; DEFAULT-NEXT:    [[HI:%.*]] = call range(i32 0, 33) i32 @llvm.amdgcn.mbcnt.hi(i32 [[UNKNOWN]], i32 0)
+; DEFAULT-NEXT:    ret i32 [[HI]]
+;
+; WAVE32-LABEL: define i32 @known_range_mbcnt_hi
+; WAVE32-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR1]] {
+; WAVE32-NEXT:    ret i32 0
+;
+; WAVE64-LABEL: define i32 @known_range_mbcnt_hi
+; WAVE64-SAM...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants