Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SimplifyCFG] When only one case value is missing, replace default with that case #76669

Merged
merged 2 commits into from
Jan 3, 2024

Conversation

DianQK
Copy link
Member

@DianQK DianQK commented Jan 1, 2024

When the default branch is the last case, we can transform that branch into a concrete branch with an unreachable default branch.

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i64 @src(i64 %0) {
  %2 = urem i64 %0, 4
  switch i64 %2, label %5 [
    i64 1, label %3
    i64 2, label %3
    i64 3, label %4
  ]

3:                                                ; preds = %1, %1
  br label %5

4:                                                ; preds = %1
  br label %5

5:                                                ; preds = %1, %4, %3
  %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ]
  ret i64 %.0
}

define i64 @tgt(i64 %0) {
  %2 = urem i64 %0, 4
  switch i64 %2, label %unreachable [
    i64 0, label %5
    i64 1, label %3
    i64 2, label %3
    i64 3, label %4
  ]

unreachable:                              ; preds = %1
  unreachable

3:                                                ; preds = %1, %1
  br label %5

4:                                                ; preds = %1
  br label %5

5:                                                ; preds = %1, %4, %3
  %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ]
  ret i64 %.0
}

Alive2: https://alive2.llvm.org/ce/z/Y-PGXv

After transform to a lookup table, I believe tgt is better code.

The final instructions are as follows:

src:                                    # @src
        and     edi, 3
        lea     rax, [rdi - 1]
        cmp     rax, 2
        ja      .LBB0_1
        mov     rax, qword ptr [8*rdi + .Lswitch.table.src-8]
        ret
.LBB0_1:
        xor     eax, eax
        ret
tgt:                                    # @tgt
        and     edi, 3
        mov     rax, qword ptr [8*rdi + .Lswitch.table.tgt]
        ret
.Lswitch.table.src:
        .quad   1                               # 0x1
        .quad   1                               # 0x1
        .quad   2                               # 0x2

.Lswitch.table.tgt:
        .quad   0                               # 0x0
        .quad   1                               # 0x1
        .quad   1                               # 0x1
        .quad   2                               # 0x2

Godbolt: https://llvm.godbolt.org/z/borME8znd

Closes #73446.

@llvmbot
Copy link
Collaborator

llvmbot commented Jan 1, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Quentin Dian (DianQK)

Changes

Closes #73446.


Full diff: https://github.com/llvm/llvm-project/pull/76669.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+30-7)
  • (modified) llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll (+8-6)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 55e375670cc61e..a808861e97f3b7 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5414,11 +5414,13 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
 }
 
 static void createUnreachableSwitchDefault(SwitchInst *Switch,
-                                           DomTreeUpdater *DTU) {
+                                           DomTreeUpdater *DTU,
+                                           bool RemoveOrigDefaultBlock = true) {
   LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
   auto *BB = Switch->getParent();
   auto *OrigDefaultBlock = Switch->getDefaultDest();
-  OrigDefaultBlock->removePredecessor(BB);
+  if (RemoveOrigDefaultBlock)
+    OrigDefaultBlock->removePredecessor(BB);
   BasicBlock *NewDefaultBlock = BasicBlock::Create(
       BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
       OrigDefaultBlock);
@@ -5427,7 +5429,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch,
   if (DTU) {
     SmallVector<DominatorTree::UpdateType, 2> Updates;
     Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
-    if (!is_contained(successors(BB), OrigDefaultBlock))
+    if (RemoveOrigDefaultBlock &&
+        !is_contained(successors(BB), OrigDefaultBlock))
       Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
     DTU->applyUpdates(Updates);
   }
@@ -5609,10 +5612,30 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
       Known.getBitWidth() - (Known.Zero | Known.One).popcount();
   assert(NumUnknownBits <= Known.getBitWidth());
   if (HasDefault && DeadCases.empty() &&
-      NumUnknownBits < 64 /* avoid overflow */ &&
-      SI->getNumCases() == (1ULL << NumUnknownBits)) {
-    createUnreachableSwitchDefault(SI, DTU);
-    return true;
+      NumUnknownBits < 64 /* avoid overflow */) {
+    uint64_t AllNumCases = 1ULL << NumUnknownBits;
+    if (SI->getNumCases() == AllNumCases) {
+      createUnreachableSwitchDefault(SI, DTU);
+      return true;
+    }
+    // When only one case value is missing, replace default with that case.
+    if (SI->getNumCases() == AllNumCases - 1) {
+      uint64_t MissingCaseVal = 0;
+      for (const auto &Case : SI->cases())
+        MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
+      for (uint64_t I = 0; I < AllNumCases; I++)
+        MissingCaseVal ^= I;
+      auto *MissingCase =
+          cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
+      SI->addCase(MissingCase, SI->getDefaultDest());
+      SwitchInstProfUpdateWrapper SIW(*SI);
+      auto DefaultCaseWeight = SIW.getSuccessorWeight(0);
+      SIW.setSuccessorWeight(
+          SI->findCaseValue(MissingCase)->getSuccessorIndex(),
+          DefaultCaseWeight);
+      createUnreachableSwitchDefault(SI, DTU, false);
+      return true;
+    }
   }
 
   if (DeadCases.empty())
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index 1662bb99f27bcc..c0893a4893bfc6 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -77,14 +77,14 @@ default:
   ret void
 }
 
-; This one is a negative test - we know the value of the default,
-; but that's about it
+; We can replace the default branch with case 3 since it is the only case that is missing.
 define void @test3(i2 %a) {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    switch i2 [[A:%.*]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:    i2 0, label [[CASE0:%.*]]
-; CHECK-NEXT:    i2 1, label [[CASE1:%.*]]
-; CHECK-NEXT:    i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT:    switch i2 [[A:%.*]], label [[DOTUNREACHABLEDEFAULT:%.*]] [
+; CHECK-NEXT:      i2 0, label [[CASE0:%.*]]
+; CHECK-NEXT:      i2 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT:      i2 -1, label [[DEFAULT:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
@@ -97,6 +97,8 @@ define void @test3(i2 %a) {
 ; CHECK:       case2:
 ; CHECK-NEXT:    call void @foo(i32 2)
 ; CHECK-NEXT:    br label [[COMMON_RET]]
+; CHECK:       .unreachabledefault:
+; CHECK-NEXT:    unreachable
 ; CHECK:       default:
 ; CHECK-NEXT:    call void @foo(i32 3)
 ; CHECK-NEXT:    br label [[COMMON_RET]]

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jan 1, 2024
llvm/lib/Transforms/Utils/SimplifyCFG.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/Utils/SimplifyCFG.cpp Show resolved Hide resolved
@dtcxzyw
Copy link
Member

dtcxzyw commented Jan 1, 2024

Emm, this patch seems to increase the inline cost estimation of switch insts. Not sure it is a regression.
Please have a look at bench/hermes/optimized/SourceMgr.cpp.ll.
https://github.com/dtcxzyw/llvm-opt-benchmark/pull/80/files

@DianQK DianQK force-pushed the replace-default-with-one-case branch from 44b251b to 4afd32e Compare January 1, 2024 12:47
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Please wait for additional approval from other reviewers.

llvm/lib/Transforms/Utils/SimplifyCFG.cpp Outdated Show resolved Hide resolved
Copy link
Member

@jmorse jmorse left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the purpose of adding the profile-data to the test case that's been updated, does it not work without that?

The original author apparently wished there to be a negative test case, if it's possible to keep a negative case and just add another test function for the new behaviour, IMHO that's preferred. (Perhaps that's not possible, but it would mean that the test coverage strictly improves).

The change here looks good -- however all the rationale for why this change is being made is in the rust bug report you link to. Could you please elaborate the comment on the added code ("When only one case value is missing...") to explain why this is a good thing to do. There's a risk that someone in the future tries to delete the code if it's not clear to them why it's beneficial.

To confirm my own understanding: adding an unreachable default is fine because we know the bits being switched upon are in a small finite set of cases; the new form has a better structure / is easier to codegen for? If so, sounds like a good change, LGTM.

@DianQK DianQK force-pushed the replace-default-with-one-case branch from 4afd32e to 4e93999 Compare January 2, 2024 12:53
@DianQK
Copy link
Member Author

DianQK commented Jan 2, 2024

What's the purpose of adding the profile-data to the test case that's been updated, does it not work without that?

This is just to verify that we are updating the profile-data correctly. I've added a test case without profile-data.

The original author apparently wished there to be a negative test case, if it's possible to keep a negative case and just add another test function for the new behaviour, IMHO that's preferred. (Perhaps that's not possible, but it would mean that the test coverage strictly improves).

As I was adding test cases, I found this one that needs to be updated. Retrieved from 98a2dab, cc @preames.

The change here looks good -- however all the rationale for why this change is being made is in the rust bug report you link to. Could you please elaborate the comment on the added code ("When only one case value is missing...") to explain why this is a good thing to do. There's a risk that someone in the future tries to delete the code if it's not clear to them why it's beneficial.

I have added detailed explanations in PR.

To confirm my own understanding: adding an unreachable default is fine because we know the bits being switched upon are in a small finite set of cases; the new form has a better structure / is easier to codegen for? If so, sounds like a good change, LGTM.

Yes, we no longer need to think about the default branch.

@DianQK DianQK requested a review from preames January 2, 2024 13:03
@DianQK DianQK force-pushed the replace-default-with-one-case branch from 4e93999 to a77ed9d Compare January 2, 2024 13:06
@nikic
Copy link
Contributor

nikic commented Jan 2, 2024

I'd suggest adding a test variant that allows conversion to lookup table, as that's where we expect to see the main improvement.

@DianQK DianQK force-pushed the replace-default-with-one-case branch from a77ed9d to cdeee33 Compare January 2, 2024 14:39
@DianQK
Copy link
Member Author

DianQK commented Jan 2, 2024

I'd suggest adding a test variant that allows conversion to lookup table, as that's where we expect to see the main improvement.

I added two test variants.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

…th that case

Eliminating the default branch will provide more opportunities for optimization,
such as lookup tables.
@DianQK DianQK force-pushed the replace-default-with-one-case branch from cdeee33 to 878c1f4 Compare January 2, 2024 22:14
@DianQK
Copy link
Member Author

DianQK commented Jan 2, 2024

@jmorse @preames
Any more comments please? I will wait for the merge in two days.

Copy link
Member

@jmorse jmorse left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM; the test consideration was just to see if there was an obvious way to preserve existing coverage, it sounds like that's not easy or maybe possible, so never mind.

@DianQK DianQK merged commit 7d81e07 into llvm:main Jan 3, 2024
4 checks passed
@DianQK DianQK deleted the replace-default-with-one-case branch January 3, 2024 01:22
@aeubanks
Copy link
Contributor

actually, this seems to be uncovering some bad behavior during codegen, still investigating

@dwblaikie
Copy link
Collaborator

actually, this seems to be uncovering some bad behavior during codegen, still investigating

To expound a bit on this - the test case we're looking at goes from 300MB to 18GB peak RAM usage.

81% of that peak memory usage is coming from SSAUpdaterImpl. (TailDuplicateBase, TailDuplicator, MachineSSAUpdater)
(oh, and another 12% from the same place, basically - just BumpPtrAllocatorImpl for the operands rather than new for the MachineInstrs - and another 3% of more BumpPtrAllocatorImpl in the same place... so 96%)

@dwblaikie
Copy link
Collaborator

& the test case in question has a rather long/machine generated switch statement - so something related to that is probably triggering some pathology

@DianQK
Copy link
Member Author

DianQK commented Jan 12, 2024

actually, this seems to be uncovering some bad behavior during codegen, still investigating

To expound a bit on this - the test case we're looking at goes from 300MB to 18GB peak RAM usage.

81% of that peak memory usage is coming from SSAUpdaterImpl. (TailDuplicateBase, TailDuplicator, MachineSSAUpdater) (oh, and another 12% from the same place, basically - just BumpPtrAllocatorImpl for the operands rather than new for the MachineInstrs - and another 3% of more BumpPtrAllocatorImpl in the same place... so 96%)

Could you share a test case? This looks like a different issue than #77831?

@dwblaikie
Copy link
Collaborator

actually, this seems to be uncovering some bad behavior during codegen, still investigating

To expound a bit on this - the test case we're looking at goes from 300MB to 18GB peak RAM usage.
81% of that peak memory usage is coming from SSAUpdaterImpl. (TailDuplicateBase, TailDuplicator, MachineSSAUpdater) (oh, and another 12% from the same place, basically - just BumpPtrAllocatorImpl for the operands rather than new for the MachineInstrs - and another 3% of more BumpPtrAllocatorImpl in the same place... so 96%)

Could you share a test case? This looks like a different issue than #77831?

Will share as soon as we have something shareable - it's proving a bit difficult to reduce something to a shareable/non-private thing. We're hoping that by sharing the info we can it might be enough to motivate a revert or help inspire you/others to figure out the bug and/or guess at a representative test case yourself in the mean time.

But, yeah, we'll keep working on something shareable in the mean time.

qiongsiwu added a commit that referenced this pull request Jan 12, 2024
…ition is Too Wide (#77831)

#76669 taught SimplifyCFG to
handle switches when `default` has only one case. When the `switch`'s
condition is wider than 64 bit, the current implementation can calculate
the wrong default value. This PR skips cases where the condition is too
wide.
@qiongsiwu
Copy link
Contributor

#77831 has landed so if we revert this PR we will need to revert #77831 as well.

@joanahalili
Copy link

joanahalili commented Jan 16, 2024

actually, this seems to be uncovering some bad behavior during codegen, still investigating

To expound a bit on this - the test case we're looking at goes from 300MB to 18GB peak RAM usage.
81% of that peak memory usage is coming from SSAUpdaterImpl. (TailDuplicateBase, TailDuplicator, MachineSSAUpdater) (oh, and another 12% from the same place, basically - just BumpPtrAllocatorImpl for the operands rather than new for the MachineInstrs - and another 3% of more BumpPtrAllocatorImpl in the same place... so 96%)

Could you share a test case? This looks like a different issue than #77831?

Will share as soon as we have something shareable - it's proving a bit difficult to reduce something to a shareable/non-private thing. We're hoping that by sharing the info we can it might be enough to motivate a revert or help inspire you/others to figure out the bug and/or guess at a representative test case yourself in the mean time.

But, yeah, we'll keep working on something shareable in the mean time.

I am adding here a reproducer where we see a 5x memory increase due to this commit. To reproduce:
clang -cc1 -O1 -emit-obj reduced.ll -o /dev/null

repro.zip

Could you please revert?

@dwblaikie
Copy link
Collaborator

I did finally get something a bit smaller (exact file attached)

int src();
int f1(unsigned int *b) {
#define SWITCH(in1, out1) \
  unsigned int out1;\
  switch (in1) {           \
    case 0: out1 = b[0] >> 0; break; \
... \
    case 31: out1 = b[0] >> 31; break; \
    case 32: out1 = b[1] >> 0; break; \
... \
    case 63: out1 = b[1] >> 31; break; \
...
    case 95: out1 = b[2] >> 31; break; \
    case 96: out1 = b[2] >> 0; break; \
...
    case 126: out1 = b[2] >> 30; break; \
    default:  out1 = b[2] >> 31; break; \
  }
  unsigned int r = src();
  SWITCH(r >> 1 & 0x7fU, v1);
  SWITCH(r >> 27 | (r << 5 & 0x60U), v2);
  SWITCH(r >> 2 & 0x7fu, v3);
  SWITCH(r >> 9 & 0x7fu, v4);
  SWITCH(r >> 16 & 0x7fu, v5);
  SWITCH(r >> 23 & 0x7fu, v6);
  SWITCH(r >> 30 | (r << 2 & 0x7fu), v7);
  SWITCH(r >> 7 & 0x7fu, v8);
  SWITCH(r >> 12 & 0x7fu, v9);
  SWITCH(r << 4 & 0x7fu, v10);
  SWITCH(r >> 10 & 0x7fu, v11);
  SWITCH(r >> 24 & 0x7fu, v12);
  SWITCH(r >> 31 | (r << 1 & 0x7eu), v13);
  SWITCH(r >> 6 & 0x7fu, v14);
  SWITCH(r >> 13 & 0x7fu, v15);
  SWITCH(r >> 20 & 0x7fu, v16);
  SWITCH(r >> 27 | (r << 5 & 0x60u), v17);
  SWITCH(r >> 3 & 0x7fu, v18);
  SWITCH(r >> 9 & 0x7fu, v19);
  SWITCH(r >> 16 & 0x7fu, v20);
  SWITCH(r >> 23 & 0x7fu, v21);
  SWITCH(r >> 30 | (r << 2 & 0x7cu), v22);
  SWITCH(r >> 7 & 0x7fu, v23);
  SWITCH(r >> 12 & 0x7fu, v24);
  SWITCH(r >> 14 & 0x7fu, v25);
  SWITCH(r >> 15 & 0x7fu, v26);
  return (v1 | v2 | v3 | v4 | v5 | v6 | v7 | v8 | v9 | v10 | v11 | v12 | v13 |
          v14 | v15 | v16 | v17 | v18 | v19 | v20 | v21 | v22 | v23 | v24 | v25 | v26);
}
OLD=`/usr/bin/time -f '%M' clang_before -cc1 -O1  -emit-obj  oom_manual.cc -o /dev/null 2>&1`
NEW=`/usr/bin/time -f '%M' clang_after -cc1 -O1  -emit-obj  oom_manual.cc -o /dev/null 2>&1`
echo $OLD
echo $NEW
expr $OLD "*" 10 "<" $NEW > /dev/null
$ ./test_manual.sh
119816
1248148
$ echo $?
0

oom_manual.cc.txt

@DianQK
Copy link
Member Author

DianQK commented Jan 17, 2024

Could you please revert?

If it does block something important, that's fine. :) But I'll prioritize investigating this one.

oom_manual.cc.txt

I think this example could probably be reduced to two switch statements. (Perhaps there are multiple issues here.) I can also manually change the default branch to unreachable to reproduce in clang 17.

e.g.

case 127:  out1 = b[2] >> 31; break; \
default:  __builtin_unreachable(); break; \

alexfh added a commit that referenced this pull request Jan 17, 2024
…fault with that case (#76669)"

This reverts commit 7d81e07, which introduces a
compiler memory usage regression. See
#76669 (comment)
@alexfh
Copy link
Contributor

alexfh commented Jan 17, 2024

Could you please revert?

If it does block something important, that's fine.

I've prepared #78469 to revert both commits. Waiting for premerge checks.

alexfh added a commit that referenced this pull request Jan 17, 2024
…the Condition is Too Wide" (#78469)

Reverts #77831, which depends on #76669, which
seriously regresses compilation time / memory usage see
#76669 (comment).
@dwblaikie
Copy link
Collaborator

oom_manual.cc.txt

I think this example could probably be reduced to two switch statements.

Certainly plausible - I tried my best to reduce it as much as possible with both automatic and manual reduction - but no doubt there's more that could be done (yeah, didn't try merging the switches to just make larger switches) & I'd set my boundary at 10x memory usage, so the problem probably occurs at smaller amounts - but wanted to make sure something that made the issue clear/wasn't only marginally problematic.

(Perhaps there are multiple issues here.) I can also manually change the default branch to unreachable to reproduce in clang 17.

Yeah, since this wasn't in the area you changed, it's presumably a case where this change caused some IR that tickles an existing problem in another pass (tail deduplication) - so, yeah, that it's reachable before your change through other codepaths sounds plausible/believable too. But causing the problem to show up more often might be enough reason to hold off on this change until the follow-on issue might be resolved or at least worked around.

@DianQK
Copy link
Member Author

DianQK commented Jan 18, 2024

oom_manual.cc.txt

I think this example could probably be reduced to two switch statements.

Certainly plausible - I tried my best to reduce it as much as possible with both automatic and manual reduction - but no doubt there's more that could be done (yeah, didn't try merging the switches to just make larger switches) & I'd set my boundary at 10x memory usage, so the problem probably occurs at smaller amounts - but wanted to make sure something that made the issue clear/wasn't only marginally problematic.

This example will make the problem more obvious than the two switch statements. I'll create an issue that using a small case and link back to this one.

(Perhaps there are multiple issues here.) I can also manually change the default branch to unreachable to reproduce in clang 17.

Yeah, since this wasn't in the area you changed, it's presumably a case where this change caused some IR that tickles an existing problem in another pass (tail deduplication) - so, yeah, that it's reachable before your change through other codepaths sounds plausible/believable too. But causing the problem to show up more often might be enough reason to hold off on this change until the follow-on issue might be resolved or at least worked around.

Yeah, I often see commits being reverted. :)

ampandey-1995 pushed a commit to ampandey-1995/llvm-project that referenced this pull request Jan 19, 2024
…the Condition is Too Wide" (llvm#78469)

Reverts llvm#77831, which depends on llvm#76669, which
seriously regresses compilation time / memory usage see
llvm#76669 (comment).
justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024
…ition is Too Wide (llvm#77831)

llvm#76669 taught SimplifyCFG to
handle switches when `default` has only one case. When the `switch`'s
condition is wider than 64 bit, the current implementation can calculate
the wrong default value. This PR skips cases where the condition is too
wide.
DianQK added a commit that referenced this pull request May 25, 2024
…fault with that case (#76669)"

When the default branch is the last case, we can transform that branch
into a concrete branch with an unreachable default branch.

```llvm
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i64 @src(i64 %0) {
  %2 = urem i64 %0, 4
  switch i64 %2, label %5 [
    i64 1, label %3
    i64 2, label %3
    i64 3, label %4
  ]

3:                                                ; preds = %1, %1
  br label %5

4:                                                ; preds = %1
  br label %5

5:                                                ; preds = %1, %4, %3
  %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ]
  ret i64 %.0
}

define i64 @tgt(i64 %0) {
  %2 = urem i64 %0, 4
  switch i64 %2, label %unreachable [
    i64 0, label %5
    i64 1, label %3
    i64 2, label %3
    i64 3, label %4
  ]

unreachable:                              ; preds = %1
  unreachable

3:                                                ; preds = %1, %1
  br label %5

4:                                                ; preds = %1
  br label %5

5:                                                ; preds = %1, %4, %3
  %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ]
  ret i64 %.0
}
```

Alive2: https://alive2.llvm.org/ce/z/Y-PGXv

After transform to a lookup table, I believe `tgt` is better code.

The final instructions are as follows:

```asm
src:                                    # @src
        and     edi, 3
        lea     rax, [rdi - 1]
        cmp     rax, 2
        ja      .LBB0_1
        mov     rax, qword ptr [8*rdi + .Lswitch.table.src-8]
        ret
.LBB0_1:
        xor     eax, eax
        ret
tgt:                                    # @tgt
        and     edi, 3
        mov     rax, qword ptr [8*rdi + .Lswitch.table.tgt]
        ret
.Lswitch.table.src:
        .quad   1                               # 0x1
        .quad   1                               # 0x1
        .quad   2                               # 0x2

.Lswitch.table.tgt:
        .quad   0                               # 0x0
        .quad   1                               # 0x1
        .quad   1                               # 0x1
        .quad   2                               # 0x2
```

Godbolt: https://llvm.godbolt.org/z/borME8znd

Closes #73446.

(cherry picked from commit 7d81e07)
DianQK pushed a commit that referenced this pull request May 25, 2024
…the Condition is Too Wide (#77831)"

#76669 taught SimplifyCFG to
handle switches when `default` has only one case. When the `switch`'s
condition is wider than 64 bit, the current implementation can calculate
the wrong default value. This PR skips cases where the condition is too
wide.

(cherry picked from commit 39bb790)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Remove the default branch of a switch
10 participants