Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -594,12 +594,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {

// Check if suitable for a bit test
if (N <= DL.getIndexSizeInBits(0u)) {
SmallPtrSet<const BasicBlock *, 4> Dests;
for (auto I : SI.cases())
Dests.insert(I.getCaseSuccessor());
DenseMap<const BasicBlock *, unsigned int> DestMap;
for (auto I : SI.cases()) {
const BasicBlock *BB = I.getCaseSuccessor();
++DestMap[BB];
}

if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
DL))
if (TLI->isSuitableForBitTests(DestMap, MinCaseVal, MaxCaseVal, DL))
return 1;
}

Expand Down
29 changes: 26 additions & 3 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1441,9 +1441,9 @@ class LLVM_ABI TargetLoweringBase {
/// \p High as its lowest and highest case values, and expects \p NumCmps
/// case value comparisons. Check if the number of destinations, comparison
/// metric, and range are all suitable.
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
const APInt &Low, const APInt &High,
const DataLayout &DL) const {
bool isSuitableForBitTests(
const DenseMap<const BasicBlock *, unsigned int> &DestCmps,
const APInt &Low, const APInt &High, const DataLayout &DL) const {
// FIXME: I don't think NumCmps is the correct metric: a single case and a
// range of cases both require only one branch to lower. Just looking at the
// number of clusters and destinations should be enough to decide whether to
Expand All @@ -1454,6 +1454,20 @@ class LLVM_ABI TargetLoweringBase {
if (!rangeFitsInWord(Low, High, DL))
return false;

unsigned NumDests = DestCmps.size();
unsigned NumCmps = 0;
unsigned int MaxBitTestEntry = 0;
for (auto &DestCmp : DestCmps) {
NumCmps += DestCmp.second;
if (DestCmp.second > MaxBitTestEntry)
MaxBitTestEntry = DestCmp.second;
}

// Comparisons might be cheaper for small number of comparisons, which can
// be Arch Target specific.
if (MaxBitTestEntry < getMinimumBitTestCmps())
return false;

// Decide whether it's profitable to lower this range with bit tests. Each
// destination requires a bit test and branch, and there is an overall range
// check branch. For a small number of clusters, separate comparisons might
Expand Down Expand Up @@ -2063,6 +2077,9 @@ class LLVM_ABI TargetLoweringBase {

virtual bool isJumpTableRelative() const;

/// Retuen the minimum of largest number of comparisons in BitTest.
unsigned getMinimumBitTestCmps() const;

/// If a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
Register getStackPointerRegisterToSaveRestore() const {
Expand Down Expand Up @@ -2579,6 +2596,9 @@ class LLVM_ABI TargetLoweringBase {
/// Set to zero to generate unlimited jump tables.
void setMaximumJumpTableSize(unsigned);

/// Set the minimum of largest of number of comparisons to generate BitTest.
void setMinimumBitTestCmps(unsigned Val);

/// If set to a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
void setStackPointerRegisterToSaveRestore(Register R) {
Expand Down Expand Up @@ -3705,6 +3725,9 @@ class LLVM_ABI TargetLoweringBase {
/// backend supports.
unsigned MinCmpXchgSizeInBits;

/// The minimum of largest number of comparisons to use bit test for switch.
unsigned MinimumBitTestCmps;

/// This indicates if the target supports unaligned atomic operations.
bool SupportsUnalignedAtomics;

Expand Down
21 changes: 11 additions & 10 deletions llvm/lib/CodeGen/SwitchLoweringUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,16 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
for (unsigned I = First; I <= Last; ++I)
JTProbs[Clusters[I].MBB] = BranchProbability::getZero();

DenseMap<const BasicBlock *, unsigned int> DestMap;
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
Prob += Clusters[I].Prob;
const APInt &Low = Clusters[I].Low->getValue();
const APInt &High = Clusters[I].High->getValue();
NumCmps += (Low == High) ? 1 : 2;
unsigned int NumCmp = (Low == High) ? 1 : 2;
const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
DestMap[BB] += NumCmp;

if (I != First) {
// Fill the gap between this and the previous cluster.
const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
Expand All @@ -226,9 +230,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}

unsigned NumDests = JTProbs.size();
if (TLI->isSuitableForBitTests(NumDests, NumCmps,
Clusters[First].Low->getValue(),
if (TLI->isSuitableForBitTests(DestMap, Clusters[First].Low->getValue(),
Clusters[Last].High->getValue(), *DL)) {
// Clusters[First..Last] should be lowered as bit tests instead.
return false;
Expand Down Expand Up @@ -372,20 +374,19 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
if (First == Last)
return false;

BitVector Dests(FuncInfo.MF->getNumBlockIDs());
unsigned NumCmps = 0;
DenseMap<const BasicBlock *, unsigned int> DestMap;
for (int64_t I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
Dests.set(Clusters[I].MBB->getNumber());
NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
unsigned NumCmp = (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
DestMap[BB] += NumCmp;
}
unsigned NumDests = Dests.count();

APInt Low = Clusters[First].Low->getValue();
APInt High = Clusters[Last].High->getValue();
assert(Low.slt(High));

if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
if (!TLI->isSuitableForBitTests(DestMap, Low, High, *DL))
return false;

APInt LowBound;
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
Expand Down Expand Up @@ -90,6 +91,11 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
cl::desc("Minimum density for building a jump table in "
"an optsize function"));

static cl::opt<unsigned> MinimumBitTestCmpsOverride(
"min-bit-test-cmps", cl::init(2), cl::Hidden,
cl::desc("Set minimum of largest number of comparisons "
"to use bit test for switch."));

// FIXME: This option is only to test if the strict fp operation processed
// correctly by preventing mutating strict fp operation to normal fp operation
// during development. When the backend supports strict float operation, this
Expand Down Expand Up @@ -719,6 +725,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)

MinCmpXchgSizeInBits = 0;
SupportsUnalignedAtomics = false;

MinimumBitTestCmps = MinimumBitTestCmpsOverride;
}

// Define the virtual destructor out-of-line to act as a key method to anchor
Expand Down Expand Up @@ -2120,6 +2128,14 @@ bool TargetLoweringBase::isJumpTableRelative() const {
return getTargetMachine().isPositionIndependent();
}

unsigned TargetLoweringBase::getMinimumBitTestCmps() const {
return MinimumBitTestCmps;
}

void TargetLoweringBase::setMinimumBitTestCmps(unsigned Val) {
MinimumBitTestCmps = Val;
}

Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
if (TM.Options.LoopAlignment)
return Align(TM.Options.LoopAlignment);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ static cl::opt<unsigned> PPCMinimumJumpTableEntries(
"ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table on PPC"));

static cl::opt<unsigned> PPCMinimumBitTestCmps(
"ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
cl::desc("Set minimum of largest number of comparisons to use bit test for "
"switch on PPC."));

static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
cl::desc("max depth when checking alias info in GatherAllAliases()"));
Expand Down Expand Up @@ -1438,6 +1443,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Re-evaluate this value on future HWs that can do better with mtctr.
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);

// The default minimum of largest number in a BitTest cluster is 3.
setMinimumBitTestCmps(PPCMinimumBitTestCmps);

setMinFunctionAlignment(Align(4));
setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);

Expand Down
193 changes: 193 additions & 0 deletions llvm/test/CodeGen/PowerPC/bittest.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs < %s -O3 -mcpu=ppc -mtriple powerpc-ibm-aix \
; RUN: -ppc-asm-full-reg-names | FileCheck %s

define i32 @foo(i32 noundef signext %x) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stwu r1, -64(r1)
; CHECK-NEXT: stw r0, 72(r1)
; CHECK-NEXT: cmpwi r3, 8
; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
; CHECK-NEXT: mr r31, r3
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: ble cr0, L..BB0_4
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: cmpwi r31, 11
; CHECK-NEXT: bge cr0, L..BB0_7
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: cmplwi r31, 9
; CHECK-NEXT: beq cr0, L..BB0_9
; CHECK-NEXT: # %bb.3: # %entry
; CHECK-NEXT: cmplwi r31, 10
; CHECK-NEXT: beq cr0, L..BB0_11
; CHECK-NEXT: b L..BB0_13
; CHECK-NEXT: L..BB0_4: # %entry
; CHECK-NEXT: cmplwi r31, 4
; CHECK-NEXT: beq cr0, L..BB0_12
; CHECK-NEXT: # %bb.5: # %entry
; CHECK-NEXT: cmplwi r31, 7
; CHECK-NEXT: beq cr0, L..BB0_11
; CHECK-NEXT: # %bb.6: # %entry
; CHECK-NEXT: cmplwi r31, 8
; CHECK-NEXT: beq cr0, L..BB0_10
; CHECK-NEXT: b L..BB0_13
; CHECK-NEXT: L..BB0_7: # %entry
; CHECK-NEXT: beq cr0, L..BB0_10
; CHECK-NEXT: # %bb.8: # %entry
; CHECK-NEXT: cmplwi r31, 12
; CHECK-NEXT: bne cr0, L..BB0_13
; CHECK-NEXT: L..BB0_9: # %sw.bb2
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: bl .foo3[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: b L..BB0_13
; CHECK-NEXT: L..BB0_10: # %sw.bb1
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: bl .foo2[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: b L..BB0_13
; CHECK-NEXT: L..BB0_11: # %sw.bb
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: bl .foo1[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: b L..BB0_13
; CHECK-NEXT: L..BB0_12: # %sw.bb3
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: bl .foo4[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: L..BB0_13: # %return
; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: lwz r0, 8(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
switch i32 %x, label %return [
i32 7, label %sw.bb
i32 10, label %sw.bb
i32 8, label %sw.bb1
i32 11, label %sw.bb1
i32 9, label %sw.bb2
i32 12, label %sw.bb2
i32 4, label %sw.bb3
]

sw.bb: ; preds = %entry, %entry
tail call void @foo1(i32 noundef signext %x)
br label %return

sw.bb1: ; preds = %entry, %entry
tail call void @foo2(i32 noundef signext %x)
br label %return

sw.bb2: ; preds = %entry, %entry
tail call void @foo3(i32 noundef signext %x)
br label %return

sw.bb3: ; preds = %entry
tail call void @foo4(i32 noundef signext 4)
br label %return

return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry
%retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ %x, %sw.bb ]
ret i32 %retval.0
}

define i32 @goo(i32 noundef signext %x) {
; CHECK-LABEL: goo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: stwu r1, -64(r1)
; CHECK-NEXT: stw r0, 72(r1)
; CHECK-NEXT: cmplwi r3, 12
; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
; CHECK-NEXT: mr r31, r3
; CHECK-NEXT: bgt cr0, L..BB1_7
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: li r3, 1
; CHECK-NEXT: slw r3, r3, r31
; CHECK-NEXT: andi. r4, r3, 5632
; CHECK-NEXT: bne cr0, L..BB1_4
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: andi. r3, r3, 2304
; CHECK-NEXT: beq cr0, L..BB1_5
; CHECK-NEXT: # %bb.3: # %sw.bb1
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: bl .foo2[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: b L..BB1_9
; CHECK-NEXT: L..BB1_4: # %sw.bb2
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: bl .foo3[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: b L..BB1_9
; CHECK-NEXT: L..BB1_5: # %entry
; CHECK-NEXT: cmplwi r31, 7
; CHECK-NEXT: bne cr0, L..BB1_7
; CHECK-NEXT: # %bb.6: # %sw.bb
; CHECK-NEXT: li r3, 7
; CHECK-NEXT: li r31, 7
; CHECK-NEXT: bl .foo1[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: b L..BB1_9
; CHECK-NEXT: L..BB1_7: # %entry
; CHECK-NEXT: cmplwi r31, 4
; CHECK-NEXT: li r31, 0
; CHECK-NEXT: bne cr0, L..BB1_9
; CHECK-NEXT: # %bb.8: # %sw.bb3
; CHECK-NEXT: li r3, 4
; CHECK-NEXT: li r31, 4
; CHECK-NEXT: bl .foo4[PR]
; CHECK-NEXT: nop
; CHECK-NEXT: L..BB1_9: # %return
; CHECK-NEXT: mr r3, r31
; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: lwz r0, 8(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
switch i32 %x, label %return [
i32 7, label %sw.bb
i32 8, label %sw.bb1
i32 11, label %sw.bb1
i32 9, label %sw.bb2
i32 10, label %sw.bb2
i32 12, label %sw.bb2
i32 4, label %sw.bb3
]

sw.bb: ; preds = %entry
tail call void @foo1(i32 noundef signext 7)
br label %return

sw.bb1: ; preds = %entry, %entry
tail call void @foo2(i32 noundef signext %x)
br label %return

sw.bb2: ; preds = %entry, %entry, %entry
tail call void @foo3(i32 noundef signext %x)
br label %return

sw.bb3: ; preds = %entry
tail call void @foo4(i32 noundef signext 4)
br label %return

return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry
%retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ 7, %sw.bb ]
ret i32 %retval.0
}

declare void @foo1(i32 noundef signext)

declare void @foo2(i32 noundef signext)

declare void @foo3(i32 noundef signext)

declare void @foo4(i32 noundef signext)
Loading