diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index cfc8a4243e894..082e7b4371d27 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -427,6 +427,7 @@ struct SDNodeFlags { // implied by its producer as, e.g, operations between producer and PTRADD // that affect the provenance may have been optimized away. InBounds = 1 << 15, + NoZero = 1 << 16, // NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below // the class definition when adding new flags. @@ -468,6 +469,7 @@ struct SDNodeFlags { void setNoFPExcept(bool b) { setFlag(b); } void setUnpredictable(bool b) { setFlag(b); } void setInBounds(bool b) { setFlag(b); } + void setNoZero(bool b) { setFlag(b); } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return Flags & NoUnsignedWrap; } @@ -486,6 +488,7 @@ struct SDNodeFlags { bool hasNoFPExcept() const { return Flags & NoFPExcept; } bool hasUnpredictable() const { return Flags & Unpredictable; } bool hasInBounds() const { return Flags & InBounds; } + bool hasNoZero() const { return Flags & NoZero; } bool operator==(const SDNodeFlags &Other) const { return Flags == Other.Flags; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 587c1372b19cb..b4b4879c515c5 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -786,8 +786,32 @@ bool CodeGenPrepare::eliminateAssumptions(Function &F) { if (auto *Assume = dyn_cast(I)) { MadeChange = true; Value *Operand = Assume->getOperand(0); + if (ICmpInst *Cmp = dyn_cast(Operand)) { + // Check if we are comparing an Argument against a Constant + if (Argument *Arg = dyn_cast(Cmp->getOperand(0))) { + if (ConstantInt *C = dyn_cast(Cmp->getOperand(1))) { + // We found "assume(Arg Constant)" + ConstantRange Constraint = ConstantRange::makeAllowedICmpRegion( + Cmp->getPredicate(), C->getValue()); + // If the argument already has a range attribute, intersect with + // it + ConstantRange FinalRange = Constraint; + Attribute ExistingRangeAttr = Arg->getAttribute(Attribute::Range); + if (ExistingRangeAttr.isValid()) { + FinalRange = + FinalRange.intersectWith(ExistingRangeAttr.getRange()); + } + // If the range provides new information (and isn't empty), save + // it. + if (!FinalRange.isFullSet() && !FinalRange.isEmptySet()) { + AttrBuilder AB(Arg->getContext()); + AB.addRangeAttr(FinalRange); + Arg->addAttrs(AB); + } + } + } + } Assume->eraseFromParent(); - resetIteratorIfInvalidatedWhileCalling(&BB, [&]() { RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr); }); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 985a54ca83256..51d3fc0d78975 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3687,6 +3687,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { bool nuw = false; bool nsw = false; bool exact = false; + bool noZero = false; if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { @@ -3698,11 +3699,30 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { if (const PossiblyExactOperator *ExactOp = dyn_cast(&I)) exact = ExactOp->isExact(); + + const Value *ShiftAmt = I.getOperand(1); + + // Look through zext as computeConstantRange does not do this. + const Value *InnerVal = ShiftAmt; + if (auto *ZExt = dyn_cast(ShiftAmt)) { + InnerVal = ZExt->getOperand(0); + } + + // Get the constant range and check it excludes 0 + ConstantRange CR = llvm::computeConstantRange( + InnerVal, true, true, AC, dyn_cast(&I), nullptr); + + if (!CR.isEmptySet() && !CR.contains(APInt(CR.getBitWidth(), 0))) { + // We can guarantee that that we will not be shifted by 0 + noZero = true; + } } + SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); + Flags.setNoZero(noZero); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); setValue(&I, Res); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dc84025c166a3..87e637ee4c272 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23625,6 +23625,54 @@ static SDValue EmitTest(SDValue Op, X86::CondCode X86CC, const SDLoc &dl, return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), Op->getOperand(1)).getValue(1); } + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: { + SDValue Amt = ArithOp.getOperand(1); + + // Skip Constants + if (isa(Amt)) + break; + + // Check we can make this optimization + if (ArithOp->getFlags().hasNoZero() || DAG.isKnownNeverZero(Amt)) { + SDLoc DL(ArithOp); + + // CopyToReg(CL, Amt) + SDValue Chain = DAG.getEntryNode(); + SDValue Glue; + + Chain = DAG.getCopyToReg(Chain, DL, X86::CL, Amt, Glue); + Glue = Chain.getValue(1); + + // Select Opcode + unsigned X86Opcode; + switch (ArithOp.getOpcode()) { + case ISD::SHL: + X86Opcode = X86ISD::SHL; + break; + case ISD::SRL: + X86Opcode = X86ISD::SRL; + break; + case ISD::SRA: + X86Opcode = X86ISD::SRA; + break; + default: + llvm_unreachable("Unexpected shift opcode"); + } + + // Create Node [ValueToShift, Glue] + SDVTList VTs = DAG.getVTList(ArithOp.getValueType(), MVT::i32); + SDValue Ops[] = {ArithOp.getOperand(0), Glue}; + + SDValue NewNode = DAG.getNode(X86Opcode, DL, VTs, Ops); + + // Replace and Return + DAG.ReplaceAllUsesOfValueWith(ArithOp, NewNode.getValue(0)); + return NewNode.getValue(1); + } + break; + } default: break; } @@ -35467,6 +35515,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CVTTP2UIS) NODE_NAME_CASE(MCVTTP2UIS) NODE_NAME_CASE(POP_FROM_X87_REG) + NODE_NAME_CASE(SHL) + NODE_NAME_CASE(SRL) + NODE_NAME_CASE(SRA) } return nullptr; #undef NODE_NAME_CASE diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index b7151f65942b4..00830804213f7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -996,6 +996,9 @@ namespace llvm { CLOAD, CSTORE, LAST_MEMORY_OPCODE = CSTORE, + SHL, + SRL, + SRA, }; } // end namespace X86ISD diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index 2a5488847e648..e81bc33d3a459 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -689,3 +689,52 @@ let Predicates = [HasBMI2, HasEGPR] in { defm SHRX : ShiftX_Pats; defm SHLX : ShiftX_Pats; } + +def SDTX86ShiftWithFlags : SDTypeProfile<2, 1, [ + SDTCisSameAs<0, 2>, + SDTCisVT<1, i32>, +]>; + +def X86shl : SDNode<"X86ISD::SHL", SDTX86ShiftWithFlags, [SDNPInGlue]>; +def X86srl : SDNode<"X86ISD::SRL", SDTX86ShiftWithFlags, [SDNPInGlue]>; +def X86sra : SDNode<"X86ISD::SRA", SDTX86ShiftWithFlags, [SDNPInGlue]>; + +let Predicates = [NoNDD] in { + // SHL + def : Pat<(X86shl GR8:$src1), (SHL8rCL GR8:$src1)>; + def : Pat<(X86shl GR16:$src1), (SHL16rCL GR16:$src1)>; + def : Pat<(X86shl GR32:$src1), (SHL32rCL GR32:$src1)>; + def : Pat<(X86shl GR64:$src1), (SHL64rCL GR64:$src1)>; + + // SHR + def : Pat<(X86srl GR8:$src1), (SHR8rCL GR8:$src1)>; + def : Pat<(X86srl GR16:$src1), (SHR16rCL GR16:$src1)>; + def : Pat<(X86srl GR32:$src1), (SHR32rCL GR32:$src1)>; + def : Pat<(X86srl GR64:$src1), (SHR64rCL GR64:$src1)>; + + // SAR + def : Pat<(X86sra GR8:$src1), (SAR8rCL GR8:$src1)>; + def : Pat<(X86sra GR16:$src1), (SAR16rCL GR16:$src1)>; + def : Pat<(X86sra GR32:$src1), (SAR32rCL GR32:$src1)>; + def : Pat<(X86sra GR64:$src1), (SAR64rCL GR64:$src1)>; +} + +let Predicates = [HasNDD, In64BitMode] in { + // SHL + def : Pat<(X86shl GR8:$src1), (SHL8rCL_ND GR8:$src1)>; + def : Pat<(X86shl GR16:$src1), (SHL16rCL_ND GR16:$src1)>; + def : Pat<(X86shl GR32:$src1), (SHL32rCL_ND GR32:$src1)>; + def : Pat<(X86shl GR64:$src1), (SHL64rCL_ND GR64:$src1)>; + + // SHR + def : Pat<(X86srl GR8:$src1), (SHR8rCL_ND GR8:$src1)>; + def : Pat<(X86srl GR16:$src1), (SHR16rCL_ND GR16:$src1)>; + def : Pat<(X86srl GR32:$src1), (SHR32rCL_ND GR32:$src1)>; + def : Pat<(X86srl GR64:$src1), (SHR64rCL_ND GR64:$src1)>; + + // SAR + def : Pat<(X86sra GR8:$src1), (SAR8rCL_ND GR8:$src1)>; + def : Pat<(X86sra GR16:$src1), (SAR16rCL_ND GR16:$src1)>; + def : Pat<(X86sra GR32:$src1), (SAR32rCL_ND GR32:$src1)>; + def : Pat<(X86sra GR64:$src1), (SAR64rCL_ND GR64:$src1)>; +} diff --git a/llvm/test/CodeGen/X86/apx/shift-eflags.ll b/llvm/test/CodeGen/X86/apx/shift-eflags.ll index 2659f8031ef77..01f4689369eac 100644 --- a/llvm/test/CodeGen/X86/apx/shift-eflags.ll +++ b/llvm/test/CodeGen/X86/apx/shift-eflags.ll @@ -265,7 +265,6 @@ define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: orb $1, %sil, %cl ; CHECK-NEXT: sarl %cl, %edi -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -282,7 +281,6 @@ define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: orb $1, %sil, %cl ; CHECK-NEXT: shrl %cl, %edi -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -299,7 +297,6 @@ define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: orb $1, %sil, %cl ; CHECK-NEXT: shll %cl, %edi -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -315,7 +312,6 @@ define i32 @ashr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ; CHECK: # %bb.0: ; CHECK-NEXT: orb $1, %sil, %cl ; CHECK-NEXT: shrl %cl, %edi, %eax -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -331,7 +327,6 @@ define i32 @lshr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ; CHECK: # %bb.0: ; CHECK-NEXT: orb $1, %sil, %cl ; CHECK-NEXT: shrl %cl, %edi, %eax -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -347,7 +342,6 @@ define i32 @shl_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a ; CHECK: # %bb.0: ; CHECK-NEXT: orb $1, %sil, %cl ; CHECK-NEXT: shrl %cl, %edi, %eax -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -356,3 +350,20 @@ define i32 @shl_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a %r = select i1 %c, i32 %s, i32 %a2 ret i32 %r } + +define range(i8 0, 2) i8 @no_test_emitted_when_assume_shift_amt_gt_zero(i64 noundef %0, i32 noundef %1) { +; CHECK-LABEL: no_test_emitted_when_assume_shift_amt_gt_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shlq %cl, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %3 = icmp sgt i32 %1, 0 + tail call void @llvm.assume(i1 %3) + %4 = zext nneg i32 %1 to i64 + %5 = shl i64 %0, %4 + %6 = icmp eq i64 %5, 0 + %7 = zext i1 %6 to i8 + ret i8 %7 +} diff --git a/llvm/test/CodeGen/X86/shift-eflags.ll b/llvm/test/CodeGen/X86/shift-eflags.ll index 6eddf50ce5c9d..badcf5470ab64 100644 --- a/llvm/test/CodeGen/X86/shift-eflags.ll +++ b/llvm/test/CodeGen/X86/shift-eflags.ll @@ -282,7 +282,6 @@ define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-NEXT: orb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: sarl %cl, %edi -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -301,7 +300,6 @@ define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-NEXT: orb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %edi -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -320,7 +318,6 @@ define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-NEXT: orb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %edi -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -339,7 +336,6 @@ define i32 @ashr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ; CHECK-NEXT: orb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %eax -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -358,7 +354,6 @@ define i32 @lshr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ; CHECK-NEXT: orb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %eax -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -377,7 +372,6 @@ define i32 @shl_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a ; CHECK-NEXT: orb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %eax -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %a = or i32 %a1, 1 @@ -386,3 +380,20 @@ define i32 @shl_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a %r = select i1 %c, i32 %s, i32 %a2 ret i32 %r } + +define range(i8 0, 2) i8 @no_test_emitted_when_assume_shift_amt_gt_zero(i64 noundef %0, i32 noundef %1) { +; CHECK-LABEL: no_test_emitted_when_assume_shift_amt_gt_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shlq %cl, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %3 = icmp sgt i32 %1, 0 + tail call void @llvm.assume(i1 %3) + %4 = zext nneg i32 %1 to i64 + %5 = shl i64 %0, %4 + %6 = icmp eq i64 %5, 0 + %7 = zext i1 %6 to i8 + ret i8 %7 +}