diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index bedeb783bcf6d..ca064ee284f30 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1932,14 +1932,16 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, Mask != (0xffu << ScaleLog)) return true; + MVT XVT = X.getSimpleValueType(); MVT VT = N.getSimpleValueType(); SDLoc DL(N); SDValue Eight = DAG.getConstant(8, DL, MVT::i8); - SDValue NewMask = DAG.getConstant(0xff, DL, VT); - SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); - SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); + SDValue NewMask = DAG.getConstant(0xff, DL, XVT); + SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight); + SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask); SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); + SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount); // Insert the new nodes into the topological ordering. We must do this in // a valid topological ordering as nothing is going to go back and re-sort @@ -1951,10 +1953,12 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, insertDAGNode(DAG, N, NewMask); insertDAGNode(DAG, N, And); insertDAGNode(DAG, N, ShlCount); + if (Ext != And) + insertDAGNode(DAG, N, Ext); insertDAGNode(DAG, N, Shl); DAG.ReplaceAllUsesWith(N, Shl); DAG.RemoveDeadNode(N.getNode()); - AM.IndexReg = And; + AM.IndexReg = Ext; AM.Scale = (1 << ScaleLog); return false; } @@ -2508,53 +2512,60 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, Src = Src.getOperand(0); } - if (Src.getOpcode() != ISD::SHL || !Src.hasOneUse()) - break; + if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) { + // Give up if the shift is not a valid scale factor [1,2,3]. + SDValue ShlSrc = Src.getOperand(0); + SDValue ShlAmt = Src.getOperand(1); + auto *ShAmtC = dyn_cast(ShlAmt); + if (!ShAmtC) + break; + unsigned ShAmtV = ShAmtC->getZExtValue(); + if (ShAmtV > 3) + break; - // Give up if the shift is not a valid scale factor [1,2,3]. - SDValue ShlSrc = Src.getOperand(0); - SDValue ShlAmt = Src.getOperand(1); - auto *ShAmtC = dyn_cast(ShlAmt); - if (!ShAmtC) - break; - unsigned ShAmtV = ShAmtC->getZExtValue(); - if (ShAmtV > 3) - break; + // The narrow shift must only shift out zero bits (it must be 'nuw'). + // That makes it safe to widen to the destination type. + APInt HighZeros = + APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV); + if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask)) + break; - // The narrow shift must only shift out zero bits (it must be 'nuw'). - // That makes it safe to widen to the destination type. - APInt HighZeros = - APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV); - if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask)) - break; + // zext (shl nuw i8 %x, C1) to i32 + // --> shl (zext i8 %x to i32), (zext C1) + // zext (and (shl nuw i8 %x, C1), C2) to i32 + // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1) + MVT SrcVT = ShlSrc.getSimpleValueType(); + MVT VT = N.getSimpleValueType(); + SDLoc DL(N); + + SDValue Res = ShlSrc; + if (!Mask.isAllOnes()) { + Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT); + insertDAGNode(*CurDAG, N, Res); + Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res); + insertDAGNode(*CurDAG, N, Res); + } + SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res); + insertDAGNode(*CurDAG, N, Zext); + SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt); + insertDAGNode(*CurDAG, N, NewShl); - // zext (shl nuw i8 %x, C1) to i32 - // --> shl (zext i8 %x to i32), (zext C1) - // zext (and (shl nuw i8 %x, C1), C2) to i32 - // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1) - MVT SrcVT = ShlSrc.getSimpleValueType(); - MVT VT = N.getSimpleValueType(); - SDLoc DL(N); - - SDValue Res = ShlSrc; - if (!Mask.isAllOnes()) { - Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT); - insertDAGNode(*CurDAG, N, Res); - Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res); - insertDAGNode(*CurDAG, N, Res); + // Convert the shift to scale factor. + AM.Scale = 1 << ShAmtV; + AM.IndexReg = Zext; + + CurDAG->ReplaceAllUsesWith(N, NewShl); + CurDAG->RemoveDeadNode(N.getNode()); + return false; } - SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res); - insertDAGNode(*CurDAG, N, Zext); - SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt); - insertDAGNode(*CurDAG, N, NewShl); - // Convert the shift to scale factor. - AM.Scale = 1 << ShAmtV; - AM.IndexReg = Zext; + // Try to fold the mask and shift into an extract and scale. + if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes() && + !foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src, + Src.getOperand(0), AM)) + return false; - CurDAG->ReplaceAllUsesWith(N, NewShl); - CurDAG->RemoveDeadNode(N.getNode()); - return false; + break; } } diff --git a/llvm/test/CodeGen/X86/h-register-addressing-64.ll b/llvm/test/CodeGen/X86/h-register-addressing-64.ll index e65c2c85291a1..b36a9484d8561 100644 --- a/llvm/test/CodeGen/X86/h-register-addressing-64.ll +++ b/llvm/test/CodeGen/X86/h-register-addressing-64.ll @@ -104,9 +104,8 @@ define i8 @bar2(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly { define double @ext8(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { ; CHECK-LABEL: ext8: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: shrl $5, %esi -; CHECK-NEXT: andl $2040, %esi # imm = 0x7F8 +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movzbl %ah, %eax ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %t0 = lshr i32 %x, 5 @@ -120,9 +119,8 @@ define double @ext8(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { define float @ext4(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { ; CHECK-LABEL: ext4: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: shrl $6, %esi -; CHECK-NEXT: andl $1020, %esi # imm = 0x3FC +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movzbl %ah, %eax ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq %t0 = lshr i32 %x, 6 @@ -136,10 +134,9 @@ define float @ext4(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { define i8 @ext2(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { ; CHECK-LABEL: ext2: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: shrl $7, %esi -; CHECK-NEXT: andl $510, %esi # imm = 0x1FE -; CHECK-NEXT: movzbl (%rdi,%rsi), %eax +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movzbl %ah, %eax +; CHECK-NEXT: movzbl (%rdi,%rax,2), %eax ; CHECK-NEXT: retq %t0 = lshr i32 %x, 7 %t1 = and i32 %t0, 510