Skip to content

Commit

Permalink
[X86] combineVectorShiftImm - fold (shl (add X, X), C) -> (shl X, (C …
Browse files Browse the repository at this point in the history
…+ 1))

Noticed while investigating the regressions in D106675
  • Loading branch information
RKSimon committed Aug 14, 2022
1 parent 0c90d5f commit 8b47e29
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 50 deletions.
17 changes: 12 additions & 5 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47202,10 +47202,8 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
// result are all ones, not undef.
return DAG.getConstant(-1, SDLoc(N), VT);

// (shift (shift X, C2), C1) -> (shift X, (C1 + C2))
if (Opcode == N0.getOpcode()) {
unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
unsigned NewShiftVal = ShiftVal + ShiftVal2;
auto MergeShifts = [&](SDValue X, uint64_t Amt0, uint64_t Amt1) {
unsigned NewShiftVal = Amt0 + Amt1;
if (NewShiftVal >= NumBitsPerElt) {
// Out of range logical bit shifts are guaranteed to be zero.
// Out of range arithmetic bit shifts splat the sign bit.
Expand All @@ -47215,7 +47213,16 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
}
return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0),
DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8));
}
};

// (shift (shift X, C2), C1) -> (shift X, (C1 + C2))
if (Opcode == N0.getOpcode())
return MergeShifts(N0.getOperand(0), ShiftVal, N0.getConstantOperandVal(1));

// (shl (add X, X), C) -> (shl X, (C + 1))
if (Opcode == X86ISD::VSHLI && N0.getOpcode() == ISD::ADD &&
N0.getOperand(0) == N0.getOperand(1))
return MergeShifts(N0.getOperand(0), ShiftVal, 1);

// We can decode 'whole byte' logical bit shifts as shuffles.
if (LogicalShift && (ShiftVal % 8) == 0) {
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/pmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1356,8 +1356,7 @@ define <2 x i64> @pmuldq_square(<2 x i64> %x) {
; SSE2-NEXT: psrlq $32, %xmm0
; SSE2-NEXT: pmuludq %xmm1, %xmm0
; SSE2-NEXT: pmuludq %xmm1, %xmm1
; SSE2-NEXT: paddq %xmm0, %xmm0
; SSE2-NEXT: psllq $32, %xmm0
; SSE2-NEXT: psllq $33, %xmm0
; SSE2-NEXT: paddq %xmm1, %xmm0
; SSE2-NEXT: retq
;
Expand Down
82 changes: 39 additions & 43 deletions llvm/test/CodeGen/X86/udiv_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -311,73 +311,69 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-LABEL: vec:
; X64: # %bb.0:
; X64-NEXT: pxor %xmm9, %xmm9
; X64-NEXT: movdqa %xmm1, %xmm2
; X64-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm9[2],xmm2[3],xmm9[3]
; X64-NEXT: movq %xmm2, %rcx
; X64-NEXT: movdqa %xmm0, %xmm2
; X64-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm9[2],xmm2[3],xmm9[3]
; X64-NEXT: paddq %xmm2, %xmm2
; X64-NEXT: psllq $31, %xmm2
; X64-NEXT: movq %xmm2, %rax
; X64-NEXT: pxor %xmm3, %xmm3
; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-NEXT: movq %xmm3, %rax
; X64-NEXT: movdqa %xmm1, %xmm4
; X64-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm9[2],xmm4[3],xmm9[3]
; X64-NEXT: movq %xmm4, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm6
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; X64-NEXT: movq %xmm2, %rax
; X64-NEXT: movdqa %xmm1, %xmm2
; X64-NEXT: psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: movq %xmm2, %rcx
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
; X64-NEXT: movq %xmm3, %rax
; X64-NEXT: movdqa %xmm1, %xmm3
; X64-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: movq %xmm3, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm2
; X64-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm2[0]
; X64-NEXT: movq %rax, %xmm3
; X64-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm3[0]
; X64-NEXT: movdqa {{.*#+}} xmm10 = [9223372039002259456,9223372039002259456]
; X64-NEXT: movdqa %xmm6, %xmm2
; X64-NEXT: pxor %xmm10, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
; X64-NEXT: movdqa %xmm6, %xmm3
; X64-NEXT: pxor %xmm10, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
; X64-NEXT: movdqa {{.*#+}} xmm8 = [2147483649,2147483649,2147483649,2147483649]
; X64-NEXT: pcmpeqd %xmm8, %xmm7
; X64-NEXT: movdqa {{.*#+}} xmm4 = [9223372043297226751,9223372043297226751]
; X64-NEXT: movdqa %xmm4, %xmm5
; X64-NEXT: pcmpgtd %xmm2, %xmm5
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,0,2,2]
; X64-NEXT: pand %xmm7, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
; X64-NEXT: por %xmm3, %xmm2
; X64-NEXT: movdqa {{.*#+}} xmm2 = [9223372043297226751,9223372043297226751]
; X64-NEXT: movdqa %xmm2, %xmm5
; X64-NEXT: pcmpgtd %xmm3, %xmm5
; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,0,2,2]
; X64-NEXT: pand %xmm7, %xmm4
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
; X64-NEXT: por %xmm4, %xmm3
; X64-NEXT: movdqa {{.*#+}} xmm7 = [8589934591,8589934591]
; X64-NEXT: pand %xmm2, %xmm6
; X64-NEXT: pandn %xmm7, %xmm2
; X64-NEXT: por %xmm6, %xmm2
; X64-NEXT: psrlq $1, %xmm2
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm9[0],xmm0[1],xmm9[1]
; X64-NEXT: paddq %xmm0, %xmm0
; X64-NEXT: psllq $31, %xmm0
; X64-NEXT: movq %xmm0, %rax
; X64-NEXT: pand %xmm3, %xmm6
; X64-NEXT: pandn %xmm7, %xmm3
; X64-NEXT: por %xmm6, %xmm3
; X64-NEXT: psrlq $1, %xmm3
; X64-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm0[0],xmm9[1],xmm0[1]
; X64-NEXT: movq %xmm9, %rax
; X64-NEXT: movd %xmm1, %ecx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; X64-NEXT: movq %rax, %xmm4
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm9[2,3,2,3]
; X64-NEXT: movq %xmm0, %rax
; X64-NEXT: psrlq $32, %xmm1
; X64-NEXT: movq %xmm1, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
; X64-NEXT: movq %rax, %xmm0
; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; X64-NEXT: pxor %xmm3, %xmm10
; X64-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
; X64-NEXT: pxor %xmm4, %xmm10
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
; X64-NEXT: pcmpeqd %xmm8, %xmm0
; X64-NEXT: pcmpgtd %xmm10, %xmm4
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2]
; X64-NEXT: pcmpgtd %xmm10, %xmm2
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,2,2]
; X64-NEXT: pand %xmm0, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
; X64-NEXT: por %xmm1, %xmm0
; X64-NEXT: pand %xmm0, %xmm3
; X64-NEXT: pand %xmm0, %xmm4
; X64-NEXT: pandn %xmm7, %xmm0
; X64-NEXT: por %xmm3, %xmm0
; X64-NEXT: por %xmm4, %xmm0
; X64-NEXT: psrlq $1, %xmm0
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
; X64-NEXT: retq
;
; X86-LABEL: vec:
Expand Down

0 comments on commit 8b47e29

Please sign in to comment.