Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58135,6 +58135,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
return V;

// Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending
// on the scheduler model. Limit multiple users to AVX+ targets to prevent
// introducing extra register moves.
if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL))
if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode()))
return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(),
Op0, 1, DAG);

// Canonicalize hidden LEA pattern:
// Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y)
// iff c < 4
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/avx2-vector-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -441,21 +441,21 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4
; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4
; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm5
; CHECK-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4
; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2
; CHECK-NEXT: vpsllw $2, %ymm2, %ymm2
; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3
; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3
; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm4
; CHECK-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3
; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vpsllw $2, %ymm1, %ymm1
; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0
; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
Expand Down
168 changes: 84 additions & 84 deletions llvm/test/CodeGen/X86/gfni-shifts.ll

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/logic-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,21 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6
; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5
; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -413,21 +413,21 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6
; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5
; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -697,21 +697,21 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y,
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm6
; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpsllw $2, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm5
; CHECK-NEXT: vpblendvb %xmm5, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpsllw $2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/prefer-avx256-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -302,21 +302,21 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX256-NEXT: vpsraw $4, %ymm3, %ymm4
; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX256-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm2
; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm5
; AVX256-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; AVX256-NEXT: vpsraw $1, %ymm3, %ymm4
; AVX256-NEXT: vpaddw %ymm2, %ymm2, %ymm2
; AVX256-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX256-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX256-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX256-NEXT: vpsraw $4, %ymm0, %ymm3
; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX256-NEXT: vpsraw $2, %ymm0, %ymm3
; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm1
; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm4
; AVX256-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; AVX256-NEXT: vpsraw $1, %ymm0, %ymm3
; AVX256-NEXT: vpaddw %ymm1, %ymm1, %ymm1
; AVX256-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX256-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX256-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX256-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
Expand All @@ -338,21 +338,21 @@ define <32 x i8> @var_ashr_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX512VL-NEXT: vpsraw $4, %ymm3, %ymm4
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm5
; AVX512VL-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpsraw $1, %ymm3, %ymm4
; AVX512VL-NEXT: vpaddw %ymm2, %ymm2, %ymm2
; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm2
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512VL-NEXT: vpsraw $4, %ymm0, %ymm3
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsraw $2, %ymm0, %ymm3
; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm4
; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsraw $1, %ymm0, %ymm3
; AVX512VL-NEXT: vpaddw %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpsllw $2, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
Expand Down Expand Up @@ -432,21 +432,21 @@ define <16 x i8> @var_ashr_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX256VL-NEXT: vpsraw $4, %xmm3, %xmm4
; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; AVX256VL-NEXT: vpsraw $2, %xmm3, %xmm4
; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm5
; AVX256VL-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
; AVX256VL-NEXT: vpsraw $1, %xmm3, %xmm4
; AVX256VL-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; AVX256VL-NEXT: vpsllw $2, %xmm2, %xmm2
; AVX256VL-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
; AVX256VL-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX256VL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX256VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX256VL-NEXT: vpsraw $4, %xmm0, %xmm3
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX256VL-NEXT: vpsraw $2, %xmm0, %xmm3
; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm1
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm4
; AVX256VL-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
; AVX256VL-NEXT: vpsraw $1, %xmm0, %xmm3
; AVX256VL-NEXT: vpaddw %xmm1, %xmm1, %xmm1
; AVX256VL-NEXT: vpsllw $2, %xmm1, %xmm1
; AVX256VL-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
; AVX256VL-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX256VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
Expand Down
Loading