Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59427,6 +59427,31 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
break;
case ISD::SETCC:
if (!IsSplat && EltSizeInBits == 1 &&
llvm::all_of(Ops, [Op0](SDValue Op) {
return Op0.getOperand(0).getValueType() ==
Op.getOperand(0).getValueType() &&
Op0.getOperand(2) == Op.getOperand(2);
})) {
EVT SrcVT = Op0.getOperand(0).getValueType();
EVT NewSrcVT = EVT::getVectorVT(Ctx, SrcVT.getScalarType(),
NumOps * SrcVT.getVectorNumElements());
unsigned SrcSizeInBits = SrcVT.getScalarSizeInBits();
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(NewSrcVT) &&
(NewSrcVT.is256BitVector() ||
(NewSrcVT.is512BitVector() && Subtarget.useAVX512Regs() &&
(SrcSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
SDValue LHS = CombineSubOperand(NewSrcVT.getSimpleVT(), Ops, 0);
SDValue RHS = CombineSubOperand(NewSrcVT.getSimpleVT(), Ops, 1);
if (LHS || RHS)
return DAG.getNode(Opcode, DL, VT,
LHS ? LHS : ConcatSubOperand(NewSrcVT, Ops, 0),
RHS ? RHS : ConcatSubOperand(NewSrcVT, Ops, 1),
Op0.getOperand(2));
}
}
break;
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::CTLZ:
Expand Down Expand Up @@ -59791,13 +59816,16 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
}
}

// Attempt to merge logic ops if the type is legal.
if (TLI.isTypeLegal(VT) && all_of(Ops, [](SDValue Op) {
return ISD::isBitwiseLogicOp(Op.getOpcode());
}))
// Attempt to merge comparison/logic ops if the type is legal.
if (TLI.isTypeLegal(VT) &&
(all_of(Ops, [](SDValue Op) { return Op.getOpcode() == ISD::SETCC; }) ||
all_of(Ops, [](SDValue Op) {
return ISD::isBitwiseLogicOp(Op.getOpcode());
}))) {
if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops,
DAG, Subtarget))
return R;
}

// Don't do anything else for i1 vectors.
return SDValue();
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,12 @@ define <8 x i1> @test3(<4 x i1> %a) {
define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
; CHECK-NEXT: vpmovd2m %xmm1, %k0
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
; CHECK-NEXT: vpmovd2m %xmm0, %k1
; CHECK-NEXT: kshiftlb $4, %k0, %k0
; CHECK-NEXT: korb %k0, %k1, %k0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: vpslld $31, %ymm0, %ymm0
; CHECK-NEXT: vpmovd2m %ymm0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq

%res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Expand All @@ -68,13 +67,12 @@ define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
; CHECK-NEXT: vpmovq2m %xmm1, %k0
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
; CHECK-NEXT: vpmovq2m %xmm0, %k1
; CHECK-NEXT: kshiftlb $2, %k0, %k0
; CHECK-NEXT: korw %k0, %k1, %k0
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: vpsllq $63, %ymm0, %ymm0
; CHECK-NEXT: vpmovq2m %ymm0, %k0
; CHECK-NEXT: vpmovm2d %k0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq

%res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Expand Down
119 changes: 53 additions & 66 deletions llvm/test/CodeGen/X86/combine-icmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ define i8 @concat_icmp_v8i32_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
;
; AVX512-LABEL: concat_icmp_v8i32_v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k0
; AVX512-NEXT: vptestnmd %xmm1, %xmm1, %k1
; AVX512-NEXT: kshiftlb $4, %k1, %k1
; AVX512-NEXT: korb %k1, %k0, %k0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%v0 = icmp eq <4 x i32> %a0, zeroinitializer
%v1 = icmp eq <4 x i32> %a1, zeroinitializer
Expand Down Expand Up @@ -151,12 +151,12 @@ define i16 @concat_icmp_v16i16_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
;
; AVX512-LABEL: concat_icmp_v16i16_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
; AVX512-NEXT: vpcmpnleuw %xmm2, %xmm0, %k0
; AVX512-NEXT: vpcmpnleuw %xmm2, %xmm1, %k1
; AVX512-NEXT: kunpckbw %k0, %k1, %k0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%v0 = icmp ugt <8 x i16> %a0, splat (i16 1)
%v1 = icmp ugt <8 x i16> %a1, splat (i16 1)
Expand Down Expand Up @@ -199,11 +199,11 @@ define i32 @concat_icmp_v32i8_v16i8(<16 x i8> %a0, <16 x i8> %a1) {
;
; AVX512-LABEL: concat_icmp_v32i8_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
; AVX512-NEXT: vpcmpgtb %xmm2, %xmm0, %k0
; AVX512-NEXT: vpcmpgtb %xmm2, %xmm1, %k1
; AVX512-NEXT: kunpckwd %k0, %k1, %k0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%v0 = icmp sgt <16 x i8> %a0, splat (i8 5)
%v1 = icmp sgt <16 x i8> %a1, splat (i8 5)
Expand Down Expand Up @@ -329,21 +329,15 @@ define i8 @concat_icmp_v8i64_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2,
;
; AVX512-LABEL: concat_icmp_v8i64_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm4 = [128,128]
; AVX512-NEXT: vpcmpltuq %xmm4, %xmm0, %k0
; AVX512-NEXT: vpcmpltuq %xmm4, %xmm1, %k1
; AVX512-NEXT: vpcmpltuq %xmm4, %xmm2, %k2
; AVX512-NEXT: vpcmpltuq %xmm4, %xmm3, %k3
; AVX512-NEXT: kshiftlb $2, %k3, %k3
; AVX512-NEXT: korb %k3, %k2, %k2
; AVX512-NEXT: kshiftlb $4, %k2, %k2
; AVX512-NEXT: kshiftlb $2, %k1, %k1
; AVX512-NEXT: korw %k1, %k0, %k0
; AVX512-NEXT: kshiftlb $4, %k0, %k0
; AVX512-NEXT: kshiftrb $4, %k0, %k0
; AVX512-NEXT: korb %k2, %k0, %k0
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%v0 = icmp ult <2 x i64> %a0, splat (i64 128)
%v1 = icmp ult <2 x i64> %a1, splat (i64 128)
Expand Down Expand Up @@ -387,18 +381,16 @@ define i16 @concat_icmp_v16i32_v4i32(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
;
; AVX512-LABEL: concat_icmp_v16i32_v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVX512-NEXT: vpcmpgtd %xmm4, %xmm0, %k0
; AVX512-NEXT: vpcmpgtd %xmm4, %xmm1, %k1
; AVX512-NEXT: vpcmpgtd %xmm4, %xmm2, %k2
; AVX512-NEXT: vpcmpgtd %xmm4, %xmm3, %k3
; AVX512-NEXT: kshiftlb $4, %k1, %k1
; AVX512-NEXT: korb %k1, %k0, %k0
; AVX512-NEXT: kshiftlb $4, %k3, %k1
; AVX512-NEXT: korb %k1, %k2, %k1
; AVX512-NEXT: kunpckbw %k0, %k1, %k0
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%v0 = icmp sgt <4 x i32> %a0, zeroinitializer
%v1 = icmp sgt <4 x i32> %a1, zeroinitializer
Expand Down Expand Up @@ -468,14 +460,14 @@ define i32 @concat_icmp_v32i16_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2
;
; AVX512-LABEL: concat_icmp_v32i16_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestmw %xmm0, %xmm0, %k0
; AVX512-NEXT: vptestmw %xmm1, %xmm1, %k1
; AVX512-NEXT: vptestmw %xmm2, %xmm2, %k2
; AVX512-NEXT: vptestmw %xmm3, %xmm3, %k3
; AVX512-NEXT: kunpckbw %k0, %k1, %k0
; AVX512-NEXT: kunpckbw %k2, %k3, %k1
; AVX512-NEXT: kunpckwd %k0, %k1, %k0
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512-NEXT: vptestmw %zmm0, %zmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%v0 = icmp ne <8 x i16> %a0, zeroinitializer
%v1 = icmp ne <8 x i16> %a1, zeroinitializer
Expand Down Expand Up @@ -560,15 +552,14 @@ define i64 @concat_icmp_v64i8_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2,
;
; AVX512-LABEL: concat_icmp_v64i8_v16i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512-NEXT: vpcmpnleub %xmm4, %xmm0, %k0
; AVX512-NEXT: vpcmpnleub %xmm4, %xmm1, %k1
; AVX512-NEXT: vpcmpnleub %xmm4, %xmm2, %k2
; AVX512-NEXT: vpcmpnleub %xmm4, %xmm3, %k3
; AVX512-NEXT: kunpckwd %k0, %k1, %k0
; AVX512-NEXT: kunpckwd %k2, %k3, %k1
; AVX512-NEXT: kunpckdq %k0, %k1, %k0
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
; AVX512-NEXT: kmovq %k0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%v0 = icmp ugt <16 x i8> %a0, splat (i8 15)
%v1 = icmp ugt <16 x i8> %a1, splat (i8 15)
Expand Down Expand Up @@ -672,10 +663,9 @@ define i8 @concat_icmp_v8i64_v4i64(<4 x i64> %a0, <4 x i64> %a1) {
;
; AVX512-LABEL: concat_icmp_v8i64_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vptestnmq %ymm0, %ymm0, %k0
; AVX512-NEXT: vptestnmq %ymm1, %ymm1, %k1
; AVX512-NEXT: kshiftlb $4, %k1, %k1
; AVX512-NEXT: korb %k1, %k0, %k0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
Expand Down Expand Up @@ -768,10 +758,9 @@ define i16 @concat_icmp_v16i32_v8i32(<8 x i32> %a0, <8 x i32> %a1) {
;
; AVX512-LABEL: concat_icmp_v16i32_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
; AVX512-NEXT: vpcmpnleud %ymm2, %ymm0, %k0
; AVX512-NEXT: vpcmpnleud %ymm2, %ymm1, %k1
; AVX512-NEXT: kunpckbw %k0, %k1, %k0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
Expand Down Expand Up @@ -830,10 +819,9 @@ define i32 @concat_icmp_v32i16_v16i16(<16 x i16> %a0, <16 x i16> %a1) {
;
; AVX512-LABEL: concat_icmp_v32i16_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
; AVX512-NEXT: vpcmpgtw %ymm2, %ymm0, %k0
; AVX512-NEXT: vpcmpgtw %ymm2, %ymm1, %k1
; AVX512-NEXT: kunpckwd %k0, %k1, %k0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
; AVX512-NEXT: kmovd %k0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down Expand Up @@ -903,10 +891,9 @@ define i64 @concat_icmp_v64i8_v32i8(<32 x i8> %a0, <32 x i8> %a1) {
;
; AVX512-LABEL: concat_icmp_v64i8_v32i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %k0
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm2, %k1
; AVX512-NEXT: kunpckdq %k0, %k1, %k0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512-NEXT: vpcmpltb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0
; AVX512-NEXT: kmovq %k0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down
Loading
Loading