Skip to content

Commit

Permalink
Revert rG0b0a38a7a229b70d7261771ba0e702843bd34e97 : "[X86] combineX86…
Browse files Browse the repository at this point in the history
…ShufflesRecursively - don't widen shuffle subvector inputs"

Reports of miscompiles, that I'm still trying to triage - reverting for now
  • Loading branch information
RKSimon committed Feb 13, 2023
1 parent b838b62 commit d3b0fba
Show file tree
Hide file tree
Showing 53 changed files with 23,259 additions and 23,484 deletions.
35 changes: 15 additions & 20 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8452,16 +8452,6 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
Ops.push_back(Sub.getOperand(0));
return true;
}
// Handle generic INSERT_SUBVECTOR(SRC, SUB) iff its not a widening node.
if (!Src.isUndef() || InsertIdx != 0) {
Mask.assign(NumElts, SM_SentinelUndef);
std::iota(Mask.begin(), Mask.end(), 0);
for (int i = 0; i != (int)NumSubElts; ++i)
Mask[InsertIdx + i] = NumElts + i;
Ops.push_back(Src);
Ops.push_back(Sub);
return true;
}
// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
SmallVector<int, 64> SubMask;
SmallVector<SDValue, 2> SubInputs;
Expand Down Expand Up @@ -40581,6 +40571,21 @@ static SDValue combineX86ShufflesRecursively(
Op, OpScaledDemandedElts, DAG))
Op = NewOp;
}
// FIXME: should we rerun resolveTargetShuffleInputsAndMask() now?

// Widen any subvector shuffle inputs we've collected.
// TODO: Remove this to avoid generating temporary nodes, we should only
// widen once combineX86ShuffleChain has found a match.
if (any_of(Ops, [RootSizeInBits](SDValue Op) {
return Op.getValueSizeInBits() < RootSizeInBits;
})) {
for (SDValue &Op : Ops)
if (Op.getValueSizeInBits() < RootSizeInBits)
Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op),
RootSizeInBits);
// Reresolve - we might have repeated subvector sources.
resolveTargetShuffleInputsAndMask(Ops, Mask);
}

// We can only combine unary and binary shuffle mask cases.
if (Ops.size() <= 2) {
Expand Down Expand Up @@ -55540,16 +55545,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,

unsigned NumOps = Ops.size();
switch (Op0.getOpcode()) {
case ISD::AssertSext:
case ISD::AssertZext: {
if (!IsSplat && llvm::all_of(Ops, [&Op0](SDValue Op) {
return Op0.getOperand(1) == Op.getOperand(1);
})) {
return DAG.getNode(Op0.getOpcode(), DL, VT,
ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1));
}
break;
}
case X86ISD::VBROADCAST: {
if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
return Op.getOperand(0).getValueType().is128BitVector();
Expand Down
176 changes: 94 additions & 82 deletions llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2031,9 +2031,9 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; AVX512BW-FAST-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX512BW-FAST: # %bb.0:
; AVX512BW-FAST-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,9,0,11,0,13,0,15]
; AVX512BW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,25,0,27,0,29,0,31]
; AVX512BW-FAST-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-FAST-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512BW-FAST-NEXT: vpermt2d %zmm0, %zmm1, %zmm0
; AVX512BW-FAST-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-FAST-NEXT: vmovdqa64 %zmm0, (%rcx)
; AVX512BW-FAST-NEXT: vzeroupper
Expand Down Expand Up @@ -4274,31 +4274,33 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,0,23]
; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,0,15]
; AVX512F-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,0,23]
; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,0,15]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
Expand Down Expand Up @@ -4401,31 +4403,33 @@ define void @vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4(ptr %in.
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,22,0]
; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,14,0]
; AVX512F-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec384_i32_widen_to_i96_factor3_broadcast_to_v4i96_factor4:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,22,0]
; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,14,0]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
Expand Down Expand Up @@ -4536,31 +4540,33 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,22,23]
; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,14,15]
; AVX512F-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,22,23]
; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,14,15]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
Expand Down Expand Up @@ -4649,11 +4655,12 @@ define void @vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2(ptr %i
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,22,23]
; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm0
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,14,15]
; AVX512F-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
Expand All @@ -4662,11 +4669,12 @@ define void @vec384_i32_widen_to_i192_factor6_broadcast_to_v2i192_factor2(ptr %i
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm2 = [0,21,22,23]
; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm0
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [0,13,14,15]
; AVX512DQ-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
Expand Down Expand Up @@ -4780,31 +4788,33 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0]
; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rcx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
Expand Down Expand Up @@ -4907,11 +4917,12 @@ define void @vec384_i64_widen_to_i192_factor3_broadcast_to_v2i192_factor2(ptr %i
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512F-NEXT: vpaddb (%rdx), %ymm2, %ymm0
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
Expand All @@ -4920,11 +4931,12 @@ define void @vec384_i64_widen_to_i192_factor3_broadcast_to_v2i192_factor2(ptr %i
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0]
; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm2, %ymm0
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
; AVX512DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/X86/avx-insertelt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,11 @@ define <16 x i16> @insert_i16_firstelts(<16 x i16> %x, i16 %s) {
; AVX2-LABEL: insert_i16_firstelts:
; AVX2: # %bb.0:
; AVX2-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7],ymm2[8],ymm0[9,10,11,12,13,14,15]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1
; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
%i0 = insertelement <16 x i16> %x, i16 %s, i32 0
%i1 = insertelement <16 x i16> %i0, i16 %s, i32 8
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/avx-vperm2x128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ define <8 x i32> @shuffle_v8i32_u5u7cdef(<8 x i32> %a, <8 x i32> %b) nounwind uw
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_u5u7cdef:
Expand Down
12 changes: 7 additions & 5 deletions llvm/test/CodeGen/X86/avx512-insert-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -690,18 +690,20 @@ define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, ptr %ptr) {
; KNL-LABEL: insert_v16i16:
; KNL: ## %bb.0:
; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
; KNL-NEXT: vmovd %edi, %xmm2
; KNL-NEXT: vpbroadcastw %xmm2, %ymm2
; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3,4,5,6,7,8],ymm2[9],ymm0[10,11,12,13,14,15]
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; KNL-NEXT: vmovd %edi, %xmm1
; KNL-NEXT: vpbroadcastw %xmm1, %ymm1
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; KNL-NEXT: retq
;
; SKX-LABEL: insert_v16i16:
; SKX: ## %bb.0:
; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpbroadcastw %edi, %ymm2
; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3,4,5,6,7,8],ymm2[9],ymm0[10,11,12,13,14,15]
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; SKX-NEXT: vpbroadcastw %edi, %ymm1
; SKX-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; SKX-NEXT: retq
%val = load i16, ptr %ptr
%r1 = insertelement <16 x i16> %x, i16 %val, i32 1
Expand Down

0 comments on commit d3b0fba

Please sign in to comment.