Skip to content

Commit

Permalink
[X86] combineX86ShufflesRecursively - peek through insert_subvector(u…
Browse files Browse the repository at this point in the history
…ndef, x, 0) vector widening nodes

If the node is inert then just peek through it and canonicalize the shuffle mask to ensure any referenced upper elements are set to undef.
  • Loading branch information
RKSimon committed Feb 7, 2023
1 parent 1b25402 commit 16a357f
Show file tree
Hide file tree
Showing 4 changed files with 835 additions and 849 deletions.
18 changes: 18 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -40393,6 +40393,24 @@ static SDValue combineX86ShufflesRecursively(
}
}

// Peek through vector widenings and set out of bounds mask indices to undef.
// TODO: Can resolveTargetShuffleInputsAndMask do some of this?
for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
SDValue &Op = Ops[I];
if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
isNullConstant(Op.getOperand(2))) {
Op = Op.getOperand(1);
unsigned Scale = RootSizeInBits / Op.getValueSizeInBits();
int Lo = I * Mask.size();
int Hi = (I + 1) * Mask.size();
int NewHi = Lo + (Mask.size() / Scale);
for (int &M : Mask) {
if (Lo <= M && NewHi <= M && M < Hi)
M = SM_SentinelUndef;
}
}
}

// Peek through any free extract_subvector nodes back to root size.
for (SDValue &Op : Ops)
while (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Expand Down
21 changes: 7 additions & 14 deletions llvm/test/CodeGen/X86/haddsub-undef.ll
Expand Up @@ -1154,20 +1154,13 @@ define <4 x double> @PR34724_add_v4f64_u123(<4 x double> %0, <4 x double> %1) {
; AVX-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-SLOW-NEXT: retq
;
; AVX1-FAST-LABEL: PR34724_add_v4f64_u123:
; AVX1-FAST: # %bb.0:
; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-FAST-NEXT: vblendpd {{.*#+}} ymm2 = ymm0[0,1],ymm1[2,3]
; AVX1-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-FAST-NEXT: vhaddpd %ymm2, %ymm0, %ymm0
; AVX1-FAST-NEXT: retq
;
; AVX512-FAST-LABEL: PR34724_add_v4f64_u123:
; AVX512-FAST: # %bb.0:
; AVX512-FAST-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512-FAST-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
; AVX512-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
; AVX512-FAST-NEXT: retq
; AVX-FAST-LABEL: PR34724_add_v4f64_u123:
; AVX-FAST: # %bb.0:
; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX-FAST-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX-FAST-NEXT: vhaddpd %ymm0, %ymm1, %ymm0
; AVX-FAST-NEXT: retq
%3 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> <i32 2, i32 4>
%4 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> <i32 3, i32 5>
%5 = fadd <2 x double> %3, %4
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/oddshuffles.ll
Expand Up @@ -2253,8 +2253,7 @@ define <16 x i32> @splat_v3i32(ptr %ptr) {
;
; AVX2-FAST-LABEL: splat_v3i32:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-FAST-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm1
; AVX2-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-FAST-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7]
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
Expand Down

0 comments on commit 16a357f

Please sign in to comment.