Skip to content

Commit

Permalink
[x86] Enable some support for lowerVectorShuffleWithUndefHalf with AV…
Browse files Browse the repository at this point in the history
…X-512

Summary:
This teaches 512-bit shuffles to detect unused halfs in order to reduce shuffle size.

We may need to refine the 512-bit exit point. I couldn't remember if we had good cross lane shuffles for 8/16 bit with AVX-512 or not.

I believe this is step towards being able to handle D36454 without a special case.

From here we need to improve our ability to combine extract_subvector with insert_subvector and other extract_subvectors. And we need to support narrowing binary operations where we don't demand all elements. This may be improvements to DAGCombiner::narrowExtractedVectorBinOp(by recognizing an insert_subvector in addition to concat) or we may need a target specific combiner.

Reviewers: RKSimon, zvi, delena, jbhateja

Reviewed By: RKSimon, jbhateja

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D36601

llvm-svn: 310724
  • Loading branch information
topperc committed Aug 11, 2017
1 parent 9c52574 commit 0f30fe9
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 66 deletions.
14 changes: 12 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -12256,15 +12256,16 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}

/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
/// This allows for fast cases such as subvector extraction/insertion
/// or shuffling smaller vector types which can lower more efficiently.
static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(VT.is256BitVector() && "Expected 256-bit vector");
assert((VT.is256BitVector() || VT.is512BitVector()) &&
"Expected 256-bit or 512-bit vector");

unsigned NumElts = VT.getVectorNumElements();
unsigned HalfNumElts = NumElts / 2;
Expand Down Expand Up @@ -12360,6 +12361,10 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
}
}

// AVX512 - XXXXuuuu - always extract lowers.
if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0))
return SDValue();

auto GetHalfVector = [&](int HalfIdx) {
if (HalfIdx < 0)
return DAG.getUNDEF(HalfVT);
Expand Down Expand Up @@ -13703,6 +13708,11 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;

// Handle special cases where the lower or upper half is UNDEF.
if (SDValue V =
lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;

// Check for being able to broadcast a single element.
if (SDValue Broadcast =
lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/madd.ll
Expand Up @@ -323,13 +323,13 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB2_1
; AVX512-NEXT: # BB#2: # %middle.block
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/X86/sad.ll
Expand Up @@ -72,13 +72,13 @@ define i32 @sad_16i8() nounwind {
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB0_1
; AVX512F-NEXT: # BB#2: # %middle.block
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
Expand All @@ -98,13 +98,13 @@ define i32 @sad_16i8() nounwind {
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB0_1
; AVX512BW-NEXT: # BB#2: # %middle.block
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -321,13 +321,13 @@ define i32 @sad_32i8() nounwind {
; AVX512F-NEXT: jne .LBB1_1
; AVX512F-NEXT: # BB#2: # %middle.block
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
Expand All @@ -349,13 +349,13 @@ define i32 @sad_32i8() nounwind {
; AVX512BW-NEXT: jne .LBB1_1
; AVX512BW-NEXT: # BB#2: # %middle.block
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -794,13 +794,13 @@ define i32 @sad_avx64i8() nounwind {
; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
; AVX512F-NEXT: vpaddd %zmm3, %zmm1, %zmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovd %xmm0, %eax
; AVX512F-NEXT: vzeroupper
Expand All @@ -823,13 +823,13 @@ define i32 @sad_avx64i8() nounwind {
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
Expand Down
17 changes: 4 additions & 13 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
Expand Up @@ -262,19 +262,10 @@ define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19
}

define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
; AVX512F-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: movw $8, %ax
; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: movw $8, %ax
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
; AVX512BW-NEXT: retq
; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
; ALL: # BB#0:
; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i32> %c
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
Expand Up @@ -101,7 +101,7 @@ define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i1
;
; SKX-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
; SKX: ## BB#0:
; SKX-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
; SKX-NEXT: retq
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i16> %c
Expand All @@ -115,7 +115,7 @@ define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x
;
; SKX-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
; SKX: ## BB#0:
; SKX-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
; SKX-NEXT: retq
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <32 x i16> %c
Expand Down
23 changes: 4 additions & 19 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
Expand Up @@ -5,25 +5,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VBMI

define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a) {
; AVX512F-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VBMI-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
; AVX512VBMI: # BB#0:
; AVX512VBMI-NEXT: vpsrld $16, %zmm0, %zmm0
; AVX512VBMI-NEXT: retq
; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
; ALL: # BB#0:
; ALL-NEXT: vpsrld $16, %xmm0, %xmm0
; ALL-NEXT: retq
%b = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <64 x i8> %b
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
Expand Up @@ -2241,12 +2241,12 @@ define <8 x double> @shuffle_v8f64_2301uu67(<8 x double> %a0, <8 x double> %a1)
define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
; AVX512F-LABEL: shuffle_v8f64_2301uuuu:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
; AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_2301uuuu:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
; AVX512F-32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
; AVX512F-32-NEXT: retl
%1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x double> %1
Expand Down

0 comments on commit 0f30fe9

Please sign in to comment.