Skip to content

Commit

Permalink
[X86] lowerV64I8Shuffle - attempt to match with lowerShuffleAsLanePer…
Browse files Browse the repository at this point in the history
…muteAndPermute

Fixes #54562
  • Loading branch information
RKSimon committed Mar 28, 2022
1 parent d3e5f0a commit 8a1956d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 28 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -18709,6 +18709,10 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;

if (SDValue Result = lowerShuffleAsLanePermuteAndPermute(
DL, MVT::v64i8, V1, V2, Mask, DAG, Subtarget))
return Result;

if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
Expand Down
16 changes: 5 additions & 11 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-2.ll
Expand Up @@ -196,17 +196,11 @@ define void @store_i8_stride2_vf32(<32 x i8>* %in.vecptr0, <32 x i8>* %in.vecptr
;
; AVX512-LABEL: store_i8_stride2_vf32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovdqa (%rsi), %xmm0
; AVX512-NEXT: vmovdqa 16(%rsi), %xmm1
; AVX512-NEXT: vmovdqa (%rdi), %xmm2
; AVX512-NEXT: vmovdqa 16(%rdi), %xmm3
; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512-NEXT: vmovdqa (%rdi), %ymm0
; AVX512-NEXT: vinserti64x4 $1, (%rsi), %zmm0, %zmm0
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4,1,5,2,6,3,7]
; AVX512-NEXT: vpermq %zmm0, %zmm1, %zmm0
; AVX512-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15,16,24,17,25,18,26,19,27,20,28,21,29,22,30,23,31,32,40,33,41,34,42,35,43,36,44,37,45,38,46,39,47,48,56,49,57,50,58,51,59,52,60,53,61,54,62,55,63]
; AVX512-NEXT: vmovdqu64 %zmm0, (%rdx)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
Expand Down
23 changes: 6 additions & 17 deletions llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
Expand Up @@ -859,15 +859,9 @@ define <64 x i8> @PR54562_ref(<64 x i8> %a0) {
;
; AVX512BW-LABEL: PR54562_ref:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,1,2]
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30]
; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10]
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14]
; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,2,3,4,4,5]
; AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30,33,32,34,33,36,35,37,36,39,38,40,39,42,41,43,42,53,52,54,53,56,55,57,56,59,58,60,59,62,61,63,62]
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: PR54562_ref:
Expand Down Expand Up @@ -910,14 +904,9 @@ define void @PR54562_mem(<64 x i8>* %src, <64 x i8>* %dst) {
;
; AVX512BW-LABEL: PR54562_mem:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = mem[0,1,1,2]
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30]
; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm1
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10]
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[5,4,6,5,8,7,9,8,11,10,12,11,14,13,15,14]
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,1,1,2,3,4,4,5]
; AVX512BW-NEXT: vpermq (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,2,1,4,3,5,4,7,6,8,7,10,9,11,10,21,20,22,21,24,23,25,24,27,26,28,27,30,29,31,30,33,32,34,33,36,35,37,36,39,38,40,39,42,41,43,42,53,52,54,53,56,55,57,56,59,58,60,59,62,61,63,62]
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rsi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Expand Down

0 comments on commit 8a1956d

Please sign in to comment.