Skip to content

Commit

Permalink
[X86] Add shift by splat modulo amount vector tests
Browse files Browse the repository at this point in the history
Shows failure to fold zero_extend_vector_inreg(and(x, c)) -> bitcast(and(x,c')) when we're only demanding the 0'th extended element, such as with the SSE variable shift ops.
  • Loading branch information
RKSimon committed Nov 16, 2021
1 parent df0c2b9 commit aeb3c77
Show file tree
Hide file tree
Showing 9 changed files with 2,297 additions and 0 deletions.
353 changes: 353 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
Expand Up @@ -920,6 +920,359 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
ret <16 x i8> %shift
}

;
; Uniform Variable Modulo Shifts
;

define <2 x i64> @splatvar_modulo_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-LABEL: splatvar_modulo_shift_v2i64:
; SSE: # %bb.0:
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; SSE-NEXT: psrlq %xmm1, %xmm2
; SSE-NEXT: psrlq %xmm1, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: psubq %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v2i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v2i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v2i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v2i64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-SSE-NEXT: psrlq %xmm1, %xmm2
; X86-SSE-NEXT: psrlq %xmm1, %xmm0
; X86-SSE-NEXT: pxor %xmm2, %xmm0
; X86-SSE-NEXT: psubq %xmm2, %xmm0
; X86-SSE-NEXT: retl
%mod = and <2 x i64> %b, <i64 63, i64 63>
%splat = shufflevector <2 x i64> %mod, <2 x i64> undef, <2 x i32> zeroinitializer
%shift = ashr <2 x i64> %a, %splat
ret <2 x i64> %shift
}

define <4 x i32> @splatvar_modulo_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: andl $31, %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: psrad %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; SSE41-NEXT: psrad %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatvar_modulo_shift_v4i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_modulo_shift_v4i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v4i32:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v4i32:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; XOPAVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; XOPAVX2-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v4i32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movd %xmm1, %eax
; X86-SSE-NEXT: andl $31, %eax
; X86-SSE-NEXT: movd %eax, %xmm1
; X86-SSE-NEXT: psrad %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
%splat = shufflevector <4 x i32> %mod, <4 x i32> undef, <4 x i32> zeroinitializer
%shift = ashr <4 x i32> %a, %splat
ret <4 x i32> %shift
}

define <8 x i16> @splatvar_modulo_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psraw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; SSE41-NEXT: psraw %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: splatvar_modulo_shift_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; XOP-LABEL: splatvar_modulo_shift_v8i16:
; XOP: # %bb.0:
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; XOP-NEXT: retq
;
; AVX512-LABEL: splatvar_modulo_shift_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_modulo_shift_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v8i16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psraw %xmm1, %xmm0
; X86-SSE-NEXT: retl
%mod = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%splat = shufflevector <8 x i16> %mod, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i16> %a, %splat
ret <8 x i16> %shift
}

define <16 x i8> @splatvar_modulo_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE2-LABEL: splatvar_modulo_shift_v16i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: psrlw %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: psrlw %xmm1, %xmm2
; SSE2-NEXT: psrlw $8, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
; SSE2-NEXT: psrlw %xmm1, %xmm2
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: psubb %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: splatvar_modulo_shift_v16i8:
; SSE41: # %bb.0:
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: psrlw %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
; SSE41-NEXT: psrlw %xmm1, %xmm2
; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
; SSE41-NEXT: psrlw %xmm1, %xmm2
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: psubb %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatvar_modulo_shift_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
; AVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_modulo_shift_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_modulo_shift_v16i8:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: splatvar_modulo_shift_v16i8:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: retq
;
; AVX512DQ-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512DQVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: splatvar_modulo_shift_v16i8:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
;
; X86-SSE-LABEL: splatvar_modulo_shift_v16i8:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-SSE-NEXT: psrlw %xmm1, %xmm0
; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm2
; X86-SSE-NEXT: psrlw %xmm1, %xmm2
; X86-SSE-NEXT: psrlw $8, %xmm2
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; X86-SSE-NEXT: pand %xmm2, %xmm0
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
; X86-SSE-NEXT: psrlw %xmm1, %xmm2
; X86-SSE-NEXT: pxor %xmm2, %xmm0
; X86-SSE-NEXT: psubb %xmm2, %xmm0
; X86-SSE-NEXT: retl
%mod = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%splat = shufflevector <16 x i8> %mod, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i8> %a, %splat
ret <16 x i8> %shift
}

;
; Constant Shifts
;
Expand Down

0 comments on commit aeb3c77

Please sign in to comment.