Skip to content

Commit

Permalink
[X86][AVX] Add v4i64 shift-by-32 tests
Browse files Browse the repository at this point in the history
AVX1 could perform this as a v8f32 shuffle instead of splitting - based off PR46621
  • Loading branch information
RKSimon committed May 12, 2021
1 parent c5ec00e commit 778562a
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 0 deletions.
74 changes: 74 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
Expand Up @@ -1659,3 +1659,77 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
%shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}

;
; Special Cases
;

define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: shift32_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shift32_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: shift32_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744073709551584,18446744073709551584]
; XOPAVX1-NEXT: vpshaq %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpshaq %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: shift32_v4i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpsrad $31, %ymm0, %ymm1
; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: shift32_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpsraq $32, %zmm0, %zmm0
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: shift32_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsraq $32, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: shift32_v4i64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; X86-AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: shift32_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; X86-AVX2-NEXT: retl
%shift = ashr <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
ret <4 x i64> %shift
}
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
Expand Up @@ -1390,6 +1390,63 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
ret <32 x i8> %shift
}

;
; Special Cases
;

define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: shift32_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shift32_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: shift32_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpsrlq $32, %xmm0, %xmm1
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; XOPAVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: shift32_v4i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: shift32_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: shift32_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: shift32_v4i64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm1
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: shift32_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%shift = lshr <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
ret <4 x i64> %shift
}

define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
; AVX1-LABEL: sh_trunc_sh_vec:
; AVX1: # %bb.0:
Expand Down
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/X86/vector-shift-shl-256.ll
Expand Up @@ -1298,3 +1298,60 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}

;
; Special Cases
;

define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: shift32_v4i64:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shift32_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: shift32_v4i64:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm1
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm0
; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
; XOPAVX2-LABEL: shift32_v4i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: shift32_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: shift32_v4i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; X86-AVX1-LABEL: shift32_v4i64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm1
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: shift32_v4i64:
; X86-AVX2: # %bb.0:
; X86-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
; X86-AVX2-NEXT: retl
%shift = shl <4 x i64> %a, <i64 32, i64 32, i64 32, i64 32>
ret <4 x i64> %shift
}

0 comments on commit 778562a

Please sign in to comment.