Skip to content

Commit

Permalink
[x86] add tests for variants of usubsat; NFC
Browse files Browse the repository at this point in the history
  • Loading branch information
rotateright committed Oct 24, 2021
1 parent d34cad3 commit 35c767a
Showing 1 changed file with 101 additions and 0 deletions.
101 changes: 101 additions & 0 deletions llvm/test/CodeGen/X86/psubus.ll
Expand Up @@ -47,6 +47,27 @@ define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {
ret <8 x i16> %res
}

define <8 x i16> @ashr_add_and(<8 x i16> %x) nounwind {
; SSE-LABEL: ashr_add_and:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psraw $15, %xmm1
; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ashr_add_and:
; AVX: # %bb.0:
; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%signsplat = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%flipsign = add <8 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
%res = and <8 x i16> %signsplat, %flipsign
ret <8 x i16> %res
}

; negative test - extra uses may lead to extra instructions when custom-lowered

define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x i8>* %p2) nounwind {
Expand Down Expand Up @@ -118,6 +139,42 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
ret <4 x i32> %res
}

define <4 x i32> @ashr_add_and_custom(<4 x i32> %x) nounwind {
; SSE-LABEL: ashr_add_and_custom:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: ashr_add_and_custom:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ashr_add_and_custom:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpaddd %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: ashr_add_and_custom:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
%flipsign = add <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
%res = and <4 x i32> %flipsign, %signsplat
ret <4 x i32> %res
}

; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))

define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
Expand Down Expand Up @@ -368,6 +425,50 @@ define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
ret <16 x i16> %res
}

define <16 x i16> @ashr_add_and_v16i16(<16 x i16> %x) nounwind {
; SSE-LABEL: ashr_add_and_v16i16:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psraw $15, %xmm2
; SSE-NEXT: movdqa %xmm0, %xmm3
; SSE-NEXT: psraw $15, %xmm3
; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pand %xmm3, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: ashr_add_and_v16i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpsraw $15, %xmm2, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ashr_add_and_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: ashr_add_and_v16i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1
; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%flipsign = add <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
%res = and <16 x i16> %signsplat, %flipsign
ret <16 x i16> %res
}

define <16 x i16> @test8(<16 x i16> %x) nounwind {
; SSE-LABEL: test8:
; SSE: # %bb.0: # %vector.ph
Expand Down

0 comments on commit 35c767a

Please sign in to comment.