Skip to content

Commit

Permalink
[X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to SADD_SAT/SSUB_SAT g…
Browse files Browse the repository at this point in the history
…eneric intrinsics (llvm)

Pulled out of D55894 to match the clang changes in D55890.

Differential Revision: https://reviews.llvm.org/D55890

llvm-svn: 349744
  • Loading branch information
RKSimon committed Dec 20, 2018
1 parent 313dc85 commit 6bbf39b
Show file tree
Hide file tree
Showing 5 changed files with 898 additions and 30 deletions.
24 changes: 12 additions & 12 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Expand Up @@ -364,16 +364,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".

// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
def int_x86_sse2_padds_b :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
def int_x86_sse2_padds_w :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
def int_x86_sse2_psubs_b :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
def int_x86_sse2_psubs_w :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
Expand Down Expand Up @@ -1346,16 +1346,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".

// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
def int_x86_avx2_padds_b :
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
def int_x86_avx2_padds_w :
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
def int_x86_avx2_psubs_b :
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
def int_x86_avx2_psubs_w :
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
Expand Down Expand Up @@ -3677,16 +3677,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
// Integer arithmetic ops
let TargetPrefix = "x86" in {
def int_x86_avx512_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512">,
def int_x86_avx512_padds_b_512 :
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem]>;
def int_x86_avx512_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512">,
def int_x86_avx512_padds_w_512 :
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
def int_x86_avx512_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512">,
def int_x86_avx512_psubs_b_512 :
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem]>;
def int_x86_avx512_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512">,
def int_x86_avx512_psubs_w_512 :
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
Expand Up @@ -98,11 +98,11 @@ define <4 x i64> @test_mm256_adds_epi8(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-NEXT: ret{{[l|q]}}
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
%res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %arg0, <32 x i8> %arg1)
%res = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %arg0, <32 x i8> %arg1)
%bc = bitcast <32 x i8> %res to <4 x i64>
ret <4 x i64> %bc
}
declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone

define <4 x i64> @test_mm256_adds_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_adds_epi16:
Expand All @@ -111,11 +111,11 @@ define <4 x i64> @test_mm256_adds_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-NEXT: ret{{[l|q]}}
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
%res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %arg0, <16 x i16> %arg1)
%res = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %arg0, <16 x i16> %arg1)
%bc = bitcast <16 x i16> %res to <4 x i64>
ret <4 x i64> %bc
}
declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone

define <4 x i64> @test_mm256_adds_epu8(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_adds_epu8:
Expand Down Expand Up @@ -2527,11 +2527,11 @@ define <4 x i64> @test_mm256_subs_epi8(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-NEXT: ret{{[l|q]}}
%arg0 = bitcast <4 x i64> %a0 to <32 x i8>
%arg1 = bitcast <4 x i64> %a1 to <32 x i8>
%res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %arg0, <32 x i8> %arg1)
%res = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %arg0, <32 x i8> %arg1)
%bc = bitcast <32 x i8> %res to <4 x i64>
ret <4 x i64> %bc
}
declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone

define <4 x i64> @test_mm256_subs_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_subs_epi16:
Expand All @@ -2540,11 +2540,11 @@ define <4 x i64> @test_mm256_subs_epi16(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-NEXT: ret{{[l|q]}}
%arg0 = bitcast <4 x i64> %a0 to <16 x i16>
%arg1 = bitcast <4 x i64> %a1 to <16 x i16>
%res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %arg0, <16 x i16> %arg1)
%res = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %arg0, <16 x i16> %arg1)
%bc = bitcast <16 x i16> %res to <4 x i64>
ret <4 x i64> %bc
}
declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone

define <4 x i64> @test_mm256_subs_epu8(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_mm256_subs_epu8:
Expand Down

0 comments on commit 6bbf39b

Please sign in to comment.