diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index 16326a64f67ee..9e09b7f9a4bd6 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for vec declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) @@ -9,74 +12,128 @@ declare i64 @llvm.sadd.sat.i64(i64, i64) declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) define i32 @func(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: func: -; CHECK: // %bb.0: -; CHECK-NEXT: adds w8, w0, w1 -; CHECK-NEXT: asr w9, w8, #31 -; CHECK-NEXT: eor w9, w9, #0x80000000 -; CHECK-NEXT: csel w0, w9, w8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds w8, w0, w1 +; CHECK-SD-NEXT: asr w9, w8, #31 +; CHECK-SD-NEXT: eor w9, w9, #0x80000000 +; CHECK-SD-NEXT: csel w0, w9, w8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: adds w9, w0, w1 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add w8, w8, w9, asr #31 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel w0, w8, w9, ne +; CHECK-GI-NEXT: ret %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %y); ret i32 %tmp; } define i64 @func2(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: func2: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x1 +; CHECK-SD-NEXT: asr x9, x8, #63 +; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x9, x8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: adds x9, x0, x1 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add x8, x8, x9, asr #63 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x0, x8, x9, ne +; CHECK-GI-NEXT: ret %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y); ret i64 %tmp; } define i16 @func16(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #32767 // =0x7fff -; CHECK-NEXT: add w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 -; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxth w8, w0 +; CHECK-SD-NEXT: mov w9, #32767 // =0x7fff +; CHECK-SD-NEXT: add w8, w8, w1, sxth +; CHECK-SD-NEXT: cmp w8, w9 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000 +; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sxth w8, w1 +; CHECK-GI-NEXT: add w8, w8, w0, sxth +; CHECK-GI-NEXT: sxth w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #15 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y); ret i16 %tmp; } define i8 @func8(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: mov w8, #127 // =0x7f -; CHECK-NEXT: add w9, w9, w1, sxtb -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-128 // =0xffffff80 -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxtb w9, w0 +; CHECK-SD-NEXT: mov w8, #127 // =0x7f +; CHECK-SD-NEXT: add w9, w9, w1, sxtb +; CHECK-SD-NEXT: cmp w9, #127 +; CHECK-SD-NEXT: csel w8, w9, w8, lt +; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80 +; CHECK-SD-NEXT: cmn w8, #128 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sxtb w8, w1 +; CHECK-GI-NEXT: add w8, w8, w0, sxtb +; CHECK-GI-NEXT: sxtb w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #7 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #128 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y); ret i8 %tmp; } define i4 @func3(i4 %x, i4 %y) nounwind { -; CHECK-LABEL: func3: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w1, #28 -; CHECK-NEXT: sbfx w10, w0, #0, #4 -; CHECK-NEXT: mov w8, #7 // =0x7 -; CHECK-NEXT: add w9, w10, w9, asr #28 -; CHECK-NEXT: cmp w9, #7 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 -; CHECK-NEXT: cmn w8, #8 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w9, w1, #28 +; CHECK-SD-NEXT: sbfx w10, w0, #0, #4 +; CHECK-SD-NEXT: mov w8, #7 // =0x7 +; CHECK-SD-NEXT: add w9, w10, w9, asr #28 +; CHECK-SD-NEXT: cmp w9, #7 +; CHECK-SD-NEXT: csel w8, w9, w8, lt +; CHECK-SD-NEXT: mov w9, #-8 // =0xfffffff8 +; CHECK-SD-NEXT: cmn w8, #8 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sbfx w8, w0, #0, #4 +; CHECK-GI-NEXT: sbfx w9, w1, #0, #4 +; CHECK-GI-NEXT: add w8, w8, w9 +; CHECK-GI-NEXT: sbfx w9, w8, #0, #4 +; CHECK-GI-NEXT: asr w10, w9, #3 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: add w10, w10, #8 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y); ret i4 %tmp; } diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll index 49ee5ae261a61..ecc8cbaeeecae 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) @@ -8,83 +9,143 @@ declare i32 @llvm.sadd.sat.i32(i32, i32) declare i64 @llvm.sadd.sat.i64(i64, i64) define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { -; CHECK-LABEL: func32: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: adds w8, w0, w8 -; CHECK-NEXT: asr w9, w8, #31 -; CHECK-NEXT: eor w9, w9, #0x80000000 -; CHECK-NEXT: csel w0, w9, w8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: adds w8, w0, w8 +; CHECK-SD-NEXT: asr w9, w8, #31 +; CHECK-SD-NEXT: eor w9, w9, #0x80000000 +; CHECK-SD-NEXT: csel w0, w9, w8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: mov w9, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: adds w8, w0, w8 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add w9, w9, w8, asr #31 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel w0, w9, w8, ne +; CHECK-GI-NEXT: ret %a = mul i32 %y, %z %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %a) ret i32 %tmp } define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { -; CHECK-LABEL: func64: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x2 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x2 +; CHECK-SD-NEXT: asr x9, x8, #63 +; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x9, x8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: adds x9, x0, x2 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add x8, x8, x9, asr #63 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x0, x8, x9, ne +; CHECK-GI-NEXT: ret %a = mul i64 %y, %z %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z) ret i64 %tmp } define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: add w8, w9, w8, sxth -; CHECK-NEXT: mov w9, #32767 // =0x7fff -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 -; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: sxth w9, w0 +; CHECK-SD-NEXT: add w8, w9, w8, sxth +; CHECK-SD-NEXT: mov w9, #32767 // =0x7fff +; CHECK-SD-NEXT: cmp w8, w9 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000 +; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: sxth w8, w8 +; CHECK-GI-NEXT: add w8, w8, w0, sxth +; CHECK-GI-NEXT: sxth w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #15 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %a = mul i16 %y, %z %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a) ret i16 %tmp } define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: add w8, w9, w8, sxtb -; CHECK-NEXT: mov w9, #127 // =0x7f -; CHECK-NEXT: cmp w8, #127 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-128 // =0xffffff80 -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: sxtb w9, w0 +; CHECK-SD-NEXT: add w8, w9, w8, sxtb +; CHECK-SD-NEXT: mov w9, #127 // =0x7f +; CHECK-SD-NEXT: cmp w8, #127 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80 +; CHECK-SD-NEXT: cmn w8, #128 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: sxtb w8, w8 +; CHECK-GI-NEXT: add w8, w8, w0, sxtb +; CHECK-GI-NEXT: sxtb w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #7 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #128 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %a = mul i8 %y, %z %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a) ret i8 %tmp } define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { -; CHECK-LABEL: func4: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: sbfx w9, w0, #0, #4 -; CHECK-NEXT: lsl w8, w8, #28 -; CHECK-NEXT: add w8, w9, w8, asr #28 -; CHECK-NEXT: mov w9, #7 // =0x7 -; CHECK-NEXT: cmp w8, #7 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 -; CHECK-NEXT: cmn w8, #8 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: sbfx w9, w0, #0, #4 +; CHECK-SD-NEXT: lsl w8, w8, #28 +; CHECK-SD-NEXT: add w8, w9, w8, asr #28 +; CHECK-SD-NEXT: mov w9, #7 // =0x7 +; CHECK-SD-NEXT: cmp w8, #7 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-8 // =0xfffffff8 +; CHECK-SD-NEXT: cmn w8, #8 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: sbfx w9, w0, #0, #4 +; CHECK-GI-NEXT: sbfx w8, w8, #0, #4 +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: sbfx w9, w8, #0, #4 +; CHECK-GI-NEXT: asr w10, w9, #3 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: add w10, w10, #8 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %a = mul i4 %y, %z %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %a) ret i4 %tmp } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 6ec5d22dca183..5f905d94e3573 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -1,5 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) @@ -222,13 +247,26 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: st1 { v0.b }[0], [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ldr b1, [x1] +; CHECK-SD-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: st1 { v0.b }[0], [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrsb w8, [x0] +; CHECK-GI-NEXT: ldrsb w9, [x1] +; CHECK-GI-NEXT: add w8, w8, w9 +; CHECK-GI-NEXT: sxtb w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #7 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #128 +; CHECK-GI-NEXT: csel w8, w10, w8, ne +; CHECK-GI-NEXT: strb w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i8>, ptr %px %y = load <1 x i8>, ptr %py %z = call <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y) @@ -237,13 +275,26 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: str h0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: str h0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrsh w8, [x0] +; CHECK-GI-NEXT: ldrsh w9, [x1] +; CHECK-GI-NEXT: add w8, w8, w9 +; CHECK-GI-NEXT: sxth w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #15 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: csel w8, w10, w8, ne +; CHECK-GI-NEXT: strh w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i16>, ptr %px %y = load <1 x i16>, ptr %py %z = call <1 x i16> @llvm.sadd.sat.v1i16(<1 x i16> %x, <1 x i16> %y) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index 4ecfc03c8bbd7..abeb4b357fa9f 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for vec declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) @@ -9,74 +12,128 @@ declare i64 @llvm.ssub.sat.i64(i64, i64) declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) define i32 @func(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: func: -; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: asr w9, w8, #31 -; CHECK-NEXT: eor w9, w9, #0x80000000 -; CHECK-NEXT: csel w0, w9, w8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs w8, w0, w1 +; CHECK-SD-NEXT: asr w9, w8, #31 +; CHECK-SD-NEXT: eor w9, w9, #0x80000000 +; CHECK-SD-NEXT: csel w0, w9, w8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: subs w9, w0, w1 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add w8, w8, w9, asr #31 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel w0, w8, w9, ne +; CHECK-GI-NEXT: ret %tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %y); ret i32 %tmp; } define i64 @func2(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: func2: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x1 +; CHECK-SD-NEXT: asr x9, x8, #63 +; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x9, x8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: subs x9, x0, x1 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add x8, x8, x9, asr #63 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x0, x8, x9, ne +; CHECK-GI-NEXT: ret %tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y); ret i64 %tmp; } define i16 @func16(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #32767 // =0x7fff -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 -; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxth w8, w0 +; CHECK-SD-NEXT: mov w9, #32767 // =0x7fff +; CHECK-SD-NEXT: sub w8, w8, w1, sxth +; CHECK-SD-NEXT: cmp w8, w9 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000 +; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sxth w8, w0 +; CHECK-GI-NEXT: sub w8, w8, w1, sxth +; CHECK-GI-NEXT: sxth w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #15 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y); ret i16 %tmp; } define i8 @func8(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: mov w8, #127 // =0x7f -; CHECK-NEXT: sub w9, w9, w1, sxtb -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-128 // =0xffffff80 -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sxtb w9, w0 +; CHECK-SD-NEXT: mov w8, #127 // =0x7f +; CHECK-SD-NEXT: sub w9, w9, w1, sxtb +; CHECK-SD-NEXT: cmp w9, #127 +; CHECK-SD-NEXT: csel w8, w9, w8, lt +; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80 +; CHECK-SD-NEXT: cmn w8, #128 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sxtb w8, w0 +; CHECK-GI-NEXT: sub w8, w8, w1, sxtb +; CHECK-GI-NEXT: sxtb w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #7 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #128 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y); ret i8 %tmp; } define i4 @func3(i4 %x, i4 %y) nounwind { -; CHECK-LABEL: func3: -; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w1, #28 -; CHECK-NEXT: sbfx w10, w0, #0, #4 -; CHECK-NEXT: mov w8, #7 // =0x7 -; CHECK-NEXT: sub w9, w10, w9, asr #28 -; CHECK-NEXT: cmp w9, #7 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 -; CHECK-NEXT: cmn w8, #8 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl w9, w1, #28 +; CHECK-SD-NEXT: sbfx w10, w0, #0, #4 +; CHECK-SD-NEXT: mov w8, #7 // =0x7 +; CHECK-SD-NEXT: sub w9, w10, w9, asr #28 +; CHECK-SD-NEXT: cmp w9, #7 +; CHECK-SD-NEXT: csel w8, w9, w8, lt +; CHECK-SD-NEXT: mov w9, #-8 // =0xfffffff8 +; CHECK-SD-NEXT: cmn w8, #8 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sbfx w8, w0, #0, #4 +; CHECK-GI-NEXT: sbfx w9, w1, #0, #4 +; CHECK-GI-NEXT: sub w8, w8, w9 +; CHECK-GI-NEXT: sbfx w9, w8, #0, #4 +; CHECK-GI-NEXT: asr w10, w9, #3 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: add w10, w10, #8 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y); ret i4 %tmp; } diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll index f7634f82499e7..25d615f6451ba 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) @@ -8,83 +9,143 @@ declare i32 @llvm.ssub.sat.i32(i32, i32) declare i64 @llvm.ssub.sat.i64(i64, i64) define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { -; CHECK-LABEL: func32: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: asr w9, w8, #31 -; CHECK-NEXT: eor w9, w9, #0x80000000 -; CHECK-NEXT: csel w0, w9, w8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: subs w8, w0, w8 +; CHECK-SD-NEXT: asr w9, w8, #31 +; CHECK-SD-NEXT: eor w9, w9, #0x80000000 +; CHECK-SD-NEXT: csel w0, w9, w8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: mov w9, #-2147483648 // =0x80000000 +; CHECK-GI-NEXT: subs w8, w0, w8 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add w9, w9, w8, asr #31 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel w0, w9, w8, ne +; CHECK-GI-NEXT: ret %a = mul i32 %y, %z %tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %a) ret i32 %tmp } define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { -; CHECK-LABEL: func64: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x2 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x0, x9, x8, vs -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x2 +; CHECK-SD-NEXT: asr x9, x8, #63 +; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 +; CHECK-SD-NEXT: csel x0, x9, x8, vs +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: subs x9, x0, x2 +; CHECK-GI-NEXT: cset w10, vs +; CHECK-GI-NEXT: add x8, x8, x9, asr #63 +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x0, x8, x9, ne +; CHECK-GI-NEXT: ret %a = mul i64 %y, %z %tmp = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %z) ret i64 %tmp } define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: sub w8, w9, w8, sxth -; CHECK-NEXT: mov w9, #32767 // =0x7fff -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 -; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: sxth w9, w0 +; CHECK-SD-NEXT: sub w8, w9, w8, sxth +; CHECK-SD-NEXT: mov w9, #32767 // =0x7fff +; CHECK-SD-NEXT: cmp w8, w9 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-32768 // =0xffff8000 +; CHECK-SD-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: sxth w9, w0 +; CHECK-GI-NEXT: sub w8, w9, w8, sxth +; CHECK-GI-NEXT: sxth w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #15 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %a = mul i16 %y, %z %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %a) ret i16 %tmp } define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: sub w8, w9, w8, sxtb -; CHECK-NEXT: mov w9, #127 // =0x7f -; CHECK-NEXT: cmp w8, #127 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-128 // =0xffffff80 -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: sxtb w9, w0 +; CHECK-SD-NEXT: sub w8, w9, w8, sxtb +; CHECK-SD-NEXT: mov w9, #127 // =0x7f +; CHECK-SD-NEXT: cmp w8, #127 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-128 // =0xffffff80 +; CHECK-SD-NEXT: cmn w8, #128 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: sxtb w9, w0 +; CHECK-GI-NEXT: sub w8, w9, w8, sxtb +; CHECK-GI-NEXT: sxtb w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #7 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #128 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %a = mul i8 %y, %z %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %a) ret i8 %tmp } define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { -; CHECK-LABEL: func4: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: sbfx w9, w0, #0, #4 -; CHECK-NEXT: lsl w8, w8, #28 -; CHECK-NEXT: sub w8, w9, w8, asr #28 -; CHECK-NEXT: mov w9, #7 // =0x7 -; CHECK-NEXT: cmp w8, #7 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 -; CHECK-NEXT: cmn w8, #8 -; CHECK-NEXT: csel w0, w8, w9, gt -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: sbfx w9, w0, #0, #4 +; CHECK-SD-NEXT: lsl w8, w8, #28 +; CHECK-SD-NEXT: sub w8, w9, w8, asr #28 +; CHECK-SD-NEXT: mov w9, #7 // =0x7 +; CHECK-SD-NEXT: cmp w8, #7 +; CHECK-SD-NEXT: csel w8, w8, w9, lt +; CHECK-SD-NEXT: mov w9, #-8 // =0xfffffff8 +; CHECK-SD-NEXT: cmn w8, #8 +; CHECK-SD-NEXT: csel w0, w8, w9, gt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: sbfx w9, w0, #0, #4 +; CHECK-GI-NEXT: sbfx w8, w8, #0, #4 +; CHECK-GI-NEXT: sub w8, w9, w8 +; CHECK-GI-NEXT: sbfx w9, w8, #0, #4 +; CHECK-GI-NEXT: asr w10, w9, #3 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: add w10, w10, #8 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %a = mul i4 %y, %z %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %a) ret i4 %tmp } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index fa707d18710ae..acec3e74d3e93 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -1,5 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) @@ -223,13 +248,26 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b -; CHECK-NEXT: st1 { v0.b }[0], [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ldr b1, [x1] +; CHECK-SD-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: st1 { v0.b }[0], [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrsb w8, [x0] +; CHECK-GI-NEXT: ldrsb w9, [x1] +; CHECK-GI-NEXT: sub w8, w8, w9 +; CHECK-GI-NEXT: sxtb w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #7 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #128 +; CHECK-GI-NEXT: csel w8, w10, w8, ne +; CHECK-GI-NEXT: strb w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i8>, ptr %px %y = load <1 x i8>, ptr %py %z = call <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8> %x, <1 x i8> %y) @@ -238,13 +276,26 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: str h0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: str h0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrsh w8, [x0] +; CHECK-GI-NEXT: ldrsh w9, [x1] +; CHECK-GI-NEXT: sub w8, w8, w9 +; CHECK-GI-NEXT: sxth w9, w8 +; CHECK-GI-NEXT: asr w10, w9, #15 +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: sub w10, w10, #8, lsl #12 // =32768 +; CHECK-GI-NEXT: csel w8, w10, w8, ne +; CHECK-GI-NEXT: strh w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i16>, ptr %px %y = load <1 x i16>, ptr %py %z = call <1 x i16> @llvm.ssub.sat.v1i16(<1 x i16> %x, <1 x i16> %y) diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll index 984cc8fcffbb6..ccf46e8fce2e1 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.uadd.sat.i4(i4, i4) declare i8 @llvm.uadd.sat.i8(i8, i8) @@ -8,61 +9,106 @@ declare i32 @llvm.uadd.sat.i32(i32, i32) declare i64 @llvm.uadd.sat.i64(i64, i64) define i32 @func(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: func: -; CHECK: // %bb.0: -; CHECK-NEXT: adds w8, w0, w1 -; CHECK-NEXT: csinv w0, w8, wzr, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds w8, w0, w1 +; CHECK-SD-NEXT: csinv w0, w8, wzr, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adds w8, w0, w1 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csinv w0, w8, wzr, eq +; CHECK-GI-NEXT: ret %tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y); ret i32 %tmp; } define i64 @func2(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: func2: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x1 -; CHECK-NEXT: csinv x0, x8, xzr, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x1 +; CHECK-SD-NEXT: csinv x0, x8, xzr, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adds x8, x0, x1 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csinv x0, x8, xzr, eq +; CHECK-GI-NEXT: ret %tmp = call i64 @llvm.uadd.sat.i64(i64 %x, i64 %y); ret i64 %tmp; } define i16 @func16(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w9, #65535 // =0xffff -; CHECK-NEXT: add w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w0, w8, w9, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: mov w9, #65535 // =0xffff +; CHECK-SD-NEXT: add w8, w8, w1, uxth +; CHECK-SD-NEXT: cmp w8, w9 +; CHECK-SD-NEXT: csel w0, w8, w9, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w1, #0xffff +; CHECK-GI-NEXT: add w8, w8, w0, uxth +; CHECK-GI-NEXT: cmp w8, w8, uxth +; CHECK-GI-NEXT: csinv w0, w8, wzr, eq +; CHECK-GI-NEXT: ret %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y); ret i16 %tmp; } define i8 @func8(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: mov w8, #255 // =0xff -; CHECK-NEXT: add w9, w9, w1, uxtb -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w0, w9, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w9, w0, #0xff +; CHECK-SD-NEXT: mov w8, #255 // =0xff +; CHECK-SD-NEXT: add w9, w9, w1, uxtb +; CHECK-SD-NEXT: cmp w9, #255 +; CHECK-SD-NEXT: csel w0, w9, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w1, #0xff +; CHECK-GI-NEXT: add w8, w8, w0, uxtb +; CHECK-GI-NEXT: cmp w8, w8, uxtb +; CHECK-GI-NEXT: csinv w0, w8, wzr, eq +; CHECK-GI-NEXT: ret %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y); ret i8 %tmp; } define i4 @func3(i4 %x, i4 %y) nounwind { -; CHECK-LABEL: func3: -; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xf -; CHECK-NEXT: and w10, w0, #0xf -; CHECK-NEXT: mov w8, #15 // =0xf -; CHECK-NEXT: add w9, w10, w9 -; CHECK-NEXT: cmp w9, #15 -; CHECK-NEXT: csel w0, w9, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w9, w1, #0xf +; CHECK-SD-NEXT: and w10, w0, #0xf +; CHECK-SD-NEXT: mov w8, #15 // =0xf +; CHECK-SD-NEXT: add w9, w10, w9 +; CHECK-SD-NEXT: cmp w9, #15 +; CHECK-SD-NEXT: csel w0, w9, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w9, w0, #0xf +; CHECK-GI-NEXT: and w10, w1, #0xf +; CHECK-GI-NEXT: mov w8, #15 // =0xf +; CHECK-GI-NEXT: add w9, w9, w10 +; CHECK-GI-NEXT: and w10, w9, #0xf +; CHECK-GI-NEXT: cmp w9, w10 +; CHECK-GI-NEXT: csel w0, w8, w9, ne +; CHECK-GI-NEXT: ret %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y); ret i4 %tmp; } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll index 705ee747f9e20..d29564029544c 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.uadd.sat.i4(i4, i4) declare i8 @llvm.uadd.sat.i8(i8, i8) @@ -8,70 +9,119 @@ declare i32 @llvm.uadd.sat.i32(i32, i32) declare i64 @llvm.uadd.sat.i64(i64, i64) define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { -; CHECK-LABEL: func32: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: adds w8, w0, w8 -; CHECK-NEXT: csinv w0, w8, wzr, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: adds w8, w0, w8 +; CHECK-SD-NEXT: csinv w0, w8, wzr, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: adds w8, w0, w8 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csinv w0, w8, wzr, eq +; CHECK-GI-NEXT: ret %a = mul i32 %y, %z %tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %a) ret i32 %tmp } define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { -; CHECK-LABEL: func64: -; CHECK: // %bb.0: -; CHECK-NEXT: adds x8, x0, x2 -; CHECK-NEXT: csinv x0, x8, xzr, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adds x8, x0, x2 +; CHECK-SD-NEXT: csinv x0, x8, xzr, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adds x8, x0, x2 +; CHECK-GI-NEXT: cset w9, hs +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csinv x0, x8, xzr, eq +; CHECK-GI-NEXT: ret %a = mul i64 %y, %z %tmp = call i64 @llvm.uadd.sat.i64(i64 %x, i64 %z) ret i64 %tmp } define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: add w8, w9, w8, uxth -; CHECK-NEXT: mov w9, #65535 // =0xffff -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w0, w8, w9, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: and w9, w0, #0xffff +; CHECK-SD-NEXT: add w8, w9, w8, uxth +; CHECK-SD-NEXT: mov w9, #65535 // =0xffff +; CHECK-SD-NEXT: cmp w8, w9 +; CHECK-SD-NEXT: csel w0, w8, w9, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: and w8, w8, #0xffff +; CHECK-GI-NEXT: add w8, w8, w0, uxth +; CHECK-GI-NEXT: cmp w8, w8, uxth +; CHECK-GI-NEXT: csinv w0, w8, wzr, eq +; CHECK-GI-NEXT: ret %a = mul i16 %y, %z %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a) ret i16 %tmp } define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: add w8, w9, w8, uxtb -; CHECK-NEXT: mov w9, #255 // =0xff -; CHECK-NEXT: cmp w8, #255 -; CHECK-NEXT: csel w0, w8, w9, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: and w9, w0, #0xff +; CHECK-SD-NEXT: add w8, w9, w8, uxtb +; CHECK-SD-NEXT: mov w9, #255 // =0xff +; CHECK-SD-NEXT: cmp w8, #255 +; CHECK-SD-NEXT: csel w0, w8, w9, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: and w8, w8, #0xff +; CHECK-GI-NEXT: add w8, w8, w0, uxtb +; CHECK-GI-NEXT: cmp w8, w8, uxtb +; CHECK-GI-NEXT: csinv w0, w8, wzr, eq +; CHECK-GI-NEXT: ret %a = mul i8 %y, %z %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a) ret i8 %tmp } define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { -; CHECK-LABEL: func4: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: and w9, w0, #0xf -; CHECK-NEXT: and w8, w8, #0xf -; CHECK-NEXT: add w8, w9, w8 -; CHECK-NEXT: mov w9, #15 // =0xf -; CHECK-NEXT: cmp w8, #15 -; CHECK-NEXT: csel w0, w8, w9, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: and w9, w0, #0xf +; CHECK-SD-NEXT: and w8, w8, #0xf +; CHECK-SD-NEXT: add w8, w9, w8 +; CHECK-SD-NEXT: mov w9, #15 // =0xf +; CHECK-SD-NEXT: cmp w8, #15 +; CHECK-SD-NEXT: csel w0, w8, w9, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: and w9, w0, #0xf +; CHECK-GI-NEXT: mov w10, #15 // =0xf +; CHECK-GI-NEXT: and w8, w8, #0xf +; CHECK-GI-NEXT: add w8, w9, w8 +; CHECK-GI-NEXT: and w9, w8, #0xf +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: csel w0, w10, w8, ne +; CHECK-GI-NEXT: ret %a = mul i4 %y, %z %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %a) ret i4 %tmp } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index cf43adb13ebfc..e05c65daf50aa 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -1,5 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) @@ -221,13 +246,23 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: st1 { v0.b }[0], [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ldr b1, [x1] +; CHECK-SD-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: st1 { v0.b }[0], [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: ldrb w9, [x1] +; CHECK-GI-NEXT: add w8, w8, w9 +; CHECK-GI-NEXT: cmp w8, w8, uxtb +; CHECK-GI-NEXT: csinv w8, w8, wzr, eq +; CHECK-GI-NEXT: strb w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i8>, ptr %px %y = load <1 x i8>, ptr %py %z = call <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y) @@ -236,13 +271,23 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: str h0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: uqadd v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: str h0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: ldrh w9, [x1] +; CHECK-GI-NEXT: add w8, w8, w9 +; CHECK-GI-NEXT: cmp w8, w8, uxth +; CHECK-GI-NEXT: csinv w8, w8, wzr, eq +; CHECK-GI-NEXT: strh w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i16>, ptr %px %y = load <1 x i16>, ptr %py %z = call <1 x i16> @llvm.uadd.sat.v1i16(<1 x i16> %x, <1 x i16> %y) diff --git a/llvm/test/CodeGen/AArch64/usub_sat.ll b/llvm/test/CodeGen/AArch64/usub_sat.ll index 22f79ea18595b..160e7e6607cdc 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.usub.sat.i4(i4, i4) declare i8 @llvm.usub.sat.i8(i8, i8) @@ -8,55 +9,99 @@ declare i32 @llvm.usub.sat.i32(i32, i32) declare i64 @llvm.usub.sat.i64(i64, i64) define i32 @func(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: func: -; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs w8, w0, w1 +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: subs w8, w0, w1 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %y); ret i32 %tmp; } define i64 @func2(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: func2: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: csel x0, xzr, x8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x1 +; CHECK-SD-NEXT: csel x0, xzr, x8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: subs x8, x0, x1 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csel x0, xzr, x8, ne +; CHECK-GI-NEXT: ret %tmp = call i64 @llvm.usub.sat.i64(i64 %x, i64 %y); ret i64 %tmp; } define i16 @func16(i16 %x, i16 %y) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: subs w8, w8, w1, uxth +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xffff +; CHECK-GI-NEXT: sub w8, w8, w1, uxth +; CHECK-GI-NEXT: cmp w8, w8, uxth +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y); ret i16 %tmp; } define i8 @func8(i8 %x, i8 %y) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: subs w8, w8, w1, uxtb +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xff +; CHECK-GI-NEXT: sub w8, w8, w1, uxtb +; CHECK-GI-NEXT: cmp w8, w8, uxtb +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y); ret i8 %tmp; } define i4 @func3(i4 %x, i4 %y) nounwind { -; CHECK-LABEL: func3: -; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xf -; CHECK-NEXT: and w9, w0, #0xf -; CHECK-NEXT: subs w8, w9, w8 -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w8, w1, #0xf +; CHECK-SD-NEXT: and w9, w0, #0xf +; CHECK-SD-NEXT: subs w8, w9, w8 +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xf +; CHECK-GI-NEXT: and w9, w1, #0xf +; CHECK-GI-NEXT: sub w8, w8, w9 +; CHECK-GI-NEXT: and w9, w8, #0xf +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y); ret i4 %tmp; } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll index 0bf7cb470cc9c..a9932216dbe34 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i4 @llvm.usub.sat.i4(i4, i4) declare i8 @llvm.usub.sat.i8(i8, i8) @@ -8,64 +9,112 @@ declare i32 @llvm.usub.sat.i32(i32, i32) declare i64 @llvm.usub.sat.i64(i64, i64) define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { -; CHECK-LABEL: func32: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: subs w8, w0, w8 +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: subs w8, w0, w8 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %a = mul i32 %y, %z %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %a) ret i32 %tmp } define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { -; CHECK-LABEL: func64: -; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x0, x2 -; CHECK-NEXT: csel x0, xzr, x8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: subs x8, x0, x2 +; CHECK-SD-NEXT: csel x0, xzr, x8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: subs x8, x0, x2 +; CHECK-GI-NEXT: cset w9, lo +; CHECK-GI-NEXT: tst w9, #0x1 +; CHECK-GI-NEXT: csel x0, xzr, x8, ne +; CHECK-GI-NEXT: ret %a = mul i64 %y, %z %tmp = call i64 @llvm.usub.sat.i64(i64 %x, i64 %z) ret i64 %tmp } define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { -; CHECK-LABEL: func16: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: subs w8, w9, w8, uxth -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: and w9, w0, #0xffff +; CHECK-SD-NEXT: subs w8, w9, w8, uxth +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: and w9, w0, #0xffff +; CHECK-GI-NEXT: sub w8, w9, w8, uxth +; CHECK-GI-NEXT: cmp w8, w8, uxth +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %a = mul i16 %y, %z %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a) ret i16 %tmp } define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { -; CHECK-LABEL: func8: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: subs w8, w9, w8, uxtb -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: and w9, w0, #0xff +; CHECK-SD-NEXT: subs w8, w9, w8, uxtb +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: and w9, w0, #0xff +; CHECK-GI-NEXT: sub w8, w9, w8, uxtb +; CHECK-GI-NEXT: cmp w8, w8, uxtb +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %a = mul i8 %y, %z %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a) ret i8 %tmp } define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { -; CHECK-LABEL: func4: -; CHECK: // %bb.0: -; CHECK-NEXT: mul w8, w1, w2 -; CHECK-NEXT: and w9, w0, #0xf -; CHECK-NEXT: and w8, w8, #0xf -; CHECK-NEXT: subs w8, w9, w8 -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mul w8, w1, w2 +; CHECK-SD-NEXT: and w9, w0, #0xf +; CHECK-SD-NEXT: and w8, w8, #0xf +; CHECK-SD-NEXT: subs w8, w9, w8 +; CHECK-SD-NEXT: csel w0, wzr, w8, lo +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mul w8, w1, w2 +; CHECK-GI-NEXT: and w9, w0, #0xf +; CHECK-GI-NEXT: and w8, w8, #0xf +; CHECK-GI-NEXT: sub w8, w9, w8 +; CHECK-GI-NEXT: and w9, w8, #0xf +; CHECK-GI-NEXT: cmp w8, w9 +; CHECK-GI-NEXT: csel w0, wzr, w8, ne +; CHECK-GI-NEXT: ret %a = mul i4 %y, %z %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a) ret i4 %tmp } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 6c8ee89b50bff..05f43e7d8427b 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -1,5 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for v16i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) @@ -218,13 +243,23 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b -; CHECK-NEXT: st1 { v0.b }[0], [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ldr b1, [x1] +; CHECK-SD-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: st1 { v0.b }[0], [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: ldrb w9, [x1] +; CHECK-GI-NEXT: sub w8, w8, w9 +; CHECK-GI-NEXT: cmp w8, w8, uxtb +; CHECK-GI-NEXT: csel w8, wzr, w8, ne +; CHECK-GI-NEXT: strb w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i8>, ptr %px %y = load <1 x i8>, ptr %py %z = call <1 x i8> @llvm.usub.sat.v1i8(<1 x i8> %x, <1 x i8> %y) @@ -233,13 +268,23 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v1i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: str h0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v1i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ldr h1, [x1] +; CHECK-SD-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: str h0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v1i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: ldrh w9, [x1] +; CHECK-GI-NEXT: sub w8, w8, w9 +; CHECK-GI-NEXT: cmp w8, w8, uxth +; CHECK-GI-NEXT: csel w8, wzr, w8, ne +; CHECK-GI-NEXT: strh w8, [x2] +; CHECK-GI-NEXT: ret %x = load <1 x i16>, ptr %px %y = load <1 x i16>, ptr %py %z = call <1 x i16> @llvm.usub.sat.v1i16(<1 x i16> %x, <1 x i16> %y)