diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 33dba6a5c61eaf0..043f142f3099bf0 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1141,9 +1141,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .scalarize(1) .lower(); - getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) - .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); }); - getActionDefinitionsBuilder({G_FSHL, G_FSHR}) .customFor({{s32, s32}, {s32, s64}, {s64, s64}}) .lower(); @@ -1191,8 +1188,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalarEltSameAsIf(always, 1, 0) .maxScalarEltSameAsIf(always, 1, 0); - // TODO: Vector types. - getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0)); + getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) + .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8}) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) + .clampNumElements(0, v2s32, v4s32) + .clampMaxNumElements(0, s64, 2) + .moreElementsToNextPow2(0) + .lower(); // TODO: Libcall support for s128. // TODO: s16 should be legal with full FP16 support. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index ac3c47c8001d4a4..200e9d19d58d253 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -395,6 +395,7 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_SADDSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_USUBSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index 9e09b7f9a4bd6f1..789fd7b20a7f990 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -2,8 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for vec - declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) declare i16 @llvm.sadd.sat.i16(i16, i16) diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 6f1ae023bf25a16..8a0e7661883f21d 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) @@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sqadd v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b -; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b -; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b -; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: sqadd v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: sqadd v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: sqadd v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.8h, v1.8h, v3.8h -; CHECK-NEXT: sqadd v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h -; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h -; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h -; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: sqadd v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: sqadd v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: sqadd v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -135,19 +145,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -196,23 +229,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-NEXT: add x8, x0, #2 -; CHECK-NEXT: add x9, x1, #2 -; CHECK-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #2 +; CHECK-SD-NEXT: add x9, x1, #2 +; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: sqadd v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -230,15 +277,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: sqadd v0.8h, v1.8h, v0.8h -; CHECK-NEXT: sqadd v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: sqadd v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: sqadd v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sqadd v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.sadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -346,23 +405,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.4s, v1.4s, v3.4s -; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s -; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s -; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: sqadd v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: sqadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: sqadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -377,23 +450,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.2d, v1.2d, v3.2d -; CHECK-NEXT: sqadd v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d -; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d -; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d -; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: sqadd v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: sqadd v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: sqadd v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index abeb4b357fa9fbf..4d755f480c3fc92 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -2,8 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for vec - declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) declare i16 @llvm.ssub.sat.i16(i16, i16) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index d1f843a09f749d5..a8c1276eadc4fac 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) @@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sqsub v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b -; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b -; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b -; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: sqsub v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: sqsub v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: sqsub v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.8h, v1.8h, v3.8h -; CHECK-NEXT: sqsub v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h -; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h -; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h -; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: sqsub v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: sqsub v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: sqsub v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -136,19 +146,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -197,23 +230,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-NEXT: add x8, x0, #2 -; CHECK-NEXT: add x9, x1, #2 -; CHECK-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #2 +; CHECK-SD-NEXT: add x9, x1, #2 +; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: sqsub v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -231,15 +278,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: sqsub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: sqsub v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: sqsub v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: sqsub v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sqsub v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.ssub.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -349,23 +408,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.4s, v1.4s, v3.4s -; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s -; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: sqsub v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: sqsub v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: sqsub v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -380,23 +453,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.2d, v1.2d, v3.2d -; CHECK-NEXT: sqsub v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d -; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d -; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d -; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: sqsub v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: sqsub v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: sqsub v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index f0bbed59405e3f6..30ff70088454d7a 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) @@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.16b, v1.16b, v3.16b -; CHECK-NEXT: uqadd v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b -; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b -; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b -; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: uqadd v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: uqadd v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: uqadd v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.8h, v1.8h, v3.8h -; CHECK-NEXT: uqadd v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h -; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h -; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h -; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: uqadd v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: uqadd v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: uqadd v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -135,16 +145,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ldr s2, [x1] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: uaddl v1.8h, v1.8b, v2.8b -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s1, [x0] +; CHECK-SD-NEXT: ldr s2, [x1] +; CHECK-SD-NEXT: movi d0, #0xff00ff00ff00ff +; CHECK-SD-NEXT: uaddl v1.8h, v1.8b, v2.8b +; CHECK-SD-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -194,24 +227,38 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-NEXT: ldrh w10, [x0, #2] -; CHECK-NEXT: ldrh w11, [x1, #2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrh w8, [x0] +; CHECK-SD-NEXT: ldrh w9, [x1] +; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-SD-NEXT: ldrh w10, [x0, #2] +; CHECK-SD-NEXT: ldrh w11, [x1, #2] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: uqadd v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -229,15 +276,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: uqadd v0.8h, v1.8h, v0.8h -; CHECK-NEXT: uqadd v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: uqadd v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: uqadd v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: uqadd v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.uadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -336,23 +395,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.4s, v1.4s, v3.4s -; CHECK-NEXT: uqadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s -; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s -; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s -; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: uqadd v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: uqadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: uqadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -367,23 +440,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.2d, v1.2d, v3.2d -; CHECK-NEXT: uqadd v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d -; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d -; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d -; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: uqadd v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: uqadd v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: uqadd v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 82c0327219f5042..3bc27962dcb41e5 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) @@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.16b, v1.16b, v3.16b -; CHECK-NEXT: uqsub v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b -; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b -; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b -; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: uqsub v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: uqsub v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: uqsub v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.8h, v1.8h, v3.8h -; CHECK-NEXT: uqsub v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h -; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h -; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h -; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: uqsub v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: uqsub v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: uqsub v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -136,16 +146,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -193,22 +226,36 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: ldrh w10, [x0, #2] -; CHECK-NEXT: ldrh w11, [x1, #2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrh w8, [x0] +; CHECK-SD-NEXT: ldrh w9, [x1] +; CHECK-SD-NEXT: ldrh w10, [x0, #2] +; CHECK-SD-NEXT: ldrh w11, [x1, #2] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: uqsub v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -226,15 +273,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: uqsub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: uqsub v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: uqsub v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: uqsub v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: uqsub v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.usub.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -334,23 +393,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.4s, v1.4s, v3.4s -; CHECK-NEXT: uqsub v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s -; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: uqsub v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: uqsub v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: uqsub v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -365,23 +438,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.2d, v1.2d, v3.2d -; CHECK-NEXT: uqsub v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d -; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d -; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d -; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: uqsub v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: uqsub v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: uqsub v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z }