diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 52c43a4ac4a04..b4ab9a5e4092c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4852,6 +4852,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerMemcpyInline(MI); case G_ZEXT: case G_SEXT: + case G_FPEXT: case G_ANYEXT: return lowerEXT(MI); case G_TRUNC: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5f93847bc680e..3542be4105f97 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -825,6 +825,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor( {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}}) + .moreElementsToNextPow2(0) + .lowerIf([](const LegalityQuery &Q) { + LLT DstTy = Q.Types[0]; + LLT SrcTy = Q.Types[1]; + return SrcTy.isVector() && DstTy.isVector() && + SrcTy.getNumElements() > 2 && + SrcTy.getScalarSizeInBits() == 16 && + DstTy.getScalarSizeInBits() == 64; + }) .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64) .scalarize(0); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 896603d6eb20d..92b273c6141d1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -555,8 +555,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FPEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll index a37aabb0b5384..12b6562b5cf0c 100644 --- a/llvm/test/CodeGen/AArch64/fmla.ll +++ b/llvm/test/CodeGen/AArch64/fmla.ll @@ -865,22 +865,22 @@ define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h -; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4] ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5] -; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4] +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] -; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2] @@ -1350,22 +1350,22 @@ define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) { ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h -; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4] ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5] -; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s -; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s -; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6] -; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0] -; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v2.h[4] +; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[5] +; CHECK-GI-NOFP16-NEXT: fcvtn v4.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[6] +; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1] -; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v4.h[1] +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[2] ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3] +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[3] ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2] diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll index 6233ce743b706..1e1e25c04b384 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -241,30 +241,16 @@ define <4 x double> @h_to_d(<4 x half> %a) { ; ; CHECK-CVT-GI-LABEL: h_to_d: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] -; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] -; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] -; CHECK-CVT-GI-NEXT: fcvt d0, h0 -; CHECK-CVT-GI-NEXT: fcvt d4, h1 -; CHECK-CVT-GI-NEXT: fcvt d1, h2 -; CHECK-CVT-GI-NEXT: fcvt d2, h3 -; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: h_to_d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d4, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h2 -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-FP16-GI-NEXT: ret %1 = fpext <4 x half> %a to <4 x double> ret <4 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll index 86763eb5f9e3b..7b152bcccf1e5 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -298,48 +298,22 @@ define <8 x double> @h_to_d(<8 x half> %a) { ; ; CHECK-CVT-GI-LABEL: h_to_d: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov h1, v0.h[1] -; CHECK-CVT-GI-NEXT: mov h2, v0.h[2] -; CHECK-CVT-GI-NEXT: mov h3, v0.h[3] -; CHECK-CVT-GI-NEXT: mov h4, v0.h[4] -; CHECK-CVT-GI-NEXT: mov h5, v0.h[5] -; CHECK-CVT-GI-NEXT: mov h6, v0.h[6] -; CHECK-CVT-GI-NEXT: mov h7, v0.h[7] -; CHECK-CVT-GI-NEXT: fcvt d0, h0 -; CHECK-CVT-GI-NEXT: fcvt d16, h1 -; CHECK-CVT-GI-NEXT: fcvt d1, h2 -; CHECK-CVT-GI-NEXT: fcvt d17, h3 -; CHECK-CVT-GI-NEXT: fcvt d2, h4 -; CHECK-CVT-GI-NEXT: fcvt d4, h5 -; CHECK-CVT-GI-NEXT: fcvt d3, h6 -; CHECK-CVT-GI-NEXT: fcvt d5, h7 -; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0] -; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0] -; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0] -; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-GI-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-CVT-GI-NEXT: fcvtl v2.2d, v3.2s +; CHECK-CVT-GI-NEXT: fcvtl2 v3.2d, v3.4s ; CHECK-CVT-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: h_to_d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d16, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h2 -; CHECK-FP16-GI-NEXT: fcvt d17, h3 -; CHECK-FP16-GI-NEXT: fcvt d2, h4 -; CHECK-FP16-GI-NEXT: fcvt d4, h5 -; CHECK-FP16-GI-NEXT: fcvt d3, h6 -; CHECK-FP16-GI-NEXT: fcvt d5, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0] -; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v3.2d, v3.4s ; CHECK-FP16-GI-NEXT: ret %1 = fpext <8 x half> %a to <8 x double> ret <8 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 637c02875b84e..b075a8b6f70ee 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -285,31 +285,24 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: stest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_1 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_1] ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI6_0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI6_0] -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -351,24 +344,17 @@ define <4 x i32> @utest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: utest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -412,28 +398,21 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: ustest_f16i32: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0 +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0 +; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -2273,31 +2252,24 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: stest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_1 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1] ; CHECK-FP16-GI-NEXT: adrp x8, .LCPI33_0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v1.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v2.2d, v1.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v2.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b ; CHECK-FP16-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: bif v1.16b, v2.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v2.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -2337,24 +2309,17 @@ define <4 x i32> @utest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: utest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmhi v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmhi v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -2397,28 +2362,21 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; ; CHECK-FP16-GI-LABEL: ustest_f16i32_mm: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-FP16-GI-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v0.2d -; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: bit v1.16b, v2.16b, v4.16b -; CHECK-FP16-GI-NEXT: cmgt v2.2d, v0.2d, #0 -; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, #0 -; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: cmgt v4.2d, v1.2d, v0.2d +; CHECK-FP16-GI-NEXT: bif v2.16b, v1.16b, v3.16b +; CHECK-FP16-GI-NEXT: bif v0.16b, v1.16b, v4.16b +; CHECK-FP16-GI-NEXT: cmgt v1.2d, v2.2d, #0 +; CHECK-FP16-GI-NEXT: cmgt v3.2d, v0.2d, #0 +; CHECK-FP16-GI-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-FP16-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-FP16-GI-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index df90f9d5f0910..295cb007467bd 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -82,11 +82,12 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) { ; ; CHECK-GI-LABEL: fpext_v3f32_v3f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s1, v0.s[2] +; CHECK-GI-NEXT: mov v1.s[0], v0.s[2] ; CHECK-GI-NEXT: fcvtl v0.2d, v0.2s -; CHECK-GI-NEXT: fcvt d2, s1 +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret entry: %c = fpext <3 x float> %a to <3 x double> @@ -353,12 +354,12 @@ define <3 x double> @fpext_v3f16_v3f64(<3 x half> %a) { ; ; CHECK-GI-LABEL: fpext_v3f16_v3f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: fcvt d2, h2 +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v1.4s +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = fpext <3 x half> %a to <3 x double> @@ -375,16 +376,9 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) { ; ; CHECK-GI-LABEL: fpext_v4f16_v4f64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov h3, v0.h[3] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d4, h1 -; CHECK-GI-NEXT: fcvt d1, h2 -; CHECK-GI-NEXT: fcvt d2, h3 -; CHECK-GI-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v0.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s ; CHECK-GI-NEXT: ret entry: %c = fpext <4 x half> %a to <4 x double> diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index f6053cee50dae..7f747ec9b7cbb 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -4710,20 +4710,14 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v3f16_v3i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d1, h0 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <3 x half> %a to <3 x i64> @@ -4774,20 +4768,14 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v3f16_v3i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h2, v0.h[1] -; CHECK-FP16-GI-NEXT: fcvt d1, h0 -; CHECK-FP16-GI-NEXT: mov h3, v0.h[2] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-FP16-GI-NEXT: fcvt d2, h3 -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-FP16-GI-NEXT: mov d1, v0.d[1] +; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <3 x half> %a to <3 x i64> @@ -4842,17 +4830,10 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v4f16_v4i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -4908,17 +4889,10 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v4f16_v4i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d +; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v1.2d ; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-FP16-GI-NEXT: ret entry: @@ -5005,29 +4979,16 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v8f16_v8i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v4.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <8 x half> %a to <8 x i64> @@ -5113,29 +5074,16 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v8f16_v8i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h1, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h2, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h3, v0.h[3] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h6, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[7] -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d1, h1 -; CHECK-FP16-GI-NEXT: fcvt d2, h2 -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v3.2d +; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v4.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <8 x half> %a to <8 x i64> @@ -5285,52 +5233,26 @@ define <16 x i64> @fptos_v16f16_v16i64(<16 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptos_v16f16_v16i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h3, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d2, h0 -; CHECK-FP16-GI-NEXT: mov h6, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h16, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h0, v0.h[7] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[2] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h22, v1.h[6] -; CHECK-FP16-GI-NEXT: mov h23, v1.h[7] -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: fcvt d16, h16 -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d24, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h17 -; CHECK-FP16-GI-NEXT: fcvt d17, h18 -; CHECK-FP16-GI-NEXT: fcvt d18, h19 -; CHECK-FP16-GI-NEXT: fcvt d19, h20 -; CHECK-FP16-GI-NEXT: fcvt d20, h21 -; CHECK-FP16-GI-NEXT: fcvt d21, h22 -; CHECK-FP16-GI-NEXT: fcvt d22, h23 -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0] -; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0] -; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0] -; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0] -; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v6.2d -; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v16.2d -; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v24.2d -; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v17.2d -; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v19.2d -; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v21.2d +; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s +; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtzs v0.2d, v4.2d +; CHECK-FP16-GI-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzs v2.2d, v5.2d +; CHECK-FP16-GI-NEXT: fcvtzs v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtzs v4.2d, v7.2d +; CHECK-FP16-GI-NEXT: fcvtzs v5.2d, v16.2d +; CHECK-FP16-GI-NEXT: fcvtzs v6.2d, v17.2d +; CHECK-FP16-GI-NEXT: fcvtzs v7.2d, v18.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptosi <16 x half> %a to <16 x i64> @@ -5480,52 +5402,26 @@ define <16 x i64> @fptou_v16f16_v16i64(<16 x half> %a) { ; ; CHECK-FP16-GI-LABEL: fptou_v16f16_v16i64: ; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: mov h3, v0.h[1] -; CHECK-FP16-GI-NEXT: mov h4, v0.h[2] -; CHECK-FP16-GI-NEXT: mov h5, v0.h[3] -; CHECK-FP16-GI-NEXT: fcvt d2, h0 -; CHECK-FP16-GI-NEXT: mov h6, v0.h[4] -; CHECK-FP16-GI-NEXT: mov h7, v0.h[5] -; CHECK-FP16-GI-NEXT: mov h16, v0.h[6] -; CHECK-FP16-GI-NEXT: mov h0, v0.h[7] -; CHECK-FP16-GI-NEXT: mov h17, v1.h[1] -; CHECK-FP16-GI-NEXT: mov h18, v1.h[2] -; CHECK-FP16-GI-NEXT: mov h19, v1.h[3] -; CHECK-FP16-GI-NEXT: mov h20, v1.h[4] -; CHECK-FP16-GI-NEXT: mov h21, v1.h[5] -; CHECK-FP16-GI-NEXT: mov h22, v1.h[6] -; CHECK-FP16-GI-NEXT: mov h23, v1.h[7] -; CHECK-FP16-GI-NEXT: fcvt d3, h3 -; CHECK-FP16-GI-NEXT: fcvt d4, h4 -; CHECK-FP16-GI-NEXT: fcvt d5, h5 -; CHECK-FP16-GI-NEXT: fcvt d6, h6 -; CHECK-FP16-GI-NEXT: fcvt d7, h7 -; CHECK-FP16-GI-NEXT: fcvt d16, h16 -; CHECK-FP16-GI-NEXT: fcvt d0, h0 -; CHECK-FP16-GI-NEXT: fcvt d24, h1 -; CHECK-FP16-GI-NEXT: fcvt d1, h17 -; CHECK-FP16-GI-NEXT: fcvt d17, h18 -; CHECK-FP16-GI-NEXT: fcvt d18, h19 -; CHECK-FP16-GI-NEXT: fcvt d19, h20 -; CHECK-FP16-GI-NEXT: fcvt d20, h21 -; CHECK-FP16-GI-NEXT: fcvt d21, h22 -; CHECK-FP16-GI-NEXT: fcvt d22, h23 -; CHECK-FP16-GI-NEXT: mov v2.d[1], v3.d[0] -; CHECK-FP16-GI-NEXT: mov v4.d[1], v5.d[0] -; CHECK-FP16-GI-NEXT: mov v6.d[1], v7.d[0] -; CHECK-FP16-GI-NEXT: mov v16.d[1], v0.d[0] -; CHECK-FP16-GI-NEXT: mov v24.d[1], v1.d[0] -; CHECK-FP16-GI-NEXT: mov v17.d[1], v18.d[0] -; CHECK-FP16-GI-NEXT: mov v19.d[1], v20.d[0] -; CHECK-FP16-GI-NEXT: mov v21.d[1], v22.d[0] -; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v2.2d -; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v4.2d -; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v6.2d -; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v16.2d -; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v24.2d -; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v17.2d -; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v19.2d -; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v21.2d +; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-FP16-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-FP16-GI-NEXT: fcvtl v4.2d, v2.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-FP16-GI-NEXT: fcvtl v5.2d, v0.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v6.2d, v0.4s +; CHECK-FP16-GI-NEXT: fcvtl v7.2d, v3.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v16.2d, v3.4s +; CHECK-FP16-GI-NEXT: fcvtl v17.2d, v1.2s +; CHECK-FP16-GI-NEXT: fcvtl2 v18.2d, v1.4s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2d, v4.2d +; CHECK-FP16-GI-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-FP16-GI-NEXT: fcvtzu v2.2d, v5.2d +; CHECK-FP16-GI-NEXT: fcvtzu v3.2d, v6.2d +; CHECK-FP16-GI-NEXT: fcvtzu v4.2d, v7.2d +; CHECK-FP16-GI-NEXT: fcvtzu v5.2d, v16.2d +; CHECK-FP16-GI-NEXT: fcvtzu v6.2d, v17.2d +; CHECK-FP16-GI-NEXT: fcvtzu v7.2d, v18.2d ; CHECK-FP16-GI-NEXT: ret entry: %c = fptoui <16 x half> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index b963acd8cb2a1..dbcfaff8aee05 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3088,30 +3088,14 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_signed_v4f16_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NEXT: fcvtzs v0.2d, v1.2d +; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x } @@ -3797,46 +3781,19 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s -; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzs v0.2d, v2.2d -; CHECK-GI-CVT-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzs v2.2d, v3.2d -; CHECK-GI-CVT-NEXT: fcvtzs v3.2d, v4.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: fcvt d4, h4 -; CHECK-GI-FP16-NEXT: fcvt d5, h5 -; CHECK-GI-FP16-NEXT: fcvt d6, h6 -; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_signed_v8f16_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-GI-NEXT: fcvtzs v0.2d, v2.2d +; CHECK-GI-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-NEXT: fcvtzs v2.2d, v3.2d +; CHECK-GI-NEXT: fcvtzs v3.2d, v4.2d +; CHECK-GI-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 5a66b68af8e96..44e6e9415263b 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2506,30 +2506,14 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v1.d[1], x11 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl v1.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v2.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_unsigned_v4f16_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-GI-NEXT: fcvtzu v0.2d, v1.2d +; CHECK-GI-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x } @@ -3114,46 +3098,19 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-SD-FP16-NEXT: mov v3.d[1], x14 ; CHECK-SD-FP16-NEXT: ret ; -; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i64: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-CVT-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-CVT-NEXT: fcvtl v2.2d, v1.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v1.2d, v1.4s -; CHECK-GI-CVT-NEXT: fcvtl v3.2d, v0.2s -; CHECK-GI-CVT-NEXT: fcvtl2 v4.2d, v0.4s -; CHECK-GI-CVT-NEXT: fcvtzu v0.2d, v2.2d -; CHECK-GI-CVT-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-CVT-NEXT: fcvtzu v2.2d, v3.2d -; CHECK-GI-CVT-NEXT: fcvtzu v3.2d, v4.2d -; CHECK-GI-CVT-NEXT: ret -; -; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64: -; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] -; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] -; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: fcvt d1, h1 -; CHECK-GI-FP16-NEXT: fcvt d2, h2 -; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: fcvt d4, h4 -; CHECK-GI-FP16-NEXT: fcvt d5, h5 -; CHECK-GI-FP16-NEXT: fcvt d6, h6 -; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] -; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] -; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d -; CHECK-GI-FP16-NEXT: ret +; CHECK-GI-LABEL: test_unsigned_v8f16_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s +; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s +; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s +; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s +; CHECK-GI-NEXT: fcvtzu v0.2d, v2.2d +; CHECK-GI-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-NEXT: fcvtzu v2.2d, v3.2d +; CHECK-GI-NEXT: fcvtzu v3.2d, v4.2d +; CHECK-GI-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x }