diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9078675da0e95..721aea2a4c8d3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4488,6 +4488,25 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, return DAG.getMergeValues({Sum, OutFlag}, DL); } +static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, + SelectionDAG &DAG) { + SDLoc DL(Op); + EVT OrigVT = Op.getValueType(); + assert((OrigVT == MVT::i32 || OrigVT == MVT::i64) && + "lowerIntNeonIntrinsic expects 32/64-bit scalar operation."); + + EVT NodeVT = (OrigVT == MVT::i32) ? MVT::f32 : MVT::f64; + + SmallVector NewOps; + NewOps.reserve(Op.getNumOperands() - 1); + + for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I) + NewOps.push_back(DAG.getBitcast(NodeVT, Op.getOperand(I))); + + SDValue OpNode = DAG.getNode(Opcode, DL, NodeVT, NewOps); + return DAG.getBitcast(OrigVT, OpNode); +} + static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) @@ -6359,26 +6378,45 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); return SDValue(); + case Intrinsic::aarch64_neon_sqrshl: + if (Op.getValueType().isVector()) + return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHL, DAG); + case Intrinsic::aarch64_neon_sqshl: + if (Op.getValueType().isVector()) + return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHL, DAG); + case Intrinsic::aarch64_neon_uqrshl: + if (Op.getValueType().isVector()) + return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHL, DAG); + case Intrinsic::aarch64_neon_uqshl: + if (Op.getValueType().isVector()) + return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHL, DAG); case Intrinsic::aarch64_neon_sqadd: if (Op.getValueType().isVector()) return DAG.getNode(ISD::SADDSAT, DL, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQADD, DAG); + case Intrinsic::aarch64_neon_sqsub: if (Op.getValueType().isVector()) return DAG.getNode(ISD::SSUBSAT, DL, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSUB, DAG); + case Intrinsic::aarch64_neon_uqadd: if (Op.getValueType().isVector()) return DAG.getNode(ISD::UADDSAT, DL, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQADD, DAG); case Intrinsic::aarch64_neon_uqsub: if (Op.getValueType().isVector()) return DAG.getNode(ISD::USUBSAT, DL, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSUB, DAG); + case Intrinsic::aarch64_sve_whilelt: return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true, /*IsEqual=*/false); @@ -6713,6 +6751,52 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::experimental_vector_match: { return LowerVectorMatch(Op, DAG); } + // case Intrinsic::aarch64_neon_fcvtas: + // case Intrinsic::aarch64_neon_fcvtau: + // case Intrinsic::aarch64_neon_fcvtms: + // case Intrinsic::aarch64_neon_fcvtmu: + // case Intrinsic::aarch64_neon_fcvtns: + // case Intrinsic::aarch64_neon_fcvtnu: + // case Intrinsic::aarch64_neon_fcvtps: + // case Intrinsic::aarch64_neon_fcvtpu: + // case Intrinsic::aarch64_neon_fcvtzs: + // case Intrinsic::aarch64_neon_fcvtzu: + // case Intrinsic::aarch64_neon_sqabs: + // case Intrinsic::aarch64_neon_sqneg: + // case Intrinsic::aarch64_neon_scalar_sqxtn: + // case Intrinsic::aarch64_neon_scalar_sqxtun: + // case Intrinsic::aarch64_neon_scalar_uqxtn: + // case Intrinsic::aarch64_neon_sqadd: + // case Intrinsic::aarch64_neon_sqdmulh: + // case Intrinsic::aarch64_neon_sqrdmulh: + // case Intrinsic::aarch64_neon_sqrshl: + // case Intrinsic::aarch64_neon_sqshl: + // case Intrinsic::aarch64_neon_sqshlu: + // case Intrinsic::aarch64_neon_sqsub: + // case Intrinsic::aarch64_neon_srshl: + // case Intrinsic::aarch64_neon_sshl: + // case Intrinsic::aarch64_neon_suqadd: + // case Intrinsic::aarch64_neon_uqadd: + // case Intrinsic::aarch64_neon_uqrshl: + // case Intrinsic::aarch64_neon_uqshl: + // case Intrinsic::aarch64_neon_uqsub: + // case Intrinsic::aarch64_neon_urshl: + // case Intrinsic::aarch64_neon_ushl: + // case Intrinsic::aarch64_neon_usqadd: + // case Intrinsic::aarch64_neon_rshrn: + // case Intrinsic::aarch64_neon_sqrshrn: + // case Intrinsic::aarch64_neon_sqrshrun: + // case Intrinsic::aarch64_neon_sqshrn: + // case Intrinsic::aarch64_neon_sqshrun: + // case Intrinsic::aarch64_neon_uqrshrn: + // case Intrinsic::aarch64_neon_uqshrn: + // case Intrinsic::aarch64_neon_sqdmulh_lane: + // case Intrinsic::aarch64_neon_sqdmulh_laneq: + // case Intrinsic::aarch64_neon_sqrdmulh_lane: + // case Intrinsic::aarch64_neon_sqrdmulh_laneq: + // case Intrinsic::aarch64_neon_sqrdmlah: + // case Intrinsic::aarch64_neon_sqrdmlsh: + // case Intrinsic::aarch64_neon_abs:{ } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index f07d3514d1a99..28314d3aa7fac 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7703,16 +7703,21 @@ multiclass SIMDThreeScalarD opc, string asm, } multiclass SIMDThreeScalarBHSD opc, string asm, - SDPatternOperator OpNode, SDPatternOperator SatOp> { + SDPatternOperator OpNode, SDPatternOperator G_OpNode, SDPatternOperator SatOp> { def v1i64 : BaseSIMDThreeScalar; def v1i32 : BaseSIMDThreeScalar; def v1i16 : BaseSIMDThreeScalar; def v1i8 : BaseSIMDThreeScalar; - def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + def : Pat<(i64 (G_OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; - def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), + def : Pat<(i32 (G_OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), + (!cast(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; + + def : Pat<(f64 (OpNode FPR64:$Rn, FPR64:$Rm)), + (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; + def : Pat<(f32 (OpNode FPR32:$Rn, FPR32:$Rm)), (!cast(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f788c7510f80c..3cc75ff43f7a3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1000,6 +1000,25 @@ def AArch64fcvtnu_half : SDNode<"AArch64ISD::FCVTNU_HALF", SDTFPExtendOp>; def AArch64fcvtps_half : SDNode<"AArch64ISD::FCVTPS_HALF", SDTFPExtendOp>; def AArch64fcvtpu_half : SDNode<"AArch64ISD::FCVTPU_HALF", SDTFPExtendOp>; +def AArch64sqadd_node: SDNode<"AArch64ISD::SQADD", SDTFPBinOp>; +def AArch64sqrshl: SDNode<"AArch64ISD::SQRSHL", SDTFPBinOp>; +def AArch64sqshl: SDNode<"AArch64ISD::SQSHL", SDTFPBinOp>; +def AArch64sqsub_node: SDNode<"AArch64ISD::SQSUB", SDTFPBinOp>; +def AArch64uqadd: SDNode<"AArch64ISD::UQADD", SDTFPBinOp>; +def AArch64uqrshl: SDNode<"AArch64ISD::UQRSHL", SDTFPBinOp>; +def AArch64uqshl: SDNode<"AArch64ISD::UQSHL", SDTFPBinOp>; +def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>; + +// This patfrags are temporary hack to get around pattern matching issues with not yet updated intrinsics. +def AArch64sqadd: PatFrags<(ops node:$lhs, node:$rhs), + [(bitconvert (AArch64sqadd_node (f32 (bitconvert node:$lhs)), (f32 (bitconvert node:$rhs)))), + (bitconvert (AArch64sqadd_node (f64 (bitconvert node:$lhs)), (f64 (bitconvert node:$rhs)))), + (int_aarch64_neon_sqadd node:$lhs, node:$rhs)]>; +def AArch64sqsub: PatFrags<(ops node:$lhs, node:$rhs), + [(bitconvert (AArch64sqsub_node (f32 (bitconvert node:$lhs)), (f32 (bitconvert node:$rhs)))), + (bitconvert (AArch64sqsub_node (f64 (bitconvert node:$lhs)), (f64 (bitconvert node:$rhs)))), + (int_aarch64_neon_sqsub node:$lhs, node:$rhs)]>; + //def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>; // Vector immediate ops @@ -6453,19 +6472,19 @@ defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>; defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>; defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>; -defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd, saddsat>; +defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", AArch64sqadd_node, int_aarch64_neon_sqadd, saddsat>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>; -defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>; -defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub, ssubsat>; +defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", AArch64sqrshl, int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", AArch64sqshl, int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", AArch64sqsub_node, int_aarch64_neon_sqsub, ssubsat>; defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; -defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd, uaddsat>; -defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>; -defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>; -defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub, usubsat>; +defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", AArch64uqadd, int_aarch64_neon_uqadd, uaddsat>; +defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", AArch64uqrshl, int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", AArch64uqshl, int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", AArch64uqsub, int_aarch64_neon_uqsub, usubsat>; defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; let Predicates = [HasRDM] in { @@ -6520,11 +6539,11 @@ defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; -def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), +def : Pat<(i64 (AArch64sqadd (i64 FPR64:$Rd), (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (i32 FPR32:$Rm))))), (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; -def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), +def : Pat<(i64 (AArch64sqsub (i64 FPR64:$Rd), (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), (i32 FPR32:$Rm))))), (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; @@ -8545,9 +8564,9 @@ defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", saddsat, - int_aarch64_neon_sqadd>; + AArch64sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", ssubsat, - int_aarch64_neon_sqsub>; + AArch64sqsub>; defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", int_aarch64_neon_sqrdmlah>; defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll new file mode 100644 index 0000000000000..819c00cdd6815 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll @@ -0,0 +1,225 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK + + +; CHECK-GI: warning: Instruction selection used fallback path for test_sqrshl_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshl_s64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshl_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshl_s64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshl_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshl_s64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshl_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshl_s64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64 + +define i32 @test_sqrshl_s32(float noundef %a){ +; CHECK-LABEL: test_sqrshl_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: sqrshl s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_sqrshl_s64(float noundef %a){ +; CHECK-LABEL: test_sqrshl_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqrshl d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} + +define i32 @test_sqshl_s32(float noundef %a) { +; CHECK-LABEL: test_sqshl_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: sqshl s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqshl.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_sqshl_s64(float noundef %a) { +; CHECK-LABEL: test_sqshl_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqshl d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.sqshl.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} + +define i32 @test_uqrshl_s32(float noundef %a) { +; CHECK-LABEL: test_uqrshl_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: uqrshl s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_uqrshl_s64(float noundef %a) { +; CHECK-LABEL: test_uqrshl_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: uqrshl d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} + +define i32 @test_uqshl_s32(float noundef %a) { +; CHECK-LABEL: test_uqshl_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: uqshl s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.uqshl.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_uqshl_s64(float noundef %a) { +; CHECK-LABEL: test_uqshl_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: uqshl d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.uqshl.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} + +define i32 @test_sqadd_s32(float noundef %a) { +; CHECK-LABEL: test_sqadd_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: sqadd s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_sqadd_s64(float noundef %a) { +; CHECK-LABEL: test_sqadd_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqadd d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} + +define i32 @test_sqsub_s32(float noundef %a) { +; CHECK-LABEL: test_sqsub_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: sqsub s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_sqsub_s64(float noundef %a) { +; CHECK-LABEL: test_sqsub_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqsub d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} + +define i32 @test_uqadd_s32(float noundef %a) { +; CHECK-LABEL: test_uqadd_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: uqadd s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_uqadd_s64(float noundef %a) { +; CHECK-LABEL: test_uqadd_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: uqadd d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} + +define i32 @test_uqsub_s32(float noundef %a) { +; CHECK-LABEL: test_uqsub_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: uqsub s0, s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %cvt, i32 %cvt) + ret i32 %res +} + +define i64 @test_uqsub_s64(float noundef %a) { +; CHECK-LABEL: test_uqsub_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: uqsub d0, d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %cvt, i64 %cvt) + ret i64 %res +} diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index e6df9f2fb2c56..fed7439bf95fb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -1766,24 +1766,14 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-SD-LABEL: sqadd_lane1_sqdmull4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-SD-NEXT: mov w8, v0.s[1] -; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: sqadd s0, s0, s1 -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sqadd_lane1_sqdmull4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: mov s0, v0.s[1] -; CHECK-GI-NEXT: sqadd s0, s1, s0 -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sqadd_lane1_sqdmull4s: +; CHECK: // %bb.0: +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: sqadd s0, s1, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) %prod = extractelement <4 x i32> %prod.vec, i32 1 %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod) @@ -1791,24 +1781,14 @@ define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { } define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-SD-LABEL: sqsub_lane1_sqdmull4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-SD-NEXT: mov w8, v0.s[1] -; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: sqsub s0, s0, s1 -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sqsub_lane1_sqdmull4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: mov s0, v0.s[1] -; CHECK-GI-NEXT: sqsub s0, s1, s0 -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sqsub_lane1_sqdmull4s: +; CHECK: // %bb.0: +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: sqsub s0, s1, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) %prod = extractelement <4 x i32> %prod.vec, i32 1 %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod) diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 8ec5434085d6a..d27e2e69f8605 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -168,10 +168,8 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind { define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqshl_scalar: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -363,10 +361,8 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind { define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqshl_scalar: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -888,10 +884,8 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind { define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sqrshl_scalar: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -904,10 +898,9 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind { define i64 @sqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: sqrshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov x8, #1 // =0x1 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -997,10 +990,8 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind { define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: uqrshl_scalar: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -1013,10 +1004,9 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind { define i64 @uqrshl_scalar_constant(ptr %A) nounwind { ; CHECK-LABEL: uqrshl_scalar_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov x8, #1 // =0x1 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret