diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index 07e722b9a6591..442f0a46a4983 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -113,10 +113,11 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - unsigned Op; + unsigned Op = 0; EVT ResTy = BVN->getValueType(0); bool Is128Vec = BVN->getValueType(0).is128BitVector(); bool Is256Vec = BVN->getValueType(0).is256BitVector(); + SDNode *Res; if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec)) break; @@ -124,26 +125,25 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { HasAnyUndefs, 8)) break; - switch (SplatBitSize) { - default: - break; - case 8: - Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; - break; - case 16: - Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; - break; - case 32: - Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; - break; - case 64: - Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; - break; - } - - SDNode *Res; // If we have a signed 10 bit integer, we can splat it directly. if (SplatValue.isSignedIntN(10)) { + switch (SplatBitSize) { + default: + break; + case 8: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; + break; + case 16: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; + break; + case 32: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; + break; + case 64: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; + break; + } + EVT EleType = ResTy.getVectorElementType(); APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits()); SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType); @@ -151,6 +151,21 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, Res); return; } + + // Select appropriate [x]vldi instructions for some special constant splats, + // where the immediate value `imm[12] == 1` for used [x]vldi instructions. + const auto &TLI = + *static_cast(getTargetLowering()); + std::pair ConvertVLDI = + TLI.isImmVLDILegalForMode1(SplatValue, SplatBitSize); + if (ConvertVLDI.first) { + Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI; + SDValue Imm = CurDAG->getSignedTargetConstant( + SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32); + Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm); + ReplaceNode(Node, Res); + return; + } break; } } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7a3fe31585b96..7eb1dc1d90adb 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2815,9 +2815,10 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, if (SplatBitSize == 64 && !Subtarget.is64Bit()) { // We can only handle 64-bit elements that are within - // the signed 10-bit range on 32-bit targets. + // the signed 10-bit range or match vldi patterns on 32-bit targets. // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select(). - if (!SplatValue.isSignedIntN(10)) + if (!SplatValue.isSignedIntN(10) && + !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first) return SDValue(); if ((Is128Vec && ResTy == MVT::v4i32) || (Is256Vec && ResTy == MVT::v8i32)) @@ -8507,6 +8508,87 @@ SDValue LoongArchTargetLowering::LowerReturn( return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); } +// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1. +// Note: The following prefixes are excluded: +// imm[11:8] == 4'b0000, 4'b0100, 4'b1000 +// as they can be represented using [x]vrepli.[whb] +std::pair LoongArchTargetLowering::isImmVLDILegalForMode1( + const APInt &SplatValue, const unsigned SplatBitSize) const { + uint64_t RequiredImm = 0; + uint64_t V = SplatValue.getZExtValue(); + if (SplatBitSize == 16 && !(V & 0x00FF)) { + // 4'b0101 + RequiredImm = (0b10101 << 8) | (V >> 8); + return {true, RequiredImm}; + } else if (SplatBitSize == 32) { + // 4'b0001 + if (!(V & 0xFFFF00FF)) { + RequiredImm = (0b10001 << 8) | (V >> 8); + return {true, RequiredImm}; + } + // 4'b0010 + if (!(V & 0xFF00FFFF)) { + RequiredImm = (0b10010 << 8) | (V >> 16); + return {true, RequiredImm}; + } + // 4'b0011 + if (!(V & 0x00FFFFFF)) { + RequiredImm = (0b10011 << 8) | (V >> 24); + return {true, RequiredImm}; + } + // 4'b0110 + if ((V & 0xFFFF00FF) == 0xFF) { + RequiredImm = (0b10110 << 8) | (V >> 8); + return {true, RequiredImm}; + } + // 4'b0111 + if ((V & 0xFF00FFFF) == 0xFFFF) { + RequiredImm = (0b10111 << 8) | (V >> 16); + return {true, RequiredImm}; + } + // 4'b1010 + if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) { + RequiredImm = + (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F); + return {true, RequiredImm}; + } + } else if (SplatBitSize == 64) { + // 4'b1011 + if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL || + (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) { + RequiredImm = + (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F); + return {true, RequiredImm}; + } + // 4'b1100 + if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL || + (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) { + RequiredImm = + (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F); + return {true, RequiredImm}; + } + // 4'b1001 + auto sameBitsPreByte = [](uint64_t x) -> std::pair { + uint8_t res = 0; + for (int i = 0; i < 8; ++i) { + uint8_t byte = x & 0xFF; + if (byte == 0 || byte == 0xFF) + res |= ((byte & 1) << i); + else + return {false, 0}; + x >>= 8; + } + return {true, res}; + }; + auto [IsSame, Suffix] = sameBitsPreByte(V); + if (IsSame) { + RequiredImm = (0b11001 << 8) | Suffix; + return {true, RequiredImm}; + } + } + return {false, RequiredImm}; +} + bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm, EVT VT) const { if (!Subtarget.hasExtLSX()) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index b2fccf59169ff..3c00296116ac2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -339,6 +339,12 @@ class LoongArchTargetLowering : public TargetLowering { bool shouldScalarizeBinop(SDValue VecOp) const override; + /// Check if a constant splat can be generated using [x]vldi, where imm[12] + /// is 1. + std::pair + isImmVLDILegalForMode1(const APInt &SplatValue, + const unsigned SplatBitSize) const; + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index b0eb51a92c6c6..2c36099f8eb71 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -158,6 +158,7 @@ def vsplatf32_fpimm_eq_1 N = N->getOperand(0).getNode(); return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == 32 && Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == APFloat(+1.0f).bitcastToAPInt(); }]>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll index 5dfc108620b67..d09ef0e2c6ac0 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -196,8 +196,7 @@ entry: define void @buildvector_v8f32_const_splat(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_v8f32_const_splat: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 260096 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvldi $xr0, -1424 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -207,19 +206,11 @@ entry: ;; Also check buildvector_const_splat_xvldi_1100. define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { -; LA32-LABEL: buildvector_v4f64_const_splat: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) -; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI14_0) -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_v4f64_const_splat: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu52i.d $a1, $zero, 1023 -; LA64-NEXT: xvreplgr2vr.d $xr0, $a1 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_v4f64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, -912 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: store <4 x double> , ptr %dst ret void @@ -229,8 +220,7 @@ entry: define void @buildvector_const_splat_xvldi_0001(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_xvldi_0001: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a1, $zero, 768 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvldi $xr0, -3837 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -241,8 +231,7 @@ entry: define void @buildvector_const_splat_xvldi_0010(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_xvldi_0010: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 16 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvldi $xr0, -3583 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -253,8 +242,7 @@ entry: define void @buildvector_const_splat_xvldi_0011(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_xvldi_0011: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 4096 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvldi $xr0, -3327 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -265,8 +253,7 @@ entry: define void @buildvector_const_splat_xvldi_0101(ptr %dst) { ; CHECK-LABEL: buildvector_const_splat_xvldi_0101: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a1, $zero, 768 -; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1 +; CHECK-NEXT: xvldi $xr0, -2813 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -277,8 +264,7 @@ entry: define void @buildvector_const_splat_xvldi_0110(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_xvldi_0110: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a1, $zero, 1023 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvldi $xr0, -2557 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -289,9 +275,7 @@ entry: define void @buildvector_const_splat_xvldi_0111(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_xvldi_0111: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 15 -; CHECK-NEXT: ori $a1, $a1, 4095 -; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvldi $xr0, -2305 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -300,39 +284,22 @@ entry: } define void @buildvector_const_splat_xvldi_1001(ptr %dst) nounwind { -; LA32-LABEL: buildvector_const_splat_xvldi_1001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI21_0) -; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI21_0) -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_const_splat_xvldi_1001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 15 -; LA64-NEXT: ori $a1, $a1, 4095 -; LA64-NEXT: xvreplgr2vr.d $xr0, $a1 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_const_splat_xvldi_1001: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, -1789 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: store <8 x i32> , ptr %dst ret void } define void @buildvector_const_splat_xvldi_1011(ptr %dst) nounwind { -; LA32-LABEL: buildvector_const_splat_xvldi_1011: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI22_0) -; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI22_0) -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_const_splat_xvldi_1011: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 262144 -; LA64-NEXT: xvreplgr2vr.d $xr0, $a1 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_const_splat_xvldi_1011: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, -1280 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: store <8 x float> , ptr %dst ret void @@ -1626,8 +1593,7 @@ define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, flo ; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2 ; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1 ; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 -; CHECK-NEXT: lu12i.w $a1, 262144 -; CHECK-NEXT: xvreplgr2vr.w $xr4, $a1 +; CHECK-NEXT: xvldi $xr4, -3264 ; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 1 ; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 2 ; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 5 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll index 7514dafa8000b..d75985b1ac215 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA32 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,+frecipe < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA64 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-LABEL: fdiv_v8f32: @@ -40,35 +40,19 @@ define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-NEXT: xvst $xr0, $a0, 0 ; FAULT-NEXT: ret ; -; LA32-LABEL: fdiv_v4f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI1_0) -; LA32-NEXT: xvld $xr0, $a2, 0 -; LA32-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI1_0) -; LA32-NEXT: xvld $xr2, $a1, 0 -; LA32-NEXT: xvfrecipe.d $xr3, $xr0 -; LA32-NEXT: xvfmadd.d $xr1, $xr0, $xr3, $xr1 -; LA32-NEXT: xvfnmsub.d $xr1, $xr1, $xr3, $xr3 -; LA32-NEXT: xvfmul.d $xr3, $xr2, $xr1 -; LA32-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr2 -; LA32-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr3 -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: fdiv_v4f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a2, 0 -; LA64-NEXT: xvld $xr1, $a1, 0 -; LA64-NEXT: lu52i.d $a1, $zero, -1025 -; LA64-NEXT: xvreplgr2vr.d $xr2, $a1 -; LA64-NEXT: xvfrecipe.d $xr3, $xr0 -; LA64-NEXT: xvfmadd.d $xr2, $xr0, $xr3, $xr2 -; LA64-NEXT: xvfnmsub.d $xr2, $xr2, $xr3, $xr3 -; LA64-NEXT: xvfmul.d $xr3, $xr1, $xr2 -; LA64-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr1 -; LA64-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr3 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: fdiv_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfrecipe.d $xr2, $xr0 +; CHECK-NEXT: xvldi $xr3, -784 +; CHECK-NEXT: xvfmadd.d $xr3, $xr0, $xr2, $xr3 +; CHECK-NEXT: xvfnmsub.d $xr2, $xr3, $xr2, $xr2 +; CHECK-NEXT: xvfmul.d $xr3, $xr1, $xr2 +; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr1 +; CHECK-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -90,8 +74,7 @@ define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvfrecipe.s $xr1, $xr0 -; CHECK-NEXT: lu12i.w $a1, -264192 -; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 +; CHECK-NEXT: xvldi $xr2, -1296 ; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 ; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 @@ -107,24 +90,22 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { ; FAULT-LA32-LABEL: one_fdiv_v4f64: ; FAULT-LA32: # %bb.0: # %entry ; FAULT-LA32-NEXT: xvld $xr0, $a1, 0 -; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; FAULT-LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) +; FAULT-LA32-NEXT: xvldi $xr1, -912 ; FAULT-LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 ; FAULT-LA32-NEXT: xvst $xr0, $a0, 0 ; FAULT-LA32-NEXT: ret ; -; LA32-LABEL: one_fdiv_v4f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) -; LA32-NEXT: xvfrecipe.d $xr2, $xr0 -; LA32-NEXT: xvfnmsub.d $xr3, $xr0, $xr2, $xr1 -; LA32-NEXT: xvfmadd.d $xr2, $xr2, $xr3, $xr2 -; LA32-NEXT: xvfnmsub.d $xr0, $xr0, $xr2, $xr1 -; LA32-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr2 -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret +; CHECK-LABEL: one_fdiv_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrecipe.d $xr1, $xr0 +; CHECK-NEXT: xvldi $xr2, -912 +; CHECK-NEXT: xvfnmsub.d $xr3, $xr0, $xr1, $xr2 +; CHECK-NEXT: xvfmadd.d $xr1, $xr1, $xr3, $xr1 +; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret ; ; FAULT-LA64-LABEL: one_fdiv_v4f64: ; FAULT-LA64: # %bb.0: # %entry @@ -132,19 +113,6 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { ; FAULT-LA64-NEXT: xvfrecip.d $xr0, $xr0 ; FAULT-LA64-NEXT: xvst $xr0, $a0, 0 ; FAULT-LA64-NEXT: ret -; -; LA64-LABEL: one_fdiv_v4f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvfrecipe.d $xr1, $xr0 -; LA64-NEXT: lu52i.d $a1, $zero, 1023 -; LA64-NEXT: xvreplgr2vr.d $xr2, $a1 -; LA64-NEXT: xvfnmsub.d $xr3, $xr0, $xr1, $xr2 -; LA64-NEXT: xvfmadd.d $xr1, $xr1, $xr3, $xr1 -; LA64-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; LA64-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr1 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %div = fdiv fast <4 x double> , %v0 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll index 4e475daa8ced3..e696129acb862 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll @@ -63,11 +63,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { ; LA32-NEXT: xvfrsqrte.s $xr1, $xr0 ; LA32-NEXT: xvfmul.s $xr1, $xr0, $xr1 ; LA32-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; LA32-NEXT: lu12i.w $a1, -261120 -; LA32-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA32-NEXT: xvldi $xr2, -1400 ; LA32-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; LA32-NEXT: lu12i.w $a1, -266240 -; LA32-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA32-NEXT: xvldi $xr2, -3137 ; LA32-NEXT: xvfmul.s $xr1, $xr1, $xr2 ; LA32-NEXT: xvfmul.s $xr0, $xr1, $xr0 ; LA32-NEXT: xvst $xr0, $sp, 64 @@ -100,11 +98,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { ; LA64-NEXT: xvfrsqrte.s $xr1, $xr0 ; LA64-NEXT: xvfmul.s $xr1, $xr0, $xr1 ; LA64-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; LA64-NEXT: lu12i.w $a1, -261120 -; LA64-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA64-NEXT: xvldi $xr2, -1400 ; LA64-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; LA64-NEXT: lu12i.w $a1, -266240 -; LA64-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA64-NEXT: xvldi $xr2, -3137 ; LA64-NEXT: xvfmul.s $xr1, $xr1, $xr2 ; LA64-NEXT: xvfmul.s $xr0, $xr1, $xr0 ; LA64-NEXT: xvst $xr0, $a0, 0 @@ -136,9 +132,8 @@ define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { ; FAULT-LA32-NEXT: ld.w $a1, $a1, 0 ; FAULT-LA32-NEXT: st.w $a1, $sp, 32 ; FAULT-LA32-NEXT: xvld $xr0, $sp, 32 -; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; FAULT-LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0) ; FAULT-LA32-NEXT: xvfsqrt.d $xr0, $xr0 +; FAULT-LA32-NEXT: xvldi $xr1, -912 ; FAULT-LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 ; FAULT-LA32-NEXT: xvst $xr0, $sp, 64 ; FAULT-LA32-NEXT: vld $vr0, $sp, 80 @@ -176,18 +171,16 @@ define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { ; LA32-NEXT: st.w $a1, $sp, 32 ; LA32-NEXT: xvld $xr0, $sp, 32 ; LA32-NEXT: xvfrsqrte.d $xr1, $xr0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; LA32-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI1_0) -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_1) -; LA32-NEXT: xvld $xr3, $a1, %pc_lo12(.LCPI1_1) ; LA32-NEXT: xvfmul.d $xr1, $xr0, $xr1 -; LA32-NEXT: xvfmul.d $xr4, $xr0, $xr1 -; LA32-NEXT: xvfmadd.d $xr4, $xr4, $xr1, $xr2 -; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr3 +; LA32-NEXT: xvfmul.d $xr2, $xr0, $xr1 +; LA32-NEXT: xvldi $xr3, -888 +; LA32-NEXT: xvfmadd.d $xr2, $xr2, $xr1, $xr3 +; LA32-NEXT: xvldi $xr4, -800 ; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr4 +; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr2 ; LA32-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; LA32-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr3 +; LA32-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr3 +; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr4 ; LA32-NEXT: xvfmul.d $xr0, $xr1, $xr0 ; LA32-NEXT: xvst $xr0, $sp, 64 ; LA32-NEXT: vld $vr0, $sp, 80 @@ -219,13 +212,9 @@ define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { ; LA64-NEXT: xvfrsqrte.d $xr1, $xr0 ; LA64-NEXT: xvfmul.d $xr1, $xr0, $xr1 ; LA64-NEXT: xvfmul.d $xr2, $xr0, $xr1 -; LA64-NEXT: ori $a1, $zero, 0 -; LA64-NEXT: lu32i.d $a1, -524288 -; LA64-NEXT: lu52i.d $a1, $a1, -1024 -; LA64-NEXT: xvreplgr2vr.d $xr3, $a1 +; LA64-NEXT: xvldi $xr3, -888 ; LA64-NEXT: xvfmadd.d $xr2, $xr2, $xr1, $xr3 -; LA64-NEXT: lu52i.d $a1, $zero, -1026 -; LA64-NEXT: xvreplgr2vr.d $xr4, $a1 +; LA64-NEXT: xvldi $xr4, -800 ; LA64-NEXT: xvfmul.d $xr1, $xr1, $xr4 ; LA64-NEXT: xvfmul.d $xr1, $xr1, $xr2 ; LA64-NEXT: xvfmul.d $xr0, $xr0, $xr1 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll index f8a3284f04dc8..9ae651d612f18 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll @@ -174,9 +174,8 @@ define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { ; LA32-NEXT: ld.w $a1, $a1, 0 ; LA32-NEXT: st.w $a1, $sp, 32 ; LA32-NEXT: xvld $xr0, $sp, 32 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) ; LA32-NEXT: xvfsqrt.d $xr0, $xr0 +; LA32-NEXT: xvldi $xr1, -912 ; LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 ; LA32-NEXT: xvst $xr0, $sp, 64 ; LA32-NEXT: vld $vr0, $sp, 80 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll index ae6f091ddb498..aefaa0efb079c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll @@ -53,8 +53,7 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { ; LA32-LABEL: one_fdiv_v4f64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: xvldi $xr1, -912 ; LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 ; LA32-NEXT: xvst $xr0, $a0, 0 ; LA32-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll index 44e4f71c8d08d..bf31ccb1d0104 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: select_v32i8_imm: @@ -50,26 +50,14 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { } define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -; LA32-LABEL: select_v8i32: -; LA32: # %bb.0: -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvld $xr1, $a2, 0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0) -; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -; LA32-NEXT: xvst $xr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: select_v8i32: -; LA64: # %bb.0: -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvld $xr1, $a2, 0 -; LA64-NEXT: ori $a1, $zero, 0 -; LA64-NEXT: lu32i.d $a1, -1 -; LA64-NEXT: xvreplgr2vr.d $xr2, $a1 -; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -; LA64-NEXT: xvst $xr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: select_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvldi $xr2, -1552 +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret %v0 = load <8 x i32>, ptr %a0 %v1 = load <8 x i32>, ptr %a1 %sel = select <8 x i1> , <8 x i32> %v0, <8 x i32> %v1 diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll index 373f2b19c2a46..fe45e73b36f51 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -196,8 +196,7 @@ entry: define void @buildvector_v4f32_const_splat(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_v4f32_const_splat: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 260096 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vldi $vr0, -1424 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -207,19 +206,11 @@ entry: ;; Also check buildvector_const_splat_vldi_1100. define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { -; LA32-LABEL: buildvector_v2f64_const_splat: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) -; LA32-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI14_0) -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_v2f64_const_splat: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu52i.d $a1, $zero, 1023 -; LA64-NEXT: vreplgr2vr.d $vr0, $a1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_v2f64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, -912 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: store <2 x double> , ptr %dst ret void @@ -229,8 +220,7 @@ entry: define void @buildvector_const_splat_vldi_0001(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_vldi_0001: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a1, $zero, 768 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vldi $vr0, -3837 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -241,8 +231,7 @@ entry: define void @buildvector_const_splat_vldi_0010(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_vldi_0010: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 16 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vldi $vr0, -3583 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -253,8 +242,7 @@ entry: define void @buildvector_const_splat_vldi_0011(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_vldi_0011: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 4096 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vldi $vr0, -3327 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -265,8 +253,7 @@ entry: define void @buildvector_const_splat_vldi_0101(ptr %dst) { ; CHECK-LABEL: buildvector_const_splat_vldi_0101: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a1, $zero, 768 -; CHECK-NEXT: vreplgr2vr.h $vr0, $a1 +; CHECK-NEXT: vldi $vr0, -2813 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -277,8 +264,7 @@ entry: define void @buildvector_const_splat_vldi_0110(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_vldi_0110: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a1, $zero, 1023 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vldi $vr0, -2557 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -289,9 +275,7 @@ entry: define void @buildvector_const_splat_vldi_0111(ptr %dst) nounwind { ; CHECK-LABEL: buildvector_const_splat_vldi_0111: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu12i.w $a1, 15 -; CHECK-NEXT: ori $a1, $a1, 4095 -; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vldi $vr0, -2305 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -300,39 +284,22 @@ entry: } define void @buildvector_const_splat_vldi_1001(ptr %dst) nounwind { -; LA32-LABEL: buildvector_const_splat_vldi_1001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI21_0) -; LA32-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI21_0) -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_const_splat_vldi_1001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 15 -; LA64-NEXT: ori $a1, $a1, 4095 -; LA64-NEXT: vreplgr2vr.d $vr0, $a1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_const_splat_vldi_1001: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, -1789 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: store <4 x i32> , ptr %dst ret void } define void @buildvector_const_splat_vldi_1011(ptr %dst) nounwind { -; LA32-LABEL: buildvector_const_splat_vldi_1011: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI22_0) -; LA32-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI22_0) -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: buildvector_const_splat_vldi_1011: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 262144 -; LA64-NEXT: vreplgr2vr.d $vr0, $a1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: buildvector_const_splat_vldi_1011: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, -1280 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: store <4 x float> , ptr %dst ret void diff --git a/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll index 58e16d37ae278..46eb91e4079bf 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA32 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA64 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-LABEL: fdiv_v4f32: @@ -40,35 +40,19 @@ define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-NEXT: vst $vr0, $a0, 0 ; FAULT-NEXT: ret ; -; LA32-LABEL: fdiv_v2f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI1_0) -; LA32-NEXT: vld $vr0, $a2, 0 -; LA32-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI1_0) -; LA32-NEXT: vld $vr2, $a1, 0 -; LA32-NEXT: vfrecipe.d $vr3, $vr0 -; LA32-NEXT: vfmadd.d $vr1, $vr0, $vr3, $vr1 -; LA32-NEXT: vfnmsub.d $vr1, $vr1, $vr3, $vr3 -; LA32-NEXT: vfmul.d $vr3, $vr2, $vr1 -; LA32-NEXT: vfnmsub.d $vr0, $vr0, $vr3, $vr2 -; LA32-NEXT: vfmadd.d $vr0, $vr1, $vr0, $vr3 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: fdiv_v2f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a2, 0 -; LA64-NEXT: vld $vr1, $a1, 0 -; LA64-NEXT: lu52i.d $a1, $zero, -1025 -; LA64-NEXT: vreplgr2vr.d $vr2, $a1 -; LA64-NEXT: vfrecipe.d $vr3, $vr0 -; LA64-NEXT: vfmadd.d $vr2, $vr0, $vr3, $vr2 -; LA64-NEXT: vfnmsub.d $vr2, $vr2, $vr3, $vr3 -; LA64-NEXT: vfmul.d $vr3, $vr1, $vr2 -; LA64-NEXT: vfnmsub.d $vr0, $vr0, $vr3, $vr1 -; LA64-NEXT: vfmadd.d $vr0, $vr2, $vr0, $vr3 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: fdiv_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfrecipe.d $vr2, $vr0 +; CHECK-NEXT: vldi $vr3, -784 +; CHECK-NEXT: vfmadd.d $vr3, $vr0, $vr2, $vr3 +; CHECK-NEXT: vfnmsub.d $vr2, $vr3, $vr2, $vr2 +; CHECK-NEXT: vfmul.d $vr3, $vr1, $vr2 +; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr3, $vr1 +; CHECK-NEXT: vfmadd.d $vr0, $vr2, $vr0, $vr3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0 %v1 = load <2 x double>, ptr %a1 @@ -90,8 +74,7 @@ define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vfrecipe.s $vr1, $vr0 -; CHECK-NEXT: lu12i.w $a1, -264192 -; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vldi $vr2, -1296 ; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 ; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 @@ -107,24 +90,22 @@ define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { ; FAULT-LA32-LABEL: one_fdiv_v2f64: ; FAULT-LA32: # %bb.0: # %entry ; FAULT-LA32-NEXT: vld $vr0, $a1, 0 -; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; FAULT-LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) +; FAULT-LA32-NEXT: vldi $vr1, -912 ; FAULT-LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 ; FAULT-LA32-NEXT: vst $vr0, $a0, 0 ; FAULT-LA32-NEXT: ret ; -; LA32-LABEL: one_fdiv_v2f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) -; LA32-NEXT: vfrecipe.d $vr2, $vr0 -; LA32-NEXT: vfnmsub.d $vr3, $vr0, $vr2, $vr1 -; LA32-NEXT: vfmadd.d $vr2, $vr2, $vr3, $vr2 -; LA32-NEXT: vfnmsub.d $vr0, $vr0, $vr2, $vr1 -; LA32-NEXT: vfmadd.d $vr0, $vr2, $vr0, $vr2 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret +; CHECK-LABEL: one_fdiv_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrecipe.d $vr1, $vr0 +; CHECK-NEXT: vldi $vr2, -912 +; CHECK-NEXT: vfnmsub.d $vr3, $vr0, $vr1, $vr2 +; CHECK-NEXT: vfmadd.d $vr1, $vr1, $vr3, $vr1 +; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: vfmadd.d $vr0, $vr1, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret ; ; FAULT-LA64-LABEL: one_fdiv_v2f64: ; FAULT-LA64: # %bb.0: # %entry @@ -132,19 +113,6 @@ define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { ; FAULT-LA64-NEXT: vfrecip.d $vr0, $vr0 ; FAULT-LA64-NEXT: vst $vr0, $a0, 0 ; FAULT-LA64-NEXT: ret -; -; LA64-LABEL: one_fdiv_v2f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vfrecipe.d $vr1, $vr0 -; LA64-NEXT: lu52i.d $a1, $zero, 1023 -; LA64-NEXT: vreplgr2vr.d $vr2, $a1 -; LA64-NEXT: vfnmsub.d $vr3, $vr0, $vr1, $vr2 -; LA64-NEXT: vfmadd.d $vr1, $vr1, $vr3, $vr1 -; LA64-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 -; LA64-NEXT: vfmadd.d $vr0, $vr1, $vr0, $vr1 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0 %div = fdiv fast <2 x double> , %v0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll index 1f744830bd56b..4951696e05a94 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA32 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA64 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s ;; 1.0 / (fsqrt vec) define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { @@ -19,11 +19,9 @@ define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-NEXT: vfrsqrte.s $vr1, $vr0 ; CHECK-NEXT: vfmul.s $vr1, $vr0, $vr1 ; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 -; CHECK-NEXT: lu12i.w $a1, -261120 -; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vldi $vr2, -1400 ; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 -; CHECK-NEXT: lu12i.w $a1, -266240 -; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vldi $vr2, -3137 ; CHECK-NEXT: vfmul.s $vr1, $vr1, $vr2 ; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 ; CHECK-NEXT: vst $vr0, $a0, 0 @@ -40,32 +38,29 @@ define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { ; FAULT-LA32-LABEL: one_div_sqrt_v2f64: ; FAULT-LA32: # %bb.0: # %entry ; FAULT-LA32-NEXT: vld $vr0, $a1, 0 -; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; FAULT-LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI1_0) ; FAULT-LA32-NEXT: vfsqrt.d $vr0, $vr0 +; FAULT-LA32-NEXT: vldi $vr1, -912 ; FAULT-LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 ; FAULT-LA32-NEXT: vst $vr0, $a0, 0 ; FAULT-LA32-NEXT: ret ; -; LA32-LABEL: one_div_sqrt_v2f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vfrsqrte.d $vr1, $vr0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; LA32-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI1_0) -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_1) -; LA32-NEXT: vld $vr3, $a1, %pc_lo12(.LCPI1_1) -; LA32-NEXT: vfmul.d $vr1, $vr0, $vr1 -; LA32-NEXT: vfmul.d $vr4, $vr0, $vr1 -; LA32-NEXT: vfmadd.d $vr4, $vr4, $vr1, $vr2 -; LA32-NEXT: vfmul.d $vr1, $vr1, $vr3 -; LA32-NEXT: vfmul.d $vr1, $vr1, $vr4 -; LA32-NEXT: vfmul.d $vr0, $vr0, $vr1 -; LA32-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 -; LA32-NEXT: vfmul.d $vr1, $vr1, $vr3 -; LA32-NEXT: vfmul.d $vr0, $vr1, $vr0 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret +; CHECK-LABEL: one_div_sqrt_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrsqrte.d $vr1, $vr0 +; CHECK-NEXT: vfmul.d $vr1, $vr0, $vr1 +; CHECK-NEXT: vfmul.d $vr2, $vr0, $vr1 +; CHECK-NEXT: vldi $vr3, -888 +; CHECK-NEXT: vfmadd.d $vr2, $vr2, $vr1, $vr3 +; CHECK-NEXT: vldi $vr4, -800 +; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr4 +; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr2 +; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr3 +; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr4 +; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret ; ; FAULT-LA64-LABEL: one_div_sqrt_v2f64: ; FAULT-LA64: # %bb.0: # %entry @@ -73,28 +68,6 @@ define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { ; FAULT-LA64-NEXT: vfrsqrt.d $vr0, $vr0 ; FAULT-LA64-NEXT: vst $vr0, $a0, 0 ; FAULT-LA64-NEXT: ret -; -; LA64-LABEL: one_div_sqrt_v2f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vfrsqrte.d $vr1, $vr0 -; LA64-NEXT: vfmul.d $vr1, $vr0, $vr1 -; LA64-NEXT: vfmul.d $vr2, $vr0, $vr1 -; LA64-NEXT: ori $a1, $zero, 0 -; LA64-NEXT: lu32i.d $a1, -524288 -; LA64-NEXT: lu52i.d $a1, $a1, -1024 -; LA64-NEXT: vreplgr2vr.d $vr3, $a1 -; LA64-NEXT: vfmadd.d $vr2, $vr2, $vr1, $vr3 -; LA64-NEXT: lu52i.d $a1, $zero, -1026 -; LA64-NEXT: vreplgr2vr.d $vr4, $a1 -; LA64-NEXT: vfmul.d $vr1, $vr1, $vr4 -; LA64-NEXT: vfmul.d $vr1, $vr1, $vr2 -; LA64-NEXT: vfmul.d $vr0, $vr0, $vr1 -; LA64-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr3 -; LA64-NEXT: vfmul.d $vr1, $vr1, $vr4 -; LA64-NEXT: vfmul.d $vr0, $vr1, $vr0 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0, align 16 %sqrt = call fast <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll index d88e0d1ea7c2d..9664808681bb8 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll @@ -51,9 +51,8 @@ define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { ; LA32-LABEL: one_div_sqrt_v2f64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) ; LA32-NEXT: vfsqrt.d $vr0, $vr0 +; LA32-NEXT: vldi $vr1, -912 ; LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll index 603bd21ab9af9..fb0b9cee67df5 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll @@ -53,8 +53,7 @@ define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { ; LA32-LABEL: one_fdiv_v2f64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: vldi $vr1, -912 ; LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll index 5dbff4a402b3d..8f25a6ba62f9f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: select_v16i8_imm: @@ -50,26 +50,14 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { } define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -; LA32-LABEL: select_v4i32: -; LA32: # %bb.0: -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vld $vr1, $a2, 0 -; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) -; LA32-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI3_0) -; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -; LA32-NEXT: vst $vr0, $a0, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: select_v4i32: -; LA64: # %bb.0: -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vld $vr1, $a2, 0 -; LA64-NEXT: ori $a1, $zero, 0 -; LA64-NEXT: lu32i.d $a1, -1 -; LA64-NEXT: vreplgr2vr.d $vr2, $a1 -; LA64-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -; LA64-NEXT: vst $vr0, $a0, 0 -; LA64-NEXT: ret +; CHECK-LABEL: select_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vldi $vr2, -1552 +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret %v0 = load <4 x i32>, ptr %a0 %v1 = load <4 x i32>, ptr %a1 %sel = select <4 x i1> , <4 x i32> %v0, <4 x i32> %v1