-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LoongArch] Generate [x]vldi instructions with special constant splats #159258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: Zhaoxin Yang (ylzsx) ChangesPatch is 42.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159258.diff 15 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 07e722b9a6591..fda313e693760 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -113,10 +113,11 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- unsigned Op;
+ unsigned Op = 0;
EVT ResTy = BVN->getValueType(0);
bool Is128Vec = BVN->getValueType(0).is128BitVector();
bool Is256Vec = BVN->getValueType(0).is256BitVector();
+ SDNode *Res;
if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
break;
@@ -124,26 +125,25 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
HasAnyUndefs, 8))
break;
- switch (SplatBitSize) {
- default:
- break;
- case 8:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
- break;
- case 16:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
- break;
- case 32:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
- break;
- case 64:
- Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
- break;
- }
-
- SDNode *Res;
// If we have a signed 10 bit integer, we can splat it directly.
if (SplatValue.isSignedIntN(10)) {
+ switch (SplatBitSize) {
+ default:
+ break;
+ case 8:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+ break;
+ case 16:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+ break;
+ case 32:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+ break;
+ case 64:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+ break;
+ }
+
EVT EleType = ResTy.getVectorElementType();
APInt Val = SplatValue.sextOrTrunc(EleType.getSizeInBits());
SDValue Imm = CurDAG->getTargetConstant(Val, DL, EleType);
@@ -151,6 +151,20 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Res);
return;
}
+
+ // Select appropriate [x]vldi instructions for some special constant splats,
+ // where the immediate value `imm[12] == 1` for used [x]vldi instructions.
+ std::pair<bool, uint64_t> ConvertVLDI =
+ LoongArchTargetLowering::isImmVLDILegalForMode1(SplatValue,
+ SplatBitSize);
+ if (ConvertVLDI.first) {
+ Op = Is256Vec ? LoongArch::XVLDI : LoongArch::VLDI;
+ SDValue Imm = CurDAG->getSignedTargetConstant(
+ SignExtend32<13>(ConvertVLDI.second), DL, MVT::i32);
+ Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
+ ReplaceNode(Node, Res);
+ return;
+ }
break;
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e8668860c2b38..460e2d7c87af7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2679,9 +2679,10 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
// We can only handle 64-bit elements that are within
- // the signed 10-bit range on 32-bit targets.
+ // the signed 10-bit range or match vldi patterns on 32-bit targets.
// See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
- if (!SplatValue.isSignedIntN(10))
+ if (!SplatValue.isSignedIntN(10) &&
+ !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
return SDValue();
if ((Is128Vec && ResTy == MVT::v4i32) ||
(Is256Vec && ResTy == MVT::v8i32))
@@ -8194,6 +8195,88 @@ SDValue LoongArchTargetLowering::LowerReturn(
return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
}
+// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
+// Note: The following prefixes are excluded:
+// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
+// as thy can be represented using [x]vrepli.[whb]
+std::pair<bool, uint64_t>
+LoongArchTargetLowering::isImmVLDILegalForMode1(const APInt &SplatValue,
+ const unsigned SplatBitSize) {
+ uint64_t RequiredImm = 0;
+ uint64_t V = SplatValue.getZExtValue();
+ if (SplatBitSize == 16 && !(V & 0x00FF)) {
+ // 4'b0101
+ RequiredImm = (0b10101 << 8) | (V >> 8);
+ return {true, RequiredImm};
+ } else if (SplatBitSize == 32) {
+ // 4'b0001
+ if (!(V & 0xFFFF00FF)) {
+ RequiredImm = (0b10001 << 8) | (V >> 8);
+ return {true, RequiredImm};
+ }
+ // 4'b0010
+ if (!(V & 0xFF00FFFF)) {
+ RequiredImm = (0b10010 << 8) | (V >> 16);
+ return {true, RequiredImm};
+ }
+ // 4'b0011
+ if (!(V & 0x00FFFFFF)) {
+ RequiredImm = (0b10011 << 8) | (V >> 24);
+ return {true, RequiredImm};
+ }
+ // 4'b0110
+ if ((V & 0xFFFF00FF) == 0xFF) {
+ RequiredImm = (0b10110 << 8) | (V >> 8);
+ return {true, RequiredImm};
+ }
+ // 4'b0111
+ if ((V & 0xFF00FFFF) == 0xFFFF) {
+ RequiredImm = (0b10111 << 8) | (V >> 16);
+ return {true, RequiredImm};
+ }
+ // 4'b1010
+ if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
+ RequiredImm =
+ (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
+ return {true, RequiredImm};
+ }
+ } else if (SplatBitSize == 64) {
+ // 4'b1011
+ if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
+ (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
+ RequiredImm =
+ (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
+ return {true, RequiredImm};
+ }
+ // 4'b1100
+ if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
+ (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
+ RequiredImm =
+ (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
+ return {true, RequiredImm};
+ }
+ // 4'b1001
+ auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
+ uint8_t res = 0;
+ for (int i = 0; i < 8; ++i) {
+ uint8_t byte = x & 0xFF;
+ if (byte == 0 || byte == 0xFF)
+ res |= ((byte & 1) << i);
+ else
+ return {false, 0};
+ x >>= 8;
+ }
+ return {true, res};
+ };
+ auto [IsSame, Suffix] = sameBitsPreByte(V);
+ if (IsSame) {
+ RequiredImm = (0b11001 << 8) | Suffix;
+ return {true, RequiredImm};
+ }
+ }
+ return {false, RequiredImm};
+}
+
bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
EVT VT) const {
if (!Subtarget.hasExtLSX())
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9d14934a9d363..76a5cd87bf76e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,11 @@ class LoongArchTargetLowering : public TargetLowering {
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ /// Check if a constant splat can be generated using [x]vldi, where imm[12]
+ /// is 1.
+ static std::pair<bool, uint64_t>
+ isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize);
+
private:
/// Target-specific function used to lower LoongArch calling conventions.
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 8dfd7bf3ac8ec..58d684e1beb54 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -201,8 +201,7 @@ entry:
define void @buildvector_v8f32_const_splat(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_v8f32_const_splat:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lu12i.w $a1, 260096
-; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvldi $xr0, -1424
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -212,19 +211,11 @@ entry:
;; Also check buildvector_const_splat_xvldi_1100.
define void @buildvector_v4f64_const_splat(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_v4f64_const_splat:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0)
-; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI14_0)
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: buildvector_v4f64_const_splat:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu52i.d $a1, $zero, 1023
-; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: buildvector_v4f64_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvldi $xr0, -912
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %dst
ret void
@@ -234,8 +225,7 @@ entry:
define void @buildvector_const_splat_xvldi_0001(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_const_splat_xvldi_0001:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ori $a1, $zero, 768
-; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvldi $xr0, -3837
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -246,8 +236,7 @@ entry:
define void @buildvector_const_splat_xvldi_0010(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_const_splat_xvldi_0010:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lu12i.w $a1, 16
-; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvldi $xr0, -3583
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -258,8 +247,7 @@ entry:
define void @buildvector_const_splat_xvldi_0011(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_const_splat_xvldi_0011:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lu12i.w $a1, 4096
-; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvldi $xr0, -3327
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -270,8 +258,7 @@ entry:
define void @buildvector_const_splat_xvldi_0101(ptr %dst) {
; CHECK-LABEL: buildvector_const_splat_xvldi_0101:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ori $a1, $zero, 768
-; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1
+; CHECK-NEXT: xvldi $xr0, -2813
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -282,8 +269,7 @@ entry:
define void @buildvector_const_splat_xvldi_0110(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_const_splat_xvldi_0110:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ori $a1, $zero, 1023
-; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvldi $xr0, -2557
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -294,9 +280,7 @@ entry:
define void @buildvector_const_splat_xvldi_0111(ptr %dst) nounwind {
; CHECK-LABEL: buildvector_const_splat_xvldi_0111:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lu12i.w $a1, 15
-; CHECK-NEXT: ori $a1, $a1, 4095
-; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvldi $xr0, -2305
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -305,39 +289,22 @@ entry:
}
define void @buildvector_const_splat_xvldi_1001(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_const_splat_xvldi_1001:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI21_0)
-; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI21_0)
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: buildvector_const_splat_xvldi_1001:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a1, 15
-; LA64-NEXT: ori $a1, $a1, 4095
-; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: buildvector_const_splat_xvldi_1001:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvldi $xr0, -1789
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
store <8 x i32> <i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0, i32 65535, i32 0>, ptr %dst
ret void
}
define void @buildvector_const_splat_xvldi_1011(ptr %dst) nounwind {
-; LA32-LABEL: buildvector_const_splat_xvldi_1011:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI22_0)
-; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI22_0)
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: buildvector_const_splat_xvldi_1011:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a1, 262144
-; LA64-NEXT: xvreplgr2vr.d $xr0, $a1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: buildvector_const_splat_xvldi_1011:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvldi $xr0, -1280
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
store <8 x float> <float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0, float 2.0, float 0.0>, ptr %dst
ret void
@@ -2458,8 +2425,7 @@ define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, flo
; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2
; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT: lu12i.w $a1, 262144
-; CHECK-NEXT: xvreplgr2vr.w $xr4, $a1
+; CHECK-NEXT: xvldi $xr4, -3264
; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 1
; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 2
; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 5
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
index 7514dafa8000b..ba821308cb4db 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll
@@ -40,35 +40,19 @@ define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
; FAULT-NEXT: xvst $xr0, $a0, 0
; FAULT-NEXT: ret
;
-; LA32-LABEL: fdiv_v4f64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI1_0)
-; LA32-NEXT: xvld $xr0, $a2, 0
-; LA32-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI1_0)
-; LA32-NEXT: xvld $xr2, $a1, 0
-; LA32-NEXT: xvfrecipe.d $xr3, $xr0
-; LA32-NEXT: xvfmadd.d $xr1, $xr0, $xr3, $xr1
-; LA32-NEXT: xvfnmsub.d $xr1, $xr1, $xr3, $xr3
-; LA32-NEXT: xvfmul.d $xr3, $xr2, $xr1
-; LA32-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr2
-; LA32-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr3
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: fdiv_v4f64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a2, 0
-; LA64-NEXT: xvld $xr1, $a1, 0
-; LA64-NEXT: lu52i.d $a1, $zero, -1025
-; LA64-NEXT: xvreplgr2vr.d $xr2, $a1
-; LA64-NEXT: xvfrecipe.d $xr3, $xr0
-; LA64-NEXT: xvfmadd.d $xr2, $xr0, $xr3, $xr2
-; LA64-NEXT: xvfnmsub.d $xr2, $xr2, $xr3, $xr3
-; LA64-NEXT: xvfmul.d $xr3, $xr1, $xr2
-; LA64-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr1
-; LA64-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr3
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
+; CHECK-LABEL: fdiv_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a2, 0
+; CHECK-NEXT: xvld $xr1, $a1, 0
+; CHECK-NEXT: xvfrecipe.d $xr2, $xr0
+; CHECK-NEXT: xvldi $xr3, -784
+; CHECK-NEXT: xvfmadd.d $xr3, $xr0, $xr2, $xr3
+; CHECK-NEXT: xvfnmsub.d $xr2, $xr3, $xr2, $xr2
+; CHECK-NEXT: xvfmul.d $xr3, $xr1, $xr2
+; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr1
+; CHECK-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr3
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
entry:
%v0 = load <4 x double>, ptr %a0
%v1 = load <4 x double>, ptr %a1
@@ -90,8 +74,7 @@ define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvfrecipe.s $xr1, $xr0
-; CHECK-NEXT: lu12i.w $a1, -264192
-; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1
+; CHECK-NEXT: xvldi $xr2, -1296
; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2
; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
@@ -107,24 +90,22 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
; FAULT-LA32-LABEL: one_fdiv_v4f64:
; FAULT-LA32: # %bb.0: # %entry
; FAULT-LA32-NEXT: xvld $xr0, $a1, 0
-; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; FAULT-LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0)
+; FAULT-LA32-NEXT: xvldi $xr1, -912
; FAULT-LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0
; FAULT-LA32-NEXT: xvst $xr0, $a0, 0
; FAULT-LA32-NEXT: ret
;
-; LA32-LABEL: one_fdiv_v4f64:
-; LA32: # %bb.0: # %entry
-; LA32-NEXT: xvld $xr0, $a1, 0
-; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0)
-; LA32-NEXT: xvfrecipe.d $xr2, $xr0
-; LA32-NEXT: xvfnmsub.d $xr3, $xr0, $xr2, $xr1
-; LA32-NEXT: xvfmadd.d $xr2, $xr2, $xr3, $xr2
-; LA32-NEXT: xvfnmsub.d $xr0, $xr0, $xr2, $xr1
-; LA32-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr2
-; LA32-NEXT: xvst $xr0, $a0, 0
-; LA32-NEXT: ret
+; CHECK-LABEL: one_fdiv_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvfrecipe.d $xr1, $xr0
+; CHECK-NEXT: xvldi $xr2, -912
+; CHECK-NEXT: xvfnmsub.d $xr3, $xr0, $xr1, $xr2
+; CHECK-NEXT: xvfmadd.d $xr1, $xr1, $xr3, $xr1
+; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2
+; CHECK-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
;
; FAULT-LA64-LABEL: one_fdiv_v4f64:
; FAULT-LA64: # %bb.0: # %entry
@@ -132,22 +113,12 @@ define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind {
; FAULT-LA64-NEXT: xvfrecip.d $xr0, $xr0
; FAULT-LA64-NEXT: xvst $xr0, $a0, 0
; FAULT-LA64-NEXT: ret
-;
-; LA64-LABEL: one_fdiv_v4f64:
-; LA64: # %bb.0: # %entry
-; LA64-NEXT: xvld $xr0, $a1, 0
-; LA64-NEXT: xvfrecipe.d $xr1, $xr0
-; LA64-NEXT: lu52i.d $a1, $zero, 1023
-; LA64-NEXT: xvreplgr2vr.d $xr2, $a1
-; LA64-NEXT: xvfnmsub.d $xr3, $xr0, $xr1, $xr2
-; LA64-NEXT: xvfmadd.d $xr1, $xr1, $xr3, $xr1
-; LA64-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2
-; LA64-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr1
-; LA64-NEXT: xvst $xr0, $a0, 0
-; LA64-NEXT: ret
entry:
%v0 = load <4 x double>, ptr %a0
%div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0
store <4 x double> %div, ptr %res
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LA32: {{.*}}
+; LA64: {{.*}}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
index 4e475daa8ced3..e696129acb862 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll
@@ -63,11 +63,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
; LA32-NEXT: xvfrsqrte.s $xr1, $xr0
; LA32-NEXT: xvfmul.s $xr1, $xr0, $xr1
; LA32-NEXT: xvfmul.s $xr0, $xr0, $xr1
-; LA32-NEXT: lu12i.w $a1, -261120
-; LA32-NEXT: xvreplgr2vr.w $xr2, $a1
+; LA32-NEXT: xvldi $xr2, -1400
; LA32-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2
-; LA32-NEXT: lu12i.w $a1, -266240
-; LA32-NEXT: xvreplgr2vr.w $xr2, $a1
+; LA32-NEXT: xvldi $xr2, -3137
; LA32-NEXT: xvfmul.s $xr1, $xr1, $xr2
; LA32-NEXT: xvfmul.s $xr0, $xr1, $xr0
; LA32-NEXT: xvst $xr0, $sp, 64
@@ -100,11 +98,9 @@ define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind {
; LA64-NEXT: xvfrsqrte.s $xr1, $xr0
; LA64-NEXT: xvfmul.s $xr1, $xr0, $xr1
; LA64-NEXT: xvfmul.s $xr0, $xr0, $xr1
-; LA64-NEXT: lu12i.w $a1, -261120
-; LA64-NEXT: xvreplgr2vr.w $xr2, $a1
+; LA64-NEXT: xvldi $xr2, -1400
; LA64-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2
-; LA64-NEXT: lu12i.w $a1, -266240
-; LA64-NEXT: xvreplgr2vr.w $xr2, $a1
+; LA64-NEXT: xvldi $xr2, -3137
; LA64-NEXT: xvfmul.s $xr1, $xr1, $xr2
; LA64-NEXT: xvfmul.s $xr0, $xr1, ...
[truncated]
|
unsigned SplatBitSize; | ||
bool HasAnyUndefs; | ||
unsigned Op; | ||
unsigned Op = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is unnecessary to initialize it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Without initialization, a warning will occur. I'm not sure why it didn't happen before.
[44/185] Building CXX object lib/Target/LoongArch/CMakeFiles/LLVMLoongArchCodeGen.dir/LoongArchISelDAGToDAG.cpp.o
/home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:131:7: warning: variable 'Op' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized]
131 | default:
| ^~~~~~~
/home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:150:36: note: uninitialized use occurs here
150 | Res = CurDAG->getMachineNode(Op, DL, ResTy, Imm);
| ^~
/home/yangzhaoxin/workspace/flang-test/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp:116:16: note: initialize the variable 'Op' to silence this warning
116 | unsigned Op;
| ^
| = 0
1 warning generated.
0c87b0d
to
409f80c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Otherwise, many tests will failed in LA32. For example: CodeGen/LoongArch/lasx/fsqrt.ll
409f80c
to
56fe245
Compare
No description provided.