-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
release/18.x: [LoongArch] Override LoongArchTargetLowering::getExtendForAtomicCmpSwapArg (#83656) #83750
Conversation
@xen0n What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-loongarch Author: None (llvmbot) ChangesBackport 5f058aa Requested by: @SixWeining Patch is 47.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83750.diff 5 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 76c1a14fe0156c..b161c5434ca13e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4940,3 +4940,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
return !isa<ConstantSDNode>(Y);
}
+
+ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
+ // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
+ return ISD::SIGN_EXTEND;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 72182623b2c3dd..9e9ac0b8269291 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -206,6 +206,8 @@ class LoongArchTargetLowering : public TargetLowering {
return ISD::SIGN_EXTEND;
}
+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
+
Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index b0f29ee790885d..b84c1093eb75f2 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -25,15 +25,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: andi $a5, $a5, 255
; LA64-NEXT: sll.w $a5, $a5, $a3
; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a6, $a6, $a5
+; LA64-NEXT: or $a5, $a6, $a5
+; LA64-NEXT: addi.w $a6, $a2, 0
; LA64-NEXT: .LBB0_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB0_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a5, $a0, 0
-; LA64-NEXT: bne $a5, $a2, .LBB0_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a6, .LBB0_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
-; LA64-NEXT: move $a7, $a6
+; LA64-NEXT: move $a7, $a5
; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB0_3
; LA64-NEXT: b .LBB0_6
@@ -42,11 +43,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT: addi.w $a6, $a2, 0
-; LA64-NEXT: move $a2, $a5
-; LA64-NEXT: bne $a5, $a6, .LBB0_1
+; LA64-NEXT: bne $a2, $a6, .LBB0_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a5, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -77,15 +76,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
; LA64-NEXT: sll.w $a5, $a5, $a3
; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a6, $a6, $a5
+; LA64-NEXT: or $a5, $a6, $a5
+; LA64-NEXT: addi.w $a6, $a2, 0
; LA64-NEXT: .LBB1_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB1_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a5, $a0, 0
-; LA64-NEXT: bne $a5, $a2, .LBB1_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a6, .LBB1_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
-; LA64-NEXT: move $a7, $a6
+; LA64-NEXT: move $a7, $a5
; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB1_3
; LA64-NEXT: b .LBB1_6
@@ -94,11 +94,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT: addi.w $a6, $a2, 0
-; LA64-NEXT: move $a2, $a5
-; LA64-NEXT: bne $a5, $a6, .LBB1_1
+; LA64-NEXT: bne $a2, $a6, .LBB1_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a5, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -107,37 +105,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i32:
; LA64: # %bb.0:
-; LA64-NEXT: ld.w $a3, $a0, 0
-; LA64-NEXT: addi.w $a2, $a1, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB2_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB2_3 Depth 2
-; LA64-NEXT: addi.w $a4, $a3, 0
-; LA64-NEXT: sltu $a1, $a4, $a2
-; LA64-NEXT: xori $a1, $a1, 1
-; LA64-NEXT: addi.d $a5, $a3, 1
-; LA64-NEXT: masknez $a5, $a5, $a1
+; LA64-NEXT: addi.w $a3, $a2, 0
+; LA64-NEXT: sltu $a4, $a3, $a1
+; LA64-NEXT: xori $a4, $a4, 1
+; LA64-NEXT: addi.d $a2, $a2, 1
+; LA64-NEXT: masknez $a4, $a2, $a4
; LA64-NEXT: .LBB2_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB2_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a1, $a0, 0
-; LA64-NEXT: bne $a1, $a3, .LBB2_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a3, .LBB2_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
-; LA64-NEXT: move $a6, $a5
-; LA64-NEXT: sc.w $a6, $a0, 0
-; LA64-NEXT: beqz $a6, .LBB2_3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: sc.w $a5, $a0, 0
+; LA64-NEXT: beqz $a5, .LBB2_3
; LA64-NEXT: b .LBB2_6
; LA64-NEXT: .LBB2_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB2_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
-; LA64-NEXT: move $a3, $a1
-; LA64-NEXT: bne $a1, $a4, .LBB2_1
+; LA64-NEXT: bne $a2, $a3, .LBB2_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: move $a0, $a1
+; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
ret i32 %result
@@ -209,15 +206,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: andi $a6, $a6, 255
; LA64-NEXT: sll.w $a6, $a6, $a3
; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a7, $a7, $a6
+; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: addi.w $a7, $a2, 0
; LA64-NEXT: .LBB4_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB4_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a6, $a0, 0
-; LA64-NEXT: bne $a6, $a2, .LBB4_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a7, .LBB4_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
-; LA64-NEXT: move $t0, $a7
+; LA64-NEXT: move $t0, $a6
; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB4_3
; LA64-NEXT: b .LBB4_6
@@ -226,11 +224,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT: addi.w $a7, $a2, 0
-; LA64-NEXT: move $a2, $a6
-; LA64-NEXT: bne $a6, $a7, .LBB4_1
+; LA64-NEXT: bne $a2, $a7, .LBB4_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a6, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -266,15 +262,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
; LA64-NEXT: sll.w $a6, $a6, $a3
; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a7, $a7, $a6
+; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: addi.w $a7, $a2, 0
; LA64-NEXT: .LBB5_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB5_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a6, $a0, 0
-; LA64-NEXT: bne $a6, $a2, .LBB5_5
+; LA64-NEXT: ll.w $a2, $a0, 0
+; LA64-NEXT: bne $a2, $a7, .LBB5_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
-; LA64-NEXT: move $t0, $a7
+; LA64-NEXT: move $t0, $a6
; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB5_3
; LA64-NEXT: b .LBB5_6
@@ -283,11 +280,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT: addi.w $a7, $a2, 0
-; LA64-NEXT: move $a2, $a6
-; LA64-NEXT: bne $a6, $a7, .LBB5_1
+; LA64-NEXT: bne $a2, $a7, .LBB5_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a6, $a3
+; LA64-NEXT: srl.w $a0, $a2, $a3
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -296,22 +291,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i32:
; LA64: # %bb.0:
-; LA64-NEXT: ld.w $a4, $a0, 0
+; LA64-NEXT: ld.w $a2, $a0, 0
; LA64-NEXT: addi.w $a3, $a1, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB6_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB6_3 Depth 2
-; LA64-NEXT: addi.w $a5, $a4, 0
-; LA64-NEXT: sltu $a2, $a3, $a5
-; LA64-NEXT: addi.d $a6, $a4, -1
-; LA64-NEXT: masknez $a6, $a6, $a2
-; LA64-NEXT: maskeqz $a2, $a1, $a2
-; LA64-NEXT: or $a2, $a2, $a6
-; LA64-NEXT: sltui $a6, $a5, 1
-; LA64-NEXT: masknez $a2, $a2, $a6
-; LA64-NEXT: maskeqz $a6, $a1, $a6
-; LA64-NEXT: or $a6, $a6, $a2
+; LA64-NEXT: addi.w $a4, $a2, 0
+; LA64-NEXT: sltu $a5, $a3, $a4
+; LA64-NEXT: addi.d $a2, $a2, -1
+; LA64-NEXT: masknez $a2, $a2, $a5
+; LA64-NEXT: maskeqz $a5, $a1, $a5
+; LA64-NEXT: or $a2, $a5, $a2
+; LA64-NEXT: sltui $a5, $a4, 1
+; LA64-NEXT: masknez $a2, $a2, $a5
+; LA64-NEXT: maskeqz $a5, $a1, $a5
+; LA64-NEXT: or $a5, $a5, $a2
; LA64-NEXT: .LBB6_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB6_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
@@ -319,17 +314,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: bne $a2, $a4, .LBB6_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
-; LA64-NEXT: move $a7, $a6
-; LA64-NEXT: sc.w $a7, $a0, 0
-; LA64-NEXT: beqz $a7, .LBB6_3
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sc.w $a6, $a0, 0
+; LA64-NEXT: beqz $a6, .LBB6_3
; LA64-NEXT: b .LBB6_6
; LA64-NEXT: .LBB6_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
-; LA64-NEXT: move $a4, $a2
-; LA64-NEXT: bne $a2, $a5, .LBB6_1
+; LA64-NEXT: bne $a2, $a4, .LBB6_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
index 417c865f6383ff..31ecec6ea8051b 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -69,6 +69,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind
define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_acquire:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB2_3
@@ -172,6 +173,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_monotonic:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB6_3
@@ -279,9 +281,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou
define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $a1, 0
; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a3, $a0, 0
-; LA64-NEXT: bne $a3, $a1, .LBB10_3
+; LA64-NEXT: ll.w $a1, $a0, 0
+; LA64-NEXT: bne $a1, $a3, .LBB10_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
; LA64-NEXT: move $a4, $a2
; LA64-NEXT: sc.w $a4, $a0, 0
@@ -290,7 +293,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou
; LA64-NEXT: .LBB10_3:
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB10_4:
-; LA64-NEXT: move $a0, $a3
+; LA64-NEXT: move $a0, $a1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
%res = extractvalue { i32, i1 } %tmp, 0
@@ -396,6 +399,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw
define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB14_3
@@ -407,8 +411,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw
; LA64-NEXT: .LBB14_3:
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB14_4:
-; LA64-NEXT: addi.w $a0, $a1, 0
-; LA64-NEXT: xor $a0, $a3, $a0
+; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
@@ -506,6 +509,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw
define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_monotonic_monotonic:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB18_3
@@ -613,9 +617,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val)
define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $a1, 0
; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ll.w $a3, $a0, 0
-; LA64-NEXT: bne $a3, $a1, .LBB22_3
+; LA64-NEXT: ll.w $a1, $a0, 0
+; LA64-NEXT: bne $a1, $a3, .LBB22_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1
; LA64-NEXT: move $a4, $a2
; LA64-NEXT: sc.w $a4, $a0, 0
@@ -624,7 +629,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val)
; LA64-NEXT: .LBB22_3:
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB22_4:
-; LA64-NEXT: move $a0, $a3
+; LA64-NEXT: move $a0, $a1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
%res = extractvalue { i32, i1 } %tmp, 0
@@ -730,6 +735,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n
define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind {
; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1:
; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: ll.w $a3, $a0, 0
; LA64-NEXT: bne $a3, $a1, .LBB26_3
@@ -741,8 +747,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n
; LA64-NEXT: .LBB26_3:
; LA64-NEXT: dbar 1792
; LA64-NEXT: .LBB26_4:
-; LA64-NEXT: addi.w $a0, $a1, 0
-; LA64-NEXT: xor $a0, $a3, $a0
+; LA64-NEXT: xor $a0, $a3, $a1
; LA64-NEXT: sltui $a0, $a0, 1
; LA64-NEXT: ret
%tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 589360823b1488..4d8160d7080340 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64F-NEXT: movfr2gr.s $a1, $fa2
; LA64F-NEXT: movfr2gr.s $a2, $fa0
+; LA64F-NEXT: addi.w $a2, $a2, 0
; LA64F-NEXT: .LBB0_3: # %atomicrmw.start
; LA64F-NEXT: # Parent Loop BB0_1 Depth=1
; LA64F-NEXT: # => This Inner Loop Header: Depth=2
@@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64F-NEXT: .LBB0_6: # %atomicrmw.start
; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1
; LA64F-NEXT: movgr2fr.w $fa0, $a3
-; LA64F-NEXT: addi.w $a1, $a2, 0
-; LA64F-NEXT: bne $a3, $a1, .LBB0_1
+; LA64F-NEXT: bne $a3, $a2, .LBB0_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: ret
;
@@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64D-NEXT: movfr2gr.s $a1, $fa2
; LA64D-NEXT: movfr2gr.s $a2, $fa0
+; LA64D-NEXT: addi.w $a2, $a2, 0
; LA64D-NEXT: .LBB0_3: # %atomicrmw.start
; LA64D-NEXT: # Parent Loop BB0_1 Depth=1
; LA64D-NEXT: # => This Inner Loop Header: Depth=2
@@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind {
; LA64D-NEXT: .LBB0_6: # %atomicrmw.start
; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1
; LA64D-NEXT: movgr2fr.w $fa0, $a3
-; LA64D-NEXT: addi.w $a1, $a2, 0
-; LA64D-NEXT: bne $a3, $a1, .LBB0_1
+; LA64D-NEXT: bne $a3, $a2, .LBB0_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: ret
%v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4
@@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64F-NEXT: movfr2gr.s $a1, $fa2
; LA64F-NEXT: movfr2gr.s $a2, $fa0
+; LA64F-NEXT: addi.w $a2, $a2, 0
; LA64F-NEXT: .LBB1_3: # %atomicrmw.start
; LA64F-NEXT: # Parent Loop BB1_1 Depth=1
; LA64F-NEXT: # => This Inner Loop Header: Depth=2
@@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64F-NEXT: .LBB1_6: # %atomicrmw.start
; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1
; LA64F-NEXT: movgr2fr.w $fa0, $a3
-; LA64F-NEXT: addi.w $a1, $a2, 0
-; LA64F-NEXT: bne $a3, $a1, .LBB1_1
+; LA64F-NEXT: bne $a3, $a2, .LBB1_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
; LA64F-NEXT: ret
;
@@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1
; LA64D-NEXT: movfr2gr.s $a1, $fa2
; LA64D-NEXT: movfr2gr.s $a2, $fa0
+; LA64D-NEXT: addi.w $a2, $a2, 0
; LA64D-NEXT: .LBB1_3: # %atomicrmw.start
; LA64D-NEXT: # Parent Loop BB1_1 Depth=1
; LA64D-NEXT: # => This Inner Loop Header: Depth=2
@@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind {
; LA64D-NEXT: .LBB1_6: # %atomicrmw.start
; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1
; LA64D-NEXT: movgr2fr.w $fa0, $a3
-; LA64D-NEXT: addi.w $a1, $a2, 0
-; LA64D-NEXT: bne $a3, $a1, .LBB1_1
+; LA64D-NEXT: bne $a3, $a2, .LBB1_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: ret
%v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4
@@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind {
; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1
; LA64F-NEXT: movfr2gr.s $a1, $fa2
; LA64F-NEXT: movfr2gr.s $a2, $fa0
+; LA64F-NEXT: addi.w $a2, $a2, 0
; LA64F-NEXT: .LBB2_3: # %atomicrmw.start
; LA64F-NEXT: # Parent Loop BB2_1 Depth=1
; LA64F-NEXT: # => This Inner Loop Header: Depth=2
@@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind {
; LA64F-NE...
[truncated]
|
I agree with backporting the change, as the actual code change is trivial and fixes a bug regarding atomic ops, which is known to be a subtle area important to overall system stability. Without the backport distro maintainers would still have to cherry-pick it themselves, duplicating efforts. |
…apArg (llvm#83656) This patch aims to solve Firefox issue: https://bugzilla.mozilla.org/show_bug.cgi?id=1882301 Similar to 616289e. Currently LoongArch uses an ll.[wd]/sc.[wd] loop for ATOMIC_CMP_XCHG. Because the comparison in the loop is full-width (i.e. the `bne` instruction), we must sign extend the input comparsion argument. Note that LoongArch ISA manual V1.1 has introduced compare-and-swap instructions. We would change the implementation (return `ANY_EXTEND`) when we support them. (cherry picked from commit 5f058aa)
Backport 5f058aa
Requested by: @SixWeining