-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Switch to the Machine Scheduler #83759
Conversation
The SelectionDAG scheduling preference now becomes source order scheduling (machine scheduler generates better code -- even without there being a machine model defined for LoongArch yet). Most of the test changes are trivial instruction reorderings and differing register allocations, without any obvious performance impact. This is similar to commit: 3d0fbaf
@llvm/pr-subscribers-backend-loongarch Author: wanglei (wangleiat) ChangesThe SelectionDAG scheduling preference now becomes source order scheduling (machine scheduler generates better code -- even without there being a machine model defined for LoongArch yet). Most of the test changes are trivial instruction reorderings and differing register allocations, without any obvious performance impact. This is similar to commit: 3d0fbaf Patch is 785.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83759.diff 95 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index 11c0b39e176e61..cecb4a50aa7633 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -113,6 +113,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
+ bool enableMachineScheduler() const override { return true; }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/alloca.ll b/llvm/test/CodeGen/LoongArch/alloca.ll
index d766be6aac9509..75a05689e4178d 100644
--- a/llvm/test/CodeGen/LoongArch/alloca.ll
+++ b/llvm/test/CodeGen/LoongArch/alloca.ll
@@ -126,8 +126,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA32-NEXT: st.w $a1, $sp, 8
; LA32-NEXT: ori $a1, $zero, 10
; LA32-NEXT: st.w $a1, $sp, 4
-; LA32-NEXT: ori $a1, $zero, 9
-; LA32-NEXT: st.w $a1, $sp, 0
+; LA32-NEXT: ori $t0, $zero, 9
; LA32-NEXT: ori $a1, $zero, 2
; LA32-NEXT: ori $a2, $zero, 3
; LA32-NEXT: ori $a3, $zero, 4
@@ -135,6 +134,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA32-NEXT: ori $a5, $zero, 6
; LA32-NEXT: ori $a6, $zero, 7
; LA32-NEXT: ori $a7, $zero, 8
+; LA32-NEXT: st.w $t0, $sp, 0
; LA32-NEXT: bl %plt(func)
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: addi.w $sp, $fp, -16
@@ -162,8 +162,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA64-NEXT: st.d $a1, $sp, 16
; LA64-NEXT: ori $a1, $zero, 10
; LA64-NEXT: st.d $a1, $sp, 8
-; LA64-NEXT: ori $a1, $zero, 9
-; LA64-NEXT: st.d $a1, $sp, 0
+; LA64-NEXT: ori $t0, $zero, 9
; LA64-NEXT: ori $a1, $zero, 2
; LA64-NEXT: ori $a2, $zero, 3
; LA64-NEXT: ori $a3, $zero, 4
@@ -171,6 +170,7 @@ define void @alloca_callframe(i32 %n) nounwind {
; LA64-NEXT: ori $a5, $zero, 6
; LA64-NEXT: ori $a6, $zero, 7
; LA64-NEXT: ori $a7, $zero, 8
+; LA64-NEXT: st.d $t0, $sp, 0
; LA64-NEXT: bl %plt(func)
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: addi.d $sp, $fp, -16
diff --git a/llvm/test/CodeGen/LoongArch/alsl.ll b/llvm/test/CodeGen/LoongArch/alsl.ll
index 650f504dcaf83a..177e37de0952d7 100644
--- a/llvm/test/CodeGen/LoongArch/alsl.ll
+++ b/llvm/test/CodeGen/LoongArch/alsl.ll
@@ -53,12 +53,12 @@ entry:
define i64 @alsl_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-LABEL: alsl_i64:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: slli.w $a1, $a1, 4
; LA32-NEXT: srli.w $a4, $a0, 28
+; LA32-NEXT: slli.w $a1, $a1, 4
; LA32-NEXT: or $a1, $a1, $a4
-; LA32-NEXT: add.w $a1, $a3, $a1
; LA32-NEXT: alsl.w $a0, $a0, $a2, 4
; LA32-NEXT: sltu $a2, $a0, $a2
+; LA32-NEXT: add.w $a1, $a3, $a1
; LA32-NEXT: add.w $a1, $a1, $a2
; LA32-NEXT: ret
;
@@ -189,14 +189,14 @@ entry:
define i64 @mul_add_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-LABEL: mul_add_i64:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: slli.w $a4, $a1, 4
-; LA32-NEXT: sub.w $a1, $a4, $a1
; LA32-NEXT: ori $a4, $zero, 15
; LA32-NEXT: mulh.wu $a4, $a0, $a4
+; LA32-NEXT: slli.w $a5, $a1, 4
+; LA32-NEXT: sub.w $a1, $a5, $a1
; LA32-NEXT: add.w $a1, $a4, $a1
+; LA32-NEXT: slli.w $a4, $a0, 4
+; LA32-NEXT: sub.w $a0, $a4, $a0
; LA32-NEXT: add.w $a1, $a3, $a1
-; LA32-NEXT: slli.w $a3, $a0, 4
-; LA32-NEXT: sub.w $a0, $a3, $a0
; LA32-NEXT: add.w $a0, $a2, $a0
; LA32-NEXT: sltu $a2, $a0, $a2
; LA32-NEXT: add.w $a1, $a1, $a2
@@ -342,9 +342,9 @@ define i64 @mul_add_neg_i64(i64 signext %a, i64 signext %b) nounwind {
; LA32-NEXT: mulh.wu $a4, $a0, $a4
; LA32-NEXT: sub.w $a4, $a4, $a0
; LA32-NEXT: add.w $a1, $a4, $a1
+; LA32-NEXT: slli.w $a4, $a0, 4
+; LA32-NEXT: sub.w $a0, $a0, $a4
; LA32-NEXT: add.w $a1, $a3, $a1
-; LA32-NEXT: slli.w $a3, $a0, 4
-; LA32-NEXT: sub.w $a0, $a0, $a3
; LA32-NEXT: add.w $a0, $a2, $a0
; LA32-NEXT: sltu $a2, $a0, $a2
; LA32-NEXT: add.w $a1, $a1, $a2
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
index b84c1093eb75f2..bf48c0df3e4961 100644
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -4,34 +4,34 @@
define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a4, $a3, $a2
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: andi $a1, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB0_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB0_3 Depth 2
-; LA64-NEXT: srl.w $a5, $a2, $a3
-; LA64-NEXT: andi $a6, $a5, 255
-; LA64-NEXT: sltu $a6, $a6, $a1
+; LA64-NEXT: srl.w $a5, $a3, $a2
+; LA64-NEXT: addi.w $a6, $a3, 0
+; LA64-NEXT: andi $a7, $a5, 255
; LA64-NEXT: addi.d $a5, $a5, 1
-; LA64-NEXT: xori $a6, $a6, 1
-; LA64-NEXT: masknez $a5, $a5, $a6
+; LA64-NEXT: sltu $a7, $a7, $a1
+; LA64-NEXT: xori $a7, $a7, 1
+; LA64-NEXT: masknez $a5, $a5, $a7
; LA64-NEXT: andi $a5, $a5, 255
-; LA64-NEXT: sll.w $a5, $a5, $a3
-; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a5, $a6, $a5
-; LA64-NEXT: addi.w $a6, $a2, 0
+; LA64-NEXT: sll.w $a5, $a5, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a5, $a3, $a5
; LA64-NEXT: .LBB0_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB0_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a6, .LBB0_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a6, .LBB0_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
; LA64-NEXT: move $a7, $a5
@@ -43,9 +43,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
-; LA64-NEXT: bne $a2, $a6, .LBB0_1
+; LA64-NEXT: bne $a3, $a6, .LBB0_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -54,35 +54,35 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_uinc_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a2, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a4, $a3, $a2
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: andi $a2, $a2, 24
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB1_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB1_3 Depth 2
-; LA64-NEXT: srl.w $a5, $a2, $a3
-; LA64-NEXT: bstrpick.d $a6, $a5, 15, 0
-; LA64-NEXT: sltu $a6, $a6, $a1
+; LA64-NEXT: srl.w $a5, $a3, $a2
+; LA64-NEXT: addi.w $a6, $a3, 0
+; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
; LA64-NEXT: addi.d $a5, $a5, 1
-; LA64-NEXT: xori $a6, $a6, 1
-; LA64-NEXT: masknez $a5, $a5, $a6
+; LA64-NEXT: sltu $a7, $a7, $a1
+; LA64-NEXT: xori $a7, $a7, 1
+; LA64-NEXT: masknez $a5, $a5, $a7
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
-; LA64-NEXT: sll.w $a5, $a5, $a3
-; LA64-NEXT: and $a6, $a2, $a4
-; LA64-NEXT: or $a5, $a6, $a5
-; LA64-NEXT: addi.w $a6, $a2, 0
+; LA64-NEXT: sll.w $a5, $a5, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a5, $a3, $a5
; LA64-NEXT: .LBB1_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB1_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a6, .LBB1_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a6, .LBB1_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
; LA64-NEXT: move $a7, $a5
@@ -94,9 +94,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
-; LA64-NEXT: bne $a2, $a6, .LBB1_1
+; LA64-NEXT: bne $a3, $a6, .LBB1_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -111,19 +111,19 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: .LBB2_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB2_3 Depth 2
-; LA64-NEXT: addi.w $a3, $a2, 0
-; LA64-NEXT: sltu $a4, $a3, $a1
-; LA64-NEXT: xori $a4, $a4, 1
-; LA64-NEXT: addi.d $a2, $a2, 1
-; LA64-NEXT: masknez $a4, $a2, $a4
+; LA64-NEXT: addi.d $a3, $a2, 1
+; LA64-NEXT: addi.w $a4, $a2, 0
+; LA64-NEXT: sltu $a2, $a4, $a1
+; LA64-NEXT: xori $a2, $a2, 1
+; LA64-NEXT: masknez $a3, $a3, $a2
; LA64-NEXT: .LBB2_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB2_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a3, .LBB2_5
+; LA64-NEXT: bne $a2, $a4, .LBB2_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
-; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: move $a5, $a3
; LA64-NEXT: sc.w $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB2_3
; LA64-NEXT: b .LBB2_6
@@ -132,7 +132,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB2_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
-; LA64-NEXT: bne $a2, $a3, .LBB2_1
+; LA64-NEXT: bne $a2, $a4, .LBB2_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
@@ -149,10 +149,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB3_3 Depth 2
; LA64-NEXT: move $a3, $a2
-; LA64-NEXT: sltu $a2, $a2, $a1
-; LA64-NEXT: xori $a2, $a2, 1
-; LA64-NEXT: addi.d $a4, $a3, 1
-; LA64-NEXT: masknez $a4, $a4, $a2
+; LA64-NEXT: addi.d $a2, $a2, 1
+; LA64-NEXT: sltu $a4, $a3, $a1
+; LA64-NEXT: xori $a4, $a4, 1
+; LA64-NEXT: masknez $a4, $a2, $a4
; LA64-NEXT: .LBB3_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB3_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
@@ -180,39 +180,39 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i8:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a4, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: ori $a4, $zero, 255
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: andi $a2, $a4, 24
+; LA64-NEXT: ori $a5, $zero, 255
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: sll.w $a4, $a5, $a4
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: andi $a5, $a1, 255
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB4_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB4_3 Depth 2
-; LA64-NEXT: srl.w $a6, $a2, $a3
-; LA64-NEXT: andi $a7, $a6, 255
-; LA64-NEXT: sltu $t0, $a5, $a7
+; LA64-NEXT: srl.w $a6, $a3, $a2
+; LA64-NEXT: addi.w $a7, $a3, 0
+; LA64-NEXT: andi $t0, $a6, 255
; LA64-NEXT: addi.d $a6, $a6, -1
+; LA64-NEXT: sltui $t1, $t0, 1
+; LA64-NEXT: sltu $t0, $a5, $t0
; LA64-NEXT: masknez $a6, $a6, $t0
; LA64-NEXT: maskeqz $t0, $a1, $t0
; LA64-NEXT: or $a6, $t0, $a6
-; LA64-NEXT: sltui $a7, $a7, 1
-; LA64-NEXT: masknez $a6, $a6, $a7
-; LA64-NEXT: maskeqz $a7, $a1, $a7
-; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: masknez $a6, $a6, $t1
+; LA64-NEXT: maskeqz $t0, $a1, $t1
+; LA64-NEXT: or $a6, $t0, $a6
; LA64-NEXT: andi $a6, $a6, 255
-; LA64-NEXT: sll.w $a6, $a6, $a3
-; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a6, $a7, $a6
-; LA64-NEXT: addi.w $a7, $a2, 0
+; LA64-NEXT: sll.w $a6, $a6, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a6, $a3, $a6
; LA64-NEXT: .LBB4_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB4_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a7, .LBB4_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a7, .LBB4_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
; LA64-NEXT: move $t0, $a6
@@ -224,9 +224,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
-; LA64-NEXT: bne $a2, $a7, .LBB4_1
+; LA64-NEXT: bne $a3, $a7, .LBB4_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
ret i8 %result
@@ -235,40 +235,40 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-LABEL: atomicrmw_udec_wrap_i16:
; LA64: # %bb.0:
-; LA64-NEXT: slli.d $a3, $a0, 3
+; LA64-NEXT: slli.d $a4, $a0, 3
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
-; LA64-NEXT: ld.w $a2, $a0, 0
-; LA64-NEXT: lu12i.w $a4, 15
-; LA64-NEXT: ori $a4, $a4, 4095
-; LA64-NEXT: sll.w $a4, $a4, $a3
-; LA64-NEXT: andi $a3, $a3, 24
+; LA64-NEXT: andi $a2, $a4, 24
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a5, $a3, 4095
+; LA64-NEXT: ld.w $a3, $a0, 0
+; LA64-NEXT: sll.w $a4, $a5, $a4
; LA64-NEXT: nor $a4, $a4, $zero
; LA64-NEXT: bstrpick.d $a5, $a1, 15, 0
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB5_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB5_3 Depth 2
-; LA64-NEXT: srl.w $a6, $a2, $a3
-; LA64-NEXT: bstrpick.d $a7, $a6, 15, 0
-; LA64-NEXT: sltu $t0, $a5, $a7
+; LA64-NEXT: srl.w $a6, $a3, $a2
+; LA64-NEXT: addi.w $a7, $a3, 0
+; LA64-NEXT: bstrpick.d $t0, $a6, 15, 0
; LA64-NEXT: addi.d $a6, $a6, -1
+; LA64-NEXT: sltui $t1, $t0, 1
+; LA64-NEXT: sltu $t0, $a5, $t0
; LA64-NEXT: masknez $a6, $a6, $t0
; LA64-NEXT: maskeqz $t0, $a1, $t0
; LA64-NEXT: or $a6, $t0, $a6
-; LA64-NEXT: sltui $a7, $a7, 1
-; LA64-NEXT: masknez $a6, $a6, $a7
-; LA64-NEXT: maskeqz $a7, $a1, $a7
-; LA64-NEXT: or $a6, $a7, $a6
+; LA64-NEXT: masknez $a6, $a6, $t1
+; LA64-NEXT: maskeqz $t0, $a1, $t1
+; LA64-NEXT: or $a6, $t0, $a6
; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0
-; LA64-NEXT: sll.w $a6, $a6, $a3
-; LA64-NEXT: and $a7, $a2, $a4
-; LA64-NEXT: or $a6, $a7, $a6
-; LA64-NEXT: addi.w $a7, $a2, 0
+; LA64-NEXT: sll.w $a6, $a6, $a2
+; LA64-NEXT: and $a3, $a3, $a4
+; LA64-NEXT: or $a6, $a3, $a6
; LA64-NEXT: .LBB5_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB5_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
-; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a7, .LBB5_5
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a7, .LBB5_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
; LA64-NEXT: move $t0, $a6
@@ -280,9 +280,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
-; LA64-NEXT: bne $a2, $a7, .LBB5_1
+; LA64-NEXT: bne $a3, $a7, .LBB5_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
-; LA64-NEXT: srl.w $a0, $a2, $a3
+; LA64-NEXT: srl.w $a0, $a3, $a2
; LA64-NEXT: ret
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
ret i16 %result
@@ -297,24 +297,24 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: .LBB6_1: # %atomicrmw.start
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB6_3 Depth 2
-; LA64-NEXT: addi.w $a4, $a2, 0
-; LA64-NEXT: sltu $a5, $a3, $a4
-; LA64-NEXT: addi.d $a2, $a2, -1
-; LA64-NEXT: masknez $a2, $a2, $a5
-; LA64-NEXT: maskeqz $a5, $a1, $a5
-; LA64-NEXT: or $a2, $a5, $a2
-; LA64-NEXT: sltui $a5, $a4, 1
-; LA64-NEXT: masknez $a2, $a2, $a5
-; LA64-NEXT: maskeqz $a5, $a1, $a5
-; LA64-NEXT: or $a5, $a5, $a2
+; LA64-NEXT: addi.d $a4, $a2, -1
+; LA64-NEXT: addi.w $a5, $a2, 0
+; LA64-NEXT: sltui $a2, $a5, 1
+; LA64-NEXT: sltu $a6, $a3, $a5
+; LA64-NEXT: masknez $a4, $a4, $a6
+; LA64-NEXT: maskeqz $a6, $a1, $a6
+; LA64-NEXT: or $a4, $a6, $a4
+; LA64-NEXT: masknez $a4, $a4, $a2
+; LA64-NEXT: maskeqz $a2, $a1, $a2
+; LA64-NEXT: or $a4, $a2, $a4
; LA64-NEXT: .LBB6_3: # %atomicrmw.start
; LA64-NEXT: # Parent Loop BB6_1 Depth=1
; LA64-NEXT: # => This Inner Loop Header: Depth=2
; LA64-NEXT: ll.w $a2, $a0, 0
-; LA64-NEXT: bne $a2, $a4, .LBB6_5
+; LA64-NEXT: bne $a2, $a5, .LBB6_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
-; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: move $a6, $a4
; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB6_3
; LA64-NEXT: b .LBB6_6
@@ -323,7 +323,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: dbar 20
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
-; LA64-NEXT: bne $a2, $a4, .LBB6_1
+; LA64-NEXT: bne $a2, $a5, .LBB6_1
; LA64-NEXT: # %bb.2: # %atomicrmw.end
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
@@ -340,12 +340,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: # =>This Loop Header: Depth=1
; LA64-NEXT: # Child Loop BB7_3 Depth 2
; LA64-NEXT: move $a3, $a2
-; LA64-NEXT: sltu $a2, $a1, $a2
-; LA64-NEXT: addi.d $a4, $a3, -1
-; LA64-NEXT: masknez $a4, $a4, $a2
-; LA64-NEXT: maskeqz $a2, $a1, $a2
-; LA64-NEXT: or $a2, $a2, $a4
+; LA64-NEXT: addi.d $a2, $a2, -1
; LA64-NEXT: sltui $a4, $a3, 1
+; LA64-NEXT: sltu $a5, $a1, $a3
+; LA64-NEXT: masknez $a2, $a2, $a5
+; LA64-NEXT: maskeqz $a5, $a1, $a5
+; LA64-NEXT: or $a2, $a5, $a2
; LA64-NEXT: masknez $a2, $a2, $a4
; LA64-NEXT: maskeqz $a4, $a1, $a4
; LA64-NEXT: or $a4, $a4, $a2
diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll
index 259d8565c68420..fcf523aa3c883a 100644
--- a/llvm/test/CodeGen/LoongArch/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll
@@ -129,30 +129,30 @@ define i48 @test_bitreverse_i48(i48 %a) nounwind {
define i77 @test_bitreverse_i77(i77 %a) nounwind {
; LA32-LABEL: test_bitreverse_i77:
; LA32: # %bb.0:
-; LA32-NEXT: ld.w $a2, $a1, 0
+; LA32-NEXT: ld.w $a2, $a1, 4
+; LA32-NEXT: ld.w $a3, $a1, 8
+; LA32-NEXT: ld.w $a1, $a1, 0
; LA32-NEXT: bitrev.w $a2, $a2
-; LA32-NEXT: ld.w $a3, $a1, 4
+; LA32-NEXT: slli.w $a4, $a2, 13
; LA32-NEXT: bitrev.w $a3, $a3
-; LA32-NEXT: srli.w $a4, $a3, 19
-; LA32-NEXT: slli.w $a5, $a2, 13
-; LA32-NEXT: or $a4, $a5, $a4
+; LA32-NEXT: srli.w $a3, $a3, 19
+; LA32-NEXT: or $a3, $a3, $a4
; L...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
The SelectionDAG scheduling preference now becomes source order scheduling (machine scheduler generates better code -- even without there being a machine model defined for LoongArch yet).
Most of the test changes are trivial instruction reorderings and differing register allocations, without any obvious performance impact.
This is similar to commit: 3d0fbaf