-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Add IsSignExtendingOpW to AMO*_W instructions. #72349
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThe amo*.w instructions return a sign extended value like other W instructions. Adding IsSignExtendingOpW allows RISCVOptWInstrs to remove sext.w instructions that extend results from these operations. Patch is 35.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72349.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 5a3d393bdb599e0..c9ff9b4872d4aba 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -52,6 +52,8 @@ let Predicates = [HasStdExtA] in {
defm LR_W : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>;
defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">,
Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>;
+
+let IsSignExtendingOpW = 1 in {
defm AMOSWAP_W : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">,
Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
defm AMOADD_W : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">,
@@ -70,6 +72,7 @@ defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">,
Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">,
Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+}
} // Predicates = [HasStdExtA]
let Predicates = [HasStdExtA, IsRV64] in {
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index bd945c865c359d5..803dfa4b4ad0e9d 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -4193,3 +4193,1106 @@ define i1 @cmpxchg_i32_monotonic_monotonic_val1(ptr %ptr, i32 signext %cmp, i32
%2 = extractvalue { i32, i1 } %1, 1
ret i1 %2
}
+
+define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
+; RV32I-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB53_2
+; RV32I-NEXT: # %bb.1: # %then
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_exchange_4@plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB53_2: # %else
+; RV32I-NEXT: lw a1, 0(a0)
+; RV32I-NEXT: li a2, 1
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a1, a1, 1
+; RV32IA-NEXT: beqz a1, .LBB53_2
+; RV32IA-NEXT: # %bb.1: # %then
+; RV32IA-NEXT: li a1, 1
+; RV32IA-NEXT: amoswap.w a0, a1, (a0)
+; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB53_2: # %else
+; RV32IA-NEXT: mv a1, a0
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: li a2, 1
+; RV32IA-NEXT: sw a2, 0(a1)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB53_2
+; RV64I-NEXT: # %bb.1: # %then
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_exchange_4@plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB53_2: # %else
+; RV64I-NEXT: lw a1, 0(a0)
+; RV64I-NEXT: li a2, 1
+; RV64I-NEXT: sw a2, 0(a0)
+; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_xchg_i32_monotonic_crossbb:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a1, a1, 1
+; RV64IA-NEXT: beqz a1, .LBB53_2
+; RV64IA-NEXT: # %bb.1: # %then
+; RV64IA-NEXT: li a1, 1
+; RV64IA-NEXT: amoswap.w a0, a1, (a0)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB53_2: # %else
+; RV64IA-NEXT: mv a1, a0
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: li a2, 1
+; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+ br i1 %c, label %then, label %else
+
+then:
+ %1 = atomicrmw xchg ptr %a, i32 1 monotonic
+ br label %merge
+
+else:
+ %2 = load i32, ptr %a, align 4
+ store i32 1, ptr %a
+ br label %merge
+
+merge:
+ %3 = phi i32 [ %1, %then ], [ %2, %else ]
+ ret i32 %3
+}
+
+define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
+; RV32I-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB54_2
+; RV32I-NEXT: # %bb.1: # %then
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_fetch_add_4@plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB54_2: # %else
+; RV32I-NEXT: lw a1, 0(a0)
+; RV32I-NEXT: addi a2, a1, 1
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a1, a1, 1
+; RV32IA-NEXT: beqz a1, .LBB54_2
+; RV32IA-NEXT: # %bb.1: # %then
+; RV32IA-NEXT: li a1, 1
+; RV32IA-NEXT: amoadd.w a0, a1, (a0)
+; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB54_2: # %else
+; RV32IA-NEXT: mv a1, a0
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: addi a2, a0, 1
+; RV32IA-NEXT: sw a2, 0(a1)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB54_2
+; RV64I-NEXT: # %bb.1: # %then
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_fetch_add_4@plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB54_2: # %else
+; RV64I-NEXT: lw a1, 0(a0)
+; RV64I-NEXT: addi a2, a1, 1
+; RV64I-NEXT: sw a2, 0(a0)
+; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i32_monotonic_crossbb:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a1, a1, 1
+; RV64IA-NEXT: beqz a1, .LBB54_2
+; RV64IA-NEXT: # %bb.1: # %then
+; RV64IA-NEXT: li a1, 1
+; RV64IA-NEXT: amoadd.w a0, a1, (a0)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB54_2: # %else
+; RV64IA-NEXT: mv a1, a0
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: addi a2, a0, 1
+; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+ br i1 %c, label %then, label %else
+
+then:
+ %1 = atomicrmw add ptr %a, i32 1 monotonic
+ br label %merge
+
+else:
+ %2 = load i32, ptr %a, align 4
+ %3 = add i32 %2, 1
+ store i32 %3, ptr %a
+ br label %merge
+
+merge:
+ %4 = phi i32 [ %1, %then ], [ %2, %else ]
+ ret i32 %4
+}
+
+define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
+; RV32I-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB55_2
+; RV32I-NEXT: # %bb.1: # %then
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_fetch_sub_4@plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB55_2: # %else
+; RV32I-NEXT: lw a1, 0(a0)
+; RV32I-NEXT: addi a2, a1, -1
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a1, a1, 1
+; RV32IA-NEXT: beqz a1, .LBB55_2
+; RV32IA-NEXT: # %bb.1: # %then
+; RV32IA-NEXT: li a1, -1
+; RV32IA-NEXT: amoadd.w a0, a1, (a0)
+; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB55_2: # %else
+; RV32IA-NEXT: mv a1, a0
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: addi a2, a0, -1
+; RV32IA-NEXT: sw a2, 0(a1)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB55_2
+; RV64I-NEXT: # %bb.1: # %then
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_fetch_sub_4@plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB55_2: # %else
+; RV64I-NEXT: lw a1, 0(a0)
+; RV64I-NEXT: addi a2, a1, -1
+; RV64I-NEXT: sw a2, 0(a0)
+; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i32_monotonic_crossbb:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a1, a1, 1
+; RV64IA-NEXT: beqz a1, .LBB55_2
+; RV64IA-NEXT: # %bb.1: # %then
+; RV64IA-NEXT: li a1, -1
+; RV64IA-NEXT: amoadd.w a0, a1, (a0)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB55_2: # %else
+; RV64IA-NEXT: mv a1, a0
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: addi a2, a0, -1
+; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+ br i1 %c, label %then, label %else
+
+then:
+ %1 = atomicrmw sub ptr %a, i32 1 monotonic
+ br label %merge
+
+else:
+ %2 = load i32, ptr %a, align 4
+ %3 = sub i32 %2, 1
+ store i32 %3, ptr %a
+ br label %merge
+
+merge:
+ %4 = phi i32 [ %1, %then ], [ %2, %else ]
+ ret i32 %4
+}
+
+define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
+; RV32I-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB56_2
+; RV32I-NEXT: # %bb.1: # %then
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_fetch_and_4@plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB56_2: # %else
+; RV32I-NEXT: lw a1, 0(a0)
+; RV32I-NEXT: andi a2, a1, 1
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a1, a1, 1
+; RV32IA-NEXT: beqz a1, .LBB56_2
+; RV32IA-NEXT: # %bb.1: # %then
+; RV32IA-NEXT: li a1, 1
+; RV32IA-NEXT: amoand.w a0, a1, (a0)
+; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB56_2: # %else
+; RV32IA-NEXT: mv a1, a0
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: andi a2, a0, 1
+; RV32IA-NEXT: sw a2, 0(a1)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB56_2
+; RV64I-NEXT: # %bb.1: # %then
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_fetch_and_4@plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB56_2: # %else
+; RV64I-NEXT: lwu a1, 0(a0)
+; RV64I-NEXT: andi a2, a1, 1
+; RV64I-NEXT: sw a2, 0(a0)
+; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_and_i32_monotonic_crossbb:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a1, a1, 1
+; RV64IA-NEXT: beqz a1, .LBB56_2
+; RV64IA-NEXT: # %bb.1: # %then
+; RV64IA-NEXT: li a1, 1
+; RV64IA-NEXT: amoand.w a0, a1, (a0)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB56_2: # %else
+; RV64IA-NEXT: mv a1, a0
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: andi a2, a0, 1
+; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+ br i1 %c, label %then, label %else
+
+then:
+ %1 = atomicrmw and ptr %a, i32 1 monotonic
+ br label %merge
+
+else:
+ %2 = load i32, ptr %a, align 4
+ %3 = and i32 %2, 1
+ store i32 %3, ptr %a
+ br label %merge
+
+merge:
+ %4 = phi i32 [ %1, %then ], [ %2, %else ]
+ ret i32 %4
+}
+
+define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
+; RV32I-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB57_2
+; RV32I-NEXT: # %bb.1: # %then
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_fetch_or_4@plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB57_2: # %else
+; RV32I-NEXT: lw a1, 0(a0)
+; RV32I-NEXT: ori a2, a1, 1
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a1, a1, 1
+; RV32IA-NEXT: beqz a1, .LBB57_2
+; RV32IA-NEXT: # %bb.1: # %then
+; RV32IA-NEXT: li a1, 1
+; RV32IA-NEXT: amoor.w a0, a1, (a0)
+; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB57_2: # %else
+; RV32IA-NEXT: mv a1, a0
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: ori a2, a0, 1
+; RV32IA-NEXT: sw a2, 0(a1)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB57_2
+; RV64I-NEXT: # %bb.1: # %then
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_fetch_or_4@plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB57_2: # %else
+; RV64I-NEXT: lw a1, 0(a0)
+; RV64I-NEXT: ori a2, a1, 1
+; RV64I-NEXT: sw a2, 0(a0)
+; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_or_i32_monotonic_crossbb:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a1, a1, 1
+; RV64IA-NEXT: beqz a1, .LBB57_2
+; RV64IA-NEXT: # %bb.1: # %then
+; RV64IA-NEXT: li a1, 1
+; RV64IA-NEXT: amoor.w a0, a1, (a0)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB57_2: # %else
+; RV64IA-NEXT: mv a1, a0
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: ori a2, a0, 1
+; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+ br i1 %c, label %then, label %else
+
+then:
+ %1 = atomicrmw or ptr %a, i32 1 monotonic
+ br label %merge
+
+else:
+ %2 = load i32, ptr %a, align 4
+ %3 = or i32 %2, 1
+ store i32 %3, ptr %a
+ br label %merge
+
+merge:
+ %4 = phi i32 [ %1, %then ], [ %2, %else ]
+ ret i32 %4
+}
+
+define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
+; RV32I-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: beqz a1, .LBB58_2
+; RV32I-NEXT: # %bb.1: # %then
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_fetch_xor_4@plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB58_2: # %else
+; RV32I-NEXT: lw a1, 0(a0)
+; RV32I-NEXT: xori a2, a1, 1
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a1, a1, 1
+; RV32IA-NEXT: beqz a1, .LBB58_2
+; RV32IA-NEXT: # %bb.1: # %then
+; RV32IA-NEXT: li a1, 1
+; RV32IA-NEXT: amoxor.w a0, a1, (a0)
+; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB58_2: # %else
+; RV32IA-NEXT: mv a1, a0
+; RV32IA-NEXT: lw a0, 0(a0)
+; RV32IA-NEXT: xori a2, a0, 1
+; RV32IA-NEXT: sw a2, 0(a1)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: beqz a1, .LBB58_2
+; RV64I-NEXT: # %bb.1: # %then
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 0
+; RV64I-NEXT: call __atomic_fetch_xor_4@plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB58_2: # %else
+; RV64I-NEXT: lw a1, 0(a0)
+; RV64I-NEXT: xori a2, a1, 1
+; RV64I-NEXT: sw a2, 0(a0)
+; RV64I-NEXT: sext.w a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_xor_i32_monotonic_crossbb:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: andi a1, a1, 1
+; RV64IA-NEXT: beqz a1, .LBB58_2
+; RV64IA-NEXT: # %bb.1: # %then
+; RV64IA-NEXT: li a1, 1
+; RV64IA-NEXT: amoxor.w a0, a1, (a0)
+; RV64IA-NEXT: ret
+; RV64IA-NEXT: .LBB58_2: # %else
+; RV64IA-NEXT: mv a1, a0
+; RV64IA-NEXT: lw a0, 0(a0)
+; RV64IA-NEXT: xori a2, a0, 1
+; RV64IA-NEXT: sw a2, 0(a1)
+; RV64IA-NEXT: ret
+ br i1 %c, label %then, label %else
+
+then:
+ %1 = atomicrmw xor ptr %a, i32 1 monotonic
+ br label %merge
+
+else:
+ %2 = load i32, ptr %a, align 4
+ %3 = xor i32 %2, 1
+ store i32 %3, ptr %a
+ br label %merge
+
+merge:
+ %4 = phi i32 [ %1, %then ], [ %2, %else ]
+ ret i32 %4
+}
+
+define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind {
+; RV32I-LABEL: atomicrmw_max_i32_monotonic_crossbb:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: andi a1, a1, 1
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: beqz a1, .LBB59_5
+; RV32I-NEXT: # %bb.1: # %then
+; RV32I-NEXT: lw a1, 0(s0)
+; RV32I-NEXT: j .LBB59_3
+; RV32I-NEXT: .LBB59_2: # %atomicrmw.start
+; RV32I-NEXT: # in Loop: Header=BB59_3 Depth=1
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: addi a1, sp, 4
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: li a3, 0
+; RV32I-NEXT: li a4, 0
+; RV32I-NEXT: call __atomic_compare_exchange_4@plt
+; RV32I-NEXT: lw a1, 4(sp)
+; RV32I-NEXT: bnez a0, .LBB59_8
+; RV32I-NEXT: .LBB59_3: # %atomicrmw.start
+; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: bgtz a1, .LBB59_2
+; RV32I-NEXT: # %bb.4: # %atomicrmw.start
+; RV32I-NEXT: # in Loop: Header=BB59_3 Depth=1
+; RV32I-NEXT: li a2, 1
+; RV32I-NEXT: j .LBB59_2
+; RV32I-NEXT: .LBB59_5: # %else
+; RV32I-NEXT: lw a1, 0(s0)
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: bgtz a1, .LBB59_7
+; RV32I-NEXT: # %bb.6: # %else
+; RV32I-NEXT: li a0, 1
+; RV32I-NEXT: .LBB59_7: # %else
+; RV32I-NEXT: sw a0, 0(s0)
+; RV32I-NEXT: .LBB59_8: # %merge
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_max_i32_monotonic_crossbb:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: andi a2, a1, 1
+; RV32IA-NEXT: mv a1, a0
+; RV32IA-NEXT: beqz a2, .LBB59_2
+; RV32IA-NEXT: # %bb.1: # %then
+; RV32IA-NEXT: li a0, 1
+; RV32IA-NEXT: amomax.w a0, a0, (a1)
+; RV32IA-NEXT: ret
+; RV32IA-NEXT: .LBB59_2: # %else
+; RV32IA-NEXT: lw a0, 0(a1)
+; RV32IA-NEXT: mv a2, a0
+; RV32IA-NEXT: bgtz a0, .LBB59_4
+; RV32IA-NEXT: # %bb.3: # %else
+; RV32IA-NEXT: li a2, 1
+; RV32IA-NEXT: .LBB59_4: # %else
+; RV32IA-NEXT: sw a2, 0(a1)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_max_i32_monotonic_crossbb:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: andi a1, a1, 1
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: beqz a1, .LBB59_5
+; RV64I-NEXT: # %bb.1: # %then
+; RV64I-NEXT: lw a1, 0(s0)
+; RV64I-NEXT: j .LBB59_3
+; RV64I-NEXT: .LBB59_2: # %atomicrmw.start
+; RV64I-NEXT: # in Loop: Header=BB59_3 Depth=1
+; RV64I-NEXT: sw a1, 12(sp)
+; RV64I-NEXT: addi a1, sp, 12
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: li a3, 0
+; RV64I-NEXT: li a4, 0
+; RV64I-NEXT: call __atomic_compare_exchange_4@plt
+; RV64I-NEXT: lw a1, 12(sp)
+; RV64I-NEXT: bnez a0, .LBB59_8
+; RV...
[truncated]
|
I think it also holds for |
Do lr.w instructions exist at that point in the pipeline? And I don't think codegen for amocas.w is implemented yet. |
|
…ter amo*.w. NFC We should tell RISCVOptWInstrs that these instructions sign extend their results.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I think there's an argument for adding IsSignExtendingOpW for instructions with that property even if it makes no difference in the current codegen pipeline. But happy to defer to your preference.
The amo*.w instructions return a sign extended value like other W instructions. Adding IsSignExtendingOpW allows RISCVOptWInstrs to remove sext.w instructions that extend results from these operations.