diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index ab5c9e17b9a37..21bcbbb0ded65 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -697,7 +697,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); - getActionDefinitionsBuilder(G_ATOMICRMW_ADD) + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) + .lowerIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(2, p0))); + + getActionDefinitionsBuilder({G_ATOMIC_CMPXCHG, G_ATOMICRMW_ADD}) .legalFor(ST.hasStdExtA(), {{sXLen, p0}}) .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}}) .clampScalar(0, sXLen, sXLen); @@ -746,6 +749,7 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, } case Intrinsic::riscv_masked_atomicrmw_add: case Intrinsic::riscv_masked_atomicrmw_sub: + case Intrinsic::riscv_masked_cmpxchg: return true; } } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll new file mode 100644 index 0000000000000..2fb9dcfeef785 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-cmpxchg.ll @@ -0,0 +1,5910 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=riscv32 < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a,+zacas < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO-ZACAS %s +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a,+ztso < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s +; RUN: llc -global-isel -mtriple=riscv32 -mattr=+a,+ztso,+zacas < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO-ZACAS %s +; RUN: llc -global-isel -mtriple=riscv64 < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+zacas < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO-ZACAS %s +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+zacas,+zabha < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-WMO-ZABHA %s +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+ztso < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+ztso,+zacas < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO-ZACAS %s +; RUN: llc -global-isel -mtriple=riscv64 -mattr=+a,+ztso,+zacas,+zabha < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-TSO-ZABHA %s + +define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: li a3, 255 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 +; RV32IA-NEXT: zext.b a2, a2 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a2, (a4) +; RV32IA-NEXT: and a5, a2, a3 +; RV32IA-NEXT: bne a5, a1, .LBB0_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a2, a5 +; RV32IA-NEXT: sc.w a5, a5, (a4) +; RV32IA-NEXT: bnez a5, .LBB0_1 +; RV32IA-NEXT: .LBB0_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 0 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB0_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB0_1 +; RV64IA-WMO-NEXT: .LBB0_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: li a3, 255 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB0_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB0_1 +; RV64IA-ZACAS-NEXT: .LBB0_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-ZABHA-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV64IA-ZABHA: # %bb.0: +; RV64IA-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB0_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB0_1 +; RV64IA-TSO-NEXT: .LBB0_3: +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 2 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 +; RV32IA-WMO-NEXT: zext.b a2, a2 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a5, a2, a3 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a4) +; RV32IA-WMO-NEXT: bnez a5, .LBB1_1 +; RV32IA-WMO-NEXT: .LBB1_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB1_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 +; RV32IA-TSO-NEXT: zext.b a2, a2 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a5, a2, a3 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-NEXT: bnez a5, .LBB1_1 +; RV32IA-TSO-NEXT: .LBB1_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB1_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB1_1 +; RV64IA-WMO-NEXT: .LBB1_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB1_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB1_1 +; RV64IA-TSO-NEXT: .LBB1_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB1_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB1_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acquire_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic + ret void +} + +define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_acquire_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 2 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 +; RV32IA-WMO-NEXT: zext.b a2, a2 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a5, a2, a3 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w a5, a5, (a4) +; RV32IA-WMO-NEXT: bnez a5, .LBB2_1 +; RV32IA-WMO-NEXT: .LBB2_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB2_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 +; RV32IA-TSO-NEXT: zext.b a2, a2 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a5, a2, a3 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-NEXT: bnez a5, .LBB2_1 +; RV32IA-TSO-NEXT: .LBB2_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB2_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_acquire_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB2_1 +; RV64IA-WMO-NEXT: .LBB2_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB2_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB2_1 +; RV64IA-TSO-NEXT: .LBB2_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB2_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB2_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acquire_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire + ret void +} + +define void @cmpxchg_i8_release_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_release_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 +; RV32IA-WMO-NEXT: zext.b a2, a2 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a2, (a4) +; RV32IA-WMO-NEXT: and a5, a2, a3 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-NEXT: bnez a5, .LBB3_1 +; RV32IA-WMO-NEXT: .LBB3_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB3_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 +; RV32IA-TSO-NEXT: zext.b a2, a2 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a5, a2, a3 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-NEXT: bnez a5, .LBB3_1 +; RV32IA-TSO-NEXT: .LBB3_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB3_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_release_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB3_1 +; RV64IA-WMO-NEXT: .LBB3_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB3_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.rl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB3_1 +; RV64IA-TSO-NEXT: .LBB3_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB3_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB3_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_release_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic + ret void +} + +define void @cmpxchg_i8_release_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_release_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 +; RV32IA-WMO-NEXT: zext.b a2, a2 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a5, a2, a3 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-NEXT: bnez a5, .LBB4_1 +; RV32IA-WMO-NEXT: .LBB4_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB4_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 +; RV32IA-TSO-NEXT: zext.b a2, a2 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a5, a2, a3 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-NEXT: bnez a5, .LBB4_1 +; RV32IA-TSO-NEXT: .LBB4_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB4_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_release_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB4_1 +; RV64IA-WMO-NEXT: .LBB4_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB4_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB4_1 +; RV64IA-TSO-NEXT: .LBB4_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB4_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB4_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB4_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_release_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire + ret void +} + +define void @cmpxchg_i8_acq_rel_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 +; RV32IA-WMO-NEXT: zext.b a2, a2 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a5, a2, a3 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-NEXT: bnez a5, .LBB5_1 +; RV32IA-WMO-NEXT: .LBB5_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB5_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 +; RV32IA-TSO-NEXT: zext.b a2, a2 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a5, a2, a3 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-NEXT: bnez a5, .LBB5_1 +; RV32IA-TSO-NEXT: .LBB5_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB5_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB5_1 +; RV64IA-WMO-NEXT: .LBB5_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB5_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB5_1 +; RV64IA-TSO-NEXT: .LBB5_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB5_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB5_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB5_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acq_rel_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic + ret void +} + +define void @cmpxchg_i8_acq_rel_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 4 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: li a3, 255 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: zext.b a1, a1 +; RV32IA-WMO-NEXT: zext.b a2, a2 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a3, a3, a0 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a5, a2, a3 +; RV32IA-WMO-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-WMO-NEXT: xor a5, a2, a0 +; RV32IA-WMO-NEXT: and a5, a5, a3 +; RV32IA-WMO-NEXT: xor a5, a2, a5 +; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-NEXT: bnez a5, .LBB6_1 +; RV32IA-WMO-NEXT: .LBB6_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: li a3, 255 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB6_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: li a3, 255 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: zext.b a1, a1 +; RV32IA-TSO-NEXT: zext.b a2, a2 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a3, a3, a0 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a5, a2, a3 +; RV32IA-TSO-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-TSO-NEXT: xor a5, a2, a0 +; RV32IA-TSO-NEXT: and a5, a5, a3 +; RV32IA-TSO-NEXT: xor a5, a2, a5 +; RV32IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-NEXT: bnez a5, .LBB6_1 +; RV32IA-TSO-NEXT: .LBB6_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: li a3, 255 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV32IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV32IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB6_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB6_1 +; RV64IA-WMO-NEXT: .LBB6_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: li a3, 255 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-WMO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-WMO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB6_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB6_1 +; RV64IA-TSO-NEXT: .LBB6_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: li a3, 255 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-TSO-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: bne a5, a1, .LBB6_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-TSO-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a5, .LBB6_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB6_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_acq_rel_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire + ret void +} + +define void @cmpxchg_i8_seq_cst_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: li a3, 255 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 +; RV32IA-NEXT: zext.b a2, a2 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a2, (a4) +; RV32IA-NEXT: and a5, a2, a3 +; RV32IA-NEXT: bne a5, a1, .LBB7_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a2, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-NEXT: bnez a5, .LBB7_1 +; RV32IA-NEXT: .LBB7_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB7_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB7_1 +; RV64IA-WMO-NEXT: .LBB7_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: li a3, 255 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB7_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB7_1 +; RV64IA-ZACAS-NEXT: .LBB7_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB7_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB7_1 +; RV64IA-TSO-NEXT: .LBB7_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst monotonic + ret void +} + +define void @cmpxchg_i8_seq_cst_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: li a3, 255 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 +; RV32IA-NEXT: zext.b a2, a2 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a2, (a4) +; RV32IA-NEXT: and a5, a2, a3 +; RV32IA-NEXT: bne a5, a1, .LBB8_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a2, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-NEXT: bnez a5, .LBB8_1 +; RV32IA-NEXT: .LBB8_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB8_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB8_1 +; RV64IA-WMO-NEXT: .LBB8_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: li a3, 255 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB8_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB8_1 +; RV64IA-ZACAS-NEXT: .LBB8_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB8_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB8_1 +; RV64IA-TSO-NEXT: .LBB8_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst acquire + ret void +} + +define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; RV32I-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sb a1, 11(sp) +; RV32I-NEXT: addi a1, sp, 11 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: call __atomic_compare_exchange_1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: li a3, 255 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: zext.b a1, a1 +; RV32IA-NEXT: zext.b a2, a2 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a3, a3, a0 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a2, (a4) +; RV32IA-NEXT: and a5, a2, a3 +; RV32IA-NEXT: bne a5, a1, .LBB9_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; RV32IA-NEXT: xor a5, a2, a0 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: xor a5, a2, a5 +; RV32IA-NEXT: sc.w.rl a5, a5, (a4) +; RV32IA-NEXT: bnez a5, .LBB9_1 +; RV32IA-NEXT: .LBB9_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sb a1, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: call __atomic_compare_exchange_1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: li a3, 255 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: zext.b a1, a1 +; RV64IA-WMO-NEXT: zext.b a2, a2 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a3, a3, a0 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-WMO-NEXT: and a5, a2, a3 +; RV64IA-WMO-NEXT: bne a5, a1, .LBB9_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; RV64IA-WMO-NEXT: xor a5, a2, a0 +; RV64IA-WMO-NEXT: and a5, a5, a3 +; RV64IA-WMO-NEXT: xor a5, a2, a5 +; RV64IA-WMO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-WMO-NEXT: bnez a5, .LBB9_1 +; RV64IA-WMO-NEXT: .LBB9_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: li a3, 255 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: zext.b a1, a1 +; RV64IA-ZACAS-NEXT: zext.b a2, a2 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a3, a3, a0 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-ZACAS-NEXT: and a5, a2, a3 +; RV64IA-ZACAS-NEXT: bne a5, a1, .LBB9_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a5, a2, a0 +; RV64IA-ZACAS-NEXT: and a5, a5, a3 +; RV64IA-ZACAS-NEXT: xor a5, a2, a5 +; RV64IA-ZACAS-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-ZACAS-NEXT: bnez a5, .LBB9_1 +; RV64IA-ZACAS-NEXT: .LBB9_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw +; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: li a3, 255 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: zext.b a1, a1 +; RV64IA-TSO-NEXT: zext.b a2, a2 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a3, a3, a0 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-TSO-NEXT: and a5, a2, a3 +; RV64IA-TSO-NEXT: bne a5, a1, .LBB9_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; RV64IA-TSO-NEXT: xor a5, a2, a0 +; RV64IA-TSO-NEXT: and a5, a5, a3 +; RV64IA-TSO-NEXT: xor a5, a2, a5 +; RV64IA-TSO-NEXT: sc.w.rl a5, a5, (a4) +; RV64IA-TSO-NEXT: bnez a5, .LBB9_1 +; RV64IA-TSO-NEXT: .LBB9_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw +; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst + ret void +} + +define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w a2, (a4) +; RV32IA-NEXT: and a3, a2, a5 +; RV32IA-NEXT: bne a3, a1, .LBB10_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV32IA-NEXT: xor a3, a2, a0 +; RV32IA-NEXT: and a3, a3, a5 +; RV32IA-NEXT: xor a3, a2, a3 +; RV32IA-NEXT: sc.w a3, a3, (a4) +; RV32IA-NEXT: bnez a3, .LBB10_1 +; RV32IA-NEXT: .LBB10_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 0 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB10_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB10_1 +; RV64IA-WMO-NEXT: .LBB10_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB10_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-ZACAS-NEXT: bnez a3, .LBB10_1 +; RV64IA-ZACAS-NEXT: .LBB10_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-ZABHA-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV64IA-ZABHA: # %bb.0: +; RV64IA-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB10_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB10_1 +; RV64IA-TSO-NEXT: .LBB10_3: +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 2 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a3, a2, a5 +; RV32IA-WMO-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-WMO-NEXT: xor a3, a2, a0 +; RV32IA-WMO-NEXT: and a3, a3, a5 +; RV32IA-WMO-NEXT: xor a3, a2, a3 +; RV32IA-WMO-NEXT: sc.w a3, a3, (a4) +; RV32IA-WMO-NEXT: bnez a3, .LBB11_1 +; RV32IA-WMO-NEXT: .LBB11_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB11_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a3, a2, a5 +; RV32IA-TSO-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-TSO-NEXT: xor a3, a2, a0 +; RV32IA-TSO-NEXT: and a3, a3, a5 +; RV32IA-TSO-NEXT: xor a3, a2, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-NEXT: bnez a3, .LBB11_1 +; RV32IA-TSO-NEXT: .LBB11_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB11_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB11_1 +; RV64IA-WMO-NEXT: .LBB11_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB11_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB11_1 +; RV64IA-TSO-NEXT: .LBB11_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB11_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB11_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB11_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acquire_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic + ret void +} + +define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_acquire_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 2 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a3, a2, a5 +; RV32IA-WMO-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-WMO-NEXT: xor a3, a2, a0 +; RV32IA-WMO-NEXT: and a3, a3, a5 +; RV32IA-WMO-NEXT: xor a3, a2, a3 +; RV32IA-WMO-NEXT: sc.w a3, a3, (a4) +; RV32IA-WMO-NEXT: bnez a3, .LBB12_1 +; RV32IA-WMO-NEXT: .LBB12_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB12_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a3, a2, a5 +; RV32IA-TSO-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-TSO-NEXT: xor a3, a2, a0 +; RV32IA-TSO-NEXT: and a3, a3, a5 +; RV32IA-TSO-NEXT: xor a3, a2, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-NEXT: bnez a3, .LBB12_1 +; RV32IA-TSO-NEXT: .LBB12_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB12_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_acquire_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB12_1 +; RV64IA-WMO-NEXT: .LBB12_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB12_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB12_1 +; RV64IA-TSO-NEXT: .LBB12_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB12_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB12_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB12_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acquire_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire + ret void +} + +define void @cmpxchg_i16_release_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_release_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a2, (a4) +; RV32IA-WMO-NEXT: and a3, a2, a5 +; RV32IA-WMO-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-WMO-NEXT: xor a3, a2, a0 +; RV32IA-WMO-NEXT: and a3, a3, a5 +; RV32IA-WMO-NEXT: xor a3, a2, a3 +; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-NEXT: bnez a3, .LBB13_1 +; RV32IA-WMO-NEXT: .LBB13_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB13_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a3, a2, a5 +; RV32IA-TSO-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-TSO-NEXT: xor a3, a2, a0 +; RV32IA-TSO-NEXT: and a3, a3, a5 +; RV32IA-TSO-NEXT: xor a3, a2, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-NEXT: bnez a3, .LBB13_1 +; RV32IA-TSO-NEXT: .LBB13_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB13_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_release_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB13_1 +; RV64IA-WMO-NEXT: .LBB13_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB13_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.rl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB13_1 +; RV64IA-TSO-NEXT: .LBB13_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB13_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB13_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB13_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_release_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic + ret void +} + +define void @cmpxchg_i16_release_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_release_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a3, a2, a5 +; RV32IA-WMO-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-WMO-NEXT: xor a3, a2, a0 +; RV32IA-WMO-NEXT: and a3, a3, a5 +; RV32IA-WMO-NEXT: xor a3, a2, a3 +; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-NEXT: bnez a3, .LBB14_1 +; RV32IA-WMO-NEXT: .LBB14_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB14_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a3, a2, a5 +; RV32IA-TSO-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-TSO-NEXT: xor a3, a2, a0 +; RV32IA-TSO-NEXT: and a3, a3, a5 +; RV32IA-TSO-NEXT: xor a3, a2, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-NEXT: bnez a3, .LBB14_1 +; RV32IA-TSO-NEXT: .LBB14_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB14_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_release_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB14_1 +; RV64IA-WMO-NEXT: .LBB14_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB14_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB14_1 +; RV64IA-TSO-NEXT: .LBB14_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB14_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB14_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB14_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_release_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire + ret void +} + +define void @cmpxchg_i16_acq_rel_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a3, a2, a5 +; RV32IA-WMO-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-WMO-NEXT: xor a3, a2, a0 +; RV32IA-WMO-NEXT: and a3, a3, a5 +; RV32IA-WMO-NEXT: xor a3, a2, a3 +; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-NEXT: bnez a3, .LBB15_1 +; RV32IA-WMO-NEXT: .LBB15_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB15_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a3, a2, a5 +; RV32IA-TSO-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-TSO-NEXT: xor a3, a2, a0 +; RV32IA-TSO-NEXT: and a3, a3, a5 +; RV32IA-TSO-NEXT: xor a3, a2, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-NEXT: bnez a3, .LBB15_1 +; RV32IA-TSO-NEXT: .LBB15_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB15_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB15_1 +; RV64IA-WMO-NEXT: .LBB15_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB15_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB15_1 +; RV64IA-TSO-NEXT: .LBB15_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB15_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB15_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB15_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acq_rel_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic + ret void +} + +define void @cmpxchg_i16_acq_rel_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 4 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lui a3, 16 +; RV32IA-WMO-NEXT: andi a4, a0, -4 +; RV32IA-WMO-NEXT: andi a0, a0, 3 +; RV32IA-WMO-NEXT: addi a3, a3, -1 +; RV32IA-WMO-NEXT: slli a0, a0, 3 +; RV32IA-WMO-NEXT: sll a5, a3, a0 +; RV32IA-WMO-NEXT: and a1, a1, a3 +; RV32IA-WMO-NEXT: and a2, a2, a3 +; RV32IA-WMO-NEXT: sll a1, a1, a0 +; RV32IA-WMO-NEXT: sll a0, a2, a0 +; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-NEXT: and a3, a2, a5 +; RV32IA-WMO-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-WMO-NEXT: xor a3, a2, a0 +; RV32IA-WMO-NEXT: and a3, a3, a5 +; RV32IA-WMO-NEXT: xor a3, a2, a3 +; RV32IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-NEXT: bnez a3, .LBB16_1 +; RV32IA-WMO-NEXT: .LBB16_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-WMO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-WMO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV32IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-WMO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV32IA-WMO-ZACAS-NEXT: .LBB16_3: +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lui a3, 16 +; RV32IA-TSO-NEXT: andi a4, a0, -4 +; RV32IA-TSO-NEXT: andi a0, a0, 3 +; RV32IA-TSO-NEXT: addi a3, a3, -1 +; RV32IA-TSO-NEXT: slli a0, a0, 3 +; RV32IA-TSO-NEXT: sll a5, a3, a0 +; RV32IA-TSO-NEXT: and a1, a1, a3 +; RV32IA-TSO-NEXT: and a2, a2, a3 +; RV32IA-TSO-NEXT: sll a1, a1, a0 +; RV32IA-TSO-NEXT: sll a0, a2, a0 +; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a2, (a4) +; RV32IA-TSO-NEXT: and a3, a2, a5 +; RV32IA-TSO-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-TSO-NEXT: xor a3, a2, a0 +; RV32IA-TSO-NEXT: and a3, a3, a5 +; RV32IA-TSO-NEXT: xor a3, a2, a3 +; RV32IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-NEXT: bnez a3, .LBB16_1 +; RV32IA-TSO-NEXT: .LBB16_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV32IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV32IA-TSO-ZACAS-NEXT: sll a5, a3, a0 +; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV32IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0 +; RV32IA-TSO-ZACAS-NEXT: sll a0, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV32IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV32IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV32IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV32IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV32IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV32IA-TSO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV32IA-TSO-ZACAS-NEXT: .LBB16_3: +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB16_1 +; RV64IA-WMO-NEXT: .LBB16_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: lui a3, 16 +; RV64IA-WMO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-WMO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-WMO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-WMO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-WMO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-ZACAS-NEXT: lr.w.aq a2, (a4) +; RV64IA-WMO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-WMO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-WMO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-WMO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-WMO-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV64IA-WMO-ZACAS-NEXT: .LBB16_3: +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB16_1 +; RV64IA-TSO-NEXT: .LBB16_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: lui a3, 16 +; RV64IA-TSO-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-TSO-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-TSO-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-TSO-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-TSO-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-ZACAS-NEXT: lr.w a2, (a4) +; RV64IA-TSO-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-TSO-ZACAS-NEXT: bne a3, a1, .LBB16_3 +; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-TSO-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-TSO-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-TSO-ZACAS-NEXT: sc.w a3, a3, (a4) +; RV64IA-TSO-ZACAS-NEXT: bnez a3, .LBB16_1 +; RV64IA-TSO-ZACAS-NEXT: .LBB16_3: +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_acq_rel_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire + ret void +} + +define void @cmpxchg_i16_seq_cst_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a2, (a4) +; RV32IA-NEXT: and a3, a2, a5 +; RV32IA-NEXT: bne a3, a1, .LBB17_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; RV32IA-NEXT: xor a3, a2, a0 +; RV32IA-NEXT: and a3, a3, a5 +; RV32IA-NEXT: xor a3, a2, a3 +; RV32IA-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-NEXT: bnez a3, .LBB17_1 +; RV32IA-NEXT: .LBB17_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB17_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB17_1 +; RV64IA-WMO-NEXT: .LBB17_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB17_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-ZACAS-NEXT: bnez a3, .LBB17_1 +; RV64IA-ZACAS-NEXT: .LBB17_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB17_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB17_1 +; RV64IA-TSO-NEXT: .LBB17_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst monotonic + ret void +} + +define void @cmpxchg_i16_seq_cst_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a2, (a4) +; RV32IA-NEXT: and a3, a2, a5 +; RV32IA-NEXT: bne a3, a1, .LBB18_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; RV32IA-NEXT: xor a3, a2, a0 +; RV32IA-NEXT: and a3, a3, a5 +; RV32IA-NEXT: xor a3, a2, a3 +; RV32IA-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-NEXT: bnez a3, .LBB18_1 +; RV32IA-NEXT: .LBB18_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB18_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB18_1 +; RV64IA-WMO-NEXT: .LBB18_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB18_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-ZACAS-NEXT: bnez a3, .LBB18_1 +; RV64IA-ZACAS-NEXT: .LBB18_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB18_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB18_1 +; RV64IA-TSO-NEXT: .LBB18_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst acquire + ret void +} + +define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; RV32I-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: call __atomic_compare_exchange_2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: andi a4, a0, -4 +; RV32IA-NEXT: andi a0, a0, 3 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: slli a0, a0, 3 +; RV32IA-NEXT: sll a5, a3, a0 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: and a2, a2, a3 +; RV32IA-NEXT: sll a1, a1, a0 +; RV32IA-NEXT: sll a0, a2, a0 +; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: lr.w.aqrl a2, (a4) +; RV32IA-NEXT: and a3, a2, a5 +; RV32IA-NEXT: bne a3, a1, .LBB19_3 +; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; RV32IA-NEXT: xor a3, a2, a0 +; RV32IA-NEXT: and a3, a3, a5 +; RV32IA-NEXT: xor a3, a2, a3 +; RV32IA-NEXT: sc.w.rl a3, a3, (a4) +; RV32IA-NEXT: bnez a3, .LBB19_1 +; RV32IA-NEXT: .LBB19_3: +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: call __atomic_compare_exchange_2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lui a3, 16 +; RV64IA-WMO-NEXT: andi a4, a0, -4 +; RV64IA-WMO-NEXT: andi a0, a0, 3 +; RV64IA-WMO-NEXT: addi a3, a3, -1 +; RV64IA-WMO-NEXT: slli a0, a0, 3 +; RV64IA-WMO-NEXT: sllw a5, a3, a0 +; RV64IA-WMO-NEXT: and a1, a1, a3 +; RV64IA-WMO-NEXT: and a2, a2, a3 +; RV64IA-WMO-NEXT: sllw a1, a1, a0 +; RV64IA-WMO-NEXT: sllw a0, a2, a0 +; RV64IA-WMO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-WMO-NEXT: and a3, a2, a5 +; RV64IA-WMO-NEXT: bne a3, a1, .LBB19_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; RV64IA-WMO-NEXT: xor a3, a2, a0 +; RV64IA-WMO-NEXT: and a3, a3, a5 +; RV64IA-WMO-NEXT: xor a3, a2, a3 +; RV64IA-WMO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-WMO-NEXT: bnez a3, .LBB19_1 +; RV64IA-WMO-NEXT: .LBB19_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: lui a3, 16 +; RV64IA-ZACAS-NEXT: andi a4, a0, -4 +; RV64IA-ZACAS-NEXT: andi a0, a0, 3 +; RV64IA-ZACAS-NEXT: addi a3, a3, -1 +; RV64IA-ZACAS-NEXT: slli a0, a0, 3 +; RV64IA-ZACAS-NEXT: sllw a5, a3, a0 +; RV64IA-ZACAS-NEXT: and a1, a1, a3 +; RV64IA-ZACAS-NEXT: and a2, a2, a3 +; RV64IA-ZACAS-NEXT: sllw a1, a1, a0 +; RV64IA-ZACAS-NEXT: sllw a0, a2, a0 +; RV64IA-ZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-ZACAS-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-ZACAS-NEXT: and a3, a2, a5 +; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB19_3 +; RV64IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; RV64IA-ZACAS-NEXT: xor a3, a2, a0 +; RV64IA-ZACAS-NEXT: and a3, a3, a5 +; RV64IA-ZACAS-NEXT: xor a3, a2, a3 +; RV64IA-ZACAS-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-ZACAS-NEXT: bnez a3, .LBB19_1 +; RV64IA-ZACAS-NEXT: .LBB19_3: +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw +; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lui a3, 16 +; RV64IA-TSO-NEXT: andi a4, a0, -4 +; RV64IA-TSO-NEXT: andi a0, a0, 3 +; RV64IA-TSO-NEXT: addi a3, a3, -1 +; RV64IA-TSO-NEXT: slli a0, a0, 3 +; RV64IA-TSO-NEXT: sllw a5, a3, a0 +; RV64IA-TSO-NEXT: and a1, a1, a3 +; RV64IA-TSO-NEXT: and a2, a2, a3 +; RV64IA-TSO-NEXT: sllw a1, a1, a0 +; RV64IA-TSO-NEXT: sllw a0, a2, a0 +; RV64IA-TSO-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a2, (a4) +; RV64IA-TSO-NEXT: and a3, a2, a5 +; RV64IA-TSO-NEXT: bne a3, a1, .LBB19_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; RV64IA-TSO-NEXT: xor a3, a2, a0 +; RV64IA-TSO-NEXT: and a3, a3, a5 +; RV64IA-TSO-NEXT: xor a3, a2, a3 +; RV64IA-TSO-NEXT: sc.w.rl a3, a3, (a4) +; RV64IA-TSO-NEXT: bnez a3, .LBB19_1 +; RV64IA-TSO-NEXT: .LBB19_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw +; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst seq_cst + ret void +} + +define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB20_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB20_1 +; RV32IA-WMO-NEXT: .LBB20_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-ZACAS: # %bb.0: +; RV32IA-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB20_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB20_1 +; RV32IA-TSO-NEXT: .LBB20_3: +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 0 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB20_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB20_1 +; RV64IA-WMO-NEXT: .LBB20_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-ZABHA-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-ZABHA: # %bb.0: +; RV64IA-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB20_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB20_1 +; RV64IA-TSO-NEXT: .LBB20_3: +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 2 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB21_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB21_1 +; RV32IA-WMO-NEXT: .LBB21_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB21_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB21_1 +; RV32IA-TSO-NEXT: .LBB21_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB21_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB21_1 +; RV64IA-WMO-NEXT: .LBB21_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB21_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB21_1 +; RV64IA-TSO-NEXT: .LBB21_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acquire_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic + ret void +} + +define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_acquire_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 2 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB22_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB22_1 +; RV32IA-WMO-NEXT: .LBB22_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB22_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB22_1 +; RV32IA-TSO-NEXT: .LBB22_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_acquire_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB22_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB22_1 +; RV64IA-WMO-NEXT: .LBB22_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB22_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB22_1 +; RV64IA-TSO-NEXT: .LBB22_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acquire_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire + ret void +} + +define void @cmpxchg_i32_release_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_release_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB23_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB23_1 +; RV32IA-WMO-NEXT: .LBB23_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB23_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB23_1 +; RV32IA-TSO-NEXT: .LBB23_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_release_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB23_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB23_1 +; RV64IA-WMO-NEXT: .LBB23_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.rl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.rl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB23_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB23_1 +; RV64IA-TSO-NEXT: .LBB23_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_release_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic + ret void +} + +define void @cmpxchg_i32_release_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_release_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB24_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB24_1 +; RV32IA-WMO-NEXT: .LBB24_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB24_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB24_1 +; RV32IA-TSO-NEXT: .LBB24_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_release_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB24_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB24_1 +; RV64IA-WMO-NEXT: .LBB24_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB24_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB24_1 +; RV64IA-TSO-NEXT: .LBB24_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_release_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire + ret void +} + +define void @cmpxchg_i32_acq_rel_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB25_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB25_1 +; RV32IA-WMO-NEXT: .LBB25_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB25_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB25_1 +; RV32IA-TSO-NEXT: .LBB25_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB25_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB25_1 +; RV64IA-WMO-NEXT: .LBB25_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB25_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB25_1 +; RV64IA-TSO-NEXT: .LBB25_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acq_rel_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic + ret void +} + +define void @cmpxchg_i32_acq_rel_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 4 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB26_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB26_1 +; RV32IA-WMO-NEXT: .LBB26_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB26_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB26_1 +; RV32IA-TSO-NEXT: .LBB26_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB26_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB26_1 +; RV64IA-WMO-NEXT: .LBB26_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB26_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB26_1 +; RV64IA-TSO-NEXT: .LBB26_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_acq_rel_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire + ret void +} + +define void @cmpxchg_i32_seq_cst_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB27_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB27_1 +; RV32IA-WMO-NEXT: .LBB27_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB27_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB27_1 +; RV32IA-TSO-NEXT: .LBB27_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB27_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB27_1 +; RV64IA-WMO-NEXT: .LBB27_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB27_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB27_1 +; RV64IA-TSO-NEXT: .LBB27_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic + ret void +} + +define void @cmpxchg_i32_seq_cst_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB28_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB28_1 +; RV32IA-WMO-NEXT: .LBB28_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB28_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB28_1 +; RV32IA-TSO-NEXT: .LBB28_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB28_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB28_1 +; RV64IA-WMO-NEXT: .LBB28_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB28_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB28_1 +; RV64IA-TSO-NEXT: .LBB28_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst acquire + ret void +} + +define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; RV32I-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: call __atomic_compare_exchange_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-WMO-NEXT: bne a3, a1, .LBB29_3 +; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV32IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-WMO-NEXT: bnez a4, .LBB29_1 +; RV32IA-WMO-NEXT: .LBB29_3: +; RV32IA-WMO-NEXT: ret +; +; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-WMO-ZACAS: # %bb.0: +; RV32IA-WMO-ZACAS-NEXT: fence rw, rw +; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV32IA-WMO-ZACAS-NEXT: ret +; +; RV32IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV32IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV32IA-TSO-NEXT: bne a3, a1, .LBB29_3 +; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV32IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV32IA-TSO-NEXT: bnez a4, .LBB29_1 +; RV32IA-TSO-NEXT: .LBB29_3: +; RV32IA-TSO-NEXT: ret +; +; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV32IA-TSO-ZACAS: # %bb.0: +; RV32IA-TSO-ZACAS-NEXT: fence rw, rw +; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV32IA-TSO-ZACAS-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: call __atomic_compare_exchange_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB29_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV64IA-WMO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB29_1 +; RV64IA-WMO-NEXT: .LBB29_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: fence rw, rw +; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw +; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.w.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB29_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; RV64IA-TSO-NEXT: sc.w.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB29_1 +; RV64IA-TSO-NEXT: .LBB29_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: fence rw, rw +; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw +; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst + ret void +} + +define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: mv a2, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: li a4, 0 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 0 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB30_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB30_1 +; RV64IA-WMO-NEXT: .LBB30_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-ZACAS-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-ZACAS: # %bb.0: +; RV64IA-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-ZACAS-NEXT: ret +; +; RV64IA-ZABHA-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-ZABHA: # %bb.0: +; RV64IA-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_monotonic_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB30_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB30_1 +; RV64IA-TSO-NEXT: .LBB30_3: +; RV64IA-TSO-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acquire_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 2 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB31_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB31_1 +; RV64IA-WMO-NEXT: .LBB31_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB31_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB31_1 +; RV64IA-TSO-NEXT: .LBB31_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acquire_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic + ret void +} + +define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_acquire_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 2 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a2, a6 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acquire_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 2 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a2, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_acquire_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 2 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB32_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB32_1 +; RV64IA-WMO-NEXT: .LBB32_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aq a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB32_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB32_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB32_1 +; RV64IA-TSO-NEXT: .LBB32_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acquire_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire + ret void +} + +define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_release_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 3 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_release_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 3 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_release_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB33_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB33_1 +; RV64IA-WMO-NEXT: .LBB33_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.rl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.rl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB33_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB33_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB33_1 +; RV64IA-TSO-NEXT: .LBB33_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_release_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic + ret void +} + +define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_release_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 3 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a2, a6 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_release_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 3 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a2, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_release_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 3 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB34_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB34_1 +; RV64IA-WMO-NEXT: .LBB34_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB34_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB34_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB34_1 +; RV64IA-TSO-NEXT: .LBB34_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_release_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire + ret void +} + +define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 4 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 4 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB35_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB35_1 +; RV64IA-WMO-NEXT: .LBB35_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB35_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB35_1 +; RV64IA-TSO-NEXT: .LBB35_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acq_rel_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic + ret void +} + +define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 4 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a2, a6 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 4 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a2, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 4 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aq a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB36_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB36_1 +; RV64IA-WMO-NEXT: .LBB36_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB36_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB36_1 +; RV64IA-TSO-NEXT: .LBB36_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_acq_rel_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire + ret void +} + +define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a2, a5 +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a5, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: mv a2, a5 +; RV32IA-NEXT: li a5, 0 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 0 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB37_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB37_1 +; RV64IA-WMO-NEXT: .LBB37_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB37_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB37_1 +; RV64IA-TSO-NEXT: .LBB37_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_monotonic: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic + ret void +} + +define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 2 +; RV32I-NEXT: mv a2, a6 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 2 +; RV32IA-NEXT: mv a2, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 2 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB38_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB38_1 +; RV64IA-WMO-NEXT: .LBB38_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB38_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB38_1 +; RV64IA-TSO-NEXT: .LBB38_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_acquire: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst acquire + ret void +} + +define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; RV32I-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 5 +; RV32I-NEXT: mv a2, a6 +; RV32I-NEXT: call __atomic_compare_exchange_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: mv a6, a3 +; RV32IA-NEXT: mv a3, a4 +; RV32IA-NEXT: sw a1, 0(sp) +; RV32IA-NEXT: sw a2, 4(sp) +; RV32IA-NEXT: mv a1, sp +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 5 +; RV32IA-NEXT: mv a2, a6 +; RV32IA-NEXT: call __atomic_compare_exchange_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd a1, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: call __atomic_compare_exchange_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-WMO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-WMO-NEXT: bne a3, a1, .LBB39_3 +; RV64IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 +; RV64IA-WMO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-WMO-NEXT: bnez a4, .LBB39_1 +; RV64IA-WMO-NEXT: .LBB39_3: +; RV64IA-WMO-NEXT: ret +; +; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-WMO-ZACAS: # %bb.0: +; RV64IA-WMO-ZACAS-NEXT: fence rw, rw +; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZACAS-NEXT: ret +; +; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-WMO-ZABHA: # %bb.0: +; RV64IA-WMO-ZABHA-NEXT: fence rw, rw +; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0) +; RV64IA-WMO-ZABHA-NEXT: ret +; +; RV64IA-TSO-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 +; RV64IA-TSO-NEXT: lr.d.aqrl a3, (a0) +; RV64IA-TSO-NEXT: bne a3, a1, .LBB39_3 +; RV64IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 +; RV64IA-TSO-NEXT: sc.d.rl a4, a2, (a0) +; RV64IA-TSO-NEXT: bnez a4, .LBB39_1 +; RV64IA-TSO-NEXT: .LBB39_3: +; RV64IA-TSO-NEXT: ret +; +; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-TSO-ZACAS: # %bb.0: +; RV64IA-TSO-ZACAS-NEXT: fence rw, rw +; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZACAS-NEXT: ret +; +; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst: +; RV64IA-TSO-ZABHA: # %bb.0: +; RV64IA-TSO-ZABHA-NEXT: fence rw, rw +; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0) +; RV64IA-TSO-ZABHA-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV64IA: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir new file mode 100644 index 0000000000000..74249c1247e3e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir @@ -0,0 +1,119 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+a,+zacas,+zabha -run-pass=instruction-select %s -o - \ +# RUN: | FileCheck %s --check-prefixes=RV32IA-ZABHA + +--- +name: cmpxchg_i8 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i8 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]] + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gpr(p0) = COPY $x10 + %1:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8)) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i16 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i16 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]] + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gpr(p0) = COPY $x10 + %1:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16)) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i32 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gpr(p0) = COPY $x10 + %1:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32)) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_with_success_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i32 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1 + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] + ; RV32IA-ZABHA-NEXT: $x11 = COPY [[SLTIU]] + ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY2]] + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = COPY $x10 + %1:gprb(s32) = G_CONSTANT i32 0 + %2:gprb(s32) = G_CONSTANT i32 1 + %3:gprb(s32) = G_ATOMIC_CMPXCHG %0(p0), %1, %2 :: (load store monotonic (s32)) + %4:gprb(s32) = G_ICMP intpred(eq), %3(s32), %1 + %5:gprb(s32) = COPY %3(s32) + ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + $x10 = COPY %5(s32) + $x11 = COPY %4(s32) + PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + %6:gprb(s32) = COPY $x10 + $x10 = COPY %6(s32) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir new file mode 100644 index 0000000000000..a2f7e303a871f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir @@ -0,0 +1,144 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -run-pass=instruction-select %s -o - \ +# RUN: | FileCheck %s --check-prefixes=RV64IA-ZABHA + +--- +name: cmpxchg_i8 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV64IA-ZABHA-LABEL: name: cmpxchg_i8 + ; RV64IA-ZABHA: liveins: $x10 + ; RV64IA-ZABHA-NEXT: {{ $}} + ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8)) + ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]] + ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gpr(p0) = COPY $x10 + %1:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8)) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i16 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV64IA-ZABHA-LABEL: name: cmpxchg_i16 + ; RV64IA-ZABHA: liveins: $x10 + ; RV64IA-ZABHA-NEXT: {{ $}} + ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16)) + ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]] + ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gpr(p0) = COPY $x10 + %1:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16)) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV64IA-ZABHA-LABEL: name: cmpxchg_i32 + ; RV64IA-ZABHA: liveins: $x10 + ; RV64IA-ZABHA-NEXT: {{ $}} + ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] + ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gpr(p0) = COPY $x10 + %1:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32)) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $x10 + + ; RV64IA-ZABHA-LABEL: name: cmpxchg_i64 + ; RV64IA-ZABHA: liveins: $x10 + ; RV64IA-ZABHA-NEXT: {{ $}} + ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64)) + ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]] + ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gpr(p0) = COPY $x10 + %1:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s64)) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_with_success_i64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x10 + + ; RV64IA-ZABHA-LABEL: name: cmpxchg_with_success_i64 + ; RV64IA-ZABHA: liveins: $x10 + ; RV64IA-ZABHA-NEXT: {{ $}} + ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64)) + ; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1 + ; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]] + ; RV64IA-ZABHA-NEXT: $x11 = COPY [[SLTIU]] + ; RV64IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV64IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; RV64IA-ZABHA-NEXT: $x10 = COPY [[COPY2]] + ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 + %0:gprb(p0) = COPY $x10 + %1:gprb(s64) = G_CONSTANT i64 0 + %2:gprb(s64) = G_CONSTANT i64 1 + %3:gprb(s64) = G_ATOMIC_CMPXCHG %0(p0), %1, %2 :: (load store monotonic (s64)) + %4:gprb(s64) = G_ICMP intpred(eq), %3(s64), %1 + %5:gprb(s64) = COPY %3(s64) + %6:gprb(s64) = COPY %4(s64) + ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + $x10 = COPY %5(s64) + $x11 = COPY %6(s64) + PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + %7:gprb(s64) = COPY $x10 + $x10 = COPY %7(s64) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 562adbd2ce3a7..439f06bcb3c54 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -214,15 +214,16 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ATOMIC_CMPXCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_ATOMICRMW_XCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_ATOMICRMW_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_ATOMICRMW_SUB (opcode {{[0-9]+}}): 2 type indices, 0 imm indices diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv32.mir new file mode 100644 index 0000000000000..3f50bc729f52f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv32.mir @@ -0,0 +1,155 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+a,+zacas,+zabha -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefixes=RV32IA-ZABHA +# RUN: llc -mtriple=riscv32 -mattr=+a -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefixes=RV32IA + +--- +name: cmpxchg_i8 +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i8 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_i8 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8)) + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(s8) = G_CONSTANT i8 1 + %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8)) + %4:_(s32) = G_ANYEXT %3 + $x10 = COPY %4(s32) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i16 +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i16 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_i16 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16)) + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(s16) = G_CONSTANT i16 1 + %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16)) + %4:_(s32) = G_ANYEXT %3 + $x10 = COPY %4(s32) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i32 +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i32 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_i32 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32)) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_with_success_i32 + +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i32 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[C]] + ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY1]](s32) + ; RV32IA-ZABHA-NEXT: $x11 = COPY [[ICMP]](s32) + ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY2]](s32) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_with_success_i32 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[C]] + ; RV32IA-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32) + ; RV32IA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-NEXT: $x10 = COPY [[COPY1]](s32) + ; RV32IA-NEXT: $x11 = COPY [[ICMP]](s32) + ; RV32IA-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32IA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32IA-NEXT: $x10 = COPY [[COPY2]](s32) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s32)) + %5:_(s32) = G_ANYEXT %4 + %6:_(s32) = G_MUL %3, %5 + $x10 = COPY %6(s32) + PseudoRET implicit $x10 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv64.mir new file mode 100644 index 0000000000000..689998299a8b2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomic-cmpxchg-rv64.mir @@ -0,0 +1,240 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefixes=RV32IA-ZABHA +# RUN: llc -mtriple=riscv64 -mattr=+a -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefixes=RV32IA + +--- +name: cmpxchg_i8 +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i8 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_i8 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8)) + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(s8) = G_CONSTANT i8 1 + %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8)) + %4:_(s64) = G_ANYEXT %3 + $x10 = COPY %4(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i16 +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i16 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_i16 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16)) + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(s16) = G_CONSTANT i16 1 + %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16)) + %4:_(s64) = G_ANYEXT %3 + $x10 = COPY %4(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i32 +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i32 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_i32 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32)) + %4:_(s64) = G_ANYEXT %3 + $x10 = COPY %4(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_i64 +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_i64 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64)) + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_i64 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64)) + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s64) = G_CONSTANT i64 0 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s64)) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_with_success_i32 + +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i32 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ATOMIC_CMPXCHG]], 32 + ; RV32IA-ZABHA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SEXT_INREG]](s64), [[C]] + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-ZABHA-NEXT: $x11 = COPY [[ICMP]](s64) + ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_with_success_i32 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32)) + ; RV32IA-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ATOMIC_CMPXCHG]], 32 + ; RV32IA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[SEXT_INREG]](s64), [[C]] + ; RV32IA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-NEXT: $x10 = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-NEXT: $x11 = COPY [[ICMP]](s64) + ; RV32IA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32IA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV32IA-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s32)) + %5:_(s32) = G_ANYEXT %4 + %6:_(s32) = G_MUL %3, %5 + %7:_(s64) = G_ANYEXT %6 + $x10 = COPY %7(s64) + PseudoRET implicit $x10 +... +--- +name: cmpxchg_with_success_i64 + +body: | + bb.0: + liveins: $x10 + + ; RV32IA-ZABHA-LABEL: name: cmpxchg_with_success_i64 + ; RV32IA-ZABHA: liveins: $x10 + ; RV32IA-ZABHA-NEXT: {{ $}} + ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-ZABHA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-ZABHA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-ZABHA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64)) + ; RV32IA-ZABHA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[C]] + ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV32IA-ZABHA-NEXT: $x11 = COPY [[ICMP]](s64) + ; RV32IA-ZABHA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32IA-ZABHA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-ZABHA-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; RV32IA-ZABHA-NEXT: $x10 = COPY [[COPY2]](s64) + ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 + ; + ; RV32IA-LABEL: name: cmpxchg_with_success_i64 + ; RV32IA: liveins: $x10 + ; RV32IA-NEXT: {{ $}} + ; RV32IA-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10 + ; RV32IA-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV32IA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32IA-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64)) + ; RV32IA-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[C]] + ; RV32IA-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64) + ; RV32IA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-NEXT: $x10 = COPY [[COPY1]](s64) + ; RV32IA-NEXT: $x11 = COPY [[ICMP]](s64) + ; RV32IA-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32IA-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32IA-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x10 + ; RV32IA-NEXT: $x10 = COPY [[COPY2]](s64) + ; RV32IA-NEXT: PseudoRET implicit $x10 + %0:_(p0) = COPY $x10 + %1:_(s64) = G_CONSTANT i64 0 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s64)) + %5:_(s64) = G_ANYEXT %4 + %6:_(s64) = G_MUL %3, %5 + $x10 = COPY %6(s64) + PseudoRET implicit $x10 +...