diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 95eab16511e5a..09b9570f62316 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -788,8 +788,21 @@ void AArch64PassConfig::addMachineSSAOptimization() { // Run default MachineSSAOptimization first. TargetPassConfig::addMachineSSAOptimization(); + // With optimization, dead code should already be eliminated. However + // there is one known exception: peephole optimizations may open more + // opportunities for dead code. This is especially true for targets whose + // peephole optimizations like ARM and AArch64 where dead defs to the flag + // register are removed, which previously prevented CSE. + addPass(&MachineCSELegacyID); + addPass(&MachineSinkingLegacyID); + if (TM->getOptLevel() != CodeGenOptLevel::None) addPass(createAArch64MIPeepholeOptPass()); + + // Clean-up any last code that can be eliminated + // Due to the fact that the demotion of some instructions + // can result in the removal of instructions previously unable to be removed + addPass(&DeadMachineInstructionElimID); } bool AArch64PassConfig::addILPOpts() { diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index e1481667a4ab7..8e8092b1da67d 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -162,6 +162,8 @@ ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: AArch64 MI Peephole Optimization pass ; CHECK-NEXT: AArch64 Dead register definitions +; CHECK-NEXT: Machine Common Subexpression Elimination +; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 91eda8d552397..48f874d0b2dcc 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -206,13 +206,12 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-LABEL: insert_vec_v8i16_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: stp xzr, xzr, [x0, #16] -; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: mov.h v0[0], v1[0] +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -228,14 +227,13 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-LABEL: insert_vec_v3i16_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: add x8, x0, #8 -; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: st1.s { v1 }[2], [x8] -; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: mov.h v0[0], v1[0] +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: st1.s { v0 }[2], [x8] +; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret entry: @@ -283,9 +281,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) { ; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: mov.h v2[0], v1[0] ; CHECK-NEXT: bic.4h v2, #255, lsl #8 -; CHECK-NEXT: ushll.4s v2, v2, #0 -; CHECK-NEXT: ucvtf.4s v2, v2 -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: ushll.4s v1, v2, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: @@ -386,12 +384,11 @@ define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) { ; CHECK-LABEL: insert_vec_v4i16_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: mov.h v0[0], v1[0] +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -407,14 +404,13 @@ define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) { ; CHECK-LABEL: insert_vec_v16i16_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: stp q2, q2, [x0, #32] -; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: mov.h v2[0], v1[0] +; CHECK-NEXT: ushll.4s v1, v2, #0 ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: @@ -430,14 +426,13 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) { ; CHECK-LABEL: insert_vec_v8i8_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: stp xzr, xzr, [x0, #16] -; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: mov.h v0[0], v1[0] +; CHECK-NEXT: bic.4h v0, #255, lsl #8 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -453,15 +448,14 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) { ; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: stp q2, q2, [x0, #32] -; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: mov.h v2[0], v1[0] +; CHECK-NEXT: bic.4h v2, #255, lsl #8 +; CHECK-NEXT: ushll.4s v1, v2, #0 ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll b/llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll index a72f9df9e496f..0147e5239aefb 100644 --- a/llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll +++ b/llvm/test/CodeGen/AArch64/addsub-shifted-reg-cheap-as-move.ll @@ -97,15 +97,13 @@ define void @f1(i1 %c0, i1 %c1, ptr %a, i64 %i) { ; LSLFAST-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; LSLFAST-NEXT: .cfi_def_cfa_offset 16 ; LSLFAST-NEXT: .cfi_offset w30, -16 -; LSLFAST-NEXT: add x8, x2, x3, lsl #4 +; LSLFAST-NEXT: add x0, x2, x3, lsl #4 ; LSLFAST-NEXT: tbz w1, #0, .LBB1_3 ; LSLFAST-NEXT: // %bb.2: // %B -; LSLFAST-NEXT: mov x0, x8 ; LSLFAST-NEXT: bl g ; LSLFAST-NEXT: b .LBB1_4 ; LSLFAST-NEXT: .LBB1_3: // %C -; LSLFAST-NEXT: add x0, x2, x3, lsl #4 -; LSLFAST-NEXT: mov x1, x8 +; LSLFAST-NEXT: mov x1, x0 ; LSLFAST-NEXT: bl g ; LSLFAST-NEXT: .LBB1_4: ; LSLFAST-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll index 70f3b5cc488ea..3bf4622d5fb38 100644 --- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -aarch64-enable-sink-fold=true < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS @@ -17,6 +18,12 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -25,19 +32,27 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -46,19 +61,27 @@ define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -67,19 +90,27 @@ define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -88,19 +119,27 @@ define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -109,18 +148,26 @@ define dso_local void @test_atomic_load_add_i32_noret(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -129,18 +176,26 @@ define dso_local void @test_atomic_load_add_i64_noret(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -149,19 +204,27 @@ define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -170,19 +233,27 @@ define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -191,19 +262,27 @@ define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -212,19 +291,27 @@ define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -233,18 +320,26 @@ define dso_local void @test_atomic_load_or_i32_noret(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -253,18 +348,26 @@ define dso_local void @test_atomic_load_or_i64_noret(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -273,19 +376,27 @@ define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeoralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -294,19 +405,27 @@ define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeoralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -315,19 +434,27 @@ define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -336,19 +463,27 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -357,18 +492,26 @@ define dso_local void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -377,658 +520,858 @@ define dso_local void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB18_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB18_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB19_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB19_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB20_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB20_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB21_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB21_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB22_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB22_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB23_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB23_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB24_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB24_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB25_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB25_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB26_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB26_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB27_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB27_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB28_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB28_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB29_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB29_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB30_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB30_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB31_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB31_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB32_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB32_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB33_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB33_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB34_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB34_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB35_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB35_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB36_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB36_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB37_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB37_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB38_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB38_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB39_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB39_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB40_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB40_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB41_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB41_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1037,19 +1380,27 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1058,19 +1409,27 @@ define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1079,19 +1438,27 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1100,19 +1467,27 @@ define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1121,19 +1496,27 @@ define dso_local void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1142,19 +1525,27 @@ define dso_local void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casab w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1163,20 +1554,31 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casab w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK-NEXT: casab w0, w1, [x[[ADDR]]] -; CHECK-NEXT: ret ret i8 %old } define dso_local i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: casab w8, w1, [x9] +; CHECK-NEXT: cmp w8, w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_1: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill @@ -1188,22 +1590,31 @@ define dso_local i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind { ; OUTLINE-ATOMICS-NEXT: cset w0, eq ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_1: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: casab w8, w1, [x9] +; CHECK-REG-NEXT: cmp w8, w0, uxtb +; CHECK-REG-NEXT: cset w0, eq +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire %success = extractvalue { i8, i1 } %pair, 1 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]] -; CHECK-NEXT: cmp w[[NEW]], w0, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret ret i1 %success } define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casah w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1212,20 +1623,31 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: casah w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK-NEXT: casah w0, w1, [x[[ADDR]]] -; CHECK-NEXT: ret ret i16 %old } define dso_local i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: casah w8, w1, [x9] +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_1: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill @@ -1237,23 +1659,32 @@ define dso_local i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind { ; OUTLINE-ATOMICS-NEXT: cset w0, eq ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_1: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: casah w8, w1, [x9] +; CHECK-REG-NEXT: cmp w8, w0, uxth +; CHECK-REG-NEXT: cset w0, eq +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire %success = extractvalue { i16, i1 } %pair, 1 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]] -; CHECK-NEXT: cmp w[[NEW]], w0, uxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret ret i1 %success } define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1262,21 +1693,29 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casa w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new acquire acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_acquire(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1285,21 +1724,29 @@ define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_acquire(i32 %wanted, i32 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casa w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casa x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1308,21 +1755,33 @@ define dso_local i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: casa x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new acquire acquire %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casa x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1331,21 +1790,37 @@ define dso_local i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwin ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new acquire acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i128 @test_atomic_cmpxchg_i128_monotonic_seqcst(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1354,21 +1829,37 @@ define dso_local i128 @test_atomic_cmpxchg_i128_monotonic_seqcst(i128 %wanted, i ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new monotonic seq_cst %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i128 @test_atomic_cmpxchg_i128_release_acquire(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_release_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_release_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1377,251 +1868,353 @@ define dso_local i128 @test_atomic_cmpxchg_i128_release_acquire(i128 %wanted, i1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_release_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new release acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1630,20 +2223,28 @@ define dso_local i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i8 0, %offset %old = atomicrmw sub ptr @var8, i8 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1652,20 +2253,28 @@ define dso_local i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i16 0, %offset %old = atomicrmw sub ptr @var16, i16 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1674,20 +2283,28 @@ define dso_local i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i32 0, %offset %old = atomicrmw sub ptr @var32, i32 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1696,20 +2313,29 @@ define dso_local i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i64 0, %offset %old = atomicrmw sub ptr @var64, i64 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1719,19 +2345,28 @@ define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1741,19 +2376,28 @@ define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1763,19 +2407,28 @@ define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1785,103 +2438,147 @@ define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_and_i8_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[CONST]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldclralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1890,18 +2587,26 @@ define dso_local i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i8 %offset, -1 %old = atomicrmw and ptr @var8, i8 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldclralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1910,18 +2615,26 @@ define dso_local i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i16 %offset, -1 %old = atomicrmw and ptr @var16, i16 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldclral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1930,18 +2643,26 @@ define dso_local i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldclral w0, w0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i32 %offset, -1 %old = atomicrmw and ptr @var32, i32 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldclral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1950,18 +2671,27 @@ define dso_local i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldclral x0, x0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i64 %offset, -1 %old = atomicrmw and ptr @var64, i64 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1971,19 +2701,28 @@ define dso_local void @test_atomic_load_and_i32_noret(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1993,19 +2732,27 @@ define dso_local void @test_atomic_load_and_i64_noret(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2014,19 +2761,27 @@ define dso_local i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2035,19 +2790,27 @@ define dso_local i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2056,19 +2819,27 @@ define dso_local i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2077,19 +2848,27 @@ define dso_local i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2098,18 +2877,26 @@ define dso_local void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2118,18 +2905,26 @@ define dso_local void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2138,19 +2933,27 @@ define dso_local i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2159,19 +2962,27 @@ define dso_local i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadda w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2180,19 +2991,27 @@ define dso_local i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadda w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadda x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2201,19 +3020,27 @@ define dso_local i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadda x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadda w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2222,18 +3049,26 @@ define dso_local void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadda w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadda x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2242,18 +3077,26 @@ define dso_local void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadda x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2262,19 +3105,27 @@ define dso_local i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2283,19 +3134,27 @@ define dso_local i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadd w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2304,19 +3163,27 @@ define dso_local i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadd w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadd x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2325,19 +3192,27 @@ define dso_local i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadd x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadd w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2346,18 +3221,26 @@ define dso_local void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadd w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadd x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2366,18 +3249,26 @@ define dso_local void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadd x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x{{[0-9]}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2386,19 +3277,27 @@ define dso_local i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2407,19 +3306,27 @@ define dso_local i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2428,19 +3335,27 @@ define dso_local i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2449,19 +3364,27 @@ define dso_local i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2470,18 +3393,26 @@ define dso_local void @test_atomic_load_add_i32_noret_release(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2490,18 +3421,26 @@ define dso_local void @test_atomic_load_add_i64_noret_release(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2510,19 +3449,27 @@ define dso_local i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2531,19 +3478,27 @@ define dso_local i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2552,19 +3507,27 @@ define dso_local i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2573,19 +3536,27 @@ define dso_local i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2594,18 +3565,26 @@ define dso_local void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2614,18 +3593,27 @@ define dso_local void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2635,19 +3623,28 @@ define dso_local i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2657,19 +3654,28 @@ define dso_local i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2679,19 +3685,28 @@ define dso_local i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2701,19 +3716,28 @@ define dso_local i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2723,19 +3747,28 @@ define dso_local void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2745,19 +3778,28 @@ define dso_local void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrab w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2767,19 +3809,28 @@ define dso_local i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclrab w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrab w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrah w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2789,19 +3840,28 @@ define dso_local i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclrah w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrah w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclra w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2811,19 +3871,28 @@ define dso_local i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclra w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclra x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2833,19 +3902,28 @@ define dso_local i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclra x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclra w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2855,19 +3933,28 @@ define dso_local void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclra w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclra x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2877,19 +3964,28 @@ define dso_local void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclra x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2899,19 +3995,28 @@ define dso_local i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclrb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2921,19 +4026,28 @@ define dso_local i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclrh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclr w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2943,19 +4057,28 @@ define dso_local i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclr w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclr w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclr x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2965,19 +4088,28 @@ define dso_local i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclr x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclr x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclr w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2987,19 +4119,28 @@ define dso_local void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclr w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclr w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclr x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3009,19 +4150,28 @@ define dso_local void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclr x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclr x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrlb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3031,19 +4181,28 @@ define dso_local i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclrlb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrlb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrlh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3053,19 +4212,28 @@ define dso_local i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclrlh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrlh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclrl w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3075,19 +4243,28 @@ define dso_local i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclrl w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclrl w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclrl x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3097,19 +4274,28 @@ define dso_local i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclrl x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclrl x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclrl w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3119,19 +4305,28 @@ define dso_local void @test_atomic_load_and_i32_noret_release(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclrl w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclrl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclrl x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3141,19 +4336,28 @@ define dso_local void @test_atomic_load_and_i64_noret_release(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclrl x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclrl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3163,19 +4367,28 @@ define dso_local i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3185,19 +4398,28 @@ define dso_local i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3207,19 +4429,28 @@ define dso_local i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3229,19 +4460,28 @@ define dso_local i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3251,19 +4491,28 @@ define dso_local void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3273,19 +4522,27 @@ define dso_local void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casab w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3294,21 +4551,29 @@ define dso_local i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwin ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casab w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casah w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3317,21 +4582,29 @@ define dso_local i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: casah w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casah w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3340,21 +4613,29 @@ define dso_local i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casa w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new acquire acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casa x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3363,21 +4644,33 @@ define dso_local i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: casa x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new acquire acquire %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casa x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3386,21 +4679,33 @@ define dso_local i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new acquire acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casb w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3409,21 +4714,29 @@ define dso_local i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casb w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new monotonic monotonic %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: cash w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3432,21 +4745,29 @@ define dso_local i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) n ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: cash w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new monotonic monotonic %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: cash w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: cas w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3455,21 +4776,29 @@ define dso_local i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) n ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: cas w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic monotonic %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: cas w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: cas x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3478,21 +4807,33 @@ define dso_local i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) n ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: cas x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: cas x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: casp x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3501,21 +4842,33 @@ define dso_local i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %ne ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: casp x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new monotonic monotonic %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: casp x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casalb w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3524,21 +4877,29 @@ define dso_local i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwin ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casalb w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new seq_cst seq_cst %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casalb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casalh w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3547,21 +4908,29 @@ define dso_local i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: casalh w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casalh w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casal w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3570,21 +4939,29 @@ define dso_local i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casal w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new seq_cst seq_cst %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casal w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_seq_cst(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casal w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3593,21 +4970,29 @@ define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_seq_cst(i32 %wanted, i32 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casal w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic seq_cst %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casal w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i32 @test_atomic_cmpxchg_i32_release_acquire(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_release_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casal w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_release_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3616,21 +5001,29 @@ define dso_local i32 @test_atomic_cmpxchg_i32_release_acquire(i32 %wanted, i32 % ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_release_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casal w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casal w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casal x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3639,21 +5032,33 @@ define dso_local i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: casal x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new seq_cst seq_cst %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casal x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3662,1621 +5067,2113 @@ define dso_local i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new seq_cst seq_cst %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB163_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB163_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB164_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB164_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB165_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB165_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB166_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB166_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB167_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB167_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB168_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB168_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB169_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB169_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB170_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB170_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxa w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB171_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB171_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxa w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxa x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB172_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB172_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxa x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxa w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB173_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB173_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxa w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxa x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB174_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB174_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxa x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB175_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB175_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB176_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB176_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmax w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB177_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB177_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmax w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmax x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB178_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB178_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmax x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmax w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB179_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB179_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmax w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmax x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB180_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB180_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmax x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB181_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB181_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB182_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB182_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB183_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB183_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB184_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB184_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB185_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB185_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB186_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB186_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB187_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB187_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB188_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB188_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB189_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB189_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB190_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB190_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB191_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB191_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB192_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB192_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB193_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB193_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB194_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB194_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB195_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB195_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB196_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB196_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB197_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB197_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB198_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB198_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB199_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB199_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB200_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB200_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmina w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB201_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB201_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmina w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmina x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB202_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB202_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmina x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmina w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB203_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB203_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmina w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmina w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmina x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB204_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB204_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmina x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmina x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB205_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB205_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB206_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB206_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmin w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB207_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB207_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmin w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmin x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB208_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB208_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmin x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmin w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB209_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB209_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmin w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmin x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB210_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB210_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmin x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB211_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB211_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB212_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB212_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB213_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB213_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB214_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB214_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB215_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB215_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB216_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB216_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB217_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB217_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB218_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB218_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB219_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB219_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB220_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB220_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB221_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB221_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB222_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB222_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5285,19 +7182,27 @@ define dso_local i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5306,19 +7211,27 @@ define dso_local i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5327,19 +7240,27 @@ define dso_local i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5348,19 +7269,27 @@ define dso_local i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5369,18 +7298,26 @@ define dso_local void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5389,18 +7326,26 @@ define dso_local void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5409,19 +7354,27 @@ define dso_local i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5430,19 +7383,27 @@ define dso_local i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldseta w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5451,19 +7412,27 @@ define dso_local i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldseta w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldseta w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldseta x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5472,19 +7441,27 @@ define dso_local i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldseta x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldseta x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldseta w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5493,18 +7470,26 @@ define dso_local void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldseta w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldseta w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldseta x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5513,18 +7498,26 @@ define dso_local void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldseta x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldseta x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5533,19 +7526,27 @@ define dso_local i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldseth w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5554,19 +7555,27 @@ define dso_local i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldseth w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldseth w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldset w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5575,19 +7584,27 @@ define dso_local i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldset w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldset w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldset x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5596,19 +7613,27 @@ define dso_local i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldset x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldset x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldset w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5617,18 +7642,26 @@ define dso_local void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldset w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldset w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldset x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5637,18 +7670,26 @@ define dso_local void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldset x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldset x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5657,19 +7698,27 @@ define dso_local i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5678,19 +7727,27 @@ define dso_local i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5699,19 +7756,27 @@ define dso_local i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5720,19 +7785,27 @@ define dso_local i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5741,18 +7814,26 @@ define dso_local void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5761,18 +7842,26 @@ define dso_local void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5781,19 +7870,27 @@ define dso_local i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5802,19 +7899,27 @@ define dso_local i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5823,19 +7928,27 @@ define dso_local i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5844,19 +7957,27 @@ define dso_local i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5865,18 +7986,26 @@ define dso_local void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5885,708 +8014,986 @@ define dso_local void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwi ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddab w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddab w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddab w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddah w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddah w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddah w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadda w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadda w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadda x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadda x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadda w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadda w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadda x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadda x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadd w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadd w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadd x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadd x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadd w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadd w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadd x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadd x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddlb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddlb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddlb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddlh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddlh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddlh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddl w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddl x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddl w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddl x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE-ATOMICS-NEXT: neg w0, w0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE-ATOMICS-NEXT: neg x0, x0 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6595,19 +9002,27 @@ define dso_local i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6616,19 +9031,27 @@ define dso_local i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6637,19 +9060,27 @@ define dso_local i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6658,19 +9089,27 @@ define dso_local i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6679,19 +9118,27 @@ define dso_local void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6700,19 +9147,27 @@ define dso_local void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6721,19 +9176,27 @@ define dso_local i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6742,19 +9205,27 @@ define dso_local i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpa w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6763,19 +9234,27 @@ define dso_local i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpa w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpa x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6784,19 +9263,27 @@ define dso_local i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpa x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpa w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6805,19 +9292,27 @@ define dso_local void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpa w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpa x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6826,19 +9321,27 @@ define dso_local void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpa x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6847,19 +9350,27 @@ define dso_local i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swph w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6868,19 +9379,27 @@ define dso_local i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swph w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swph w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swp w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6889,19 +9408,27 @@ define dso_local i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swp w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swp x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6910,19 +9437,27 @@ define dso_local i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swp x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swp w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6931,19 +9466,27 @@ define dso_local void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) no ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swp w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swp x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6952,19 +9495,27 @@ define dso_local void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) no ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swp x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swplb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6973,19 +9524,27 @@ define dso_local i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swplb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swplb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swplh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6994,19 +9553,27 @@ define dso_local i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swplh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swplh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7015,19 +9582,27 @@ define dso_local i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7036,19 +9611,27 @@ define dso_local i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7057,19 +9640,27 @@ define dso_local void @test_atomic_load_xchg_i32_noret_release(i32 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7078,19 +9669,27 @@ define dso_local void @test_atomic_load_xchg_i64_noret_release(i64 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7099,19 +9698,27 @@ define dso_local i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7120,19 +9727,27 @@ define dso_local i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7141,19 +9756,27 @@ define dso_local i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7162,19 +9785,27 @@ define dso_local i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7183,19 +9814,27 @@ define dso_local void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7204,1619 +9843,2107 @@ define dso_local void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) noun ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB313_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB313_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB314_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB314_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB315_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB315_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB316_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB316_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB317_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB317_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB318_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB318_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB319_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB319_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB320_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB320_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxa w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB321_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB321_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxa w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxa x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB322_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB322_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxa x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxa w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB323_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB323_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxa w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxa x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB324_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB324_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxa x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB325_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB325_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB326_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB326_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumax w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB327_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB327_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumax w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumax x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB328_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB328_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumax x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumax w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB329_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB329_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumax w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumax x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB330_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB330_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumax x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB331_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB331_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB332_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB332_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB333_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB333_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB334_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB334_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB335_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB335_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB336_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB336_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB337_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB337_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB338_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB338_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB339_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB339_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB340_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB340_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB341_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB341_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB342_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB342_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB343_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB343_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB344_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB344_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB345_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB345_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB346_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB346_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB347_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB347_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB348_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB348_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB349_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB349_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB350_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB350_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumina w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB351_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB351_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumina w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumina x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB352_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB352_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumina x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumina w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB353_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB353_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumina w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumina w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumina x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB354_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB354_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumina x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumina x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB355_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB355_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB356_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB356_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumin w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB357_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB357_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumin w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumin x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB358_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB358_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumin x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumin w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB359_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB359_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumin w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumin x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB360_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB360_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumin x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB361_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB361_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB362_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB362_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB363_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB363_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB364_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB364_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB365_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB365_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB366_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB366_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var8 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB367_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB367_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var16 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 ; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB368_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w0, w9 ; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB368_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB369_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB369_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB370_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB370_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB371_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB371_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB372_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB372_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8825,19 +11952,27 @@ define dso_local i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeoralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8846,19 +11981,27 @@ define dso_local i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeoralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8867,19 +12010,27 @@ define dso_local i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8888,19 +12039,27 @@ define dso_local i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8909,18 +12068,26 @@ define dso_local void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8929,18 +12096,26 @@ define dso_local void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8949,19 +12124,27 @@ define dso_local i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeorab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8970,19 +12153,27 @@ define dso_local i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeorah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeora w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8991,19 +12182,27 @@ define dso_local i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeora w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeora w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeora x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9012,19 +12211,27 @@ define dso_local i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeora x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeora x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeora w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9033,18 +12240,26 @@ define dso_local void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeora w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeora w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeora x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9053,18 +12268,26 @@ define dso_local void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeora x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeora x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9073,19 +12296,27 @@ define dso_local i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeorb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9094,19 +12325,27 @@ define dso_local i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeorh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeor w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9115,19 +12354,27 @@ define dso_local i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeor w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeor w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeor x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9136,19 +12383,27 @@ define dso_local i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeor x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeor x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeor w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9157,18 +12412,26 @@ define dso_local void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeor w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeor w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeor x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9177,18 +12440,26 @@ define dso_local void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nou ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeor x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeor x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9197,19 +12468,27 @@ define dso_local i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeorlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9218,19 +12497,27 @@ define dso_local i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeorlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeorl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9239,19 +12526,27 @@ define dso_local i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeorl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeorl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeorl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9260,19 +12555,27 @@ define dso_local i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeorl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeorl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeorl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9281,18 +12584,26 @@ define dso_local void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeorl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeorl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeorl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9301,18 +12612,26 @@ define dso_local void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeorl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeorl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9321,19 +12640,27 @@ define dso_local i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeoralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9342,19 +12669,27 @@ define dso_local i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeoralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9363,19 +12698,27 @@ define dso_local i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9384,19 +12727,27 @@ define dso_local i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind { ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9405,18 +12756,26 @@ define dso_local void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9425,23 +12784,49 @@ define dso_local void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounw ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i128 @test_atomic_load_i128() nounwind { ; CHECK-LABEL: test_atomic_load_i128: -; CHECK: casp - +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: casp x0, x1, x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_i128: -; OUTLINE-ATOMICS: ldxp -; OUTLINE-ATOMICS: stxp +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var128 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var128 +; OUTLINE-ATOMICS-NEXT: .LBB403_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxp x0, x1, [x8] +; OUTLINE-ATOMICS-NEXT: stxp w9, x0, x1, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w9, .LBB403_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_i128: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov x0, xzr +; CHECK-REG-NEXT: mov x1, xzr +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: casp x0, x1, x0, x1, [x8] +; CHECK-REG-NEXT: ret + %pair = load atomic i128, ptr @var128 monotonic, align 16 ret i128 %pair } diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll index d8ac89f76b321..773ecf98680ea 100644 --- a/llvm/test/CodeGen/AArch64/atomic-ops.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll @@ -133,9 +133,9 @@ define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i8: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: neg w0, w0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var8 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var8 +; OUTLINE_ATOMICS-NEXT: neg w0, w0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd1_relax ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret @@ -161,9 +161,9 @@ define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i16: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: neg w0, w0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var16 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var16 +; OUTLINE_ATOMICS-NEXT: neg w0, w0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd2_rel ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret @@ -189,9 +189,9 @@ define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i32: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: neg w0, w0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var32 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var32 +; OUTLINE_ATOMICS-NEXT: neg w0, w0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret @@ -217,9 +217,9 @@ define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; OUTLINE_ATOMICS-LABEL: test_atomic_load_sub_i64: ; OUTLINE_ATOMICS: // %bb.0: ; OUTLINE_ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; OUTLINE_ATOMICS-NEXT: neg x0, x0 ; OUTLINE_ATOMICS-NEXT: adrp x1, var64 ; OUTLINE_ATOMICS-NEXT: add x1, x1, :lo12:var64 +; OUTLINE_ATOMICS-NEXT: neg x0, x0 ; OUTLINE_ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE_ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE_ATOMICS-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index 6208a697cab11..f43a996c5708b 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -749,9 +749,9 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-GI-NEXT: mov v4.h[1], w9 ; CHECK-GI-NEXT: neg v5.2d, v5.2d ; CHECK-GI-NEXT: ldr q19, [x8, :lo12:.LCPI23_3] -; CHECK-GI-NEXT: neg v19.2d, v19.2d ; CHECK-GI-NEXT: ushl v7.2d, v7.2d, v5.2d ; CHECK-GI-NEXT: ushl v5.2d, v17.2d, v5.2d +; CHECK-GI-NEXT: neg v17.2d, v19.2d ; CHECK-GI-NEXT: mov v4.h[2], w9 ; CHECK-GI-NEXT: add v7.2d, v0.2d, v7.2d ; CHECK-GI-NEXT: add v5.2d, v2.2d, v5.2d @@ -759,31 +759,28 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; CHECK-GI-NEXT: adrp x9, .LCPI23_0 ; CHECK-GI-NEXT: ldr q6, [x9, :lo12:.LCPI23_0] ; CHECK-GI-NEXT: adrp x9, .LCPI23_2 -; CHECK-GI-NEXT: sshl v7.2d, v7.2d, v19.2d +; CHECK-GI-NEXT: sshl v7.2d, v7.2d, v17.2d ; CHECK-GI-NEXT: ldr q20, [x9, :lo12:.LCPI23_2] -; CHECK-GI-NEXT: sshl v5.2d, v5.2d, v19.2d +; CHECK-GI-NEXT: sshl v5.2d, v5.2d, v17.2d ; CHECK-GI-NEXT: neg v6.2d, v6.2d ; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-GI-NEXT: neg v20.2d, v20.2d ; CHECK-GI-NEXT: ushl v16.2d, v16.2d, v6.2d ; CHECK-GI-NEXT: ushl v6.2d, v18.2d, v6.2d -; CHECK-GI-NEXT: ushll v17.2d, v4.2s, #0 -; CHECK-GI-NEXT: ushll2 v18.2d, v4.4s, #0 -; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-GI-NEXT: neg v18.2d, v20.2d +; CHECK-GI-NEXT: ushll v21.2d, v4.2s, #0 +; CHECK-GI-NEXT: ushll2 v4.2d, v4.4s, #0 ; CHECK-GI-NEXT: add v16.2d, v1.2d, v16.2d ; CHECK-GI-NEXT: add v6.2d, v3.2d, v6.2d -; CHECK-GI-NEXT: shl v17.2d, v17.2d, #63 -; CHECK-GI-NEXT: shl v18.2d, v18.2d, #63 +; CHECK-GI-NEXT: shl v19.2d, v21.2d, #63 ; CHECK-GI-NEXT: shl v4.2d, v4.2d, #63 -; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v20.2d -; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v20.2d -; CHECK-GI-NEXT: sshr v17.2d, v17.2d, #63 -; CHECK-GI-NEXT: sshr v18.2d, v18.2d, #63 +; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v18.2d +; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v18.2d +; CHECK-GI-NEXT: sshr v19.2d, v19.2d, #63 ; CHECK-GI-NEXT: sshr v4.2d, v4.2d, #63 -; CHECK-GI-NEXT: bif v0.16b, v7.16b, v17.16b -; CHECK-GI-NEXT: bif v1.16b, v16.16b, v18.16b -; CHECK-GI-NEXT: bif v2.16b, v5.16b, v4.16b -; CHECK-GI-NEXT: bif v3.16b, v6.16b, v18.16b +; CHECK-GI-NEXT: bif v0.16b, v7.16b, v19.16b +; CHECK-GI-NEXT: bif v1.16b, v16.16b, v4.16b +; CHECK-GI-NEXT: bif v2.16b, v5.16b, v19.16b +; CHECK-GI-NEXT: bif v3.16b, v6.16b, v4.16b ; CHECK-GI-NEXT: ret %1 = sdiv <8 x i64> %x, ret <8 x i64> %1 diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index acf15f1bd1178..da19bdc495f9f 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -272,9 +272,9 @@ define <4 x i16> @concat_undef_first_use_first(ptr %p1, ptr %p2) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldrh w8, [x0] ; CHECK-GI-NEXT: ldrh w9, [x0, #2] -; CHECK-GI-NEXT: fmov s1, w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: mov v0.h[1], w9 +; CHECK-GI-NEXT: mov v0.s[1], v0.s[0] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %l1 = load <2 x i16>, ptr %p1 @@ -295,9 +295,9 @@ define <4 x i16> @concat_undef_first_use_second(ptr %p1, ptr %p2) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldrh w8, [x0] ; CHECK-GI-NEXT: ldrh w9, [x0, #2] -; CHECK-GI-NEXT: fmov s1, w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: mov v0.h[1], w9 +; CHECK-GI-NEXT: mov v0.s[1], v0.s[0] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %l1 = load <2 x i16>, ptr %p1 diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll index 903aa8adf7085..17b75f89b32da 100644 --- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll +++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll @@ -144,7 +144,7 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) { ; CHECK-GI-LABEL: fabs_v4f128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, v0.d[1] -; CHECK-GI-NEXT: mov v7.d[0], v0.d[0] +; CHECK-GI-NEXT: mov v0.d[0], v0.d[0] ; CHECK-GI-NEXT: mov x9, v1.d[1] ; CHECK-GI-NEXT: mov x10, v2.d[1] ; CHECK-GI-NEXT: mov x11, v3.d[1] @@ -152,14 +152,13 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) { ; CHECK-GI-NEXT: mov v2.d[0], v2.d[0] ; CHECK-GI-NEXT: mov v3.d[0], v3.d[0] ; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff -; CHECK-GI-NEXT: mov v7.d[1], x8 +; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: and x8, x9, #0x7fffffffffffffff ; CHECK-GI-NEXT: and x9, x10, #0x7fffffffffffffff ; CHECK-GI-NEXT: and x10, x11, #0x7fffffffffffffff ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov v3.d[1], x10 -; CHECK-GI-NEXT: mov v0.16b, v7.16b ; CHECK-GI-NEXT: ret entry: %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a) diff --git a/llvm/test/CodeGen/AArch64/rem.ll b/llvm/test/CodeGen/AArch64/rem.ll index 7477d33f9aa46..6236e4b445586 100644 --- a/llvm/test/CodeGen/AArch64/rem.ll +++ b/llvm/test/CodeGen/AArch64/rem.ll @@ -888,194 +888,182 @@ define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-GI-NEXT: .cfi_offset w29, -96 ; CHECK-GI-NEXT: sshll v4.8h, v0.8b, #0 ; CHECK-GI-NEXT: sshll v5.8h, v2.8b, #0 -; CHECK-GI-NEXT: sshll v16.8h, v1.8b, #0 -; CHECK-GI-NEXT: sshll v17.8h, v3.8b, #0 +; CHECK-GI-NEXT: sshll2 v16.8h, v0.16b, #0 +; CHECK-GI-NEXT: sshll2 v17.8h, v2.16b, #0 +; CHECK-GI-NEXT: sshll v19.8h, v1.8b, #0 +; CHECK-GI-NEXT: sshll v18.8h, v3.8b, #0 +; CHECK-GI-NEXT: sshll2 v22.8h, v1.16b, #0 +; CHECK-GI-NEXT: sshll2 v23.8h, v3.16b, #0 ; CHECK-GI-NEXT: sshll v6.4s, v4.4h, #0 ; CHECK-GI-NEXT: sshll v7.4s, v5.4h, #0 -; CHECK-GI-NEXT: sshll2 v4.4s, v4.8h, #0 -; CHECK-GI-NEXT: sshll2 v5.4s, v5.8h, #0 -; CHECK-GI-NEXT: sshll v18.4s, v16.4h, #0 -; CHECK-GI-NEXT: sshll v19.4s, v17.4h, #0 +; CHECK-GI-NEXT: sshll v0.4s, v16.4h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v17.4h, #0 ; CHECK-GI-NEXT: sshll2 v16.4s, v16.8h, #0 ; CHECK-GI-NEXT: sshll2 v17.4s, v17.8h, #0 +; CHECK-GI-NEXT: sshll v20.4s, v19.4h, #0 +; CHECK-GI-NEXT: sshll v21.4s, v18.4h, #0 +; CHECK-GI-NEXT: sshll v1.4s, v22.4h, #0 ; CHECK-GI-NEXT: fmov w8, s6 ; CHECK-GI-NEXT: fmov w9, s7 -; CHECK-GI-NEXT: mov w12, v7.s[3] -; CHECK-GI-NEXT: fmov w13, s5 -; CHECK-GI-NEXT: mov w14, v5.s[1] -; CHECK-GI-NEXT: mov w16, v5.s[3] -; CHECK-GI-NEXT: fmov w6, s19 -; CHECK-GI-NEXT: mov w7, v19.s[3] -; CHECK-GI-NEXT: fmov w21, s17 +; CHECK-GI-NEXT: mov w11, v7.s[3] +; CHECK-GI-NEXT: fmov w17, s2 +; CHECK-GI-NEXT: mov w18, v2.s[3] +; CHECK-GI-NEXT: fmov w2, s17 +; CHECK-GI-NEXT: mov w5, v17.s[3] +; CHECK-GI-NEXT: fmov w6, s21 +; CHECK-GI-NEXT: mov w20, v21.s[3] ; CHECK-GI-NEXT: sdiv w10, w8, w9 ; CHECK-GI-NEXT: mov w8, v6.s[1] ; CHECK-GI-NEXT: mov w9, v7.s[1] -; CHECK-GI-NEXT: mov w22, v17.s[3] -; CHECK-GI-NEXT: sdiv w11, w8, w9 +; CHECK-GI-NEXT: sshll v3.4s, v23.4h, #0 +; CHECK-GI-NEXT: sshll2 v22.4s, v22.8h, #0 +; CHECK-GI-NEXT: sshll2 v23.4s, v23.8h, #0 +; CHECK-GI-NEXT: fmov w25, s3 +; CHECK-GI-NEXT: mov w26, v3.s[1] +; CHECK-GI-NEXT: mov w27, v3.s[2] +; CHECK-GI-NEXT: mov w28, v3.s[3] +; CHECK-GI-NEXT: fmov w29, s23 +; CHECK-GI-NEXT: mov w30, v23.s[1] +; CHECK-GI-NEXT: sdiv w12, w8, w9 ; CHECK-GI-NEXT: mov w8, v6.s[2] ; CHECK-GI-NEXT: mov w9, v7.s[2] -; CHECK-GI-NEXT: fmov s20, w10 +; CHECK-GI-NEXT: sshll2 v7.4s, v5.8h, #0 +; CHECK-GI-NEXT: fmov s24, w10 +; CHECK-GI-NEXT: mov w10, v22.s[3] +; CHECK-GI-NEXT: sshll v5.4s, v5.4h, #0 +; CHECK-GI-NEXT: fmov w13, s7 +; CHECK-GI-NEXT: mov w16, v7.s[3] ; CHECK-GI-NEXT: sdiv w9, w8, w9 ; CHECK-GI-NEXT: mov w8, v6.s[3] -; CHECK-GI-NEXT: sshll2 v6.8h, v0.16b, #0 -; CHECK-GI-NEXT: mov v20.s[1], w11 -; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: sshll v28.4s, v0.4h, #0 -; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0 -; CHECK-GI-NEXT: sdiv w8, w8, w12 -; CHECK-GI-NEXT: fmov w12, s4 -; CHECK-GI-NEXT: mov v20.s[2], w9 -; CHECK-GI-NEXT: sdiv w13, w12, w13 -; CHECK-GI-NEXT: mov w12, v4.s[1] +; CHECK-GI-NEXT: sshll2 v6.4s, v4.8h, #0 +; CHECK-GI-NEXT: mov v24.s[1], w12 +; CHECK-GI-NEXT: mov w12, v23.s[3] +; CHECK-GI-NEXT: sshll v4.4s, v4.4h, #0 +; CHECK-GI-NEXT: sdiv w8, w8, w11 +; CHECK-GI-NEXT: fmov w11, s6 +; CHECK-GI-NEXT: mov v24.s[2], w9 +; CHECK-GI-NEXT: sdiv w14, w11, w13 +; CHECK-GI-NEXT: mov w11, v6.s[1] +; CHECK-GI-NEXT: mov w13, v7.s[1] ; CHECK-GI-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; CHECK-GI-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload -; CHECK-GI-NEXT: mov v20.s[3], w11 -; CHECK-GI-NEXT: sdiv w15, w12, w14 -; CHECK-GI-NEXT: mov w12, v4.s[2] -; CHECK-GI-NEXT: mov w14, v5.s[2] -; CHECK-GI-NEXT: sshll v5.4s, v6.4h, #0 -; CHECK-GI-NEXT: fmov s21, w13 -; CHECK-GI-NEXT: sdiv w14, w12, w14 -; CHECK-GI-NEXT: mov w12, v4.s[3] -; CHECK-GI-NEXT: sshll2 v4.8h, v2.16b, #0 -; CHECK-GI-NEXT: mov v21.s[1], w15 -; CHECK-GI-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-GI-NEXT: sshll v7.4s, v4.4h, #0 -; CHECK-GI-NEXT: sshll v30.4s, v2.4h, #0 -; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0 -; CHECK-GI-NEXT: fmov w17, s7 -; CHECK-GI-NEXT: mls v28.4s, v20.4s, v30.4s -; CHECK-GI-NEXT: sdiv w12, w12, w16 -; CHECK-GI-NEXT: fmov w16, s5 -; CHECK-GI-NEXT: mov v21.s[2], w14 -; CHECK-GI-NEXT: sdiv w18, w16, w17 -; CHECK-GI-NEXT: mov w16, v5.s[1] -; CHECK-GI-NEXT: mov w17, v7.s[1] -; CHECK-GI-NEXT: mov v21.s[3], w12 -; CHECK-GI-NEXT: mls v0.4s, v21.4s, v2.4s -; CHECK-GI-NEXT: sdiv w1, w16, w17 -; CHECK-GI-NEXT: mov w16, v5.s[2] -; CHECK-GI-NEXT: mov w17, v7.s[2] -; CHECK-GI-NEXT: fmov s22, w18 -; CHECK-GI-NEXT: uzp1 v0.8h, v28.8h, v0.8h +; CHECK-GI-NEXT: mov w8, v23.s[2] +; CHECK-GI-NEXT: sdiv w15, w11, w13 +; CHECK-GI-NEXT: mov w11, v6.s[2] +; CHECK-GI-NEXT: mov w13, v7.s[2] +; CHECK-GI-NEXT: fmov s25, w14 +; CHECK-GI-NEXT: sdiv w13, w11, w13 +; CHECK-GI-NEXT: mov w11, v6.s[3] +; CHECK-GI-NEXT: mov v25.s[1], w15 +; CHECK-GI-NEXT: sdiv w11, w11, w16 +; CHECK-GI-NEXT: fmov w16, s0 +; CHECK-GI-NEXT: mov v25.s[2], w13 ; CHECK-GI-NEXT: sdiv w0, w16, w17 -; CHECK-GI-NEXT: mov w16, v5.s[3] -; CHECK-GI-NEXT: mov w17, v7.s[3] -; CHECK-GI-NEXT: sshll2 v5.4s, v6.8h, #0 -; CHECK-GI-NEXT: sshll2 v7.4s, v4.8h, #0 -; CHECK-GI-NEXT: mov v22.s[1], w1 -; CHECK-GI-NEXT: sshll v6.4s, v6.4h, #0 -; CHECK-GI-NEXT: sshll v4.4s, v4.4h, #0 -; CHECK-GI-NEXT: fmov w2, s7 -; CHECK-GI-NEXT: mov w3, v7.s[3] -; CHECK-GI-NEXT: sdiv w16, w16, w17 -; CHECK-GI-NEXT: fmov w17, s5 -; CHECK-GI-NEXT: mov v22.s[2], w0 -; CHECK-GI-NEXT: sdiv w5, w17, w2 -; CHECK-GI-NEXT: mov w17, v5.s[1] -; CHECK-GI-NEXT: mov w2, v7.s[1] -; CHECK-GI-NEXT: mov v22.s[3], w16 -; CHECK-GI-NEXT: mls v6.4s, v22.4s, v4.4s -; CHECK-GI-NEXT: sdiv w4, w17, w2 -; CHECK-GI-NEXT: mov w17, v5.s[2] -; CHECK-GI-NEXT: mov w2, v7.s[2] -; CHECK-GI-NEXT: fmov s23, w5 -; CHECK-GI-NEXT: sdiv w2, w17, w2 -; CHECK-GI-NEXT: mov w17, v5.s[3] -; CHECK-GI-NEXT: mov v23.s[1], w4 -; CHECK-GI-NEXT: sdiv w17, w17, w3 -; CHECK-GI-NEXT: fmov w3, s18 -; CHECK-GI-NEXT: mov v23.s[2], w2 -; CHECK-GI-NEXT: sdiv w20, w3, w6 -; CHECK-GI-NEXT: mov w3, v18.s[1] -; CHECK-GI-NEXT: mov w6, v19.s[1] -; CHECK-GI-NEXT: mov v23.s[3], w17 -; CHECK-GI-NEXT: mls v5.4s, v23.4s, v7.4s -; CHECK-GI-NEXT: sdiv w19, w3, w6 -; CHECK-GI-NEXT: mov w3, v18.s[2] -; CHECK-GI-NEXT: mov w6, v19.s[2] -; CHECK-GI-NEXT: fmov s24, w20 -; CHECK-GI-NEXT: uzp1 v2.8h, v6.8h, v5.8h -; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: sdiv w6, w3, w6 -; CHECK-GI-NEXT: mov w3, v18.s[3] -; CHECK-GI-NEXT: mov v24.s[1], w19 +; CHECK-GI-NEXT: mov w16, v0.s[1] +; CHECK-GI-NEXT: mov w17, v2.s[1] +; CHECK-GI-NEXT: mov v25.s[3], w11 +; CHECK-GI-NEXT: mls v6.4s, v25.4s, v7.4s +; CHECK-GI-NEXT: sdiv w1, w16, w17 +; CHECK-GI-NEXT: mov w16, v0.s[2] +; CHECK-GI-NEXT: mov w17, v2.s[2] +; CHECK-GI-NEXT: fmov s26, w0 +; CHECK-GI-NEXT: sdiv w17, w16, w17 +; CHECK-GI-NEXT: mov w16, v0.s[3] +; CHECK-GI-NEXT: mov v26.s[1], w1 +; CHECK-GI-NEXT: sdiv w16, w16, w18 +; CHECK-GI-NEXT: fmov w18, s16 +; CHECK-GI-NEXT: mov v26.s[2], w17 +; CHECK-GI-NEXT: sdiv w3, w18, w2 +; CHECK-GI-NEXT: mov w18, v16.s[1] +; CHECK-GI-NEXT: mov w2, v17.s[1] +; CHECK-GI-NEXT: mov v26.s[3], w16 +; CHECK-GI-NEXT: mls v0.4s, v26.4s, v2.4s +; CHECK-GI-NEXT: sdiv w4, w18, w2 +; CHECK-GI-NEXT: mov w18, v16.s[2] +; CHECK-GI-NEXT: mov w2, v17.s[2] +; CHECK-GI-NEXT: fmov s27, w3 +; CHECK-GI-NEXT: sdiv w2, w18, w2 +; CHECK-GI-NEXT: mov w18, v16.s[3] +; CHECK-GI-NEXT: mov v27.s[1], w4 +; CHECK-GI-NEXT: sdiv w18, w18, w5 +; CHECK-GI-NEXT: fmov w5, s20 +; CHECK-GI-NEXT: mov v27.s[2], w2 +; CHECK-GI-NEXT: sdiv w7, w5, w6 +; CHECK-GI-NEXT: mov w5, v20.s[1] +; CHECK-GI-NEXT: mov w6, v21.s[1] +; CHECK-GI-NEXT: mov v27.s[3], w18 +; CHECK-GI-NEXT: mls v16.4s, v27.4s, v17.4s +; CHECK-GI-NEXT: sdiv w19, w5, w6 +; CHECK-GI-NEXT: mov w5, v20.s[2] +; CHECK-GI-NEXT: mov w6, v21.s[2] +; CHECK-GI-NEXT: sshll2 v21.4s, v18.8h, #0 +; CHECK-GI-NEXT: fmov s28, w7 +; CHECK-GI-NEXT: sshll v18.4s, v18.4h, #0 +; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v16.8h +; CHECK-GI-NEXT: fmov w21, s21 +; CHECK-GI-NEXT: mov w24, v21.s[3] +; CHECK-GI-NEXT: sdiv w6, w5, w6 +; CHECK-GI-NEXT: mov w5, v20.s[3] +; CHECK-GI-NEXT: sshll2 v20.4s, v19.8h, #0 +; CHECK-GI-NEXT: mov v28.s[1], w19 +; CHECK-GI-NEXT: sshll v19.4s, v19.4h, #0 +; CHECK-GI-NEXT: sdiv w5, w5, w20 +; CHECK-GI-NEXT: fmov w20, s20 +; CHECK-GI-NEXT: mov v28.s[2], w6 +; CHECK-GI-NEXT: sdiv w22, w20, w21 +; CHECK-GI-NEXT: mov w20, v20.s[1] +; CHECK-GI-NEXT: mov w21, v21.s[1] +; CHECK-GI-NEXT: mov v28.s[3], w5 +; CHECK-GI-NEXT: mls v19.4s, v28.4s, v18.4s +; CHECK-GI-NEXT: sdiv w23, w20, w21 +; CHECK-GI-NEXT: mov w20, v20.s[2] +; CHECK-GI-NEXT: mov w21, v21.s[2] +; CHECK-GI-NEXT: fmov s29, w22 +; CHECK-GI-NEXT: sdiv w21, w20, w21 +; CHECK-GI-NEXT: mov w20, v20.s[3] +; CHECK-GI-NEXT: mov v29.s[1], w23 +; CHECK-GI-NEXT: sdiv w20, w20, w24 +; CHECK-GI-NEXT: fmov w24, s1 +; CHECK-GI-NEXT: mov v29.s[2], w21 +; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: sdiv w24, w24, w25 +; CHECK-GI-NEXT: mov w25, v1.s[1] +; CHECK-GI-NEXT: mov v29.s[3], w20 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-GI-NEXT: sdiv w3, w3, w7 -; CHECK-GI-NEXT: fmov w7, s16 -; CHECK-GI-NEXT: mov v24.s[2], w6 -; CHECK-GI-NEXT: sdiv w23, w7, w21 -; CHECK-GI-NEXT: mov w7, v16.s[1] -; CHECK-GI-NEXT: mov w21, v17.s[1] -; CHECK-GI-NEXT: mov v24.s[3], w3 -; CHECK-GI-NEXT: sdiv w24, w7, w21 -; CHECK-GI-NEXT: mov w7, v16.s[2] -; CHECK-GI-NEXT: mov w21, v17.s[2] -; CHECK-GI-NEXT: sshll2 v17.8h, v1.16b, #0 -; CHECK-GI-NEXT: fmov s25, w23 -; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: sshll v18.4s, v17.4h, #0 -; CHECK-GI-NEXT: sshll v29.4s, v1.4h, #0 -; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 -; CHECK-GI-NEXT: sdiv w21, w7, w21 -; CHECK-GI-NEXT: mov w7, v16.s[3] -; CHECK-GI-NEXT: sshll2 v16.8h, v3.16b, #0 -; CHECK-GI-NEXT: mov v25.s[1], w24 -; CHECK-GI-NEXT: sshll v3.8h, v3.8b, #0 +; CHECK-GI-NEXT: mls v20.4s, v29.4s, v21.4s +; CHECK-GI-NEXT: sdiv w25, w25, w26 +; CHECK-GI-NEXT: mov w26, v1.s[2] +; CHECK-GI-NEXT: fmov s30, w24 ; CHECK-GI-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: sshll v19.4s, v16.4h, #0 -; CHECK-GI-NEXT: sshll v31.4s, v3.4h, #0 -; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 -; CHECK-GI-NEXT: fmov w25, s19 -; CHECK-GI-NEXT: mov w26, v19.s[1] -; CHECK-GI-NEXT: mov w27, v19.s[2] -; CHECK-GI-NEXT: mov w28, v19.s[3] -; CHECK-GI-NEXT: sshll2 v19.4s, v16.8h, #0 -; CHECK-GI-NEXT: sshll v16.4s, v16.4h, #0 -; CHECK-GI-NEXT: sdiv w7, w7, w22 -; CHECK-GI-NEXT: fmov w22, s18 -; CHECK-GI-NEXT: mov v25.s[2], w21 -; CHECK-GI-NEXT: mls v29.4s, v24.4s, v31.4s -; CHECK-GI-NEXT: fmov w29, s19 -; CHECK-GI-NEXT: mov w30, v19.s[1] -; CHECK-GI-NEXT: mov w8, v19.s[2] -; CHECK-GI-NEXT: mov w10, v19.s[3] -; CHECK-GI-NEXT: sdiv w25, w22, w25 -; CHECK-GI-NEXT: mov w22, v18.s[1] -; CHECK-GI-NEXT: mov v25.s[3], w7 -; CHECK-GI-NEXT: mls v1.4s, v25.4s, v3.4s -; CHECK-GI-NEXT: sdiv w26, w22, w26 -; CHECK-GI-NEXT: mov w22, v18.s[2] -; CHECK-GI-NEXT: fmov s26, w25 -; CHECK-GI-NEXT: uzp1 v1.8h, v29.8h, v1.8h -; CHECK-GI-NEXT: sdiv w27, w22, w27 -; CHECK-GI-NEXT: mov w22, v18.s[3] -; CHECK-GI-NEXT: sshll2 v18.4s, v17.8h, #0 -; CHECK-GI-NEXT: mov v26.s[1], w26 -; CHECK-GI-NEXT: sshll v17.4s, v17.4h, #0 +; CHECK-GI-NEXT: sdiv w26, w26, w27 +; CHECK-GI-NEXT: mov w27, v1.s[3] +; CHECK-GI-NEXT: mov v30.s[1], w25 +; CHECK-GI-NEXT: sdiv w27, w27, w28 +; CHECK-GI-NEXT: fmov w28, s22 +; CHECK-GI-NEXT: mov v30.s[2], w26 ; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov w9, v18.s[3] -; CHECK-GI-NEXT: sdiv w22, w22, w28 -; CHECK-GI-NEXT: fmov w28, s18 -; CHECK-GI-NEXT: mov v26.s[2], w27 ; CHECK-GI-NEXT: sdiv w28, w28, w29 -; CHECK-GI-NEXT: mov w29, v18.s[1] -; CHECK-GI-NEXT: mov v26.s[3], w22 -; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: mls v17.4s, v26.4s, v16.4s +; CHECK-GI-NEXT: mov w29, v22.s[1] +; CHECK-GI-NEXT: mov v30.s[3], w27 +; CHECK-GI-NEXT: mls v1.4s, v30.4s, v3.4s +; CHECK-GI-NEXT: uzp1 v3.8h, v19.8h, v20.8h ; CHECK-GI-NEXT: sdiv w29, w29, w30 -; CHECK-GI-NEXT: mov w30, v18.s[2] -; CHECK-GI-NEXT: fmov s27, w28 +; CHECK-GI-NEXT: mov w30, v22.s[2] +; CHECK-GI-NEXT: fmov s31, w28 ; CHECK-GI-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: sdiv w8, w30, w8 -; CHECK-GI-NEXT: mov v27.s[1], w29 +; CHECK-GI-NEXT: mov v31.s[1], w29 ; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: sdiv w9, w9, w10 -; CHECK-GI-NEXT: mov v27.s[2], w8 -; CHECK-GI-NEXT: mov v27.s[3], w9 -; CHECK-GI-NEXT: mls v18.4s, v27.4s, v19.4s -; CHECK-GI-NEXT: uzp1 v3.8h, v17.8h, v18.8h -; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: sdiv w10, w10, w12 +; CHECK-GI-NEXT: mov v31.s[2], w8 +; CHECK-GI-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: mov v24.s[3], w8 +; CHECK-GI-NEXT: mls v4.4s, v24.4s, v5.4s +; CHECK-GI-NEXT: mov v31.s[3], w10 +; CHECK-GI-NEXT: uzp1 v2.8h, v4.8h, v6.8h +; CHECK-GI-NEXT: mls v22.4s, v31.4s, v23.4s +; CHECK-GI-NEXT: uzp1 v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v22.8h +; CHECK-GI-NEXT: uzp1 v1.16b, v3.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #112 ; CHECK-GI-NEXT: ret entry: @@ -1810,194 +1798,182 @@ define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { ; CHECK-GI-NEXT: .cfi_offset w29, -96 ; CHECK-GI-NEXT: ushll v4.8h, v0.8b, #0 ; CHECK-GI-NEXT: ushll v5.8h, v2.8b, #0 -; CHECK-GI-NEXT: ushll v16.8h, v1.8b, #0 -; CHECK-GI-NEXT: ushll v17.8h, v3.8b, #0 +; CHECK-GI-NEXT: ushll2 v16.8h, v0.16b, #0 +; CHECK-GI-NEXT: ushll2 v17.8h, v2.16b, #0 +; CHECK-GI-NEXT: ushll v19.8h, v1.8b, #0 +; CHECK-GI-NEXT: ushll v18.8h, v3.8b, #0 +; CHECK-GI-NEXT: ushll2 v22.8h, v1.16b, #0 +; CHECK-GI-NEXT: ushll2 v23.8h, v3.16b, #0 ; CHECK-GI-NEXT: ushll v6.4s, v4.4h, #0 ; CHECK-GI-NEXT: ushll v7.4s, v5.4h, #0 -; CHECK-GI-NEXT: ushll2 v4.4s, v4.8h, #0 -; CHECK-GI-NEXT: ushll2 v5.4s, v5.8h, #0 -; CHECK-GI-NEXT: ushll v18.4s, v16.4h, #0 -; CHECK-GI-NEXT: ushll v19.4s, v17.4h, #0 +; CHECK-GI-NEXT: ushll v0.4s, v16.4h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v17.4h, #0 ; CHECK-GI-NEXT: ushll2 v16.4s, v16.8h, #0 ; CHECK-GI-NEXT: ushll2 v17.4s, v17.8h, #0 +; CHECK-GI-NEXT: ushll v20.4s, v19.4h, #0 +; CHECK-GI-NEXT: ushll v21.4s, v18.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v22.4h, #0 ; CHECK-GI-NEXT: fmov w8, s6 ; CHECK-GI-NEXT: fmov w9, s7 -; CHECK-GI-NEXT: mov w12, v7.s[3] -; CHECK-GI-NEXT: fmov w13, s5 -; CHECK-GI-NEXT: mov w14, v5.s[1] -; CHECK-GI-NEXT: mov w16, v5.s[3] -; CHECK-GI-NEXT: fmov w6, s19 -; CHECK-GI-NEXT: mov w7, v19.s[3] -; CHECK-GI-NEXT: fmov w21, s17 +; CHECK-GI-NEXT: mov w11, v7.s[3] +; CHECK-GI-NEXT: fmov w17, s2 +; CHECK-GI-NEXT: mov w18, v2.s[3] +; CHECK-GI-NEXT: fmov w2, s17 +; CHECK-GI-NEXT: mov w5, v17.s[3] +; CHECK-GI-NEXT: fmov w6, s21 +; CHECK-GI-NEXT: mov w20, v21.s[3] ; CHECK-GI-NEXT: udiv w10, w8, w9 ; CHECK-GI-NEXT: mov w8, v6.s[1] ; CHECK-GI-NEXT: mov w9, v7.s[1] -; CHECK-GI-NEXT: mov w22, v17.s[3] -; CHECK-GI-NEXT: udiv w11, w8, w9 +; CHECK-GI-NEXT: ushll v3.4s, v23.4h, #0 +; CHECK-GI-NEXT: ushll2 v22.4s, v22.8h, #0 +; CHECK-GI-NEXT: ushll2 v23.4s, v23.8h, #0 +; CHECK-GI-NEXT: fmov w25, s3 +; CHECK-GI-NEXT: mov w26, v3.s[1] +; CHECK-GI-NEXT: mov w27, v3.s[2] +; CHECK-GI-NEXT: mov w28, v3.s[3] +; CHECK-GI-NEXT: fmov w29, s23 +; CHECK-GI-NEXT: mov w30, v23.s[1] +; CHECK-GI-NEXT: udiv w12, w8, w9 ; CHECK-GI-NEXT: mov w8, v6.s[2] ; CHECK-GI-NEXT: mov w9, v7.s[2] -; CHECK-GI-NEXT: fmov s20, w10 +; CHECK-GI-NEXT: ushll2 v7.4s, v5.8h, #0 +; CHECK-GI-NEXT: fmov s24, w10 +; CHECK-GI-NEXT: mov w10, v22.s[3] +; CHECK-GI-NEXT: ushll v5.4s, v5.4h, #0 +; CHECK-GI-NEXT: fmov w13, s7 +; CHECK-GI-NEXT: mov w16, v7.s[3] ; CHECK-GI-NEXT: udiv w9, w8, w9 ; CHECK-GI-NEXT: mov w8, v6.s[3] -; CHECK-GI-NEXT: ushll2 v6.8h, v0.16b, #0 -; CHECK-GI-NEXT: mov v20.s[1], w11 -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-GI-NEXT: ushll v28.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 -; CHECK-GI-NEXT: udiv w8, w8, w12 -; CHECK-GI-NEXT: fmov w12, s4 -; CHECK-GI-NEXT: mov v20.s[2], w9 -; CHECK-GI-NEXT: udiv w13, w12, w13 -; CHECK-GI-NEXT: mov w12, v4.s[1] +; CHECK-GI-NEXT: ushll2 v6.4s, v4.8h, #0 +; CHECK-GI-NEXT: mov v24.s[1], w12 +; CHECK-GI-NEXT: mov w12, v23.s[3] +; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 +; CHECK-GI-NEXT: udiv w8, w8, w11 +; CHECK-GI-NEXT: fmov w11, s6 +; CHECK-GI-NEXT: mov v24.s[2], w9 +; CHECK-GI-NEXT: udiv w14, w11, w13 +; CHECK-GI-NEXT: mov w11, v6.s[1] +; CHECK-GI-NEXT: mov w13, v7.s[1] ; CHECK-GI-NEXT: str w8, [sp, #12] // 4-byte Folded Spill -; CHECK-GI-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload -; CHECK-GI-NEXT: mov v20.s[3], w11 -; CHECK-GI-NEXT: udiv w15, w12, w14 -; CHECK-GI-NEXT: mov w12, v4.s[2] -; CHECK-GI-NEXT: mov w14, v5.s[2] -; CHECK-GI-NEXT: ushll v5.4s, v6.4h, #0 -; CHECK-GI-NEXT: fmov s21, w13 -; CHECK-GI-NEXT: udiv w14, w12, w14 -; CHECK-GI-NEXT: mov w12, v4.s[3] -; CHECK-GI-NEXT: ushll2 v4.8h, v2.16b, #0 -; CHECK-GI-NEXT: mov v21.s[1], w15 -; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-GI-NEXT: ushll v7.4s, v4.4h, #0 -; CHECK-GI-NEXT: ushll v30.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0 -; CHECK-GI-NEXT: fmov w17, s7 -; CHECK-GI-NEXT: mls v28.4s, v20.4s, v30.4s -; CHECK-GI-NEXT: udiv w12, w12, w16 -; CHECK-GI-NEXT: fmov w16, s5 -; CHECK-GI-NEXT: mov v21.s[2], w14 -; CHECK-GI-NEXT: udiv w18, w16, w17 -; CHECK-GI-NEXT: mov w16, v5.s[1] -; CHECK-GI-NEXT: mov w17, v7.s[1] -; CHECK-GI-NEXT: mov v21.s[3], w12 -; CHECK-GI-NEXT: mls v0.4s, v21.4s, v2.4s -; CHECK-GI-NEXT: udiv w1, w16, w17 -; CHECK-GI-NEXT: mov w16, v5.s[2] -; CHECK-GI-NEXT: mov w17, v7.s[2] -; CHECK-GI-NEXT: fmov s22, w18 -; CHECK-GI-NEXT: uzp1 v0.8h, v28.8h, v0.8h +; CHECK-GI-NEXT: mov w8, v23.s[2] +; CHECK-GI-NEXT: udiv w15, w11, w13 +; CHECK-GI-NEXT: mov w11, v6.s[2] +; CHECK-GI-NEXT: mov w13, v7.s[2] +; CHECK-GI-NEXT: fmov s25, w14 +; CHECK-GI-NEXT: udiv w13, w11, w13 +; CHECK-GI-NEXT: mov w11, v6.s[3] +; CHECK-GI-NEXT: mov v25.s[1], w15 +; CHECK-GI-NEXT: udiv w11, w11, w16 +; CHECK-GI-NEXT: fmov w16, s0 +; CHECK-GI-NEXT: mov v25.s[2], w13 ; CHECK-GI-NEXT: udiv w0, w16, w17 -; CHECK-GI-NEXT: mov w16, v5.s[3] -; CHECK-GI-NEXT: mov w17, v7.s[3] -; CHECK-GI-NEXT: ushll2 v5.4s, v6.8h, #0 -; CHECK-GI-NEXT: ushll2 v7.4s, v4.8h, #0 -; CHECK-GI-NEXT: mov v22.s[1], w1 -; CHECK-GI-NEXT: ushll v6.4s, v6.4h, #0 -; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-GI-NEXT: fmov w2, s7 -; CHECK-GI-NEXT: mov w3, v7.s[3] -; CHECK-GI-NEXT: udiv w16, w16, w17 -; CHECK-GI-NEXT: fmov w17, s5 -; CHECK-GI-NEXT: mov v22.s[2], w0 -; CHECK-GI-NEXT: udiv w5, w17, w2 -; CHECK-GI-NEXT: mov w17, v5.s[1] -; CHECK-GI-NEXT: mov w2, v7.s[1] -; CHECK-GI-NEXT: mov v22.s[3], w16 -; CHECK-GI-NEXT: mls v6.4s, v22.4s, v4.4s -; CHECK-GI-NEXT: udiv w4, w17, w2 -; CHECK-GI-NEXT: mov w17, v5.s[2] -; CHECK-GI-NEXT: mov w2, v7.s[2] -; CHECK-GI-NEXT: fmov s23, w5 -; CHECK-GI-NEXT: udiv w2, w17, w2 -; CHECK-GI-NEXT: mov w17, v5.s[3] -; CHECK-GI-NEXT: mov v23.s[1], w4 -; CHECK-GI-NEXT: udiv w17, w17, w3 -; CHECK-GI-NEXT: fmov w3, s18 -; CHECK-GI-NEXT: mov v23.s[2], w2 -; CHECK-GI-NEXT: udiv w20, w3, w6 -; CHECK-GI-NEXT: mov w3, v18.s[1] -; CHECK-GI-NEXT: mov w6, v19.s[1] -; CHECK-GI-NEXT: mov v23.s[3], w17 -; CHECK-GI-NEXT: mls v5.4s, v23.4s, v7.4s -; CHECK-GI-NEXT: udiv w19, w3, w6 -; CHECK-GI-NEXT: mov w3, v18.s[2] -; CHECK-GI-NEXT: mov w6, v19.s[2] -; CHECK-GI-NEXT: fmov s24, w20 -; CHECK-GI-NEXT: uzp1 v2.8h, v6.8h, v5.8h -; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: udiv w6, w3, w6 -; CHECK-GI-NEXT: mov w3, v18.s[3] -; CHECK-GI-NEXT: mov v24.s[1], w19 +; CHECK-GI-NEXT: mov w16, v0.s[1] +; CHECK-GI-NEXT: mov w17, v2.s[1] +; CHECK-GI-NEXT: mov v25.s[3], w11 +; CHECK-GI-NEXT: mls v6.4s, v25.4s, v7.4s +; CHECK-GI-NEXT: udiv w1, w16, w17 +; CHECK-GI-NEXT: mov w16, v0.s[2] +; CHECK-GI-NEXT: mov w17, v2.s[2] +; CHECK-GI-NEXT: fmov s26, w0 +; CHECK-GI-NEXT: udiv w17, w16, w17 +; CHECK-GI-NEXT: mov w16, v0.s[3] +; CHECK-GI-NEXT: mov v26.s[1], w1 +; CHECK-GI-NEXT: udiv w16, w16, w18 +; CHECK-GI-NEXT: fmov w18, s16 +; CHECK-GI-NEXT: mov v26.s[2], w17 +; CHECK-GI-NEXT: udiv w3, w18, w2 +; CHECK-GI-NEXT: mov w18, v16.s[1] +; CHECK-GI-NEXT: mov w2, v17.s[1] +; CHECK-GI-NEXT: mov v26.s[3], w16 +; CHECK-GI-NEXT: mls v0.4s, v26.4s, v2.4s +; CHECK-GI-NEXT: udiv w4, w18, w2 +; CHECK-GI-NEXT: mov w18, v16.s[2] +; CHECK-GI-NEXT: mov w2, v17.s[2] +; CHECK-GI-NEXT: fmov s27, w3 +; CHECK-GI-NEXT: udiv w2, w18, w2 +; CHECK-GI-NEXT: mov w18, v16.s[3] +; CHECK-GI-NEXT: mov v27.s[1], w4 +; CHECK-GI-NEXT: udiv w18, w18, w5 +; CHECK-GI-NEXT: fmov w5, s20 +; CHECK-GI-NEXT: mov v27.s[2], w2 +; CHECK-GI-NEXT: udiv w7, w5, w6 +; CHECK-GI-NEXT: mov w5, v20.s[1] +; CHECK-GI-NEXT: mov w6, v21.s[1] +; CHECK-GI-NEXT: mov v27.s[3], w18 +; CHECK-GI-NEXT: mls v16.4s, v27.4s, v17.4s +; CHECK-GI-NEXT: udiv w19, w5, w6 +; CHECK-GI-NEXT: mov w5, v20.s[2] +; CHECK-GI-NEXT: mov w6, v21.s[2] +; CHECK-GI-NEXT: ushll2 v21.4s, v18.8h, #0 +; CHECK-GI-NEXT: fmov s28, w7 +; CHECK-GI-NEXT: ushll v18.4s, v18.4h, #0 +; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v16.8h +; CHECK-GI-NEXT: fmov w21, s21 +; CHECK-GI-NEXT: mov w24, v21.s[3] +; CHECK-GI-NEXT: udiv w6, w5, w6 +; CHECK-GI-NEXT: mov w5, v20.s[3] +; CHECK-GI-NEXT: ushll2 v20.4s, v19.8h, #0 +; CHECK-GI-NEXT: mov v28.s[1], w19 +; CHECK-GI-NEXT: ushll v19.4s, v19.4h, #0 +; CHECK-GI-NEXT: udiv w5, w5, w20 +; CHECK-GI-NEXT: fmov w20, s20 +; CHECK-GI-NEXT: mov v28.s[2], w6 +; CHECK-GI-NEXT: udiv w22, w20, w21 +; CHECK-GI-NEXT: mov w20, v20.s[1] +; CHECK-GI-NEXT: mov w21, v21.s[1] +; CHECK-GI-NEXT: mov v28.s[3], w5 +; CHECK-GI-NEXT: mls v19.4s, v28.4s, v18.4s +; CHECK-GI-NEXT: udiv w23, w20, w21 +; CHECK-GI-NEXT: mov w20, v20.s[2] +; CHECK-GI-NEXT: mov w21, v21.s[2] +; CHECK-GI-NEXT: fmov s29, w22 +; CHECK-GI-NEXT: udiv w21, w20, w21 +; CHECK-GI-NEXT: mov w20, v20.s[3] +; CHECK-GI-NEXT: mov v29.s[1], w23 +; CHECK-GI-NEXT: udiv w20, w20, w24 +; CHECK-GI-NEXT: fmov w24, s1 +; CHECK-GI-NEXT: mov v29.s[2], w21 +; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: udiv w24, w24, w25 +; CHECK-GI-NEXT: mov w25, v1.s[1] +; CHECK-GI-NEXT: mov v29.s[3], w20 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-GI-NEXT: udiv w3, w3, w7 -; CHECK-GI-NEXT: fmov w7, s16 -; CHECK-GI-NEXT: mov v24.s[2], w6 -; CHECK-GI-NEXT: udiv w23, w7, w21 -; CHECK-GI-NEXT: mov w7, v16.s[1] -; CHECK-GI-NEXT: mov w21, v17.s[1] -; CHECK-GI-NEXT: mov v24.s[3], w3 -; CHECK-GI-NEXT: udiv w24, w7, w21 -; CHECK-GI-NEXT: mov w7, v16.s[2] -; CHECK-GI-NEXT: mov w21, v17.s[2] -; CHECK-GI-NEXT: ushll2 v17.8h, v1.16b, #0 -; CHECK-GI-NEXT: fmov s25, w23 -; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: ushll v18.4s, v17.4h, #0 -; CHECK-GI-NEXT: ushll v29.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-GI-NEXT: udiv w21, w7, w21 -; CHECK-GI-NEXT: mov w7, v16.s[3] -; CHECK-GI-NEXT: ushll2 v16.8h, v3.16b, #0 -; CHECK-GI-NEXT: mov v25.s[1], w24 -; CHECK-GI-NEXT: ushll v3.8h, v3.8b, #0 +; CHECK-GI-NEXT: mls v20.4s, v29.4s, v21.4s +; CHECK-GI-NEXT: udiv w25, w25, w26 +; CHECK-GI-NEXT: mov w26, v1.s[2] +; CHECK-GI-NEXT: fmov s30, w24 ; CHECK-GI-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ushll v19.4s, v16.4h, #0 -; CHECK-GI-NEXT: ushll v31.4s, v3.4h, #0 -; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0 -; CHECK-GI-NEXT: fmov w25, s19 -; CHECK-GI-NEXT: mov w26, v19.s[1] -; CHECK-GI-NEXT: mov w27, v19.s[2] -; CHECK-GI-NEXT: mov w28, v19.s[3] -; CHECK-GI-NEXT: ushll2 v19.4s, v16.8h, #0 -; CHECK-GI-NEXT: ushll v16.4s, v16.4h, #0 -; CHECK-GI-NEXT: udiv w7, w7, w22 -; CHECK-GI-NEXT: fmov w22, s18 -; CHECK-GI-NEXT: mov v25.s[2], w21 -; CHECK-GI-NEXT: mls v29.4s, v24.4s, v31.4s -; CHECK-GI-NEXT: fmov w29, s19 -; CHECK-GI-NEXT: mov w30, v19.s[1] -; CHECK-GI-NEXT: mov w8, v19.s[2] -; CHECK-GI-NEXT: mov w10, v19.s[3] -; CHECK-GI-NEXT: udiv w25, w22, w25 -; CHECK-GI-NEXT: mov w22, v18.s[1] -; CHECK-GI-NEXT: mov v25.s[3], w7 -; CHECK-GI-NEXT: mls v1.4s, v25.4s, v3.4s -; CHECK-GI-NEXT: udiv w26, w22, w26 -; CHECK-GI-NEXT: mov w22, v18.s[2] -; CHECK-GI-NEXT: fmov s26, w25 -; CHECK-GI-NEXT: uzp1 v1.8h, v29.8h, v1.8h -; CHECK-GI-NEXT: udiv w27, w22, w27 -; CHECK-GI-NEXT: mov w22, v18.s[3] -; CHECK-GI-NEXT: ushll2 v18.4s, v17.8h, #0 -; CHECK-GI-NEXT: mov v26.s[1], w26 -; CHECK-GI-NEXT: ushll v17.4s, v17.4h, #0 +; CHECK-GI-NEXT: udiv w26, w26, w27 +; CHECK-GI-NEXT: mov w27, v1.s[3] +; CHECK-GI-NEXT: mov v30.s[1], w25 +; CHECK-GI-NEXT: udiv w27, w27, w28 +; CHECK-GI-NEXT: fmov w28, s22 +; CHECK-GI-NEXT: mov v30.s[2], w26 ; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov w9, v18.s[3] -; CHECK-GI-NEXT: udiv w22, w22, w28 -; CHECK-GI-NEXT: fmov w28, s18 -; CHECK-GI-NEXT: mov v26.s[2], w27 ; CHECK-GI-NEXT: udiv w28, w28, w29 -; CHECK-GI-NEXT: mov w29, v18.s[1] -; CHECK-GI-NEXT: mov v26.s[3], w22 -; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: mls v17.4s, v26.4s, v16.4s +; CHECK-GI-NEXT: mov w29, v22.s[1] +; CHECK-GI-NEXT: mov v30.s[3], w27 +; CHECK-GI-NEXT: mls v1.4s, v30.4s, v3.4s +; CHECK-GI-NEXT: uzp1 v3.8h, v19.8h, v20.8h ; CHECK-GI-NEXT: udiv w29, w29, w30 -; CHECK-GI-NEXT: mov w30, v18.s[2] -; CHECK-GI-NEXT: fmov s27, w28 +; CHECK-GI-NEXT: mov w30, v22.s[2] +; CHECK-GI-NEXT: fmov s31, w28 ; CHECK-GI-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: udiv w8, w30, w8 -; CHECK-GI-NEXT: mov v27.s[1], w29 +; CHECK-GI-NEXT: mov v31.s[1], w29 ; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: udiv w9, w9, w10 -; CHECK-GI-NEXT: mov v27.s[2], w8 -; CHECK-GI-NEXT: mov v27.s[3], w9 -; CHECK-GI-NEXT: mls v18.4s, v27.4s, v19.4s -; CHECK-GI-NEXT: uzp1 v3.8h, v17.8h, v18.8h -; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: udiv w10, w10, w12 +; CHECK-GI-NEXT: mov v31.s[2], w8 +; CHECK-GI-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload +; CHECK-GI-NEXT: mov v24.s[3], w8 +; CHECK-GI-NEXT: mls v4.4s, v24.4s, v5.4s +; CHECK-GI-NEXT: mov v31.s[3], w10 +; CHECK-GI-NEXT: uzp1 v2.8h, v4.8h, v6.8h +; CHECK-GI-NEXT: mls v22.4s, v31.4s, v23.4s +; CHECK-GI-NEXT: uzp1 v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v22.8h +; CHECK-GI-NEXT: uzp1 v1.16b, v3.16b, v1.16b ; CHECK-GI-NEXT: add sp, sp, #112 ; CHECK-GI-NEXT: ret entry: @@ -2384,51 +2360,49 @@ define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: sshll v4.4s, v0.4h, #0 ; CHECK-GI-NEXT: sshll v5.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0 ; CHECK-GI-NEXT: sshll v6.4s, v1.4h, #0 ; CHECK-GI-NEXT: sshll v7.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 ; CHECK-GI-NEXT: fmov w8, s4 ; CHECK-GI-NEXT: fmov w9, s5 ; CHECK-GI-NEXT: mov w12, v5.s[3] +; CHECK-GI-NEXT: fmov w13, s2 +; CHECK-GI-NEXT: mov w14, v2.s[1] +; CHECK-GI-NEXT: mov w15, v2.s[2] +; CHECK-GI-NEXT: mov w16, v2.s[3] ; CHECK-GI-NEXT: fmov w17, s7 ; CHECK-GI-NEXT: mov w18, v7.s[1] -; CHECK-GI-NEXT: mov w0, v7.s[2] -; CHECK-GI-NEXT: mov w1, v7.s[3] -; CHECK-GI-NEXT: sshll2 v7.4s, v3.8h, #0 -; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0 ; CHECK-GI-NEXT: sdiv w10, w8, w9 ; CHECK-GI-NEXT: mov w8, v4.s[1] ; CHECK-GI-NEXT: mov w9, v5.s[1] -; CHECK-GI-NEXT: fmov w2, s7 -; CHECK-GI-NEXT: mov w3, v7.s[1] -; CHECK-GI-NEXT: mov w4, v7.s[2] -; CHECK-GI-NEXT: mov w5, v7.s[3] +; CHECK-GI-NEXT: mov w0, v7.s[2] +; CHECK-GI-NEXT: mov w1, v7.s[3] +; CHECK-GI-NEXT: fmov w2, s3 +; CHECK-GI-NEXT: mov w3, v3.s[1] +; CHECK-GI-NEXT: mov w4, v3.s[2] +; CHECK-GI-NEXT: mov w5, v3.s[3] ; CHECK-GI-NEXT: sdiv w11, w8, w9 ; CHECK-GI-NEXT: mov w8, v4.s[2] ; CHECK-GI-NEXT: mov w9, v5.s[2] -; CHECK-GI-NEXT: sshll2 v5.4s, v2.8h, #0 ; CHECK-GI-NEXT: fmov s16, w10 -; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: fmov w13, s5 -; CHECK-GI-NEXT: mov w14, v5.s[1] -; CHECK-GI-NEXT: mov w15, v5.s[2] -; CHECK-GI-NEXT: mov w16, v5.s[3] ; CHECK-GI-NEXT: sdiv w9, w8, w9 ; CHECK-GI-NEXT: mov w8, v4.s[3] -; CHECK-GI-NEXT: sshll2 v4.4s, v0.8h, #0 ; CHECK-GI-NEXT: mov v16.s[1], w11 -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: sdiv w8, w8, w12 -; CHECK-GI-NEXT: fmov w12, s4 +; CHECK-GI-NEXT: fmov w12, s0 ; CHECK-GI-NEXT: mov v16.s[2], w9 ; CHECK-GI-NEXT: sdiv w13, w12, w13 -; CHECK-GI-NEXT: mov w12, v4.s[1] +; CHECK-GI-NEXT: mov w12, v0.s[1] ; CHECK-GI-NEXT: mov v16.s[3], w8 -; CHECK-GI-NEXT: mls v0.4s, v16.4s, v2.4s +; CHECK-GI-NEXT: mls v4.4s, v16.4s, v5.4s ; CHECK-GI-NEXT: sdiv w14, w12, w14 -; CHECK-GI-NEXT: mov w12, v4.s[2] +; CHECK-GI-NEXT: mov w12, v0.s[2] ; CHECK-GI-NEXT: fmov s17, w13 ; CHECK-GI-NEXT: sdiv w15, w12, w15 -; CHECK-GI-NEXT: mov w12, v4.s[3] +; CHECK-GI-NEXT: mov w12, v0.s[3] ; CHECK-GI-NEXT: mov v17.s[1], w14 ; CHECK-GI-NEXT: sdiv w12, w12, w16 ; CHECK-GI-NEXT: fmov w16, s6 @@ -2436,34 +2410,32 @@ define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-GI-NEXT: sdiv w16, w16, w17 ; CHECK-GI-NEXT: mov w17, v6.s[1] ; CHECK-GI-NEXT: mov v17.s[3], w12 -; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s +; CHECK-GI-NEXT: mls v0.4s, v17.4s, v2.4s ; CHECK-GI-NEXT: sdiv w17, w17, w18 ; CHECK-GI-NEXT: mov w18, v6.s[2] ; CHECK-GI-NEXT: fmov s18, w16 -; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uzp1 v0.8h, v4.8h, v0.8h ; CHECK-GI-NEXT: sdiv w18, w18, w0 ; CHECK-GI-NEXT: mov w0, v6.s[3] -; CHECK-GI-NEXT: sshll2 v6.4s, v1.8h, #0 ; CHECK-GI-NEXT: mov v18.s[1], w17 -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: sdiv w0, w0, w1 -; CHECK-GI-NEXT: fmov w1, s6 +; CHECK-GI-NEXT: fmov w1, s1 ; CHECK-GI-NEXT: mov v18.s[2], w18 ; CHECK-GI-NEXT: sdiv w1, w1, w2 -; CHECK-GI-NEXT: mov w2, v6.s[1] +; CHECK-GI-NEXT: mov w2, v1.s[1] ; CHECK-GI-NEXT: mov v18.s[3], w0 -; CHECK-GI-NEXT: mls v1.4s, v18.4s, v3.4s +; CHECK-GI-NEXT: mls v6.4s, v18.4s, v7.4s ; CHECK-GI-NEXT: sdiv w2, w2, w3 -; CHECK-GI-NEXT: mov w3, v6.s[2] +; CHECK-GI-NEXT: mov w3, v1.s[2] ; CHECK-GI-NEXT: fmov s19, w1 ; CHECK-GI-NEXT: sdiv w3, w3, w4 -; CHECK-GI-NEXT: mov w4, v6.s[3] +; CHECK-GI-NEXT: mov w4, v1.s[3] ; CHECK-GI-NEXT: mov v19.s[1], w2 ; CHECK-GI-NEXT: sdiv w10, w4, w5 ; CHECK-GI-NEXT: mov v19.s[2], w3 ; CHECK-GI-NEXT: mov v19.s[3], w10 -; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s -; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v6.8h +; CHECK-GI-NEXT: mls v1.4s, v19.4s, v3.4s +; CHECK-GI-NEXT: uzp1 v1.8h, v6.8h, v1.8h ; CHECK-GI-NEXT: ret entry: %s = srem <16 x i16> %d, %e @@ -2852,51 +2824,49 @@ define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0 ; CHECK-GI-NEXT: ushll v5.4s, v2.4h, #0 +; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0 ; CHECK-GI-NEXT: ushll v6.4s, v1.4h, #0 ; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0 ; CHECK-GI-NEXT: fmov w8, s4 ; CHECK-GI-NEXT: fmov w9, s5 ; CHECK-GI-NEXT: mov w12, v5.s[3] +; CHECK-GI-NEXT: fmov w13, s2 +; CHECK-GI-NEXT: mov w14, v2.s[1] +; CHECK-GI-NEXT: mov w15, v2.s[2] +; CHECK-GI-NEXT: mov w16, v2.s[3] ; CHECK-GI-NEXT: fmov w17, s7 ; CHECK-GI-NEXT: mov w18, v7.s[1] -; CHECK-GI-NEXT: mov w0, v7.s[2] -; CHECK-GI-NEXT: mov w1, v7.s[3] -; CHECK-GI-NEXT: ushll2 v7.4s, v3.8h, #0 -; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-GI-NEXT: udiv w10, w8, w9 ; CHECK-GI-NEXT: mov w8, v4.s[1] ; CHECK-GI-NEXT: mov w9, v5.s[1] -; CHECK-GI-NEXT: fmov w2, s7 -; CHECK-GI-NEXT: mov w3, v7.s[1] -; CHECK-GI-NEXT: mov w4, v7.s[2] -; CHECK-GI-NEXT: mov w5, v7.s[3] +; CHECK-GI-NEXT: mov w0, v7.s[2] +; CHECK-GI-NEXT: mov w1, v7.s[3] +; CHECK-GI-NEXT: fmov w2, s3 +; CHECK-GI-NEXT: mov w3, v3.s[1] +; CHECK-GI-NEXT: mov w4, v3.s[2] +; CHECK-GI-NEXT: mov w5, v3.s[3] ; CHECK-GI-NEXT: udiv w11, w8, w9 ; CHECK-GI-NEXT: mov w8, v4.s[2] ; CHECK-GI-NEXT: mov w9, v5.s[2] -; CHECK-GI-NEXT: ushll2 v5.4s, v2.8h, #0 ; CHECK-GI-NEXT: fmov s16, w10 -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: fmov w13, s5 -; CHECK-GI-NEXT: mov w14, v5.s[1] -; CHECK-GI-NEXT: mov w15, v5.s[2] -; CHECK-GI-NEXT: mov w16, v5.s[3] ; CHECK-GI-NEXT: udiv w9, w8, w9 ; CHECK-GI-NEXT: mov w8, v4.s[3] -; CHECK-GI-NEXT: ushll2 v4.4s, v0.8h, #0 ; CHECK-GI-NEXT: mov v16.s[1], w11 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: udiv w8, w8, w12 -; CHECK-GI-NEXT: fmov w12, s4 +; CHECK-GI-NEXT: fmov w12, s0 ; CHECK-GI-NEXT: mov v16.s[2], w9 ; CHECK-GI-NEXT: udiv w13, w12, w13 -; CHECK-GI-NEXT: mov w12, v4.s[1] +; CHECK-GI-NEXT: mov w12, v0.s[1] ; CHECK-GI-NEXT: mov v16.s[3], w8 -; CHECK-GI-NEXT: mls v0.4s, v16.4s, v2.4s +; CHECK-GI-NEXT: mls v4.4s, v16.4s, v5.4s ; CHECK-GI-NEXT: udiv w14, w12, w14 -; CHECK-GI-NEXT: mov w12, v4.s[2] +; CHECK-GI-NEXT: mov w12, v0.s[2] ; CHECK-GI-NEXT: fmov s17, w13 ; CHECK-GI-NEXT: udiv w15, w12, w15 -; CHECK-GI-NEXT: mov w12, v4.s[3] +; CHECK-GI-NEXT: mov w12, v0.s[3] ; CHECK-GI-NEXT: mov v17.s[1], w14 ; CHECK-GI-NEXT: udiv w12, w12, w16 ; CHECK-GI-NEXT: fmov w16, s6 @@ -2904,34 +2874,32 @@ define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { ; CHECK-GI-NEXT: udiv w16, w16, w17 ; CHECK-GI-NEXT: mov w17, v6.s[1] ; CHECK-GI-NEXT: mov v17.s[3], w12 -; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s +; CHECK-GI-NEXT: mls v0.4s, v17.4s, v2.4s ; CHECK-GI-NEXT: udiv w17, w17, w18 ; CHECK-GI-NEXT: mov w18, v6.s[2] ; CHECK-GI-NEXT: fmov s18, w16 -; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uzp1 v0.8h, v4.8h, v0.8h ; CHECK-GI-NEXT: udiv w18, w18, w0 ; CHECK-GI-NEXT: mov w0, v6.s[3] -; CHECK-GI-NEXT: ushll2 v6.4s, v1.8h, #0 ; CHECK-GI-NEXT: mov v18.s[1], w17 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: udiv w0, w0, w1 -; CHECK-GI-NEXT: fmov w1, s6 +; CHECK-GI-NEXT: fmov w1, s1 ; CHECK-GI-NEXT: mov v18.s[2], w18 ; CHECK-GI-NEXT: udiv w1, w1, w2 -; CHECK-GI-NEXT: mov w2, v6.s[1] +; CHECK-GI-NEXT: mov w2, v1.s[1] ; CHECK-GI-NEXT: mov v18.s[3], w0 -; CHECK-GI-NEXT: mls v1.4s, v18.4s, v3.4s +; CHECK-GI-NEXT: mls v6.4s, v18.4s, v7.4s ; CHECK-GI-NEXT: udiv w2, w2, w3 -; CHECK-GI-NEXT: mov w3, v6.s[2] +; CHECK-GI-NEXT: mov w3, v1.s[2] ; CHECK-GI-NEXT: fmov s19, w1 ; CHECK-GI-NEXT: udiv w3, w3, w4 -; CHECK-GI-NEXT: mov w4, v6.s[3] +; CHECK-GI-NEXT: mov w4, v1.s[3] ; CHECK-GI-NEXT: mov v19.s[1], w2 ; CHECK-GI-NEXT: udiv w10, w4, w5 ; CHECK-GI-NEXT: mov v19.s[2], w3 ; CHECK-GI-NEXT: mov v19.s[3], w10 -; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s -; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v6.8h +; CHECK-GI-NEXT: mls v1.4s, v19.4s, v3.4s +; CHECK-GI-NEXT: uzp1 v1.8h, v6.8h, v1.8h ; CHECK-GI-NEXT: ret entry: %s = urem <16 x i16> %d, %e