diff --git a/clang/include/clang/Basic/BuiltinsAArch64.td b/clang/include/clang/Basic/BuiltinsAArch64.td index ba30e344911aa..7b8f19f2893b3 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.td +++ b/clang/include/clang/Basic/BuiltinsAArch64.td @@ -171,6 +171,10 @@ let Attributes = [NoThrow], Features = "ls64" in { def st64bv0 : AArch64TargetBuiltin<"uint64_t (void *, uint64_t const *)">; } +let Attributes = [NoThrow, CustomTypeChecking] in { + def atomic_store_with_hint : AArch64Builtin<"void(...)">; +} + // Armv9.3-A Guarded Control Stack let Attributes = [NoThrow], Features = "gcs" in { def gcspopm : AArch64TargetBuiltin<"uint64_t (uint64_t)">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c73c116cdc451..241aef979728a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9666,6 +9666,12 @@ def err_atomic_op_needs_atomic_int_or_fp : Error< def err_atomic_op_needs_atomic_int : Error< "address argument to atomic operation must be a pointer to " "%select{|atomic }0integer (%1 invalid)">; +def err_atomic_op_hint_data_size : Error< + "address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits">; +def err_atomic_hint_has_invalid_memory_order : Error< + "invalid memory order argument to atomic hint operation (%0 invalid)">; +def err_atomic_hint_has_invalid_hint_type : Error< + "invalid hint type argument to atomic hint operation (%0 invalid)">; def warn_atomic_op_has_invalid_memory_order : Warning< "%select{|success |failure }0memory order argument to atomic operation is invalid">, InGroup>; diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h index af8e0e9047171..b0a01c40ffece 100644 --- a/clang/include/clang/Sema/SemaARM.h +++ b/clang/include/clang/Sema/SemaARM.h @@ -70,6 +70,7 @@ class SemaARM : public SemaBase { bool BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, int ArgNum, unsigned ExpectedFieldNum, bool AllowName); bool BuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall); + bool BuiltinARMAtomicStoreHintCall(unsigned BuiltinID, CallExpr *TheCall); bool MveAliasValid(unsigned BuiltinID, llvm::StringRef AliasName); bool CdeAliasValid(unsigned BuiltinID, llvm::StringRef AliasName); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 95168e94106cc..1162d0a31e975 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" +#include "llvm/Support/AArch64AtomicHints.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include @@ -2129,6 +2130,56 @@ static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID, Ops); } +static Value *EmitAtomicStoreWithHintBuiltin(CodeGenFunction &CGF, + unsigned BuiltinID, + const CallExpr *E) { + CodeGen::CGBuilderTy &Builder = CGF.Builder; + CodeGen::CodeGenModule &CGM = CGF.CGM; + Expr::EvalResult Result; + if (!E->getArg(2)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable( + "Expected integer policy argument to atomic store with hint."); + + StoreInst *Store = + Builder.CreateStore(CGF.EmitScalarExpr(E->getArg(1)), // Value + CGF.EmitPointerWithAlignment(E->getArg(0))); // Ptr; + + AtomicOrdering Ordering; + unsigned OrderingArg = Result.Val.getInt().getExtValue(); + assert(isValidAtomicOrderingCABI(OrderingArg) && "Invalid atomic ordering"); + + switch (static_cast(OrderingArg)) { + default: + llvm_unreachable("Unsupported atomic ordering found."); + case AtomicOrderingCABI::relaxed: + Ordering = AtomicOrdering::Monotonic; + break; + case AtomicOrderingCABI::release: + Ordering = AtomicOrdering::Release; + break; + case AtomicOrderingCABI::seq_cst: + Ordering = AtomicOrdering::SequentiallyConsistent; + break; + } + Store->setAtomic(Ordering); + + if (!E->getArg(3)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable( + "Expected integer hint argument to atomic store with hint."); + unsigned HintArg = Result.Val.getInt().getExtValue(); + assert((getAtomicStoreHintFromMD(HintArg) != + AArch64AtomicStoreHint::HINT_NONE) && + "Invalid hint type"); + + MDNode *HintMDVal = + MDNode::get(CGM.getLLVMContext(), + llvm::ConstantAsMetadata::get(Builder.getInt32(HintArg))); + Store->setMetadata(CGM.getModule().getMDKindID("aarch64.atomic.hint"), + HintMDVal); + + return Store; +} + /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra /// argument that specifies the vector type. The additional argument is meant /// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function @@ -4893,6 +4944,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, BuiltinID == AArch64::BI__builtin_arm_range_prefetch_x) return EmitRangePrefetchBuiltin(*this, BuiltinID, E); + if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint) + return EmitAtomicStoreWithHintBuiltin(*this, BuiltinID, E); + // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 9a6b6a837fa5a..bd99527dc5fa8 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -741,6 +741,12 @@ __arm_st64bv0(void *__addr, data512_t __value) { } #endif +/* Atomic store with hints */ +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +#define __arm_atomic_store_with_hint(ptr, data, memory_order, hint) \ + __builtin_arm_atomic_store_with_hint(ptr, data, memory_order, hint) +#endif + /* 11.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index f57c9c8b87cd5..ef5c46de1e174 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -17,6 +17,7 @@ #include "clang/Sema/Initialization.h" #include "clang/Sema/ParsedAttr.h" #include "clang/Sema/Sema.h" +#include "llvm/Support/AArch64AtomicHints.h" namespace clang { @@ -320,6 +321,94 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, return false; } +bool SemaARM::BuiltinARMAtomicStoreHintCall(unsigned BuiltinID, + CallExpr *TheCall) { + if (SemaRef.checkArgCount(TheCall, 4)) + return true; + + // Arg 0 should be the pointer type. The pointee type must be a + // scalar integral or floating-point type of 8, 16, 32 or 64 bits. + ASTContext &Context = getASTContext(); + Expr *PtrArg = TheCall->getArg(0); + auto PtrArgRes = SemaRef.DefaultFunctionArrayLvalueConversion(PtrArg); + if (PtrArgRes.isInvalid()) + return true; + auto *PtrTy = PtrArg->getType()->getAs(); + if (!PtrTy) + return Diag(TheCall->getBeginLoc(), + diag::err_atomic_builtin_must_be_pointer) + << PtrArg->getType() << 0 << PtrArg->getSourceRange(); + QualType PtrQT = PtrTy->getPointeeType(); + + // TODO: Allow MFloat8 types when supported by atomic store + if (!PtrQT->isIntegralType(getASTContext()) && !PtrQT->isFloatingType()) + return Diag(TheCall->getBeginLoc(), + diag::err_atomic_op_needs_atomic_int_or_fp) + << 0 << PtrQT << PtrArg->getSourceRange(); + + unsigned TySize = + Context.getTypeSize(Context.getCanonicalType(PtrQT).getUnqualifiedType()); + if (TySize != 8 && TySize != 16 && TySize != 32 && TySize != 64) + return Diag(TheCall->getBeginLoc(), diag::err_atomic_op_hint_data_size) + << PtrArg->getSourceRange(); + + // Arg 1 is the data to be stored. The type must match the pointee + // type found above. + auto DataArgRes = + SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(1)); + if (DataArgRes.isInvalid()) + return true; + QualType DataQT = DataArgRes.get()->getType(); + + if (PtrQT != DataQT) + return Diag(TheCall->getBeginLoc(), + diag::err_typecheck_call_different_arg_types) + << PtrQT << DataQT; + + // Arg 2 is the memory order, which must be relaxed, release or seq_cst + auto MemOrdArg = + SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(2)).get(); + std::optional MemOrdAP = + MemOrdArg->getIntegerConstantExpr(Context); + if (!MemOrdAP) + return Diag(TheCall->getBeginLoc(), + diag::err_atomic_hint_has_invalid_memory_order) + << MemOrdArg->getType() << MemOrdArg->getSourceRange(); + + unsigned Ordering = MemOrdAP->getZExtValue(); + if (!llvm::isValidAtomicOrderingCABI(Ordering)) + return Diag(TheCall->getBeginLoc(), + diag::err_atomic_hint_has_invalid_memory_order) + << *MemOrdAP << MemOrdArg->getSourceRange(); + + auto AtomicOrdering = static_cast(Ordering); + if (AtomicOrdering != llvm::AtomicOrderingCABI::relaxed && + AtomicOrdering != llvm::AtomicOrderingCABI::release && + AtomicOrdering != llvm::AtomicOrderingCABI::seq_cst) + return Diag(TheCall->getBeginLoc(), + diag::err_atomic_hint_has_invalid_memory_order) + << *MemOrdAP << MemOrdArg->getSourceRange(); + + // Arg 3 is the hint type. Only values represented by AArch64AtomicStoreHint + // are valid. + auto HintArg = + SemaRef.DefaultFunctionArrayLvalueConversion(TheCall->getArg(3)).get(); + std::optional HintAP = HintArg->getIntegerConstantExpr(Context); + if (!HintAP) + return Diag(TheCall->getBeginLoc(), + diag::err_atomic_hint_has_invalid_hint_type) + << HintArg->getType() << HintArg->getSourceRange(); + + unsigned Hint = HintAP->getZExtValue(); + if (llvm::getAtomicStoreHintFromMD(Hint) == + llvm::AArch64AtomicStoreHint::HINT_NONE) + return Diag(TheCall->getBeginLoc(), + diag::err_atomic_hint_has_invalid_hint_type) + << *HintAP << HintArg->getSourceRange(); + + return false; +} + /// getNeonEltType - Return the QualType corresponding to the elements of /// the vector type specified by the NeonTypeFlags. This is used to check /// the pointer arguments for Neon load/store intrinsics. @@ -1164,6 +1253,9 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, BuiltinID == AArch64::BI__builtin_arm_wsrp) return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true); + if (BuiltinID == AArch64::BI__builtin_arm_atomic_store_with_hint) + return BuiltinARMAtomicStoreHintCall(BuiltinID, TheCall); + // Only check the valid encoding range. Any constant in this range would be // converted to a register of the form S2_2_C3_C4_5. Let the hardware throw // an exception for incorrect registers. This matches MSVC behavior. diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index cd18fa63bfdbd..a8aa0916a8a4c 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -1821,3 +1821,81 @@ int test_rndrrs(uint64_t *__addr) { return __rndrrs(__addr); } #endif + +#if defined(__ARM_64BIT_STATE) + +// AArch64-LABEL: @test_atomic_store_hint_char( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic i8 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 1, !aarch64.atomic.hint [[META3:![0-9]+]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_char(char *ptr, char data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 0); +} + +// AArch64-LABEL: @test_atomic_store_hint_bfloat( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic bfloat [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META4:![0-9]+]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_bfloat(__bf16 *ptr, __bf16 data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 1); +} + +// AArch64-LABEL: @test_atomic_store_hint_short( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic i16 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 2, !aarch64.atomic.hint [[META3]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_short(short *ptr, short data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0); +} + +// AArch64-LABEL: @test_atomic_store_hint_u32( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic i32 [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_u32(uint32_t *ptr, uint32_t data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0); +} + +// AArch64-LABEL: @test_atomic_store_hint_float( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic float [[DATA:%.*]], ptr [[PTR:%.*]] seq_cst, align 4, !aarch64.atomic.hint [[META3]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_float(float *ptr, float data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_SEQ_CST, 0); +} + +// AArch64-LABEL: @test_atomic_store_hint_s64( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_s64(int64_t *ptr, int64_t data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1); +} + +// AArch64-LABEL: @test_atomic_store_hint_long_long_int( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic i64 [[DATA:%.*]], ptr [[PTR:%.*]] release, align 8, !aarch64.atomic.hint [[META3]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_long_long_int(long long int *ptr, long long int data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELEASE, 0); +} + +// AArch64-LABEL: @test_atomic_store_hint_double( +// AArch64-NEXT: entry: +// AArch64-NEXT: store atomic double [[DATA:%.*]], ptr [[PTR:%.*]] monotonic, align 8, !aarch64.atomic.hint [[META4]] +// AArch64-NEXT: ret void +// +void test_atomic_store_hint_double(double *ptr, double data) { + __arm_atomic_store_with_hint(ptr, data, __ATOMIC_RELAXED, 1); +} + +// AArch64: [[META3]] = !{i32 0} +// AArch64-NEXT: [[META4]] = !{i32 1} +#endif diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index 3d054c79f1777..ad9ba7feca671 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -216,4 +216,17 @@ void trap() { __builtin_arm_trap(42); } +void atomic_store_with_hint(int64_t *a, int64_t b) { + __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELAXED, 0); // HINT_STSHH_KEEP + // CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8, !aarch64.atomic.hint ![[M1:[0-9]]] + + __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_SEQ_CST, 0); + // CHECK: store atomic i64 {{.*}}, ptr {{.*}} seq_cst, align 8, !aarch64.atomic.hint ![[M1]] + + __builtin_arm_atomic_store_with_hint(a, b, __ATOMIC_RELEASE, 1); // HINT_STSHH_STRM + // CHECK: store atomic i64 {{.*}}, ptr {{.*}} release, align 8, !aarch64.atomic.hint ![[M2:[0-9]]] +} + // CHECK: ![[M0]] = !{!"1:2:3:4:5"} +// CHECK: ![[M1]] = !{i32 0} +// CHECK: ![[M2]] = !{i32 1} diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c index 41cffd7ebb1a0..fb4718a1bd1f4 100644 --- a/clang/test/Sema/builtins-arm64.c +++ b/clang/test/Sema/builtins-arm64.c @@ -51,3 +51,20 @@ void test_trap(short s, unsigned short us) { __builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}} __builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}} } + +void test_atomic_store_hint(char *c_ptr, __int128 *inv_ptr, float *f_ptr, + char c_data, __int128 inv_data, float f_data, + int inv_int) { + __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0); // expected-error {{too few arguments to function call, expected 4, have 3}} + __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}} + + __builtin_arm_atomic_store_with_hint(0, c_data, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}} + __builtin_arm_atomic_store_with_hint(c_ptr, f_data, 0, 0); // expected-error {{arguments are of different types ('char' vs 'float')}} + __builtin_arm_atomic_store_with_hint(inv_ptr, inv_data, 0, 0); // expected-error {{address argument to atomic store with hint must be of size 8, 16, 32 or 64 bits}} + + __builtin_arm_atomic_store_with_hint(c_ptr, c_data, inv_int, 0); // expected-error {{invalid memory order argument to atomic hint operation ('int' invalid)}} + __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 2, 0); // expected-error {{invalid memory order argument to atomic hint operation (2 invalid)}} + + __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, inv_int); // expected-error {{invalid hint type argument to atomic hint operation ('int' invalid)}} + __builtin_arm_atomic_store_with_hint(c_ptr, c_data, 0, 3); // expected-error {{invalid hint type argument to atomic hint operation (3 invalid)}} +} diff --git a/llvm/include/llvm/Support/AArch64AtomicHints.h b/llvm/include/llvm/Support/AArch64AtomicHints.h new file mode 100644 index 0000000000000..8118f3e2df3ad --- /dev/null +++ b/llvm/include/llvm/Support/AArch64AtomicHints.h @@ -0,0 +1,36 @@ +//===-- AArch64AtomicHints.h - AArch64 Atomic Hint Attributes ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_AARCH64ATOMICHINTS_H +#define LLVM_SUPPORT_AARCH64ATOMICHINTS_H + +namespace llvm { +enum class AArch64AtomicStoreHint { + HINT_NONE = 0, + HINT_STSHH_KEEP = 1, + HINT_STSHH_STRM = 2, +}; + +template inline bool isValidAArch64AtomicHintValue(Int I) { + return (Int)AArch64AtomicStoreHint::HINT_STSHH_KEEP <= I && + I <= (Int)AArch64AtomicStoreHint::HINT_STSHH_STRM; +} + +template +inline AArch64AtomicStoreHint getAtomicStoreHintFromMD(Int I) { + switch (I) { + case 0: + return AArch64AtomicStoreHint::HINT_STSHH_KEEP; + case 1: + return AArch64AtomicStoreHint::HINT_STSHH_STRM; + default: + return AArch64AtomicStoreHint::HINT_NONE; + } +} +} // namespace llvm +#endif // LLVM_SUPPORT_AARCH64ATOMICHINTS_H diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 7327290f62970..3c12a29eb9556 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -254,6 +254,8 @@ static void copyMetadataForAtomic(Instruction &Dest, Dest.setMetadata(ID, N); else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory")) Dest.setMetadata(ID, N); + else if (ID == Ctx.getMDKindID("aarch64.atomic.hint")) + Dest.setMetadata(ID, N); // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current // uses. @@ -707,6 +709,7 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) { NewSI->setAlignment(SI->getAlign()); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); + copyMetadataForAtomic(*NewSI, *SI); LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); SI->eraseFromParent(); return NewSI; diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 4eb475ef606de..8b66c72163087 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -278,6 +278,9 @@ class AArch64AsmPrinter : public AsmPrinter { // Emit expansion of Compare-and-branch pseudo instructions void emitCBPseudoExpansion(const MachineInstr *MI); + // Emit expansion of atomic store with hint pseudo instructions + void emitAtomicHintPseudoExpansion(const MachineInstr *MI, unsigned Size); + void EmitToStreamer(MCStreamer &S, const MCInst &Inst); void EmitToStreamer(const MCInst &Inst) { EmitToStreamer(*OutStreamer, Inst); @@ -3126,6 +3129,42 @@ void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, Inst); } +void AArch64AsmPrinter::emitAtomicHintPseudoExpansion(const MachineInstr *MI, + unsigned Size) { + + unsigned StOpc; + unsigned Order = MI->getOperand(2).getImm(); + bool Relaxed = Order == 0; + switch (Size) { + case 8: + StOpc = Relaxed ? AArch64::STRBBui : AArch64::STLRB; + break; + case 16: + StOpc = Relaxed ? AArch64::STRHHui : AArch64::STLRH; + break; + case 32: + StOpc = Relaxed ? AArch64::STRWui : AArch64::STLRW; + break; + case 64: + StOpc = Relaxed ? AArch64::STRXui : AArch64::STLRX; + break; + default: + llvm_unreachable("Unexpected atomic hint size."); + } + + EmitToStreamer( + MCInstBuilder(AArch64::STSHH).addImm(MI->getOperand(3).getImm())); + + MCInst Store; + Store.setOpcode(StOpc); + Store.addOperand(MCOperand::createReg(MI->getOperand(1).getReg())); + Store.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + Store.setFlags(MI->getFlags()); + if (Relaxed) + Store.addOperand(MCOperand::createImm(0)); + EmitToStreamer(*OutStreamer, Store); +} + // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. #include "AArch64GenMCPseudoLowering.inc" @@ -3797,6 +3836,18 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { case AArch64::CBXPrr: emitCBPseudoExpansion(MI); return; + case AArch64::ATOMIC_STORE_HINT_B: + emitAtomicHintPseudoExpansion(MI, 8); + return; + case AArch64::ATOMIC_STORE_HINT_H: + emitAtomicHintPseudoExpansion(MI, 16); + return; + case AArch64::ATOMIC_STORE_HINT_S: + emitAtomicHintPseudoExpansion(MI, 32); + return; + case AArch64::ATOMIC_STORE_HINT_D: + emitAtomicHintPseudoExpansion(MI, 64); + return; } if (emitDeactivationSymbolRelocation(MI->getDeactivationSymbol())) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 6002ee4c7b7f7..5592fef0938be 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -20,6 +21,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/Support/AArch64AtomicHints.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -508,6 +510,10 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { bool SelectCMP_SWAP(SDNode *N); + bool isAtomicHintInst(SDNode *N, AArch64AtomicStoreHint Hint) const; + bool isAtomicSTSHH_KEEP(SDNode *N) const; + bool isAtomicSTSHH_STRM(SDNode *N) const; + bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift, bool Negate); bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm, @@ -4445,6 +4451,20 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { return true; } +bool AArch64DAGToDAGISel::isAtomicHintInst(SDNode *N, + AArch64AtomicStoreHint Hint) const { + const MachineMemOperand *MMO = cast(N)->getMemOperand(); + return AArch64InstrInfo::decodeAtomicHintFlags(MMO->getFlags()) == Hint; +} + +bool AArch64DAGToDAGISel::isAtomicSTSHH_KEEP(SDNode *N) const { + return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_KEEP); +} + +bool AArch64DAGToDAGISel::isAtomicSTSHH_STRM(SDNode *N) const { + return isAtomicHintInst(N, AArch64AtomicStoreHint::HINT_STSHH_STRM); +} + bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift, bool Negate) { if (!isa(N)) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8c93adac0adf9..3eb858413aa10 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -78,6 +78,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" +#include "llvm/Support/AArch64AtomicHints.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -18521,7 +18522,25 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const { if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor && I.hasMetadata(FALKOR_STRIDED_ACCESS_MD)) return MOStridedAccess; - return MachineMemOperand::MONone; + + auto Flags = MachineMemOperand::MONone; + const MDNode *AtomicStHint = I.getMetadata(AARCH64_ATOMIC_STORE_HINT_MD); + if (AtomicStHint) { + unsigned HintVal = + cast( + cast(AtomicStHint->getOperand(0))->getValue()) + ->getZExtValue(); + AArch64AtomicStoreHint Hint = getAtomicStoreHintFromMD(HintVal); + assert(Hint != AArch64AtomicStoreHint::HINT_NONE && + "Unrecognised atomic hint value requested."); + + if (static_cast(Hint) & 0b1) + Flags |= MOAtomicHintBit0; + if (static_cast(Hint) & 0b10) + Flags |= MOAtomicHintBit1; + } + + return Flags; } bool AArch64TargetLowering::isLegalInterleavedAccessType( diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index 2187f21abb70f..af57d471e0411 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -282,6 +282,116 @@ def : Pat<(relaxed_store (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))), (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>; +//===---------------------------------- +// Atomic store with hint pseudos +//===---------------------------------- + +class seq_cst_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { + let IsAtomic = 1; + let IsAtomicOrderingSequentiallyConsistent = 1; +} + +class atomic_hint_stshh_keep_relaxed + : PatFrag<(ops node:$ptr, node:$val), + (relaxed_store node:$ptr, node:$val), + [{ return isAtomicSTSHH_KEEP(N); }]>; + +class atomic_hint_stshh_keep_release + : PatFrag<(ops node:$ptr, node:$val), + (releasing_store node:$ptr, node:$val), + [{ return isAtomicSTSHH_KEEP(N); }]>; + +class atomic_hint_stshh_keep_seqcst + : PatFrag<(ops node:$ptr, node:$val), + (seq_cst_store node:$ptr, node:$val), + [{ return isAtomicSTSHH_KEEP(N); }]>; + +class atomic_hint_stshh_strm_relaxed + : PatFrag<(ops node:$ptr, node:$val), + (relaxed_store node:$ptr, node:$val), + [{ return isAtomicSTSHH_STRM(N); }]>; + +class atomic_hint_stshh_strm_release + : PatFrag<(ops node:$ptr, node:$val), + (releasing_store node:$ptr, node:$val), + [{ return isAtomicSTSHH_STRM(N); }]>; + +class atomic_hint_stshh_strm_seqcst + : PatFrag<(ops node:$ptr, node:$val), + (seq_cst_store node:$ptr, node:$val), + [{ return isAtomicSTSHH_STRM(N); }]>; + +class BaseStoreHintPseudo + : Pseudo<(outs), (ins GPR64sp:$addr, regtype:$data, + i32imm:$order, i32imm:$hint), []>, Sched<[WriteAtomic]> { + let Size = 8; + let isCodeGenOnly = 1; + let hasSideEffects = 1; + let mayStore = 1; +} + +def ATOMIC_STORE_HINT_B : BaseStoreHintPseudo; +def ATOMIC_STORE_HINT_H : BaseStoreHintPseudo; +def ATOMIC_STORE_HINT_S : BaseStoreHintPseudo; +def ATOMIC_STORE_HINT_D : BaseStoreHintPseudo; + +let AddedComplexity = 15 in { + def : Pat<(atomic_hint_stshh_keep_relaxed GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_relaxed GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_relaxed GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_relaxed GPR64sp:$addr, GPR64:$data), + (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 0))>; + + def : Pat<(atomic_hint_stshh_keep_release GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_release GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_release GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_release GPR64sp:$addr, GPR64:$data), + (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 0))>; + + def : Pat<(atomic_hint_stshh_keep_seqcst GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_seqcst GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_seqcst GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 0))>; + def : Pat<(atomic_hint_stshh_keep_seqcst GPR64sp:$addr, GPR64:$data), + (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 0))>; + + def : Pat<(atomic_hint_stshh_strm_relaxed GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_relaxed GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_relaxed GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 0), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_relaxed GPR64sp:$addr, GPR64:$data), + (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 0), (i32 1))>; + + def : Pat<(atomic_hint_stshh_strm_release GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_release GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_release GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 3), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_release GPR64sp:$addr, GPR64:$data), + (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 3), (i32 1))>; + + def : Pat<(atomic_hint_stshh_strm_seqcst GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_B GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_seqcst GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_H GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_seqcst GPR64sp:$addr, GPR32:$data), + (ATOMIC_STORE_HINT_S GPR64sp:$addr, GPR32:$data, (i32 5), (i32 1))>; + def : Pat<(atomic_hint_stshh_strm_seqcst GPR64sp:$addr, GPR64:$data), + (ATOMIC_STORE_HINT_D GPR64sp:$addr, GPR64:$data, (i32 5), (i32 1))>; +} + //===---------------------------------- // Low-level exclusive operations //===---------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 164a8a8d5dd7c..505055d1f7666 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2855,6 +2855,19 @@ bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) { }); } +AArch64AtomicStoreHint +AArch64InstrInfo::decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags) { + unsigned AtomicHint = 0; + if (MMOFlags & MOAtomicHintBit0) + AtomicHint += 0b1; + if (MMOFlags & MOAtomicHintBit1) + AtomicHint += 0b10; + + if (!isValidAArch64AtomicHintValue(AtomicHint)) + return AArch64AtomicStoreHint::HINT_NONE; + return static_cast(AtomicHint); +} + bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) { switch (Opc) { default: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 9a98156e5b809..d8ae83aee3ee8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -16,6 +16,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/AArch64AtomicHints.h" #include "llvm/Support/TypeSize.h" #include @@ -30,8 +31,13 @@ static const MachineMemOperand::Flags MOSuppressPair = MachineMemOperand::MOTargetFlag1; static const MachineMemOperand::Flags MOStridedAccess = MachineMemOperand::MOTargetFlag2; +static const MachineMemOperand::Flags MOAtomicHintBit0 = + MachineMemOperand::MOTargetFlag3; +static const MachineMemOperand::Flags MOAtomicHintBit1 = + MachineMemOperand::MOTargetFlag4; #define FALKOR_STRIDED_ACCESS_MD "falkor.strided.access" +#define AARCH64_ATOMIC_STORE_HINT_MD "aarch64.atomic.hint" // AArch64 MachineCombiner patterns enum AArch64MachineCombinerPattern : unsigned { @@ -230,6 +236,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// Return true if the given load or store is a strided memory access. static bool isStridedAccess(const MachineInstr &MI); + static AArch64AtomicStoreHint + decodeAtomicHintFlags(MachineMemOperand::Flags MMOFlags); + /// Return true if it has an unscaled load/store offset. static bool hasUnscaledLdStOffset(unsigned Opc); static bool hasUnscaledLdStOffset(MachineInstr &MI) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index cf650fd5c4e72..524a98fbd4cd9 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include "llvm/Support/AArch64AtomicHints.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -2541,6 +2542,66 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { I.eraseFromParent(); return true; } + case TargetOpcode::G_STORE: { + GStore &St = cast(I); + auto MMO = St.getMMO(); + LLT PtrTy = MRI.getType(St.getPointerReg()); + + // Only for handling atomic store with hint. + // Can only handle AddressSpace 0, 64-bit pointers. + if (!St.isAtomic() || PtrTy != LLT::pointer(0, 64)) { + return false; + } + + AArch64AtomicStoreHint Hint = TII.decodeAtomicHintFlags(MMO.getFlags()); + if (Hint == AArch64AtomicStoreHint::HINT_NONE) + return false; + + unsigned HintOpc; + unsigned StoreSize = St.getMemSizeInBits().getValue(); + Register ValueReg = St.getValueReg(); + switch (StoreSize) { + case 8: + HintOpc = AArch64::ATOMIC_STORE_HINT_B; + break; + case 16: { + Register CastReg; + if (mi_match(ValueReg, MRI, m_GBitcast(m_Reg(CastReg)))) { + auto Undef = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, + {&AArch64::FPR32RegClass}, {}); + auto Ins = MIB.buildInstr(TargetOpcode::INSERT_SUBREG, + {&AArch64::FPR32RegClass}, {Undef, ValueReg}) + .addImm(AArch64::hsub); + constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); + constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); + ValueReg = Ins.getReg(0); + } + HintOpc = AArch64::ATOMIC_STORE_HINT_H; + break; + } + case 32: + HintOpc = AArch64::ATOMIC_STORE_HINT_S; + break; + case 64: + HintOpc = AArch64::ATOMIC_STORE_HINT_D; + break; + default: + llvm_unreachable("Unexpected getMemSizeInBits() value for atomic hint."); + } + + unsigned HintImm = Hint == AArch64AtomicStoreHint::HINT_STSHH_KEEP ? 0 : 1; + + auto StrPseudo = BuildMI(MBB, I, MIMetadata(I), TII.get(HintOpc)) + .addReg(St.getPointerReg()) + .addReg(ValueReg) + .addImm((int)toCABI(St.getMMO().getSuccessOrdering())) + .addImm(static_cast(HintImm)); + + StrPseudo.cloneMemRefs(I); + I.eraseFromParent(); + constrainSelectedInstRegOperands(*StrPseudo, TII, TRI, RBI); + return true; + } default: return false; } diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll new file mode 100644 index 0000000000000..dfcfa92cbc6c8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-hint.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel=1 -verify-machineinstrs < %s | FileCheck %s + +; +; STSHH: Keep, Relaxed +; + +define dso_local void @test_atomic_store_keep_relaxed_i8(ptr %ptr, i8 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_relaxed_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: strb w1, [x0] +; CHECK-NEXT: ret + store atomic i8 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_relaxed_i16(ptr %ptr, i16 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_relaxed_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: strh w1, [x0] +; CHECK-NEXT: ret + store atomic i16 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_relaxed_i32(ptr %ptr, i32 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_relaxed_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: str w1, [x0] +; CHECK-NEXT: ret + store atomic i32 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_relaxed_i64(ptr %ptr, i64 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_relaxed_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: str x1, [x0] +; CHECK-NEXT: ret + store atomic i64 %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !0 + ret void +} + +; +; STSHH: Keep, Release +; + +define dso_local void @test_atomic_store_keep_release_bfloat(ptr %ptr, bfloat %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_release_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlrh w8, [x0] +; CHECK-NEXT: ret + store atomic bfloat %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_release_half(ptr %ptr, half %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_release_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlrh w8, [x0] +; CHECK-NEXT: ret + store atomic half %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_release_float(ptr %ptr, float %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_release_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlr w8, [x0] +; CHECK-NEXT: ret + store atomic float %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_release_double(ptr %ptr, double %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_release_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlr x8, [x0] +; CHECK-NEXT: ret + store atomic double %val, ptr %ptr release, align 8, !aarch64.atomic.hint !0 + ret void +} + +; +; STSHH: Keep, SequentiallyConsistent +; + +define dso_local void @test_atomic_store_keep_seqcst_i8(ptr %ptr, i8 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_seqcst_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlrb w1, [x0] +; CHECK-NEXT: ret + store atomic i8 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_seqcst_i16(ptr %ptr, i16 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_seqcst_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlrh w1, [x0] +; CHECK-NEXT: ret + store atomic i16 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_seqcst_i32(ptr %ptr, i32 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_seqcst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlr w1, [x0] +; CHECK-NEXT: ret + store atomic i32 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0 + ret void +} + +define dso_local void @test_atomic_store_keep_seqcst_i64(ptr %ptr, i64 %val) nounwind { +; CHECK-LABEL: test_atomic_store_keep_seqcst_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh keep +; CHECK-NEXT: stlr x1, [x0] +; CHECK-NEXT: ret + store atomic i64 %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !0 + ret void +} + +; +; STSHH: Stream, Relaxed +; + +define dso_local void @test_atomic_store_strm_relaxed_bfloat(ptr %ptr, bfloat %val) nounwind { +; CHECK-LABEL: test_atomic_store_strm_relaxed_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: ret + store atomic bfloat %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_strm_relaxed_half(ptr %ptr, half %val) nounwind { +; CHECK-LABEL: test_atomic_store_strm_relaxed_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: ret + store atomic half %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_strm_relaxed_float(ptr %ptr, float %val) nounwind { +; CHECK-LABEL: test_atomic_store_strm_relaxed_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: ret + store atomic float %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_strm_relaxed_double(ptr %ptr, double %val) nounwind { +; CHECK-LABEL: test_atomic_store_strm_relaxed_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: ret + store atomic double %val, ptr %ptr monotonic, align 8, !aarch64.atomic.hint !1 + ret void +} + +; +; STSHH: Stream, Release +; + +define dso_local void @test_atomic_store_stream_release_i8(ptr %ptr, i8 %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_release_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlrb w1, [x0] +; CHECK-NEXT: ret + store atomic i8 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_stream_release_i16(ptr %ptr, i16 %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_release_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlrh w1, [x0] +; CHECK-NEXT: ret + store atomic i16 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_stream_release_i32(ptr %ptr, i32 %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_release_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlr w1, [x0] +; CHECK-NEXT: ret + store atomic i32 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_stream_release_i64(ptr %ptr, i64 %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_release_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlr x1, [x0] +; CHECK-NEXT: ret + store atomic i64 %val, ptr %ptr release, align 8, !aarch64.atomic.hint !1 + ret void +} + +; +; STSHH: Stream, SequentiallyConsistent +; + +define dso_local void @test_atomic_store_stream_seqcst_bfloat(ptr %ptr, bfloat %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_seqcst_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlrh w8, [x0] +; CHECK-NEXT: ret + store atomic bfloat %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_stream_seqcst_i16(ptr %ptr, half %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_seqcst_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlrh w8, [x0] +; CHECK-NEXT: ret + store atomic half %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_stream_seqcst_i32(ptr %ptr, float %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_seqcst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlr w8, [x0] +; CHECK-NEXT: ret + store atomic float %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1 + ret void +} + +define dso_local void @test_atomic_store_stream_seqcst_double(ptr %ptr, double %val) nounwind { +; CHECK-LABEL: test_atomic_store_stream_seqcst_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: stshh strm +; CHECK-NEXT: stlr x8, [x0] +; CHECK-NEXT: ret + store atomic double %val, ptr %ptr seq_cst, align 8, !aarch64.atomic.hint !1 + ret void +} + +!0 = !{i32 0} +!1 = !{i32 1}