Skip to content

Commit

Permalink
Add OpenCL 2.0 atomic builtin functions as Clang builtin
Browse files Browse the repository at this point in the history
OpenCL 2.0 atomic builtin functions have a scope argument which is ideally
represented as synchronization scope argument in LLVM atomic instructions.

Clang supports translating Clang atomic builtin functions to LLVM atomic
instructions. However it currently does not support synchronization scope
of LLVM atomic instructions. Without this, users have to use LLVM assembly
code to implement OpenCL atomic builtin functions.

This patch adds OpenCL 2.0 atomic builtin functions as Clang builtin
functions, which supports generating LLVM atomic instructions with
synchronization scope operand.

Currently only constant memory scope argument is supported. Support of
non-constant memory scope argument will be added later.

Differential Revision: https://reviews.llvm.org/D28691

llvm-svn: 310082
  • Loading branch information
yxsamliu committed Aug 4, 2017
1 parent 0afcef2 commit 3919506
Show file tree
Hide file tree
Showing 21 changed files with 804 additions and 81 deletions.
8 changes: 7 additions & 1 deletion clang/docs/LanguageExtensions.rst
Expand Up @@ -1929,7 +1929,13 @@ provided, with values corresponding to the enumerators of C11's
``memory_order`` enumeration.
(Note that Clang additionally provides GCC-compatible ``__atomic_*``
builtins)
builtins and OpenCL 2.0 ``__opencl_atomic_*`` builtins. The OpenCL 2.0
atomic builtins are an explicit form of the corresponding OpenCL 2.0
builtin function, and are named with a ``__opencl_`` prefix. The macros
``__OPENCL_MEMORY_SCOPE_WORK_ITEM``, ``__OPENCL_MEMORY_SCOPE_WORK_GROUP``,
``__OPENCL_MEMORY_SCOPE_DEVICE``, ``__OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES``,
and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values
corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.)
Low-level ARM exclusive memory builtins
---------------------------------------
Expand Down
23 changes: 18 additions & 5 deletions clang/include/clang/AST/Expr.h
Expand Up @@ -5064,9 +5064,11 @@ class PseudoObjectExpr final

/// AtomicExpr - Variadic atomic builtins: __atomic_exchange, __atomic_fetch_*,
/// __atomic_load, __atomic_store, and __atomic_compare_exchange_*, for the
/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>.
/// All of these instructions take one primary pointer and at least one memory
/// order.
/// similarly-named C++11 instructions, and __c11 variants for <stdatomic.h>,
/// and corresponding __opencl_atomic_* for OpenCL 2.0.
/// All of these instructions take one primary pointer, at least one memory
/// order, and one synchronization scope. The C++11 and __c11 atomic AtomicExpr
/// always take the default synchronization scope.
class AtomicExpr : public Expr {
public:
enum AtomicOp {
Expand All @@ -5078,7 +5080,7 @@ class AtomicExpr : public Expr {
};

private:
enum { PTR, ORDER, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
enum { PTR, ORDER, SCOPE, VAL1, ORDER_FAIL, VAL2, WEAK, END_EXPR };
Stmt* SubExprs[END_EXPR];
unsigned NumSubExprs;
SourceLocation BuiltinLoc, RParenLoc;
Expand All @@ -5103,8 +5105,11 @@ class AtomicExpr : public Expr {
Expr *getOrder() const {
return cast<Expr>(SubExprs[ORDER]);
}
Expr *getScope() const {
return cast<Expr>(SubExprs[SCOPE]);
}
Expr *getVal1() const {
if (Op == AO__c11_atomic_init)
if (Op == AO__c11_atomic_init || Op == AO__opencl_atomic_init)
return cast<Expr>(SubExprs[ORDER]);
assert(NumSubExprs > VAL1);
return cast<Expr>(SubExprs[VAL1]);
Expand All @@ -5123,6 +5128,7 @@ class AtomicExpr : public Expr {
assert(NumSubExprs > WEAK);
return cast<Expr>(SubExprs[WEAK]);
}
QualType getValueType() const;

AtomicOp getOp() const { return Op; }
unsigned getNumSubExprs() const { return NumSubExprs; }
Expand All @@ -5139,10 +5145,17 @@ class AtomicExpr : public Expr {
bool isCmpXChg() const {
return getOp() == AO__c11_atomic_compare_exchange_strong ||
getOp() == AO__c11_atomic_compare_exchange_weak ||
getOp() == AO__opencl_atomic_compare_exchange_strong ||
getOp() == AO__opencl_atomic_compare_exchange_weak ||
getOp() == AO__atomic_compare_exchange ||
getOp() == AO__atomic_compare_exchange_n;
}

bool isOpenCL() const {
return getOp() >= AO__opencl_atomic_init &&
getOp() <= AO__opencl_atomic_fetch_max;
}

SourceLocation getBuiltinLoc() const { return BuiltinLoc; }
SourceLocation getRParenLoc() const { return RParenLoc; }

Expand Down
15 changes: 15 additions & 0 deletions clang/include/clang/Basic/Builtins.def
Expand Up @@ -700,6 +700,21 @@ BUILTIN(__atomic_signal_fence, "vi", "n")
BUILTIN(__atomic_always_lock_free, "izvCD*", "n")
BUILTIN(__atomic_is_lock_free, "izvCD*", "n")

// OpenCL 2.0 atomic builtins.
ATOMIC_BUILTIN(__opencl_atomic_init, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_load, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_store, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_exchange, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_strong, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_compare_exchange_weak, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_fetch_add, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_fetch_sub, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_fetch_and, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_fetch_or, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_fetch_xor, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_fetch_min, "v.", "t")
ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t")

#undef ATOMIC_BUILTIN

// Non-overloaded atomic builtins.
Expand Down
8 changes: 6 additions & 2 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Expand Up @@ -6995,8 +6995,8 @@ def err_atomic_op_needs_atomic : Error<
"address argument to atomic operation must be a pointer to _Atomic "
"type (%0 invalid)">;
def err_atomic_op_needs_non_const_atomic : Error<
"address argument to atomic operation must be a pointer to non-const _Atomic "
"type (%0 invalid)">;
"address argument to atomic operation must be a pointer to non-%select{const|constant}0 _Atomic "
"type (%1 invalid)">;
def err_atomic_op_needs_non_const_pointer : Error<
"address argument to atomic operation must be a pointer to non-const "
"type (%0 invalid)">;
Expand All @@ -7012,6 +7012,10 @@ def err_atomic_op_bitwise_needs_atomic_int : Error<
def warn_atomic_op_has_invalid_memory_order : Warning<
"memory order argument to atomic operation is invalid">,
InGroup<DiagGroup<"atomic-memory-ordering">>;
def err_atomic_op_has_invalid_synch_scope : Error<
"synchronization scope argument to atomic operation is invalid">;
def err_atomic_op_has_non_constant_synch_scope : Error<
"non-constant synchronization scope argument to atomic operation is not supported">;

def err_overflow_builtin_must_be_int : Error<
"operand argument to overflow builtin must be an integer (%0 invalid)">;
Expand Down
39 changes: 39 additions & 0 deletions clang/include/clang/Basic/SyncScope.h
@@ -0,0 +1,39 @@
//===--- SyncScope.h - Atomic synchronization scopes ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Provides definitions for the atomic synchronization scopes.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_BASIC_SYNCSCOPE_H
#define LLVM_CLANG_BASIC_SYNCSCOPE_H

namespace clang {

/// \brief Defines the synch scope values used by the atomic builtins and
/// expressions.
///
/// The enum values should match the pre-defined macros
/// __OPENCL_MEMORY_SCOPE_*, which are used to define memory_scope_*
/// enums in opencl-c.h.
enum class SyncScope {
OpenCLWorkGroup = 1,
OpenCLDevice = 2,
OpenCLAllSVMDevices = 3,
OpenCLSubGroup = 4,
};

inline bool isValidSyncScopeValue(unsigned Scope) {
return Scope >= static_cast<unsigned>(SyncScope::OpenCLWorkGroup) &&
Scope <= static_cast<unsigned>(SyncScope::OpenCLSubGroup);
}
}

#endif
9 changes: 8 additions & 1 deletion clang/lib/AST/ASTContext.cpp
Expand Up @@ -1182,7 +1182,14 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
ObjCSuperType = QualType();

// void * type
VoidPtrTy = getPointerType(VoidTy);
if (LangOpts.OpenCLVersion >= 200) {
auto Q = VoidTy.getQualifiers();
Q.setAddressSpace(LangAS::opencl_generic);
VoidPtrTy = getPointerType(getCanonicalType(
getQualifiedType(VoidTy.getUnqualifiedType(), Q)));
} else {
VoidPtrTy = getPointerType(VoidTy);
}

// nullptr type (C++0x 2.14.7)
InitBuiltinType(NullPtrTy, BuiltinType::NullPtr);
Expand Down
31 changes: 26 additions & 5 deletions clang/lib/AST/Expr.cpp
Expand Up @@ -3938,12 +3938,17 @@ AtomicExpr::AtomicExpr(SourceLocation BLoc, ArrayRef<Expr*> args,
unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
switch (Op) {
case AO__c11_atomic_init:
case AO__opencl_atomic_init:
return 2;
case AO__c11_atomic_load:
case AO__opencl_atomic_load:
case AO__atomic_load_n:
return 2;
return 3;

case AO__c11_atomic_store:
case AO__c11_atomic_exchange:
case AO__opencl_atomic_store:
case AO__opencl_atomic_exchange:
case AO__atomic_load:
case AO__atomic_store:
case AO__atomic_store_n:
Expand All @@ -3953,6 +3958,13 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__c11_atomic_fetch_and:
case AO__c11_atomic_fetch_or:
case AO__c11_atomic_fetch_xor:
case AO__opencl_atomic_fetch_add:
case AO__opencl_atomic_fetch_sub:
case AO__opencl_atomic_fetch_and:
case AO__opencl_atomic_fetch_or:
case AO__opencl_atomic_fetch_xor:
case AO__opencl_atomic_fetch_min:
case AO__opencl_atomic_fetch_max:
case AO__atomic_fetch_add:
case AO__atomic_fetch_sub:
case AO__atomic_fetch_and:
Expand All @@ -3965,22 +3977,31 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_or_fetch:
case AO__atomic_xor_fetch:
case AO__atomic_nand_fetch:
return 3;
return 4;

case AO__atomic_exchange:
return 4;
return 5;

case AO__c11_atomic_compare_exchange_strong:
case AO__c11_atomic_compare_exchange_weak:
return 5;
case AO__opencl_atomic_compare_exchange_strong:
case AO__opencl_atomic_compare_exchange_weak:
return 6;

case AO__atomic_compare_exchange:
case AO__atomic_compare_exchange_n:
return 6;
return 7;
}
llvm_unreachable("unknown atomic op");
}

QualType AtomicExpr::getValueType() const {
auto T = getPtr()->getType()->castAs<PointerType>()->getPointeeType();
if (auto AT = T->getAs<AtomicType>())
return AT->getValueType();
return T;
}

QualType OMPArraySectionExpr::getBaseOriginalType(const Expr *Base) {
unsigned ArraySectionCount = 0;
while (auto *OASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParens())) {
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/AST/StmtPrinter.cpp
Expand Up @@ -1891,7 +1891,8 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
// AtomicExpr stores its subexpressions in a permuted order.
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
Node->getOp() != AtomicExpr::AO__atomic_load_n) {
Node->getOp() != AtomicExpr::AO__atomic_load_n &&
Node->getOp() != AtomicExpr::AO__opencl_atomic_load) {
OS << ", ";
PrintExpr(Node->getVal1());
}
Expand All @@ -1905,7 +1906,8 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
OS << ", ";
PrintExpr(Node->getWeak());
}
if (Node->getOp() != AtomicExpr::AO__c11_atomic_init) {
if (Node->getOp() != AtomicExpr::AO__c11_atomic_init &&
Node->getOp() != AtomicExpr::AO__opencl_atomic_init) {
OS << ", ";
PrintExpr(Node->getOrder());
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Expand Up @@ -328,6 +328,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
PtrDiffType = SignedLong;
IntPtrType = SignedLong;
}

MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
}

void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
Expand Down

0 comments on commit 3919506

Please sign in to comment.