|
|
@@ -0,0 +1,304 @@ |
|
|
//===----- SemaCUDA.h ----- Semantic Analysis for CUDA constructs ---------===// |
|
|
// |
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
|
|
// See https://llvm.org/LICENSE.txt for license information. |
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
|
|
// |
|
|
//===----------------------------------------------------------------------===// |
|
|
/// \file |
|
|
/// This file declares semantic analysis for CUDA constructs. |
|
|
/// |
|
|
//===----------------------------------------------------------------------===// |
|
|
|
|
|
#ifndef LLVM_CLANG_SEMA_SEMACUDA_H |
|
|
#define LLVM_CLANG_SEMA_SEMACUDA_H |
|
|
|
|
|
#include "clang/AST/Decl.h" |
|
|
#include "clang/AST/DeclCXX.h" |
|
|
#include "clang/AST/Redeclarable.h" |
|
|
#include "clang/Basic/Cuda.h" |
|
|
#include "clang/Basic/SourceLocation.h" |
|
|
#include "clang/Sema/Lookup.h" |
|
|
#include "clang/Sema/Ownership.h" |
|
|
#include "clang/Sema/ParsedAttr.h" |
|
|
#include "clang/Sema/Scope.h" |
|
|
#include "clang/Sema/ScopeInfo.h" |
|
|
#include "clang/Sema/SemaBase.h" |
|
|
#include "llvm/ADT/DenseMap.h" |
|
|
#include "llvm/ADT/SmallVector.h" |
|
|
#include <string> |
|
|
|
|
|
namespace clang { |
|
|
|
|
|
enum class CUDAFunctionTarget; |
|
|
|
|
|
class SemaCUDA : public SemaBase { |
|
|
public: |
|
|
SemaCUDA(Sema &S); |
|
|
|
|
|
/// Increments our count of the number of times we've seen a pragma forcing |
|
|
/// functions to be __host__ __device__. So long as this count is greater |
|
|
/// than zero, all functions encountered will be __host__ __device__. |
|
|
void PushForceHostDevice(); |
|
|
|
|
|
/// Decrements our count of the number of times we've seen a pragma forcing |
|
|
/// functions to be __host__ __device__. Returns false if the count is 0 |
|
|
/// before incrementing, so you can emit an error. |
|
|
bool PopForceHostDevice(); |
|
|
|
|
|
ExprResult ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
|
|
MultiExprArg ExecConfig, |
|
|
SourceLocation GGGLoc); |
|
|
|
|
|
/// A pair of a canonical FunctionDecl and a SourceLocation. When used as the |
|
|
/// key in a hashtable, both the FD and location are hashed. |
|
|
struct FunctionDeclAndLoc { |
|
|
CanonicalDeclPtr<const FunctionDecl> FD; |
|
|
SourceLocation Loc; |
|
|
}; |
|
|
|
|
|
/// FunctionDecls and SourceLocations for which CheckCall has emitted a |
|
|
/// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the |
|
|
/// same deferred diag twice. |
|
|
llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags; |
|
|
|
|
|
/// An inverse call graph, mapping known-emitted functions to one of their |
|
|
/// known-emitted callers (plus the location of the call). |
|
|
/// |
|
|
/// Functions that we can tell a priori must be emitted aren't added to this |
|
|
/// map. |
|
|
llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>, |
|
|
/* Caller = */ FunctionDeclAndLoc> |
|
|
DeviceKnownEmittedFns; |
|
|
|
|
|
/// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
|
|
/// context is "used as device code". |
|
|
/// |
|
|
/// - If CurContext is a __host__ function, does not emit any diagnostics |
|
|
/// unless \p EmitOnBothSides is true. |
|
|
/// - If CurContext is a __device__ or __global__ function, emits the |
|
|
/// diagnostics immediately. |
|
|
/// - If CurContext is a __host__ __device__ function and we are compiling for |
|
|
/// the device, creates a diagnostic which is emitted if and when we realize |
|
|
/// that the function will be codegen'ed. |
|
|
/// |
|
|
/// Example usage: |
|
|
/// |
|
|
/// // Variable-length arrays are not allowed in CUDA device code. |
|
|
/// if (DiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentTarget()) |
|
|
/// return ExprError(); |
|
|
/// // Otherwise, continue parsing as normal. |
|
|
SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); |
|
|
|
|
|
/// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
|
|
/// context is "used as host code". |
|
|
/// |
|
|
/// Same as DiagIfDeviceCode, with "host" and "device" switched. |
|
|
SemaDiagnosticBuilder DiagIfHostCode(SourceLocation Loc, unsigned DiagID); |
|
|
|
|
|
/// Determines whether the given function is a CUDA device/host/kernel/etc. |
|
|
/// function. |
|
|
/// |
|
|
/// Use this rather than examining the function's attributes yourself -- you |
|
|
/// will get it wrong. Returns CUDAFunctionTarget::Host if D is null. |
|
|
CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D, |
|
|
bool IgnoreImplicitHDAttr = false); |
|
|
CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs); |
|
|
|
|
|
enum CUDAVariableTarget { |
|
|
CVT_Device, /// Emitted on device side with a shadow variable on host side |
|
|
CVT_Host, /// Emitted on host side only |
|
|
CVT_Both, /// Emitted on both sides with different addresses |
|
|
CVT_Unified, /// Emitted as a unified address, e.g. managed variables |
|
|
}; |
|
|
/// Determines whether the given variable is emitted on host or device side. |
|
|
CUDAVariableTarget IdentifyTarget(const VarDecl *D); |
|
|
|
|
|
/// Defines kinds of CUDA global host/device context where a function may be |
|
|
/// called. |
|
|
enum CUDATargetContextKind { |
|
|
CTCK_Unknown, /// Unknown context |
|
|
CTCK_InitGlobalVar, /// Function called during global variable |
|
|
/// initialization |
|
|
}; |
|
|
|
|
|
/// Define the current global CUDA host/device context where a function may be |
|
|
/// called. Only used when a function is called outside of any functions. |
|
|
struct CUDATargetContext { |
|
|
CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; |
|
|
CUDATargetContextKind Kind = CTCK_Unknown; |
|
|
Decl *D = nullptr; |
|
|
} CurCUDATargetCtx; |
|
|
|
|
|
struct CUDATargetContextRAII { |
|
|
SemaCUDA &S; |
|
|
SemaCUDA::CUDATargetContext SavedCtx; |
|
|
CUDATargetContextRAII(SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, |
|
|
Decl *D); |
|
|
~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } |
|
|
}; |
|
|
|
|
|
/// Gets the CUDA target for the current context. |
|
|
CUDAFunctionTarget CurrentTarget() { |
|
|
return IdentifyTarget(dyn_cast<FunctionDecl>(SemaRef.CurContext)); |
|
|
} |
|
|
|
|
|
static bool isImplicitHostDeviceFunction(const FunctionDecl *D); |
|
|
|
|
|
// CUDA function call preference. Must be ordered numerically from |
|
|
// worst to best. |
|
|
enum CUDAFunctionPreference { |
|
|
CFP_Never, // Invalid caller/callee combination. |
|
|
CFP_WrongSide, // Calls from host-device to host or device |
|
|
// function that do not match current compilation |
|
|
// mode. |
|
|
CFP_HostDevice, // Any calls to host/device functions. |
|
|
CFP_SameSide, // Calls from host-device to host or device |
|
|
// function matching current compilation mode. |
|
|
CFP_Native, // host-to-host or device-to-device calls. |
|
|
}; |
|
|
|
|
|
/// Identifies relative preference of a given Caller/Callee |
|
|
/// combination, based on their host/device attributes. |
|
|
/// \param Caller function which needs address of \p Callee. |
|
|
/// nullptr in case of global context. |
|
|
/// \param Callee target function |
|
|
/// |
|
|
/// \returns preference value for particular Caller/Callee combination. |
|
|
CUDAFunctionPreference IdentifyPreference(const FunctionDecl *Caller, |
|
|
const FunctionDecl *Callee); |
|
|
|
|
|
/// Determines whether Caller may invoke Callee, based on their CUDA |
|
|
/// host/device attributes. Returns false if the call is not allowed. |
|
|
/// |
|
|
/// Note: Will return true for CFP_WrongSide calls. These may appear in |
|
|
/// semantically correct CUDA programs, but only if they're never codegen'ed. |
|
|
bool IsAllowedCall(const FunctionDecl *Caller, const FunctionDecl *Callee) { |
|
|
return IdentifyPreference(Caller, Callee) != CFP_Never; |
|
|
} |
|
|
|
|
|
/// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, |
|
|
/// depending on FD and the current compilation settings. |
|
|
void maybeAddHostDeviceAttrs(FunctionDecl *FD, const LookupResult &Previous); |
|
|
|
|
|
/// May add implicit CUDAConstantAttr attribute to VD, depending on VD |
|
|
/// and current compilation settings. |
|
|
void MaybeAddConstantAttr(VarDecl *VD); |
|
|
|
|
|
/// Check whether we're allowed to call Callee from the current context. |
|
|
/// |
|
|
/// - If the call is never allowed in a semantically-correct program |
|
|
/// (CFP_Never), emits an error and returns false. |
|
|
/// |
|
|
/// - If the call is allowed in semantically-correct programs, but only if |
|
|
/// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to |
|
|
/// be emitted if and when the caller is codegen'ed, and returns true. |
|
|
/// |
|
|
/// Will only create deferred diagnostics for a given SourceLocation once, |
|
|
/// so you can safely call this multiple times without generating duplicate |
|
|
/// deferred errors. |
|
|
/// |
|
|
/// - Otherwise, returns true without emitting any diagnostics. |
|
|
bool CheckCall(SourceLocation Loc, FunctionDecl *Callee); |
|
|
|
|
|
void CheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture); |
|
|
|
|
|
/// Set __device__ or __host__ __device__ attributes on the given lambda |
|
|
/// operator() method. |
|
|
/// |
|
|
/// CUDA lambdas by default is host device function unless it has explicit |
|
|
/// host or device attribute. |
|
|
void SetLambdaAttrs(CXXMethodDecl *Method); |
|
|
|
|
|
/// Record \p FD if it is a CUDA/HIP implicit host device function used on |
|
|
/// device side in device compilation. |
|
|
void RecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD); |
|
|
|
|
|
/// Finds a function in \p Matches with highest calling priority |
|
|
/// from \p Caller context and erases all functions with lower |
|
|
/// calling priority. |
|
|
void EraseUnwantedMatches( |
|
|
const FunctionDecl *Caller, |
|
|
llvm::SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> |
|
|
&Matches); |
|
|
|
|
|
/// Given a implicit special member, infer its CUDA target from the |
|
|
/// calls it needs to make to underlying base/field special members. |
|
|
/// \param ClassDecl the class for which the member is being created. |
|
|
/// \param CSM the kind of special member. |
|
|
/// \param MemberDecl the special member itself. |
|
|
/// \param ConstRHS true if this is a copy operation with a const object on |
|
|
/// its RHS. |
|
|
/// \param Diagnose true if this call should emit diagnostics. |
|
|
/// \return true if there was an error inferring. |
|
|
/// The result of this call is implicit CUDA target attribute(s) attached to |
|
|
/// the member declaration. |
|
|
bool inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
|
|
CXXSpecialMemberKind CSM, |
|
|
CXXMethodDecl *MemberDecl, |
|
|
bool ConstRHS, bool Diagnose); |
|
|
|
|
|
/// \return true if \p CD can be considered empty according to CUDA |
|
|
/// (E.2.3.1 in CUDA 7.5 Programming guide). |
|
|
bool isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD); |
|
|
bool isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *CD); |
|
|
|
|
|
// \brief Checks that initializers of \p Var satisfy CUDA restrictions. In |
|
|
// case of error emits appropriate diagnostic and invalidates \p Var. |
|
|
// |
|
|
// \details CUDA allows only empty constructors as initializers for global |
|
|
// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
|
|
// __shared__ variables whether they are local or not (they all are implicitly |
|
|
// static in CUDA). One exception is that CUDA allows constant initializers |
|
|
// for __constant__ and __device__ variables. |
|
|
void checkAllowedInitializer(VarDecl *VD); |
|
|
|
|
|
/// Check whether NewFD is a valid overload for CUDA. Emits |
|
|
/// diagnostics and invalidates NewFD if not. |
|
|
void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous); |
|
|
/// Copies target attributes from the template TD to the function FD. |
|
|
void inheritTargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); |
|
|
|
|
|
/// Returns the name of the launch configuration function. This is the name |
|
|
/// of the function that will be called to configure kernel call, with the |
|
|
/// parameters specified via <<<>>>. |
|
|
std::string getConfigureFuncName() const; |
|
|
|
|
|
private: |
|
|
unsigned ForceHostDeviceDepth = 0; |
|
|
|
|
|
friend class ASTReader; |
|
|
friend class ASTWriter; |
|
|
}; |
|
|
|
|
|
} // namespace clang |
|
|
|
|
|
namespace llvm { |
|
|
// Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its |
|
|
// SourceLocation. |
|
|
template <> struct DenseMapInfo<clang::SemaCUDA::FunctionDeclAndLoc> { |
|
|
using FunctionDeclAndLoc = clang::SemaCUDA::FunctionDeclAndLoc; |
|
|
using FDBaseInfo = |
|
|
DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>; |
|
|
|
|
|
static FunctionDeclAndLoc getEmptyKey() { |
|
|
return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()}; |
|
|
} |
|
|
|
|
|
static FunctionDeclAndLoc getTombstoneKey() { |
|
|
return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()}; |
|
|
} |
|
|
|
|
|
static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { |
|
|
return hash_combine(FDBaseInfo::getHashValue(FDL.FD), |
|
|
FDL.Loc.getHashValue()); |
|
|
} |
|
|
|
|
|
static bool isEqual(const FunctionDeclAndLoc &LHS, |
|
|
const FunctionDeclAndLoc &RHS) { |
|
|
return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc; |
|
|
} |
|
|
}; |
|
|
} // namespace llvm |
|
|
|
|
|
#endif // LLVM_CLANG_SEMA_SEMACUDA_H |