41 changes: 41 additions & 0 deletions clang/include/clang/AST/SYCLKernelInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//===--- SYCLKernelInfo.h --- Information about SYCL kernels --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file declares types used to describe SYCL kernels.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_AST_SYCLKERNELINFO_H
#define LLVM_CLANG_AST_SYCLKERNELINFO_H

#include "clang/AST/Decl.h"
#include "clang/AST/Type.h"

namespace clang {

class SYCLKernelInfo {
public:
SYCLKernelInfo(CanQualType KernelNameType,
const FunctionDecl *KernelEntryPointDecl)
: KernelNameType(KernelNameType),
KernelEntryPointDecl(KernelEntryPointDecl) {}

CanQualType getKernelNameType() const { return KernelNameType; }

const FunctionDecl *getKernelEntryPointDecl() const {
return KernelEntryPointDecl;
}

private:
CanQualType KernelNameType;
const FunctionDecl *KernelEntryPointDecl;
};

} // namespace clang

#endif // LLVM_CLANG_AST_SYCLKERNELINFO_H
23 changes: 20 additions & 3 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ def MicrosoftExt : LangOpt<"MicrosoftExt">;
def Borland : LangOpt<"Borland">;
def CUDA : LangOpt<"CUDA">;
def HIP : LangOpt<"HIP">;
def SYCL : LangOpt<"SYCLIsDevice">;
def SYCLHost : LangOpt<"SYCLIsHost">;
def SYCLDevice : LangOpt<"SYCLIsDevice">;
def COnly : LangOpt<"", "!LangOpts.CPlusPlus">;
def CPlusPlus : LangOpt<"CPlusPlus">;
def OpenCL : LangOpt<"OpenCL">;
Expand Down Expand Up @@ -1450,6 +1451,13 @@ def CUDAHost : InheritableAttr {
}
def : MutualExclusions<[CUDAGlobal, CUDAHost]>;

def CUDAGridConstant : InheritableAttr {
let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
let Subjects = SubjectList<[ParmVar]>;
let LangOpts = [CUDA];
let Documentation = [CUDAGridConstantAttrDocs];
}

def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
let Spellings = [Clang<"nvptx_kernel">];
let Subjects = SubjectList<[Function]>;
Expand Down Expand Up @@ -1493,14 +1501,23 @@ def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;
def SYCLKernel : InheritableAttr {
let Spellings = [Clang<"sycl_kernel">];
let Subjects = SubjectList<[FunctionTmpl]>;
let LangOpts = [SYCL];
let LangOpts = [SYCLDevice];
let Documentation = [SYCLKernelDocs];
}

def SYCLKernelEntryPoint : InheritableAttr {
let Spellings = [Clang<"sycl_kernel_entry_point">];
let Args = [TypeArgument<"KernelName">];
let Subjects = SubjectList<[Function], ErrorDiag>;
let TemplateDependent = 1;
let LangOpts = [SYCLHost, SYCLDevice];
let Documentation = [SYCLKernelEntryPointDocs];
}

def SYCLSpecialClass: InheritableAttr {
let Spellings = [Clang<"sycl_special_class">];
let Subjects = SubjectList<[CXXRecord]>;
let LangOpts = [SYCL];
let LangOpts = [SYCLDevice];
let Documentation = [SYCLSpecialClassDocs];
}

Expand Down
184 changes: 184 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,180 @@ The SYCL kernel in the previous code sample meets these expectations.
}];
}

def SYCLKernelEntryPointDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``sycl_kernel_entry_point`` attribute facilitates the generation of an
offload kernel entry point, sometimes called a SYCL kernel caller function,
suitable for invoking a SYCL kernel on an offload device. The attribute is
intended for use in the implementation of SYCL kernel invocation functions
like the ``single_task`` and ``parallel_for`` member functions of the
``sycl::handler`` class specified in section 4.9.4, "Command group ``handler``
class", of the SYCL 2020 specification.

The attribute requires a single type argument that specifies a class type that
meets the requirements for a SYCL kernel name as described in section 5.2,
"Naming of kernels", of the SYCL 2020 specification. A unique kernel name type
is required for each function declared with the attribute. The attribute may
not first appear on a declaration that follows a definition of the function.

The attribute only appertains to functions and only those that meet the
following requirements.

* Has a ``void`` return type.
* Is not a non-static member function, constructor, or destructor.
* Is not a C variadic function.
* Is not a coroutine.
* Is not defined as deleted or as defaulted.
* Is not declared with the ``constexpr`` or ``consteval`` specifiers.
* Is not declared with the ``[[noreturn]]`` attribute.

Use in the implementation of a SYCL kernel invocation function might look as
follows.

.. code-block:: c++

namespace sycl {
class handler {
template<typename KernelNameType, typename KernelType>
[[ clang::sycl_kernel_entry_point(KernelNameType) ]]
static void kernel_entry_point(KernelType kernel) {
kernel();
}

public:
template<typename KernelNameType, typename KernelType>
void single_task(KernelType kernel) {
// Call kernel_entry_point() to trigger generation of an offload
// kernel entry point.
kernel_entry_point<KernelNameType>(kernel);
// Call functions appropriate for the desired offload backend
// (OpenCL, CUDA, HIP, Level Zero, etc...).
}
};
} // namespace sycl

A SYCL kernel is a callable object of class type that is constructed on a host,
often via a lambda expression, and then passed to a SYCL kernel invocation
function to be executed on an offload device. A SYCL kernel invocation function
is responsible for copying the provided SYCL kernel object to an offload
device and initiating a call to it. The SYCL kernel object and its data members
constitute the parameters of an offload kernel.

A SYCL kernel type is required to satisfy the device copyability requirements
specified in section 3.13.1, "Device copyable", of the SYCL 2020 specification.
Additionally, any data members of the kernel object type are required to satisfy
section 4.12.4, "Rules for parameter passing to kernels". For most types, these
rules require that the type is trivially copyable. However, the SYCL
specification mandates that certain special SYCL types, such as
``sycl::accessor`` and ``sycl::stream`` be device copyable even if they are not
trivially copyable. These types require special handling because they cannot
be copied to device memory as if by ``memcpy()``. Additionally, some offload
backends, OpenCL for example, require objects of some of these types to be
passed as individual arguments to the offload kernel.

An offload kernel consists of an entry point function that declares the
parameters of the offload kernel and the set of all functions and variables that
are directly or indirectly used by the entry point function.

A SYCL kernel invocation function invokes a SYCL kernel on a device by
performing the following tasks (likely with the help of an offload backend
like OpenCL):

#. Identifying the offload kernel entry point to be used for the SYCL kernel.

#. Deconstructing the SYCL kernel object, if necessary, to produce the set of
offload kernel arguments required by the offload kernel entry point.

#. Copying the offload kernel arguments to device memory.

#. Initiating execution of the offload kernel entry point.

The offload kernel entry point for a SYCL kernel performs the following tasks:

#. Reconstituting the SYCL kernel object, if necessary, using the offload
kernel parameters.

#. Calling the ``operator()`` member function of the (reconstituted) SYCL kernel
object.

The ``sycl_kernel_entry_point`` attribute automates generation of an offload
kernel entry point that performs those latter tasks. The parameters and body of
a function declared with the ``sycl_kernel_entry_point`` attribute specify a
pattern from which the parameters and body of the entry point function are
derived. Consider the following call to a SYCL kernel invocation function.

.. code-block:: c++

struct S { int i; };
void f(sycl::handler &handler, sycl::stream &sout, S s) {
handler.single_task<struct KN>([=] {
sout << "The value of s.i is " << s.i << "\n";
});
}

The SYCL kernel object is the result of the lambda expression. It has two
data members corresponding to the captures of ``sout`` and ``s``. Since one
of these data members corresponds to a special SYCL type that must be passed
individually as an offload kernel parameter, it is necessary to decompose the
SYCL kernel object into its constituent parts; the offload kernel will have
two kernel parameters. Given a SYCL implementation that uses a
``sycl_kernel_entry_point`` attributed function like the one shown above, an
offload kernel entry point function will be generated that looks approximately
as follows.

.. code-block:: c++

void sycl-kernel-caller-for-KN(sycl::stream sout, S s) {
kernel-type kernel = { sout, s );
kernel();
}

There are a few items worthy of note:

#. The name of the generated function incorporates the SYCL kernel name,
``KN``, that was passed as the ``KernelNameType`` template parameter to
``kernel_entry_point()`` and provided as the argument to the
``sycl_kernel_entry_point`` attribute. There is a one-to-one correspondence
between SYCL kernel names and offload kernel entry points.

#. The SYCL kernel is a lambda closure type and therefore has no name;
``kernel-type`` is substituted above and corresponds to the ``KernelType``
template parameter deduced in the call to ``kernel_entry_point()``.
Lambda types cannot be declared and initialized using the aggregate
initialization syntax used above, but the intended behavior should be clear.

#. ``S`` is a device copyable type that does not directly or indirectly contain
a data member of a SYCL special type. It therefore does not need to be
decomposed into its constituent members to be passed as a kernel argument.

#. The depiction of the ``sycl::stream`` parameter as a single self contained
kernel parameter is an oversimplification. SYCL special types may require
additional decomposition such that the generated function might have three
or more parameters depending on how the SYCL library implementation defines
these types.

#. The call to ``kernel_entry_point()`` has no effect other than to trigger
emission of the entry point function. The statments that make up the body
of the function are not executed when the function is called; they are
only used in the generation of the entry point function.

It is not necessary for a function declared with the ``sycl_kernel_entry_point``
attribute to be called for the offload kernel entry point to be emitted. For
inline functions and function templates, any ODR-use will suffice. For other
functions, an ODR-use is not required; the offload kernel entry point will be
emitted if the function is defined.

Functions declared with the ``sycl_kernel_entry_point`` attribute are not
limited to the simple example shown above. They may have additional template
parameters, declare additional function parameters, and have complex control
flow in the function body. Function parameter decomposition and reconstitution
is performed for all function parameters. The function must abide by the
language feature restrictions described in section 5.4, "Language restrictions
for device functions" in the SYCL 2020 specification.
}];
}

def SYCLSpecialClassDocs : Documentation {
let Category = DocCatStmt;
let Content = [{
Expand Down Expand Up @@ -6620,6 +6794,16 @@ unbind runtime APIs.
}];
}

def CUDAGridConstantAttrDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
The ``__grid_constant__`` attribute can be applied to a ``const``-qualified kernel
function argument and allows compiler to take the address of that argument without
making a copy. The argument applies to sm_70 or newer GPUs, during compilation
with CUDA-11.7(PTX 7.7) or newer, and is ignored otherwise.
}];
}

def HIPManagedAttrDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4792,6 +4792,12 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}

def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_dot4add_i8packed"];
let Attributes = [NoThrow, Const];
let Prototype = "int(unsigned int, unsigned int, int)";
}

def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_frac"];
let Attributes = [NoThrow, Const];
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
"__managed__ are not allowed on non-static local variables">;
def err_cuda_grid_constant_not_allowed : Error<
"__grid_constant__ is only allowed on const-qualified kernel parameters">;
def err_cuda_ovl_target : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/TokenKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ KEYWORD(out , KEYHLSL)
// HLSL Type traits
TYPE_TRAIT_2(__builtin_hlsl_is_scalarized_layout_compatible, IsScalarizedLayoutCompatible, KEYHLSL)
TYPE_TRAIT_1(__builtin_hlsl_is_intangible, IsIntangibleType, KEYHLSL)
TYPE_TRAIT_1(__builtin_hlsl_is_typed_resource_element_compatible, IsTypedResourceElementCompatible, KEYHLSL)

// OpenMP Type Traits
UNARY_EXPR_OR_TYPE_TRAIT(__builtin_omp_required_simd_align, OpenMPRequiredSimdAlign, KEYALL)
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/CIR/CIRGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class CIRGenerator : public clang::ASTConsumer {
~CIRGenerator() override;
void Initialize(clang::ASTContext &astCtx) override;
bool HandleTopLevelDecl(clang::DeclGroupRef group) override;
mlir::ModuleOp getModule() const;
};

} // namespace cir
Expand Down
21 changes: 21 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIRDialect.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,25 @@
#ifndef LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H
#define LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H

#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Interfaces/MemorySlotInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"

#include "clang/CIR/Dialect/IR/CIROpsDialect.h.inc"

// TableGen'erated files for MLIR dialects require that a macro be defined when
// they are included. GET_OP_CLASSES tells the file to define the classes for
// the operations of that dialect.
#define GET_OP_CLASSES
#include "clang/CIR/Dialect/IR/CIROps.h.inc"

#endif // LLVM_CLANG_CIR_DIALECT_IR_CIRDIALECT_H
82 changes: 82 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,86 @@

include "clang/CIR/Dialect/IR/CIRDialect.td"

include "mlir/IR/BuiltinAttributeInterfaces.td"
include "mlir/IR/EnumAttr.td"
include "mlir/IR/SymbolInterfaces.td"
include "mlir/IR/CommonAttrConstraints.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/Interfaces/MemorySlotInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"

//===----------------------------------------------------------------------===//
// CIR Ops
//===----------------------------------------------------------------------===//

// LLVMLoweringInfo is used by cir-tablegen to generate LLVM lowering logic
// automatically for CIR operations. The `llvmOp` field gives the name of the
// LLVM IR dialect operation that the CIR operation will be lowered to. The
// input arguments of the CIR operation will be passed in the same order to the
// lowered LLVM IR operation.
//
// Example:
//
// For the following CIR operation definition:
//
// def FooOp : CIR_Op<"foo"> {
// // ...
// let arguments = (ins CIR_AnyType:$arg1, CIR_AnyType:$arg2);
// let llvmOp = "BarOp";
// }
//
// cir-tablegen will generate LLVM lowering code for the FooOp similar to the
// following:
//
// class CIRFooOpLowering
// : public mlir::OpConversionPattern<mlir::cir::FooOp> {
// public:
// using OpConversionPattern<mlir::cir::FooOp>::OpConversionPattern;
//
// mlir::LogicalResult matchAndRewrite(
// mlir::cir::FooOp op,
// OpAdaptor adaptor,
// mlir::ConversionPatternRewriter &rewriter) const override {
// rewriter.replaceOpWithNewOp<mlir::LLVM::BarOp>(
// op, adaptor.getOperands()[0], adaptor.getOperands()[1]);
// return mlir::success();
// }
// }
//
// If you want fully customized LLVM IR lowering logic, simply exclude the
// `llvmOp` field from your CIR operation definition.
class LLVMLoweringInfo {
string llvmOp = "";
}

class CIR_Op<string mnemonic, list<Trait> traits = []> :
Op<CIR_Dialect, mnemonic, traits>, LLVMLoweringInfo;

//===----------------------------------------------------------------------===//
// FuncOp
//===----------------------------------------------------------------------===//

// TODO(CIR): For starters, cir.func has only name, nothing else. The other
// properties of a function will be added over time as more of ClangIR is
// upstreamed.

def FuncOp : CIR_Op<"func"> {
let summary = "Declare or define a function";
let description = [{
... lots of text to be added later ...
}];

let arguments = (ins SymbolNameAttr:$sym_name);

let skipDefaultBuilders = 1;

let builders = [OpBuilder<(ins "StringRef":$name)>];

let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
}

#endif // LLVM_CLANG_CIR_DIALECT_IR_CIROPS
1 change: 1 addition & 0 deletions clang/include/clang/Sema/SemaHLSL.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class SemaHLSL : public SemaBase {

// HLSL Type trait implementations
bool IsScalarizedLayoutCompatible(QualType T1, QualType T2) const;
bool IsTypedResourceElementCompatible(QualType T1);

bool CheckCompatibleParameterABI(FunctionDecl *New, FunctionDecl *Old);

Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Sema/SemaSYCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class SemaSYCL : public SemaBase {
ParsedType ParsedTy);

void handleKernelAttr(Decl *D, const ParsedAttr &AL);
void handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL);
};

} // namespace clang
Expand Down
27 changes: 27 additions & 0 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14411,6 +14411,33 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
}
}

static SYCLKernelInfo BuildSYCLKernelInfo(CanQualType KernelNameType,
const FunctionDecl *FD) {
return {KernelNameType, FD};
}

void ASTContext::registerSYCLEntryPointFunction(FunctionDecl *FD) {
// If the function declaration to register is invalid or dependent, the
// registration attempt is ignored.
if (FD->isInvalidDecl() || FD->isTemplated())
return;

const auto *SKEPAttr = FD->getAttr<SYCLKernelEntryPointAttr>();
assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute");

// Be tolerant of multiple registration attempts so long as each attempt
// is for the same entity. Callers are obligated to detect and diagnose
// conflicting kernel names prior to calling this function.
CanQualType KernelNameType = getCanonicalType(SKEPAttr->getKernelName());
auto IT = SYCLKernels.find(KernelNameType);
assert((IT == SYCLKernels.end() ||
declaresSameEntity(FD, IT->second.getKernelEntryPointDecl())) &&
"SYCL kernel name conflict");
(void)IT;
SYCLKernels.insert(
std::make_pair(KernelNameType, BuildSYCLKernelInfo(KernelNameType, FD)));
}

OMPTraitInfo &ASTContext::getNewOMPTraitInfo() {
OMPTraitInfoVector.emplace_back(new OMPTraitInfo());
return *OMPTraitInfoVector.back();
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ add_clang_library(clangAST
DeclOpenMP.cpp
DeclPrinter.cpp
DeclTemplate.cpp
DynamicRecursiveASTVisitor.cpp
ParentMapContext.cpp
Expr.cpp
ExprClassification.cpp
Expand Down
452 changes: 452 additions & 0 deletions clang/lib/AST/DynamicRecursiveASTVisitor.cpp

Large diffs are not rendered by default.

136 changes: 132 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/Basic/SourceManager.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"

#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Location.h"
Expand All @@ -24,9 +27,134 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &context,
clang::ASTContext &astctx,
const clang::CodeGenOptions &cgo,
DiagnosticsEngine &diags)
: astCtx(astctx), langOpts(astctx.getLangOpts()),
theModule{mlir::ModuleOp::create(mlir::UnknownLoc())},
target(astCtx.getTargetInfo()) {}
: builder(&context), astCtx(astctx), langOpts(astctx.getLangOpts()),
theModule{mlir::ModuleOp::create(mlir::UnknownLoc::get(&context))},
diags(diags), target(astCtx.getTargetInfo()) {}

mlir::Location CIRGenModule::getLoc(SourceLocation cLoc) {
assert(cLoc.isValid() && "expected valid source location");
const SourceManager &sm = astCtx.getSourceManager();
PresumedLoc pLoc = sm.getPresumedLoc(cLoc);
StringRef filename = pLoc.getFilename();
return mlir::FileLineColLoc::get(builder.getStringAttr(filename),
pLoc.getLine(), pLoc.getColumn());
}

mlir::Location CIRGenModule::getLoc(SourceRange cRange) {
assert(cRange.isValid() && "expected a valid source range");
mlir::Location begin = getLoc(cRange.getBegin());
mlir::Location end = getLoc(cRange.getEnd());
mlir::Attribute metadata;
return mlir::FusedLoc::get({begin, end}, metadata, builder.getContext());
}

void CIRGenModule::buildGlobal(clang::GlobalDecl gd) {
const auto *global = cast<ValueDecl>(gd.getDecl());

if (const auto *fd = dyn_cast<FunctionDecl>(global)) {
// Update deferred annotations with the latest declaration if the function
// was already used or defined.
if (fd->hasAttr<AnnotateAttr>())
errorNYI(fd->getSourceRange(), "deferredAnnotations");
if (!fd->doesThisDeclarationHaveABody()) {
if (!fd->doesDeclarationForceExternallyVisibleDefinition())
return;

errorNYI(fd->getSourceRange(),
"function declaration that forces code gen");
return;
}
} else {
errorNYI(global->getSourceRange(), "global variable declaration");
}

// TODO(CIR): Defer emitting some global definitions until later
buildGlobalDefinition(gd);
}

void CIRGenModule::buildGlobalFunctionDefinition(clang::GlobalDecl gd,
mlir::Operation *op) {
auto const *funcDecl = cast<FunctionDecl>(gd.getDecl());
auto funcOp = builder.create<mlir::cir::FuncOp>(
getLoc(funcDecl->getSourceRange()), funcDecl->getIdentifier()->getName());
theModule.push_back(funcOp);
}

void CIRGenModule::buildGlobalDefinition(clang::GlobalDecl gd,
mlir::Operation *op) {
const auto *decl = cast<ValueDecl>(gd.getDecl());
if (const auto *fd = dyn_cast<FunctionDecl>(decl)) {
// TODO(CIR): Skip generation of CIR for functions with available_externally
// linkage at -O0.

if (const auto *method = dyn_cast<CXXMethodDecl>(decl)) {
// Make sure to emit the definition(s) before we emit the thunks. This is
// necessary for the generation of certain thunks.
(void)method;
errorNYI(method->getSourceRange(), "member function");
return;
}

if (fd->isMultiVersion())
errorNYI(fd->getSourceRange(), "multiversion functions");
buildGlobalFunctionDefinition(gd, op);
return;
}

llvm_unreachable("Invalid argument to CIRGenModule::buildGlobalDefinition");
}

// Emit code for a single top level declaration.
void CIRGenModule::buildTopLevelDecl(Decl *decl) {}
void CIRGenModule::buildTopLevelDecl(Decl *decl) {

// Ignore dependent declarations.
if (decl->isTemplated())
return;

switch (decl->getKind()) {
default:
errorNYI(decl->getBeginLoc(), "declaration of kind",
decl->getDeclKindName());
break;

case Decl::Function: {
auto *fd = cast<FunctionDecl>(decl);
// Consteval functions shouldn't be emitted.
if (!fd->isConsteval())
buildGlobal(fd);
break;
}
}
}

DiagnosticBuilder CIRGenModule::errorNYI(llvm::StringRef feature) {
unsigned diagID = diags.getCustomDiagID(
DiagnosticsEngine::Error, "ClangIR code gen Not Yet Implemented: %0");
return diags.Report(diagID) << feature;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceLocation loc,
llvm::StringRef feature) {
unsigned diagID = diags.getCustomDiagID(
DiagnosticsEngine::Error, "ClangIR code gen Not Yet Implemented: %0");
return diags.Report(loc, diagID) << feature;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceLocation loc,
llvm::StringRef feature,
llvm::StringRef name) {
unsigned diagID = diags.getCustomDiagID(
DiagnosticsEngine::Error, "ClangIR code gen Not Yet Implemented: %0: %1");
return diags.Report(loc, diagID) << feature << name;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceRange loc,
llvm::StringRef feature) {
return errorNYI(loc.getBegin(), feature) << loc;
}

DiagnosticBuilder CIRGenModule::errorNYI(SourceRange loc,
llvm::StringRef feature,
llvm::StringRef name) {
return errorNYI(loc.getBegin(), feature, name) << loc;
}
34 changes: 34 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,21 @@

#include "CIRGenTypeCache.h"

#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/MLIRContext.h"
#include "llvm/ADT/StringRef.h"

namespace clang {
class ASTContext;
class CodeGenOptions;
class Decl;
class DiagnosticBuilder;
class DiagnosticsEngine;
class GlobalDecl;
class LangOptions;
class SourceLocation;
class SourceRange;
class TargetInfo;
} // namespace clang

Expand All @@ -44,6 +50,10 @@ class CIRGenModule : public CIRGenTypeCache {
~CIRGenModule() = default;

private:
// TODO(CIR) 'builder' will change to CIRGenBuilderTy once that type is
// defined
mlir::OpBuilder builder;

/// Hold Clang AST information.
clang::ASTContext &astCtx;

Expand All @@ -52,10 +62,34 @@ class CIRGenModule : public CIRGenTypeCache {
/// A "module" matches a c/cpp source file: containing a list of functions.
mlir::ModuleOp theModule;

clang::DiagnosticsEngine &diags;

const clang::TargetInfo &target;

public:
mlir::ModuleOp getModule() const { return theModule; }

/// Helpers to convert the presumed location of Clang's SourceLocation to an
/// MLIR Location.
mlir::Location getLoc(clang::SourceLocation cLoc);
mlir::Location getLoc(clang::SourceRange cRange);

void buildTopLevelDecl(clang::Decl *decl);

/// Emit code for a single global function or variable declaration. Forward
/// declarations are emitted lazily.
void buildGlobal(clang::GlobalDecl gd);

void buildGlobalDefinition(clang::GlobalDecl gd,
mlir::Operation *op = nullptr);
void buildGlobalFunctionDefinition(clang::GlobalDecl gd, mlir::Operation *op);

/// Helpers to emit "not yet implemented" error diagnostics
DiagnosticBuilder errorNYI(llvm::StringRef);
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef);
DiagnosticBuilder errorNYI(SourceLocation, llvm::StringRef, llvm::StringRef);
DiagnosticBuilder errorNYI(SourceRange, llvm::StringRef);
DiagnosticBuilder errorNYI(SourceRange, llvm::StringRef, llvm::StringRef);
};
} // namespace cir

Expand Down
10 changes: 9 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@

#include "CIRGenModule.h"

#include "mlir/IR/MLIRContext.h"

#include "clang/AST/DeclGroup.h"
#include "clang/CIR/CIRGenerator.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"

using namespace cir;
using namespace clang;
Expand All @@ -31,9 +34,14 @@ void CIRGenerator::Initialize(ASTContext &astCtx) {

this->astCtx = &astCtx;

cgm = std::make_unique<CIRGenModule>(*mlirCtx, astCtx, codeGenOpts, diags);
mlirCtx = std::make_unique<mlir::MLIRContext>();
mlirCtx->loadDialect<mlir::cir::CIRDialect>();
cgm = std::make_unique<CIRGenModule>(*mlirCtx.get(), astCtx, codeGenOpts,
diags);
}

mlir::ModuleOp CIRGenerator::getModule() const { return cgm->getModule(); }

bool CIRGenerator::HandleTopLevelDecl(DeclGroupRef group) {

for (Decl *decl : group)
Expand Down
38 changes: 38 additions & 0 deletions clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===- CIRAttrs.cpp - MLIR CIR Attributes ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the attributes in the CIR dialect.
//
//===----------------------------------------------------------------------===//

#include "clang/CIR/Dialect/IR/CIRDialect.h"

using namespace mlir;
using namespace mlir::cir;

//===----------------------------------------------------------------------===//
// General CIR parsing / printing
//===----------------------------------------------------------------------===//

Attribute CIRDialect::parseAttribute(DialectAsmParser &parser,
Type type) const {
// No attributes yet to parse
return Attribute{};
}

void CIRDialect::printAttribute(Attribute attr, DialectAsmPrinter &os) const {
// No attributes yet to print
}

//===----------------------------------------------------------------------===//
// CIR Dialect
//===----------------------------------------------------------------------===//

void CIRDialect::registerAttributes() {
// No attributes yet to register
}
55 changes: 54 additions & 1 deletion clang/lib/CIR/Dialect/IR/CIRDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,57 @@
//
//===----------------------------------------------------------------------===//

#include <clang/CIR/Dialect/IR/CIRDialect.h>
#include "clang/CIR/Dialect/IR/CIRDialect.h"

#include "mlir/Support/LogicalResult.h"

#include "clang/CIR/Dialect/IR/CIROpsDialect.cpp.inc"

using namespace mlir;
using namespace mlir::cir;

//===----------------------------------------------------------------------===//
// CIR Dialect
//===----------------------------------------------------------------------===//

void mlir::cir::CIRDialect::initialize() {
registerTypes();
registerAttributes();
addOperations<
#define GET_OP_LIST
#include "clang/CIR/Dialect/IR/CIROps.cpp.inc"
>();
}

//===----------------------------------------------------------------------===//
// FuncOp
//===----------------------------------------------------------------------===//

void mlir::cir::FuncOp::build(OpBuilder &builder, OperationState &result,
StringRef name) {
result.addAttribute(SymbolTable::getSymbolAttrName(),
builder.getStringAttr(name));
}

ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) {
StringAttr nameAttr;
if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(),
state.attributes))
return failure();
return success();
}

void cir::FuncOp::print(OpAsmPrinter &p) {
p << ' ';
// For now the only property a function has is its name
p.printSymbolName(getSymName());
}

mlir::LogicalResult mlir::cir::FuncOp::verify() { return success(); }

//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
//===----------------------------------------------------------------------===//

#define GET_OP_CLASSES
#include "clang/CIR/Dialect/IR/CIROps.cpp.inc"
37 changes: 37 additions & 0 deletions clang/lib/CIR/Dialect/IR/CIRTypes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===- CIRTypes.cpp - MLIR CIR Types --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the types in the CIR dialect.
//
//===----------------------------------------------------------------------===//

#include "clang/CIR/Dialect/IR/CIRDialect.h"

using namespace mlir;
using namespace mlir::cir;

//===----------------------------------------------------------------------===//
// General CIR parsing / printing
//===----------------------------------------------------------------------===//

Type CIRDialect::parseType(DialectAsmParser &parser) const {
// No types yet to parse
return Type{};
}

void CIRDialect::printType(Type type, DialectAsmPrinter &os) const {
// No types yet to print
}

//===----------------------------------------------------------------------===//
// CIR Dialect
//===----------------------------------------------------------------------===//

void CIRDialect::registerTypes() {
// No types yet to register
}
5 changes: 5 additions & 0 deletions clang/lib/CIR/Dialect/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
add_clang_library(MLIRCIR
CIRAttrs.cpp
CIRDialect.cpp
CIRTypes.cpp

LINK_LIBS PUBLIC
MLIRIR
)
41 changes: 40 additions & 1 deletion clang/lib/CIR/FrontendAction/CIRGenAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ class CIRGenConsumer : public clang::ASTConsumer {

virtual void anchor();

CIRGenAction::OutputType Action;

std::unique_ptr<raw_pwrite_stream> OutputStream;

ASTContext *Context{nullptr};
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
std::unique_ptr<CIRGenerator> Gen;

Expand All @@ -37,14 +40,37 @@ class CIRGenConsumer : public clang::ASTConsumer {
const LangOptions &LangOptions,
const FrontendOptions &FEOptions,
std::unique_ptr<raw_pwrite_stream> OS)
: OutputStream(std::move(OS)), FS(VFS),
: Action(Action), OutputStream(std::move(OS)), FS(VFS),
Gen(std::make_unique<CIRGenerator>(DiagnosticsEngine, std::move(VFS),
CodeGenOptions)) {}

void Initialize(ASTContext &Ctx) override {
assert(!Context && "initialized multiple times");
Context = &Ctx;
Gen->Initialize(Ctx);
}

bool HandleTopLevelDecl(DeclGroupRef D) override {
Gen->HandleTopLevelDecl(D);
return true;
}

void HandleTranslationUnit(ASTContext &C) override {
Gen->HandleTranslationUnit(C);
mlir::ModuleOp MlirModule = Gen->getModule();
switch (Action) {
case CIRGenAction::OutputType::EmitCIR:
if (OutputStream && MlirModule) {
mlir::OpPrintingFlags Flags;
Flags.enableDebugInfo(/*enable=*/true, /*prettyForm=*/false);
MlirModule->print(*OutputStream, Flags);
}
break;
default:
llvm_unreachable("NYI: CIRGenAction other than EmitCIR");
break;
}
}
};
} // namespace cir

Expand All @@ -55,10 +81,23 @@ CIRGenAction::CIRGenAction(OutputType Act, mlir::MLIRContext *MLIRCtx)

CIRGenAction::~CIRGenAction() { MLIRMod.release(); }

static std::unique_ptr<raw_pwrite_stream>
getOutputStream(CompilerInstance &CI, StringRef InFile,
CIRGenAction::OutputType Action) {
switch (Action) {
case CIRGenAction::OutputType::EmitCIR:
return CI.createDefaultOutputFile(false, InFile, "cir");
}
llvm_unreachable("Invalid CIRGenAction::OutputType");
}

std::unique_ptr<ASTConsumer>
CIRGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
std::unique_ptr<llvm::raw_pwrite_stream> Out = CI.takeOutputStream();

if (!Out)
Out = getOutputStream(CI, InFile, Action);

auto Result = std::make_unique<cir::CIRGenConsumer>(
Action, CI.getDiagnostics(), &CI.getVirtualFileSystem(),
CI.getHeaderSearchOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
Expand Down
18 changes: 17 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18671,6 +18671,12 @@ Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
auto *LD = CGF.Builder.CreateLoad(
Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));

llvm::MDBuilder MDB(CGF.getLLVMContext());

// Known non-zero.
LD->setMetadata(llvm::LLVMContext::MD_range,
MDB.createRange(APInt(32, 1), APInt::getZero(32)));
LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
llvm::MDNode::get(CGF.getLLVMContext(), {}));
return LD;
Expand Down Expand Up @@ -18855,7 +18861,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/T0->getScalarType(),
getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
} break;
}
case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
Value *A = EmitScalarExpr(E->getArg(0));
Value *B = EmitScalarExpr(E->getArg(1));
Value *C = EmitScalarExpr(E->getArg(2));

Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
return Builder.CreateIntrinsic(
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
"hlsl.dot4add.i8packed");
}
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)

Expand Down
36 changes: 29 additions & 7 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IntrinsicsNVPTX.h"

using namespace clang;
Expand Down Expand Up @@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
int Operand,
const SmallVectorImpl<int> &GridConstantArgs);

static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand) {
addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
}

private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
Expand Down Expand Up @@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}

const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
if (!FD)
return;

llvm::Function *F = cast<llvm::Function>(GV);

Expand All @@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->hasAttr<CUDAGlobalAttr>()) {
SmallVector<int, 10> GCI;
for (auto IV : llvm::enumerate(FD->parameters()))
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
// For some reason arg indices are 1-based in NVVM
GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
addNVVMMetadata(F, "kernel", 1, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
Expand All @@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}

void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
StringRef Name, int Operand) {
void NVPTXTargetCodeGenInfo::addNVVMMetadata(
llvm::GlobalValue *GV, StringRef Name, int Operand,
const SmallVectorImpl<int> &GridConstantArgs) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();

// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");

llvm::Metadata *MDVals[] = {
SmallVector<llvm::Metadata *, 5> MDVals = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
if (!GridConstantArgs.empty()) {
SmallVector<llvm::Metadata *, 10> GCM;
for (int I : GridConstantArgs)
GCM.push_back(llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
llvm::MDNode::get(Ctx, GCM)});
}
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
Expand All @@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
return llvm::ConstantExpr::getAddrSpaceCast(
llvm::ConstantPointerNull::get(NPT), PT);
}
}
} // namespace

void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
const CUDALaunchBoundsAttr *Attr,
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
// some build systems. We don't try to be complete here because we don't
// care to encourage this usage model.
if (A->getOption().matches(options::OPT_Wp_COMMA) &&
A->getNumValues() > 0 &&
(A->getValue(0) == StringRef("-MD") ||
A->getValue(0) == StringRef("-MMD"))) {
// Rewrite to -MD/-MMD along with -MF.
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5358,6 +5358,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
} else if (JA.getType() == types::TY_RewrittenLegacyObjC) {
CmdArgs.push_back("-rewrite-objc");
rewriteKind = RK_Fragile;
} else if (JA.getType() == types::TY_CIR) {
CmdArgs.push_back("-emit-cir");
} else {
assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!");
}
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/PS4CPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,10 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-z");
CmdArgs.push_back("start-stop-visibility=hidden");

// DT_DEBUG is not supported on PlayStation.
CmdArgs.push_back("-z");
CmdArgs.push_back("rodynamic");

CmdArgs.push_back("-z");
CmdArgs.push_back("common-page-size=0x4000");

Expand Down
8 changes: 2 additions & 6 deletions clang/lib/Driver/XRayArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
false)) {
XRayShared = true;

// Certain targets support DSO instrumentation
switch (Triple.getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::x86_64:
break;
default:
// DSO instrumentation is currently limited to x86_64
if (Triple.getArch() != llvm::Triple::x86_64) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< "-fxray-shared" << Triple.str();
}
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,16 @@ uint64_t dot(uint64_t3, uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
uint64_t dot(uint64_t4, uint64_t4);

//===----------------------------------------------------------------------===//
// dot4add builtins
//===----------------------------------------------------------------------===//

/// \fn int dot4add_i8packed(uint A, uint B, int C)

_HLSL_AVAILABILITY(shadermodel, 6.4)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed)
int dot4add_i8packed(unsigned int, unsigned int, int);

//===----------------------------------------------------------------------===//
// exp builtins
//===----------------------------------------------------------------------===//
Expand Down
14 changes: 13 additions & 1 deletion clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12094,6 +12094,9 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
if (LangOpts.OpenMP)
OpenMP().ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(NewFD);

if (LangOpts.isSYCL() && NewFD->hasAttr<SYCLKernelEntryPointAttr>())
getASTContext().registerSYCLEntryPointFunction(NewFD);

// Semantic checking for this function declaration (in isolation).

if (getLangOpts().CPlusPlus) {
Expand Down Expand Up @@ -12222,8 +12225,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
<< NewFD;
}

if (!Redeclaration && LangOpts.CUDA)
if (!Redeclaration && LangOpts.CUDA) {
bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>();
for (auto *Parm : NewFD->parameters()) {
if (!Parm->getType()->isDependentType() &&
Parm->hasAttr<CUDAGridConstantAttr>() &&
!(IsKernel && Parm->getType().isConstQualified()))
Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(),
diag::err_cuda_grid_constant_not_allowed);
}
CUDA().checkTargetOverload(NewFD, Previous);
}
}

// Check if the function definition uses any AArch64 SME features without
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
}

static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (D->isInvalidDecl())
return;
// Whether __grid_constant__ is allowed to be used will be checked in
// Sema::CheckFunctionDeclaration as we need complete function decl to make
// the call.
D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL));
}

static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *Fn = cast<FunctionDecl>(D);
if (!Fn->isInlineSpecified()) {
Expand Down Expand Up @@ -6620,6 +6629,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_SYCLKernel:
S.SYCL().handleKernelAttr(D, AL);
break;
case ParsedAttr::AT_SYCLKernelEntryPoint:
S.SYCL().handleKernelEntryPointAttr(D, AL);
break;
case ParsedAttr::AT_SYCLSpecialClass:
handleSimpleAttribute<SYCLSpecialClassAttr>(S, D, AL);
break;
Expand All @@ -6642,6 +6654,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_CUDADevice:
handleDeviceAttr(S, D, AL);
break;
case ParsedAttr::AT_CUDAGridConstant:
handleGridConstantAttr(S, D, AL);
break;
case ParsedAttr::AT_HIPManaged:
handleManagedAttr(S, D, AL);
break;
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Sema/SemaExprCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5032,6 +5032,7 @@ static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT,
case UTT_IsScalar:
case UTT_IsCompound:
case UTT_IsMemberPointer:
case UTT_IsTypedResourceElementCompatible:
// Fall-through

// These traits are modeled on type predicates in C++0x [meta.unary.prop]
Expand Down Expand Up @@ -5714,6 +5715,15 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
tok::kw___builtin_hlsl_is_intangible))
return false;
return T->isHLSLIntangibleType();

case UTT_IsTypedResourceElementCompatible:
assert(Self.getLangOpts().HLSL &&
"typed resource element compatible types are an HLSL-only feature");
if (Self.RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), T,
diag::err_incomplete_type))
return false;

return Self.HLSL().IsTypedResourceElementCompatible(T);
}
}

Expand Down
44 changes: 44 additions & 0 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,50 @@ static void BuildFlattenedTypeList(QualType BaseTy,
}
}

bool SemaHLSL::IsTypedResourceElementCompatible(clang::QualType QT) {
if (QT.isNull())
return false;

// check if the outer type was an array type
if (QT->isArrayType())
return false;

llvm::SmallVector<QualType, 4> QTTypes;
BuildFlattenedTypeList(QT, QTTypes);

assert(QTTypes.size() > 0 &&
"expected at least one constituent type from non-null type");
QualType FirstQT = SemaRef.Context.getCanonicalType(QTTypes[0]);

// element count cannot exceed 4
if (QTTypes.size() > 4)
return false;

for (QualType TempQT : QTTypes) {
// ensure homogeneity
if (!getASTContext().hasSameUnqualifiedType(FirstQT, TempQT))
return false;
}

if (const BuiltinType *BT = FirstQT->getAs<BuiltinType>()) {
if (BT->isBooleanType() || BT->isEnumeralType())
return false;

// Check if it is an array type.
if (FirstQT->isArrayType())
return false;
}

// if the loop above completes without returning, then
// we've guaranteed homogeneity
int TotalSizeInBytes =
(SemaRef.Context.getTypeSize(FirstQT) / 8) * QTTypes.size();
if (TotalSizeInBytes > 16)
return false;

return true;
}

bool SemaHLSL::IsScalarizedLayoutCompatible(QualType T1, QualType T2) const {
if (T1.isNull() || T2.isNull())
return false;
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,12 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) {

handleSimpleAttribute<SYCLKernelAttr>(*this, D, AL);
}

void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) {
ParsedType PT = AL.getTypeArg();
TypeSourceInfo *TSI = nullptr;
(void)SemaRef.GetTypeFromParser(PT, &TSI);
assert(TSI && "no type source info for attribute argument");
D->addAttr(::new (SemaRef.Context)
SYCLKernelEntryPointAttr(SemaRef.Context, AL, TSI));
}
6 changes: 6 additions & 0 deletions clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
continue;
}

if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) {
if (!New->hasAttr<CUDAGridConstantAttr>())
New->addAttr(A->clone(Context));
continue;
}

assert(!TmplAttr->isPackExpansion());
if (TmplAttr->isLateParsed() && LateAttrs) {
// Late parsed attributes must be instantiated and attached after the
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Serialization/ASTReaderDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,14 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(readDeclAs<ParmVarDecl>());
FD->setParams(Reader.getContext(), Params);

// If the declaration is a SYCL kernel entry point function as indicated by
// the presence of a sycl_kernel_entry_point attribute, register it so that
// associated metadata is recreated.
if (FD->hasAttr<SYCLKernelEntryPointAttr>()) {
ASTContext &C = Reader.getContext();
C.registerSYCLEntryPointFunction(FD);
}
}

void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {
Expand Down
144 changes: 144 additions & 0 deletions clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Tests without serialization:
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown -fsycl-is-device \
// RUN: -ast-dump %s \
// RUN: | FileCheck --match-full-lines %s
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown -fsycl-is-host \
// RUN: -ast-dump %s \
// RUN: | FileCheck --match-full-lines %s
//
// Tests with serialization:
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown -fsycl-is-device \
// RUN: -emit-pch -o %t %s
// RUN: %clang_cc1 -x c++ -std=c++17 -triple x86_64-unknown-unknown -fsycl-is-device \
// RUN: -include-pch %t -ast-dump-all /dev/null \
// RUN: | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" \
// RUN: | FileCheck --match-full-lines %s
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown -fsycl-is-host \
// RUN: -emit-pch -o %t %s
// RUN: %clang_cc1 -x c++ -std=c++17 -triple x86_64-unknown-unknown -fsycl-is-host \
// RUN: -include-pch %t -ast-dump-all /dev/null \
// RUN: | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" \
// RUN: | FileCheck --match-full-lines %s

// These tests validate the AST produced for functions declared with the
// sycl_kernel_entry_point attribute.

// CHECK: TranslationUnitDecl {{.*}}

// A unique kernel name type is required for each declared kernel entry point.
template<int, int=0> struct KN;

__attribute__((sycl_kernel_entry_point(KN<1>)))
void skep1() {
}
// CHECK: |-FunctionDecl {{.*}} skep1 'void ()'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN<1>

using KN2 = KN<2>;
__attribute__((sycl_kernel_entry_point(KN2)))
void skep2() {
}
// CHECK: |-FunctionDecl {{.*}} skep2 'void ()'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN2

template<int I> using KNT = KN<I>;
__attribute__((sycl_kernel_entry_point(KNT<3>)))
void skep3() {
}
// CHECK: |-FunctionDecl {{.*}} skep3 'void ()'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KNT<3>

template<typename KNT, typename F>
[[clang::sycl_kernel_entry_point(KNT)]]
void skep4(F f) {
f();
}
// CHECK: |-FunctionTemplateDecl {{.*}} skep4
// CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KNT
// CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} F
// CHECK-NEXT: | |-FunctionDecl {{.*}} skep4 'void (F)'
// CHECK: | | `-SYCLKernelEntryPointAttr {{.*}} KNT

void test_skep4() {
skep4<KNT<4>>([]{});
}
// CHECK: | `-FunctionDecl {{.*}} used skep4 'void ((lambda at {{.*}}))' implicit_instantiation
// CHECK-NEXT: | |-TemplateArgument type 'KN<4>'
// CHECK: | |-TemplateArgument type '(lambda at {{.*}})'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} struct KN<4>
// CHECK-NEXT: |-FunctionDecl {{.*}} test_skep4 'void ()'

template<typename KNT, typename T>
[[clang::sycl_kernel_entry_point(KNT)]]
void skep5(T) {
}
// CHECK: |-FunctionTemplateDecl {{.*}} skep5
// CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} KNT
// CHECK-NEXT: | |-TemplateTypeParmDecl {{.*}} T
// CHECK-NEXT: | |-FunctionDecl {{.*}} skep5 'void (T)'
// CHECK: | | `-SYCLKernelEntryPointAttr {{.*}} KNT

// Checks for the explicit template instantiation declaration below.
// CHECK: | `-FunctionDecl {{.*}} skep5 'void (int)' explicit_instantiation_definition
// CHECK-NEXT: | |-TemplateArgument type 'KN<5, 4>'
// CHECK: | |-TemplateArgument type 'int'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 4>

// FIXME: C++23 [temp.expl.spec]p12 states:
// FIXME: ... Similarly, attributes appearing in the declaration of a template
// FIXME: have no effect on an explicit specialization of that template.
// FIXME: Clang currently instantiates and propagates attributes from a function
// FIXME: template to its explicit specializations resulting in the following
// FIXME: explicit specialization having an attribute incorrectly attached.
template<>
void skep5<KN<5,1>>(short) {
}
// CHECK: |-FunctionDecl {{.*}} prev {{.*}} skep5 'void (short)' explicit_specialization
// CHECK-NEXT: | |-TemplateArgument type 'KN<5, 1>'
// CHECK: | |-TemplateArgument type 'short'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} Inherited struct KN<5, 1>

template<>
[[clang::sycl_kernel_entry_point(KN<5,2>)]]
void skep5<KN<5,2>>(long) {
}
// CHECK: |-FunctionDecl {{.*}} prev {{.*}} skep5 'void (long)' explicit_specialization
// CHECK-NEXT: | |-TemplateArgument type 'KN<5, 2>'
// CHECK: | |-TemplateArgument type 'long'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 2>

template<>
[[clang::sycl_kernel_entry_point(KN<5,3>)]]
void skep5<KN<5,-1>>(long long) {
}
// CHECK: |-FunctionDecl {{.*}} prev {{.*}} skep5 'void (long long)' explicit_specialization
// CHECK-NEXT: | |-TemplateArgument type 'KN<5, -1>'
// CHECK: | |-TemplateArgument type 'long long'
// CHECK: | `-SYCLKernelEntryPointAttr {{.*}} KN<5, 3>

template void skep5<KN<5,4>>(int);
// Checks are located with the primary template declaration above.

// Ensure that matching attributes from multiple declarations are ok.
[[clang::sycl_kernel_entry_point(KN<6>)]]
void skep6();
[[clang::sycl_kernel_entry_point(KN<6>)]]
void skep6() {
}
// CHECK: |-FunctionDecl {{.*}} skep6 'void ()'
// CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<6>
// CHECK-NEXT: |-FunctionDecl {{.*}} prev {{.*}} skep6 'void ()'
// CHECK-NEXT: | |-CompoundStmt {{.*}}
// CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<6>

// Ensure that matching attributes from the same declaration are ok.
[[clang::sycl_kernel_entry_point(KN<7>), clang::sycl_kernel_entry_point(KN<7>)]]
void skep7() {
}
// CHECK: |-FunctionDecl {{.*}} skep7 'void ()'
// CHECK-NEXT: | |-CompoundStmt {{.*}}
// CHECK-NEXT: | |-SYCLKernelEntryPointAttr {{.*}} KN<7>
// CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<7>

void the_end() {}
// CHECK: `-FunctionDecl {{.*}} the_end 'void ()'
6 changes: 3 additions & 3 deletions clang/test/CIR/hello.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s | FileCheck --allow-empty %s
// Smoke test for ClangIR code generation
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s

// just confirm that we don't crash
// CHECK-NOT: *
void foo() {}
// CHECK: cir.func @foo
2 changes: 2 additions & 0 deletions clang/test/CodeGenCUDA/Inputs/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#define __managed__ __attribute__((managed))
#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
#define __grid_constant__ __attribute__((grid_constant))
#else
#define __constant__
#define __device__
Expand All @@ -20,6 +21,7 @@
#define __shared__
#define __managed__
#define __launch_bounds__(...)
#define __grid_constant__
#endif

struct dim3 {
Expand Down
31 changes: 31 additions & 0 deletions clang/test/CodeGenCUDA/grid-constant.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s

#include "Inputs/cuda.h"

struct S {};

__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}

// dependent arguments get diagnosed after instantiation.
template <typename T>
__global__ void tkernel_const(__grid_constant__ const T arg) {}

template <typename T>
__global__ void tkernel(int dummy, __grid_constant__ T arg) {}

void foo() {
tkernel_const<const S><<<1,1>>>({});
tkernel_const<S><<<1,1>>>({});
tkernel<const S><<<1,1>>>(1, {});
}
//.
//.
// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
// CHECK: [[META1]] = !{i32 1, i32 3}
// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
// CHECK: [[META3]] = !{i32 1}
// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
// CHECK: [[META6]] = !{i32 2}
//.
17 changes: 17 additions & 0 deletions clang/test/CodeGenHLSL/builtins/dot4add_i8packed.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: %clang_cc1 -finclude-default-header -triple \
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s -DTARGET=dx
// RUN: %clang_cc1 -finclude-default-header -triple \
// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s -DTARGET=spv

// Test basic lowering to runtime function call.

// CHECK-LABEL: test
int test(uint a, uint b, int c) {
// CHECK: %[[RET:.*]] = call [[TY:i32]] @llvm.[[TARGET]].dot4add.i8packed([[TY]] %[[#]], [[TY]] %[[#]], [[TY]] %[[#]])
// CHECK: ret [[TY]] %[[RET]]
return dot4add_i8packed(a, b, c);
}

// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.i8packed([[TY]], [[TY]], [[TY]])
3 changes: 2 additions & 1 deletion clang/test/CodeGenOpenCL/builtins-amdgcn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ void test_get_workgroup_size(int d, global int *out)
// CHECK-LABEL: @test_get_grid_size(
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !invariant.load
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !range [[$GRID_RANGE:![0-9]+]], !invariant.load
void test_get_grid_size(int d, global int *out)
{
switch (d) {
Expand Down Expand Up @@ -896,5 +896,6 @@ void test_set_fpenv(unsigned long env) {
__builtin_amdgcn_set_fpenv(env);
}

// CHECK-DAG: [[$GRID_RANGE]] = !{i32 1, i32 0}
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }
16 changes: 5 additions & 11 deletions clang/test/Driver/XRay/xray-shared.cpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
// Check supported targets
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: %clang -### --target=aarch64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s

// Check unsupported targets
// RUN: not %clang -### --target=arm-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=mips-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=loongarch64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=hexagon-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=powerpc64le-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET

// Check PIC requirement
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fpic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-PIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-pic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC

// On 64 bit darwin, PIC is always enabled
// RUN: %clang -### --target=x86_64-apple-darwin -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s

// Check unsupported targets
// RUN: not %clang -### --target=aarch64-pc-freebsd -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET
// RUN: not %clang -### --target=arm64-apple-macos -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-TARGET

// CHECK: "-cc1" {{.*}}"-fxray-instrument" {{.*}}"-fxray-shared"
// ERR-TARGET: error: unsupported option '-fxray-shared' for target
// ERR-PIC: error: option '-fxray-shared' cannot be specified without '-fPIC'
Expand Down
1 change: 1 addition & 0 deletions clang/test/Driver/ps5-linker.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
// CHECK-EXE-SAME: "--unresolved-symbols=report-all"
// CHECK-EXE-SAME: "-z" "now"
// CHECK-EXE-SAME: "-z" "start-stop-visibility=hidden"
// CHECK-EXE-SAME: "-z" "rodynamic"
// CHECK-EXE-SAME: "-z" "common-page-size=0x4000"
// CHECK-EXE-SAME: "-z" "max-page-size=0x4000"
// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_*=0xffffffffffffffff"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter)
// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)
Expand Down Expand Up @@ -178,6 +179,7 @@
// CHECK-NEXT: ReturnTypestate (SubjectMatchRule_function, SubjectMatchRule_variable_is_parameter)
// CHECK-NEXT: ReturnsNonNull (SubjectMatchRule_objc_method, SubjectMatchRule_function)
// CHECK-NEXT: ReturnsTwice (SubjectMatchRule_function)
// CHECK-NEXT: SYCLKernelEntryPoint (SubjectMatchRule_function)
// CHECK-NEXT: SYCLSpecialClass (SubjectMatchRule_record)
// CHECK-NEXT: ScopedLockable (SubjectMatchRule_record)
// CHECK-NEXT: Section (SubjectMatchRule_function, SubjectMatchRule_variable_is_global, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property)
Expand Down
1 change: 1 addition & 0 deletions clang/test/SemaCUDA/Inputs/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))
#define __managed__ __attribute__((managed))
#define __grid_constant__ __attribute__((grid_constant))
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))

struct dim3 {
Expand Down
33 changes: 33 additions & 0 deletions clang/test/SemaCUDA/grid-constant.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// RUN: %clang_cc1 -fsyntax-only -verify %s
// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
#include "Inputs/cuda.h"

struct S {};

__global__ void kernel_struct(__grid_constant__ const S arg) {}
__global__ void kernel_scalar(__grid_constant__ const int arg) {}

__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

// templates w/ non-dependent argument types get diagnosed right
// away, without instantiation.
template <typename T>
__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {}
template <typename T>
__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

// dependent arguments get diagnosed after instantiation.
template <typename T>
__global__ void tkernel_const(__grid_constant__ const T arg) {}

template <typename T>
__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}

void foo() {
tkernel_const<const S><<<1,1>>>({});
tkernel_const<S><<<1,1>>>({});
tkernel<const S><<<1,1>>>({});
tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}}
}
28 changes: 28 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/dot4add_i8packed-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify

int test_too_few_arg0() {
return __builtin_hlsl_dot4add_i8packed();
// expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
}

int test_too_few_arg1(int p0) {
return __builtin_hlsl_dot4add_i8packed(p0);
// expected-error@-1 {{too few arguments to function call, expected 3, have 1}}
}

int test_too_few_arg2(int p0) {
return __builtin_hlsl_dot4add_i8packed(p0, p0);
// expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
}

int test_too_many_arg(int p0) {
return __builtin_hlsl_dot4add_i8packed(p0, p0, p0, p0);
// expected-error@-1 {{too many arguments to function call, expected 3, have 4}}
}

struct S { float f; };

int test_expr_struct_type_check(S p0, int p1) {
return __builtin_hlsl_dot4add_i8packed(p0, p1, p1);
// expected-error@-1 {{no viable conversion from 'S' to 'unsigned int'}}
}
109 changes: 109 additions & 0 deletions clang/test/SemaHLSL/Types/Traits/IsTypedResourceElementCompatible.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s
// expected-no-diagnostics

struct oneInt {
int i;
};

struct twoInt {
int aa;
int ab;
};

struct threeInts {
oneInt o;
twoInt t;
};

struct oneFloat {
float f;
};
struct depthDiff {
int i;
oneInt o;
oneFloat f;
};

struct notHomogenous{
int i;
float f;
};

struct EightElements {
twoInt x[2];
twoInt y[2];
};

struct EightHalves {
half x[8];
};

struct intVec {
int2 i;
};

struct oneIntWithVec {
int i;
oneInt i2;
int2 i3;
};

struct weirdStruct {
int i;
intVec iv;
};

_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(int), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(float), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(float4), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(double2), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(oneInt), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(oneFloat), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(twoInt), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(threeInts), "");
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(notHomogenous), "");
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(depthDiff), "");
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(EightElements), "");
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(EightHalves), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(oneIntWithVec), "");
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(weirdStruct), "");
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(RWBuffer<int>), "");


// arrays not allowed
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(half[4]), "");

template<typename T> struct TemplatedBuffer {
T a;
__hlsl_resource_t h;
};
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(TemplatedBuffer<int>), "");

struct MyStruct1 : TemplatedBuffer<float> {
float x;
};
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(MyStruct1), "");

struct MyStruct2 {
const TemplatedBuffer<float> TB[10];
};
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(MyStruct2), "");

template<typename T> struct SimpleTemplate {
T a;
};

// though the element type is incomplete, the type trait should still technically return true
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(SimpleTemplate<__hlsl_resource_t>), "");

_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(SimpleTemplate<float>), "");


typedef int myInt;

struct TypeDefTest {
int x;
myInt y;
};

_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(TypeDefTest), "");
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -finclude-default-header -fnative-half-type -verify %s

// types must be complete
_Static_assert(__builtin_hlsl_is_typed_resource_element_compatible(__hlsl_resource_t), "");

// expected-note@+1{{forward declaration of 'notComplete'}}
struct notComplete;
// expected-error@+1{{incomplete type 'notComplete' where a complete type is required}}
_Static_assert(!__builtin_hlsl_is_typed_resource_element_compatible(notComplete), "");

137 changes: 137 additions & 0 deletions clang/test/SemaSYCL/sycl-kernel-entry-point-attr-grammar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -fsycl-is-device -verify %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++20 -fsyntax-only -fsycl-is-device -verify %s

// These tests validate parsing of the sycl_kernel_entry_point argument list
// and that the single argument names a type.

// Templates used to exercise class template specializations.
template<int> struct ST; // #ST-decl
template<int N> using TTA = ST<N>; // #TTA-decl


////////////////////////////////////////////////////////////////////////////////
// Valid declarations.
////////////////////////////////////////////////////////////////////////////////

struct S1;
[[clang::sycl_kernel_entry_point(S1)]] void ok1();

typedef struct {} TA2;
[[clang::sycl_kernel_entry_point(TA2)]] void ok2();

using TA3 = struct {};
[[clang::sycl_kernel_entry_point(TA3)]] void ok3();

[[clang::sycl_kernel_entry_point(ST<4>)]] void ok4();

[[clang::sycl_kernel_entry_point(TTA<5>)]] void ok5();

namespace NS6 {
struct NSS;
}
[[clang::sycl_kernel_entry_point(NS6::NSS)]] void ok6();

namespace {
struct UNSS7;
}
[[clang::sycl_kernel_entry_point(UNSS7)]] void ok7();

struct {} s;
[[clang::sycl_kernel_entry_point(decltype(s))]] void ok8();

template<typename KN>
[[clang::sycl_kernel_entry_point(KN)]] void ok9();
void test_ok9() {
ok9<struct LS1>();
}

template<int, typename KN>
[[clang::sycl_kernel_entry_point(KN)]] void ok10();
void test_ok10() {
ok10<1, struct LS2>();
}

namespace NS11 {
struct NSS;
}
template<typename T>
[[clang::sycl_kernel_entry_point(T)]] void ok11() {}
template<>
[[clang::sycl_kernel_entry_point(NS11::NSS)]] void ok11<NS11::NSS>() {}

struct S12;
[[clang::sycl_kernel_entry_point(S12)]] void ok12();
[[clang::sycl_kernel_entry_point(S12)]] void ok12() {}

template<typename T>
[[clang::sycl_kernel_entry_point(T)]] void ok13(T k);
void test_ok13() {
ok13([]{});
}


////////////////////////////////////////////////////////////////////////////////
// Invalid declarations.
////////////////////////////////////////////////////////////////////////////////

// expected-error@+1 {{'sycl_kernel_entry_point' attribute takes one argument}}
[[clang::sycl_kernel_entry_point]] void bad1();

// expected-error@+1 {{'sycl_kernel_entry_point' attribute takes one argument}}
[[clang::sycl_kernel_entry_point()]] void bad2();

struct B3;
// expected-error@+2 {{expected ')'}}
// expected-error@+1 {{expected ']'}}
[[clang::sycl_kernel_entry_point(B3,)]] void bad3();

struct B4;
// expected-error@+3 {{expected ')'}}
// expected-error@+2 {{expected ','}}
// expected-warning@+1 {{unknown attribute 'X' ignored}}
[[clang::sycl_kernel_entry_point(B4, X)]] void bad4();

// expected-error@+1 {{expected a type}}
[[clang::sycl_kernel_entry_point(1)]] void bad5();

void f6();
// expected-error@+1 {{unknown type name 'f6'}}
[[clang::sycl_kernel_entry_point(f6)]] void bad6();

// expected-error@+2 {{use of class template 'ST' requires template arguments; argument deduction not allowed here}}
// expected-note@#ST-decl {{template is declared here}}
[[clang::sycl_kernel_entry_point(ST)]] void bad7();

// expected-error@+2 {{use of alias template 'TTA' requires template arguments; argument deduction not allowed here}}
// expected-note@#TTA-decl {{template is declared here}}
[[clang::sycl_kernel_entry_point(TTA)]] void bad8();

enum {
e9
};
// expected-error@+1 {{unknown type name 'e9'}}
[[clang::sycl_kernel_entry_point(e9)]] void bad9();

#if __cplusplus >= 202002L
template<typename> concept C = true;
// expected-error@+1 {{expected a type}}
[[clang::sycl_kernel_entry_point(C)]] void bad10();

// expected-error@+1 {{expected a type}}
[[clang::sycl_kernel_entry_point(C<int>)]] void bad11();
#endif

struct B12; // #B12-decl
// FIXME: C++23 [temp.expl.spec]p12 states:
// FIXME: ... Similarly, attributes appearing in the declaration of a template
// FIXME: have no effect on an explicit specialization of that template.
// FIXME: Clang currently instantiates and propagates attributes from a function
// FIXME: template to its explicit specializations resulting in the following
// FIXME: spurious error.
// expected-error@+4 {{incomplete type 'B12' named in nested name specifier}}
// expected-note@+5 {{in instantiation of function template specialization 'bad12<B12>' requested here}}
// expected-note@#B12-decl {{forward declaration of 'B12'}}
template<typename T>
[[clang::sycl_kernel_entry_point(typename T::not_found)]] void bad12() {}
template<>
void bad12<B12>() {}
17 changes: 17 additions & 0 deletions clang/test/SemaSYCL/sycl-kernel-entry-point-attr-ignored.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++17 -fsyntax-only -verify %s

// These tests validate that the sycl_kernel_entry_point attribute is ignored
// when SYCL support is not enabled.

// A unique kernel name type is required for each declared kernel entry point.
template<int> struct KN;

// expected-warning@+1 {{'sycl_kernel_entry_point' attribute ignored}}
[[clang::sycl_kernel_entry_point(KN<1>)]]
void ok1();

// expected-warning@+2 {{'sycl_kernel_entry_point' attribute ignored}}
template<typename KNT>
[[clang::sycl_kernel_entry_point(KNT)]]
void ok2() {}
template void ok2<KN<2>>();
5 changes: 1 addition & 4 deletions clang/tools/clang-refactor/TestSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,7 @@ findTestSelectionRanges(StringRef Filename) {
EndOffset = Offset;
}
TestSelectionRange Range = {Offset, EndOffset};
auto It = GroupedRanges.insert(std::make_pair(
Matches[1].str(), SmallVector<TestSelectionRange, 8>{Range}));
if (!It.second)
It.first->second.push_back(Range);
GroupedRanges[Matches[1].str()].push_back(Range);
}
if (GroupedRanges.empty()) {
llvm::errs() << "error: -selection=test:" << Filename
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ if(APPLE)
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
else()
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
powerpc64le ${HEXAGON} ${LOONGARCH64})
powerpc64le ${HEXAGON} ${LOONGARCH64} ${S390X})
endif()
set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64})
set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64})
set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})

if (UNIX)
Expand Down
169 changes: 82 additions & 87 deletions compiler-rt/include/sanitizer/tsan_interface_atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,183 +43,178 @@ typedef enum {
} __tsan_memory_order;

__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_load(const volatile __tsan_atomic8 *a, __tsan_memory_order mo);
__tsan_atomic8_load(const volatile __tsan_atomic8 *a, int mo);
__tsan_atomic16 SANITIZER_CDECL
__tsan_atomic16_load(const volatile __tsan_atomic16 *a, __tsan_memory_order mo);
__tsan_atomic16_load(const volatile __tsan_atomic16 *a, int mo);
__tsan_atomic32 SANITIZER_CDECL
__tsan_atomic32_load(const volatile __tsan_atomic32 *a, __tsan_memory_order mo);
__tsan_atomic32_load(const volatile __tsan_atomic32 *a, int mo);
__tsan_atomic64 SANITIZER_CDECL
__tsan_atomic64_load(const volatile __tsan_atomic64 *a, __tsan_memory_order mo);
__tsan_atomic64_load(const volatile __tsan_atomic64 *a, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_load(
const volatile __tsan_atomic128 *a, __tsan_memory_order mo);
__tsan_atomic128 SANITIZER_CDECL
__tsan_atomic128_load(const volatile __tsan_atomic128 *a, int mo);
#endif

void SANITIZER_CDECL __tsan_atomic8_store(volatile __tsan_atomic8 *a,
__tsan_atomic8 v,
__tsan_memory_order mo);
__tsan_atomic8 v, int mo);
void SANITIZER_CDECL __tsan_atomic16_store(volatile __tsan_atomic16 *a,
__tsan_atomic16 v,
__tsan_memory_order mo);
__tsan_atomic16 v, int mo);
void SANITIZER_CDECL __tsan_atomic32_store(volatile __tsan_atomic32 *a,
__tsan_atomic32 v,
__tsan_memory_order mo);
__tsan_atomic32 v, int mo);
void SANITIZER_CDECL __tsan_atomic64_store(volatile __tsan_atomic64 *a,
__tsan_atomic64 v,
__tsan_memory_order mo);
__tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
void SANITIZER_CDECL __tsan_atomic128_store(volatile __tsan_atomic128 *a,
__tsan_atomic128 v,
__tsan_memory_order mo);
__tsan_atomic128 v, int mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_exchange(
volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_exchange(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_exchange(
volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_exchange(
volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_exchange(
volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_exchange(
volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_add(
volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_fetch_add(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_add(
volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_add(
volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_add(
volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_add(
volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_sub(
volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_fetch_sub(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_sub(
volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_sub(
volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_sub(
volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_sub(
volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_and(
volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_fetch_and(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_and(
volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_and(
volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_and(
volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_and(
volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_or(
volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_fetch_or(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_or(
volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_or(
volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_or(
volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_or(
volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_xor(
volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_fetch_xor(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_xor(
volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_xor(
volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_xor(
volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_xor(
volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_fetch_nand(
volatile __tsan_atomic8 *a, __tsan_atomic8 v, __tsan_memory_order mo);
__tsan_atomic8 SANITIZER_CDECL
__tsan_atomic8_fetch_nand(volatile __tsan_atomic8 *a, __tsan_atomic8 v, int mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_fetch_nand(
volatile __tsan_atomic16 *a, __tsan_atomic16 v, __tsan_memory_order mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 v, int mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_fetch_nand(
volatile __tsan_atomic32 *a, __tsan_atomic32 v, __tsan_memory_order mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 v, int mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_fetch_nand(
volatile __tsan_atomic64 *a, __tsan_atomic64 v, __tsan_memory_order mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 v, int mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_fetch_nand(
volatile __tsan_atomic128 *a, __tsan_atomic128 v, __tsan_memory_order mo);
volatile __tsan_atomic128 *a, __tsan_atomic128 v, int mo);
#endif

int SANITIZER_CDECL __tsan_atomic8_compare_exchange_weak(
volatile __tsan_atomic8 *a, __tsan_atomic8 *c, __tsan_atomic8 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic8 *a, __tsan_atomic8 *c, __tsan_atomic8 v, int mo,
int fail_mo);
int SANITIZER_CDECL __tsan_atomic16_compare_exchange_weak(
volatile __tsan_atomic16 *a, __tsan_atomic16 *c, __tsan_atomic16 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 *c, __tsan_atomic16 v, int mo,
int fail_mo);
int SANITIZER_CDECL __tsan_atomic32_compare_exchange_weak(
volatile __tsan_atomic32 *a, __tsan_atomic32 *c, __tsan_atomic32 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 *c, __tsan_atomic32 v, int mo,
int fail_mo);
int SANITIZER_CDECL __tsan_atomic64_compare_exchange_weak(
volatile __tsan_atomic64 *a, __tsan_atomic64 *c, __tsan_atomic64 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 *c, __tsan_atomic64 v, int mo,
int fail_mo);
#if __TSAN_HAS_INT128
int SANITIZER_CDECL __tsan_atomic128_compare_exchange_weak(
volatile __tsan_atomic128 *a, __tsan_atomic128 *c, __tsan_atomic128 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
int mo, int fail_mo);
#endif

int SANITIZER_CDECL __tsan_atomic8_compare_exchange_strong(
volatile __tsan_atomic8 *a, __tsan_atomic8 *c, __tsan_atomic8 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic8 *a, __tsan_atomic8 *c, __tsan_atomic8 v, int mo,
int fail_mo);
int SANITIZER_CDECL __tsan_atomic16_compare_exchange_strong(
volatile __tsan_atomic16 *a, __tsan_atomic16 *c, __tsan_atomic16 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 *c, __tsan_atomic16 v, int mo,
int fail_mo);
int SANITIZER_CDECL __tsan_atomic32_compare_exchange_strong(
volatile __tsan_atomic32 *a, __tsan_atomic32 *c, __tsan_atomic32 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 *c, __tsan_atomic32 v, int mo,
int fail_mo);
int SANITIZER_CDECL __tsan_atomic64_compare_exchange_strong(
volatile __tsan_atomic64 *a, __tsan_atomic64 *c, __tsan_atomic64 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 *c, __tsan_atomic64 v, int mo,
int fail_mo);
#if __TSAN_HAS_INT128
int SANITIZER_CDECL __tsan_atomic128_compare_exchange_strong(
volatile __tsan_atomic128 *a, __tsan_atomic128 *c, __tsan_atomic128 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
int mo, int fail_mo);
#endif

__tsan_atomic8 SANITIZER_CDECL __tsan_atomic8_compare_exchange_val(
volatile __tsan_atomic8 *a, __tsan_atomic8 c, __tsan_atomic8 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic8 *a, __tsan_atomic8 c, __tsan_atomic8 v, int mo,
int fail_mo);
__tsan_atomic16 SANITIZER_CDECL __tsan_atomic16_compare_exchange_val(
volatile __tsan_atomic16 *a, __tsan_atomic16 c, __tsan_atomic16 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic16 *a, __tsan_atomic16 c, __tsan_atomic16 v, int mo,
int fail_mo);
__tsan_atomic32 SANITIZER_CDECL __tsan_atomic32_compare_exchange_val(
volatile __tsan_atomic32 *a, __tsan_atomic32 c, __tsan_atomic32 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic32 *a, __tsan_atomic32 c, __tsan_atomic32 v, int mo,
int fail_mo);
__tsan_atomic64 SANITIZER_CDECL __tsan_atomic64_compare_exchange_val(
volatile __tsan_atomic64 *a, __tsan_atomic64 c, __tsan_atomic64 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
volatile __tsan_atomic64 *a, __tsan_atomic64 c, __tsan_atomic64 v, int mo,
int fail_mo);
#if __TSAN_HAS_INT128
__tsan_atomic128 SANITIZER_CDECL __tsan_atomic128_compare_exchange_val(
volatile __tsan_atomic128 *a, __tsan_atomic128 c, __tsan_atomic128 v,
__tsan_memory_order mo, __tsan_memory_order fail_mo);
int mo, int fail_mo);
#endif

void SANITIZER_CDECL __tsan_atomic_thread_fence(__tsan_memory_order mo);
void SANITIZER_CDECL __tsan_atomic_signal_fence(__tsan_memory_order mo);
void SANITIZER_CDECL __tsan_atomic_thread_fence(int mo);
void SANITIZER_CDECL __tsan_atomic_signal_fence(int mo);

#ifdef __cplusplus
} // extern "C"
Expand Down
1 change: 1 addition & 0 deletions compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "FuzzerExtFunctions.h"
#include "FuzzerIO.h"
#include <stdlib.h>

using namespace fuzzer;

Expand Down
Loading