18 changes: 16 additions & 2 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -6241,8 +6241,9 @@ def err_typecheck_negative_array_size : Error<"array size is negative">;
def warn_typecheck_function_qualifiers_ignored : Warning<
"'%0' qualifier on function type %1 has no effect">,
InGroup<IgnoredQualifiers>;
def warn_typecheck_function_qualifiers_unspecified : Warning<
"'%0' qualifier on function type %1 has unspecified behavior">;
def ext_typecheck_function_qualifiers_unspecified : ExtWarn<
"'%0' qualifier on function type %1 has no effect and is a Clang extension">,
InGroup<IgnoredQualifiers>;
def warn_typecheck_reference_qualifiers : Warning<
"'%0' qualifier on reference type %1 has no effect">,
InGroup<IgnoredReferenceQualifiers>;
Expand Down Expand Up @@ -12747,6 +12748,19 @@ def err_acc_gang_reduction_numgangs_conflict
def err_reduction_op_mismatch
: Error<"OpenACC 'reduction' variable must have the same operator in all "
"nested constructs (%0 vs %1)">;
def err_acc_loop_variable_type
: Error<"loop variable of loop associated with an OpenACC 'loop' construct "
"must be of integer, pointer, or random-access-iterator type (is "
"%0)">;
def err_acc_loop_variable
: Error<"OpenACC 'loop' construct must have initialization clause in "
"canonical form ('var = init' or 'T var = init')">;
def err_acc_loop_terminating_condition
: Error<"OpenACC 'loop' construct must have a terminating condition">;
def err_acc_loop_not_monotonic
: Error<"OpenACC 'loop' variable must monotonically increase or decrease "
"('++', '--', or compound assignment)">;

// AMDGCN builtins diagnostics
def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">;
def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be 1, 2, or 4">;
Expand Down
320 changes: 196 additions & 124 deletions clang/include/clang/Basic/riscv_vector.td

Large diffs are not rendered by default.

165 changes: 121 additions & 44 deletions clang/include/clang/Basic/riscv_vector_common.td
Original file line number Diff line number Diff line change
Expand Up @@ -458,52 +458,91 @@ let HasMaskedOffOperand = false in {
["vx", "Uv", "UvUvUeUv"]]>;
}
multiclass RVVFloatingTerBuiltinSet {
defm "" : RVVOutOp1BuiltinSet<NAME, "xfd",
defm "" : RVVOutOp1BuiltinSet<NAME, "fd",
[["vv", "v", "vvvv"],
["vf", "v", "vvev"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1BuiltinSet<NAME, "x",
[["vv", "v", "vvvv"],
["vf", "v", "vvev"]]>;
}
multiclass RVVFloatingTerBuiltinSetRoundingMode {
defm "" : RVVOutOp1BuiltinSet<NAME, "xfd",
defm "" : RVVOutOp1BuiltinSet<NAME, "fd",
[["vv", "v", "vvvvu"],
["vf", "v", "vvevu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1BuiltinSet<NAME, "x",
[["vv", "v", "vvvvu"],
["vf", "v", "vvevu"]]>;
}
}

let HasMaskedOffOperand = false, Log2LMUL = [-2, -1, 0, 1, 2] in {
multiclass RVVFloatingWidenTerBuiltinSet {
defm "" : RVVOutOp1Op2BuiltinSet<NAME, "xf",
defm "" : RVVOutOp1Op2BuiltinSet<NAME, "f",
[["vv", "w", "wwvv"],
["vf", "w", "wwev"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1Op2BuiltinSet<NAME, "x",
[["vv", "w", "wwvv"],
["vf", "w", "wwev"]]>;
}
multiclass RVVFloatingWidenTerBuiltinSetRoundingMode {
defm "" : RVVOutOp1Op2BuiltinSet<NAME, "xf",
defm "" : RVVOutOp1Op2BuiltinSet<NAME, "f",
[["vv", "w", "wwvvu"],
["vf", "w", "wwevu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1Op2BuiltinSet<NAME, "x",
[["vv", "w", "wwvvu"],
["vf", "w", "wwevu"]]>;
}
}

multiclass RVVFloatingBinBuiltinSet
: RVVOutOp1BuiltinSet<NAME, "xfd",
[["vv", "v", "vvv"],
["vf", "v", "vve"]]>;
multiclass RVVFloatingBinBuiltinSet {
defm "" : RVVOutOp1BuiltinSet<NAME, "fd",
[["vv", "v", "vvv"],
["vf", "v", "vve"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1BuiltinSet<NAME, "x",
[["vv", "v", "vvv"],
["vf", "v", "vve"]]>;
}

multiclass RVVFloatingBinBuiltinSetRoundingMode
: RVVOutOp1BuiltinSet<NAME, "xfd",
[["vv", "v", "vvvu"],
["vf", "v", "vveu"]]>;
multiclass RVVFloatingBinBuiltinSetRoundingMode {
defm "" : RVVOutOp1BuiltinSet<NAME, "fd",
[["vv", "v", "vvvu"],
["vf", "v", "vveu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1BuiltinSet<NAME, "x",
[["vv", "v", "vvvu"],
["vf", "v", "vveu"]]>;
}

multiclass RVVFloatingBinVFBuiltinSet
: RVVOutOp1BuiltinSet<NAME, "xfd",
[["vf", "v", "vve"]]>;
multiclass RVVFloatingBinVFBuiltinSet {
defm "" : RVVOutOp1BuiltinSet<NAME, "fd",
[["vf", "v", "vve"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1BuiltinSet<NAME, "x",
[["vf", "v", "vve"]]>;
}

multiclass RVVFloatingBinVFBuiltinSetRoundingMode
: RVVOutOp1BuiltinSet<NAME, "xfd",
[["vf", "v", "vveu"]]>;
multiclass RVVFloatingBinVFBuiltinSetRoundingMode {
defm "" : RVVOutOp1BuiltinSet<NAME, "fd",
[["vf", "v", "vveu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp1BuiltinSet<NAME, "x",
[["vf", "v", "vveu"]]>;
}

multiclass RVVFloatingMaskOutBuiltinSet
: RVVOp0Op1BuiltinSet<NAME, "xfd",
[["vv", "vm", "mvv"],
["vf", "vm", "mve"]]>;
multiclass RVVFloatingMaskOutBuiltinSet {
defm "" : RVVOp0Op1BuiltinSet<NAME, "fd",
[["vv", "vm", "mvv"],
["vf", "vm", "mve"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOp0Op1BuiltinSet<NAME, "x",
[["vv", "vm", "mvv"],
["vf", "vm", "mve"]]>;
}

multiclass RVVFloatingMaskOutVFBuiltinSet
: RVVOp0Op1BuiltinSet<NAME, "fd",
Expand Down Expand Up @@ -547,8 +586,11 @@ class RVVMaskOp0Builtin<string prototype> : RVVOp0Builtin<"m", prototype, "c"> {
let UnMaskedPolicyScheme = HasPolicyOperand,
HasMaskedOffOperand = false in {
multiclass RVVSlideUpBuiltinSet {
defm "" : RVVOutBuiltinSet<NAME, "csilxfd",
defm "" : RVVOutBuiltinSet<NAME, "csilfd",
[["vx","v", "vvvz"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutBuiltinSet<NAME, "x",
[["vx","v", "vvvz"]]>;
defm "" : RVVOutBuiltinSet<NAME, "csil",
[["vx","Uv", "UvUvUvz"]]>;
}
Expand All @@ -569,8 +611,11 @@ let UnMaskedPolicyScheme = HasPassthruOperand,
IntrinsicTypes = {ResultType, Ops.back()->getType()};
}] in {
multiclass RVVSlideDownBuiltinSet {
defm "" : RVVOutBuiltinSet<NAME, "csilxfd",
defm "" : RVVOutBuiltinSet<NAME, "csilfd",
[["vx","v", "vvz"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutBuiltinSet<NAME, "x",
[["vx","v", "vvz"]]>;
defm "" : RVVOutBuiltinSet<NAME, "csil",
[["vx","Uv", "UvUvz"]]>;
}
Expand Down Expand Up @@ -611,20 +656,32 @@ let HasMaskedOffOperand = true in {
[["vs", "UvUSv", "USvUvUSv"]]>;
}
multiclass RVVFloatingReductionBuiltin {
defm "" : RVVOutOp0BuiltinSet<NAME, "xfd",
defm "" : RVVOutOp0BuiltinSet<NAME, "fd",
[["vs", "vSv", "SvvSv"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp0BuiltinSet<NAME, "x",
[["vs", "vSv", "SvvSv"]]>;
}
multiclass RVVFloatingReductionBuiltinRoundingMode {
defm "" : RVVOutOp0BuiltinSet<NAME, "xfd",
defm "" : RVVOutOp0BuiltinSet<NAME, "fd",
[["vs", "vSv", "SvvSvu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp0BuiltinSet<NAME, "x",
[["vs", "vSv", "SvvSvu"]]>;
}
multiclass RVVFloatingWidenReductionBuiltin {
defm "" : RVVOutOp0BuiltinSet<NAME, "xf",
defm "" : RVVOutOp0BuiltinSet<NAME, "f",
[["vs", "vSw", "SwvSw"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp0BuiltinSet<NAME, "x",
[["vs", "vSw", "SwvSw"]]>;
}
multiclass RVVFloatingWidenReductionBuiltinRoundingMode {
defm "" : RVVOutOp0BuiltinSet<NAME, "xf",
defm "" : RVVOutOp0BuiltinSet<NAME, "f",
[["vs", "vSw", "SwvSwu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVOutOp0BuiltinSet<NAME, "x",
[["vs", "vSw", "SwvSwu"]]>;
}
}

Expand Down Expand Up @@ -684,22 +741,42 @@ multiclass RVVUnsignedWidenOp0BinBuiltinSet
[["wv", "Uw", "UwUwUv"],
["wx", "Uw", "UwUwUe"]]>;

multiclass RVVFloatingWidenBinBuiltinSet
: RVVWidenBuiltinSet<NAME, "xf",
[["vv", "w", "wvv"],
["vf", "w", "wve"]]>;
multiclass RVVFloatingWidenBinBuiltinSet {
defm "" : RVVWidenBuiltinSet<NAME, "f",
[["vv", "w", "wvv"],
["vf", "w", "wve"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVWidenBuiltinSet<NAME, "x",
[["vv", "w", "wvv"],
["vf", "w", "wve"]]>;
}

multiclass RVVFloatingWidenBinBuiltinSetRoundingMode
: RVVWidenBuiltinSet<NAME, "xf",
[["vv", "w", "wvvu"],
["vf", "w", "wveu"]]>;
multiclass RVVFloatingWidenBinBuiltinSetRoundingMode {
defm "" : RVVWidenBuiltinSet<NAME, "f",
[["vv", "w", "wvvu"],
["vf", "w", "wveu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVWidenBuiltinSet<NAME, "x",
[["vv", "w", "wvvu"],
["vf", "w", "wveu"]]>;
}

multiclass RVVFloatingWidenOp0BinBuiltinSet
: RVVWidenWOp0BuiltinSet<NAME # "_w", "xf",
[["wv", "w", "wwv"],
["wf", "w", "wwe"]]>;
multiclass RVVFloatingWidenOp0BinBuiltinSet {
defm "" : RVVWidenWOp0BuiltinSet<NAME # "_w", "f",
[["wv", "w", "wwv"],
["wf", "w", "wwe"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVWidenWOp0BuiltinSet<NAME # "_w", "x",
[["wv", "w", "wwv"],
["wf", "w", "wwe"]]>;
}

multiclass RVVFloatingWidenOp0BinBuiltinSetRoundingMode
: RVVWidenWOp0BuiltinSet<NAME # "_w", "xf",
[["wv", "w", "wwvu"],
["wf", "w", "wweu"]]>;
multiclass RVVFloatingWidenOp0BinBuiltinSetRoundingMode {
defm "" : RVVWidenWOp0BuiltinSet<NAME # "_w", "f",
[["wv", "w", "wwvu"],
["wf", "w", "wweu"]]>;
let RequiredFeatures = ["Zvfh"] in
defm "" : RVVWidenWOp0BuiltinSet<NAME # "_w", "x",
[["wv", "w", "wwvu"],
["wf", "w", "wweu"]]>;
}
7 changes: 4 additions & 3 deletions clang/include/clang/CIR/CIRGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,15 @@
namespace clang {
class DeclGroupRef;
class DiagnosticsEngine;
namespace CIRGen {
class CIRGenModule;
} // namespace CIRGen
} // namespace clang

namespace mlir {
class MLIRContext;
} // namespace mlir
namespace cir {
class CIRGenModule;

class CIRGenerator : public clang::ASTConsumer {
virtual void anchor();
clang::DiagnosticsEngine &diags;
Expand All @@ -44,7 +45,7 @@ class CIRGenerator : public clang::ASTConsumer {

protected:
std::unique_ptr<mlir::MLIRContext> mlirCtx;
std::unique_ptr<CIRGenModule> cgm;
std::unique_ptr<clang::CIRGen::CIRGenModule> cgm;

public:
CIRGenerator(clang::DiagnosticsEngine &diags,
Expand Down
14 changes: 8 additions & 6 deletions clang/include/clang/CIR/Dialect/IR/CIRDialect.td
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def CIR_Dialect : Dialect {
let summary = "A high-level dialect for analyzing and optimizing Clang "
"supported languages";

let cppNamespace = "::mlir::cir";
let cppNamespace = "::cir";

let useDefaultAttributePrinterParser = 0;
let useDefaultTypePrinterParser = 0;
Expand All @@ -31,13 +31,15 @@ def CIR_Dialect : Dialect {
void registerAttributes();
void registerTypes();

Type parseType(DialectAsmParser &parser) const override;
void printType(Type type, DialectAsmPrinter &printer) const override;
mlir::Type parseType(mlir::DialectAsmParser &parser) const override;
void printType(mlir::Type type,
mlir::DialectAsmPrinter &printer) const override;

Attribute parseAttribute(DialectAsmParser &parser,
Type type) const override;
mlir::Attribute parseAttribute(mlir::DialectAsmParser &parser,
mlir::Type type) const override;

void printAttribute(Attribute attr, DialectAsmPrinter &os) const override;
void printAttribute(mlir::Attribute attr,
mlir::DialectAsmPrinter &os) const override;
}];
}

Expand Down
8 changes: 4 additions & 4 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
// following:
//
// class CIRFooOpLowering
// : public mlir::OpConversionPattern<mlir::cir::FooOp> {
// : public mlir::OpConversionPattern<cir::FooOp> {
// public:
// using OpConversionPattern<mlir::cir::FooOp>::OpConversionPattern;
// using OpConversionPattern<cir::FooOp>::OpConversionPattern;
//
// mlir::LogicalResult matchAndRewrite(
// mlir::cir::FooOp op,
// cir::FooOp op,
// OpAdaptor adaptor,
// mlir::ConversionPatternRewriter &rewriter) const override {
// rewriter.replaceOpWithNewOp<mlir::LLVM::BarOp>(
Expand Down Expand Up @@ -92,7 +92,7 @@ def FuncOp : CIR_Op<"func"> {

let skipDefaultBuilders = 1;

let builders = [OpBuilder<(ins "StringRef":$name)>];
let builders = [OpBuilder<(ins "llvm::StringRef":$name)>];

let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6285,6 +6285,8 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
def mamx_avx512 : Flag<["-"], "mamx-avx512">, Group<m_x86_Features_Group>;
def mno_amx_avx512 : Flag<["-"], "mno-amx-avx512">, Group<m_x86_Features_Group>;
def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>;
Expand Down
68 changes: 59 additions & 9 deletions clang/include/clang/Sema/SemaOpenACC.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ class OpenACCClause;

class SemaOpenACC : public SemaBase {
private:
/// A collection of loop constructs in the compute construct scope that
/// haven't had their 'parent' compute construct set yet. Entires will only be
/// made to this list in the case where we know the loop isn't an orphan.
llvm::SmallVector<OpenACCLoopConstruct *> ParentlessLoopConstructs;

struct ComputeConstructInfo {
/// Which type of compute construct we are inside of, which we can use to
/// determine whether we should add loops to the above collection. We can
Expand Down Expand Up @@ -118,6 +113,43 @@ class SemaOpenACC : public SemaBase {
/// 'loop' clause enforcement, where this is 'blocked' by a compute construct.
llvm::SmallVector<OpenACCReductionClause *> ActiveReductionClauses;

// Type to check the info about the 'for stmt'.
struct ForStmtBeginChecker {
SemaOpenACC &SemaRef;
SourceLocation ForLoc;
bool IsRangeFor = false;
std::optional<const CXXForRangeStmt *> RangeFor = nullptr;
const Stmt *Init = nullptr;
bool InitChanged = false;
std::optional<const Stmt *> Cond = nullptr;
std::optional<const Stmt *> Inc = nullptr;
// Prevent us from checking 2x, which can happen with collapse & tile.
bool AlreadyChecked = false;

ForStmtBeginChecker(SemaOpenACC &SemaRef, SourceLocation ForLoc,
std::optional<const CXXForRangeStmt *> S)
: SemaRef(SemaRef), ForLoc(ForLoc), IsRangeFor(true), RangeFor(S) {}

ForStmtBeginChecker(SemaOpenACC &SemaRef, SourceLocation ForLoc,
const Stmt *I, bool InitChanged,
std::optional<const Stmt *> C,
std::optional<const Stmt *> Inc)
: SemaRef(SemaRef), ForLoc(ForLoc), IsRangeFor(false), Init(I),
InitChanged(InitChanged), Cond(C), Inc(Inc) {}
// Do the checking for the For/Range-For. Currently this implements the 'not
// seq' restrictions only, and should be called either if we know we are a
// top-level 'for' (the one associated via associated-stmt), or extended via
// 'collapse'.
void check();

const ValueDecl *checkInit();
void checkCond();
void checkInc(const ValueDecl *Init);
};

/// Helper function for checking the 'for' and 'range for' stmts.
void ForStmtBeginHelper(SourceLocation ForLoc, ForStmtBeginChecker &C);

public:
ComputeConstructInfo &getActiveComputeConstructInfo() {
return ActiveComputeConstructInfo;
Expand All @@ -137,6 +169,11 @@ class SemaOpenACC : public SemaBase {
/// permits us to implement the restriction of no further 'gang', 'vector', or
/// 'worker' clauses.
SourceLocation LoopVectorClauseLoc;
/// If there is a current 'active' loop construct that does NOT have a 'seq'
/// clause on it, this has that source location. This permits us to implement
/// the 'loop' restrictions on the loop variable. This can be extended via
/// 'collapse', so we need to keep this around for a while.
SourceLocation LoopWithoutSeqLoc;

// Redeclaration of the version in OpenACCClause.h.
using DeviceTypeArgument = std::pair<IdentifierInfo *, SourceLocation>;
Expand Down Expand Up @@ -568,8 +605,19 @@ class SemaOpenACC : public SemaBase {
void ActOnWhileStmt(SourceLocation WhileLoc);
// Called when we encounter a 'do' statement, before looking at its 'body'.
void ActOnDoStmt(SourceLocation DoLoc);
// Called when we encounter a 'for' statement, before looking at its 'body',
// for the 'range-for'. 'ActOnForStmtEnd' is used after the body.
void ActOnRangeForStmtBegin(SourceLocation ForLoc, const Stmt *OldRangeFor,
const Stmt *RangeFor);
void ActOnRangeForStmtBegin(SourceLocation ForLoc, const Stmt *RangeFor);
// Called when we encounter a 'for' statement, before looking at its 'body'.
void ActOnForStmtBegin(SourceLocation ForLoc);
// 'ActOnForStmtEnd' is used after the body.
void ActOnForStmtBegin(SourceLocation ForLoc, const Stmt *First,
const Stmt *Second, const Stmt *Third);
void ActOnForStmtBegin(SourceLocation ForLoc, const Stmt *OldFirst,
const Stmt *First, const Stmt *OldSecond,
const Stmt *Second, const Stmt *OldThird,
const Stmt *Third);
// Called when we encounter a 'for' statement, after we've consumed/checked
// the body. This is necessary for a number of checks on the contents of the
// 'for' statement.
Expand Down Expand Up @@ -598,7 +646,9 @@ class SemaOpenACC : public SemaBase {
/// Called when we encounter an associated statement for our construct, this
/// should check legality of the statement as it appertains to this Construct.
StmtResult ActOnAssociatedStmt(SourceLocation DirectiveLoc,
OpenACCDirectiveKind K, StmtResult AssocStmt);
OpenACCDirectiveKind K,
ArrayRef<const OpenACCClause *> Clauses,
StmtResult AssocStmt);

/// Called after the directive has been completely parsed, including the
/// declaration group or associated statement.
Expand Down Expand Up @@ -712,12 +762,12 @@ class SemaOpenACC : public SemaBase {
SourceLocation OldLoopGangClauseOnKernelLoc;
SourceLocation OldLoopWorkerClauseLoc;
SourceLocation OldLoopVectorClauseLoc;
llvm::SmallVector<OpenACCLoopConstruct *> ParentlessLoopConstructs;
SourceLocation OldLoopWithoutSeqLoc;
llvm::SmallVector<OpenACCReductionClause *> ActiveReductionClauses;
LoopInConstructRAII LoopRAII;

public:
AssociatedStmtRAII(SemaOpenACC &, OpenACCDirectiveKind,
AssociatedStmtRAII(SemaOpenACC &, OpenACCDirectiveKind, SourceLocation,
ArrayRef<const OpenACCClause *>,
ArrayRef<OpenACCClause *>);
void SetCollapseInfoBeforeAssociatedStmt(
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,9 @@ class ASTWriter : public ASTDeserializationListener,

/// Mapping from a source location entry to whether it is affecting or not.
llvm::BitVector IsSLocAffecting;
/// Mapping from a source location entry to whether it must be included as
/// input file.
llvm::BitVector IsSLocFileEntryAffecting;

/// Mapping from \c FileID to an index into the FileID adjustment table.
std::vector<FileID> NonAffectingFileIDs;
Expand Down
5 changes: 2 additions & 3 deletions clang/lib/AST/Decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1738,13 +1738,12 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS,

// Suppress inline namespace if it doesn't make the result ambiguous.
if (Ctx->isInlineNamespace() && NameInScope) {
bool isRedundant =
cast<NamespaceDecl>(Ctx)->isRedundantInlineQualifierFor(NameInScope);
if (P.SuppressInlineNamespace ==
PrintingPolicy::SuppressInlineNamespaceMode::All ||
(P.SuppressInlineNamespace ==
PrintingPolicy::SuppressInlineNamespaceMode::Redundant &&
isRedundant)) {
cast<NamespaceDecl>(Ctx)->isRedundantInlineQualifierFor(
NameInScope))) {
continue;
}
}
Expand Down
56 changes: 11 additions & 45 deletions clang/lib/AST/StmtOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,15 @@ OpenACCComputeConstruct::CreateEmpty(const ASTContext &C, unsigned NumClauses) {
OpenACCComputeConstruct *OpenACCComputeConstruct::Create(
const ASTContext &C, OpenACCDirectiveKind K, SourceLocation BeginLoc,
SourceLocation DirLoc, SourceLocation EndLoc,
ArrayRef<const OpenACCClause *> Clauses, Stmt *StructuredBlock,
ArrayRef<OpenACCLoopConstruct *> AssociatedLoopConstructs) {
ArrayRef<const OpenACCClause *> Clauses, Stmt *StructuredBlock) {
void *Mem = C.Allocate(
OpenACCComputeConstruct::totalSizeToAlloc<const OpenACCClause *>(
Clauses.size()));
auto *Inst = new (Mem) OpenACCComputeConstruct(K, BeginLoc, DirLoc, EndLoc,
Clauses, StructuredBlock);

llvm::for_each(AssociatedLoopConstructs, [&](OpenACCLoopConstruct *C) {
C->setParentComputeConstruct(Inst);
});

return Inst;
}

void OpenACCComputeConstruct::findAndSetChildLoops() {
struct LoopConstructFinder : RecursiveASTVisitor<LoopConstructFinder> {
OpenACCComputeConstruct *Construct = nullptr;

LoopConstructFinder(OpenACCComputeConstruct *Construct)
: Construct(Construct) {}

bool TraverseOpenACCComputeConstruct(OpenACCComputeConstruct *C) {
// Stop searching if we find a compute construct.
return true;
}
bool TraverseOpenACCLoopConstruct(OpenACCLoopConstruct *C) {
// Stop searching if we find a loop construct, after taking ownership of
// it.
C->setParentComputeConstruct(Construct);
return true;
}
};

LoopConstructFinder f(this);
f.TraverseStmt(getAssociatedStmt());
}

OpenACCLoopConstruct::OpenACCLoopConstruct(unsigned NumClauses)
: OpenACCAssociatedStmtConstruct(
OpenACCLoopConstructClass, OpenACCDirectiveKind::Loop,
Expand All @@ -79,11 +50,13 @@ OpenACCLoopConstruct::OpenACCLoopConstruct(unsigned NumClauses)
}

OpenACCLoopConstruct::OpenACCLoopConstruct(
SourceLocation Start, SourceLocation DirLoc, SourceLocation End,
OpenACCDirectiveKind ParentKind, SourceLocation Start,
SourceLocation DirLoc, SourceLocation End,
ArrayRef<const OpenACCClause *> Clauses, Stmt *Loop)
: OpenACCAssociatedStmtConstruct(OpenACCLoopConstructClass,
OpenACCDirectiveKind::Loop, Start, DirLoc,
End, Loop) {
End, Loop),
ParentComputeConstructKind(ParentKind) {
// accept 'nullptr' for the loop. This is diagnosed somewhere, but this gives
// us some level of AST fidelity in the error case.
assert((Loop == nullptr || isa<ForStmt, CXXForRangeStmt>(Loop)) &&
Expand All @@ -96,12 +69,6 @@ OpenACCLoopConstruct::OpenACCLoopConstruct(
Clauses.size()));
}

void OpenACCLoopConstruct::setLoop(Stmt *Loop) {
assert((isa<ForStmt, CXXForRangeStmt>(Loop)) &&
"Associated Loop not a for loop?");
setAssociatedStmt(Loop);
}

OpenACCLoopConstruct *OpenACCLoopConstruct::CreateEmpty(const ASTContext &C,
unsigned NumClauses) {
void *Mem =
Expand All @@ -111,15 +78,14 @@ OpenACCLoopConstruct *OpenACCLoopConstruct::CreateEmpty(const ASTContext &C,
return Inst;
}

OpenACCLoopConstruct *
OpenACCLoopConstruct::Create(const ASTContext &C, SourceLocation BeginLoc,
SourceLocation DirLoc, SourceLocation EndLoc,
ArrayRef<const OpenACCClause *> Clauses,
Stmt *Loop) {
OpenACCLoopConstruct *OpenACCLoopConstruct::Create(
const ASTContext &C, OpenACCDirectiveKind ParentKind,
SourceLocation BeginLoc, SourceLocation DirLoc, SourceLocation EndLoc,
ArrayRef<const OpenACCClause *> Clauses, Stmt *Loop) {
void *Mem =
C.Allocate(OpenACCLoopConstruct::totalSizeToAlloc<const OpenACCClause *>(
Clauses.size()));
auto *Inst =
new (Mem) OpenACCLoopConstruct(BeginLoc, DirLoc, EndLoc, Clauses, Loop);
auto *Inst = new (Mem)
OpenACCLoopConstruct(ParentKind, BeginLoc, DirLoc, EndLoc, Clauses, Loop);
return Inst;
}
2 changes: 1 addition & 1 deletion clang/lib/AST/TextNodeDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2928,7 +2928,7 @@ void TextNodeDumper::VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S) {
if (S->isOrphanedLoopConstruct())
OS << " <orphan>";
else
OS << " parent: " << S->getParentComputeConstruct();
OS << " parent: " << S->getParentComputeConstructKind();
}

void TextNodeDumper::VisitEmbedExpr(const EmbedExpr *S) {
Expand Down
28 changes: 10 additions & 18 deletions clang/lib/Basic/Attributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "clang/Basic/TargetInfo.h"

#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"

using namespace clang;

Expand Down Expand Up @@ -155,26 +156,17 @@ std::string AttributeCommonInfo::getNormalizedFullName() const {
normalizeName(getAttrName(), getScopeName(), getSyntax()));
}

// Sorted list of attribute scope names
static constexpr std::pair<StringRef, AttributeCommonInfo::Scope> ScopeList[] =
{{"", AttributeCommonInfo::Scope::NONE},
{"clang", AttributeCommonInfo::Scope::CLANG},
{"gnu", AttributeCommonInfo::Scope::GNU},
{"gsl", AttributeCommonInfo::Scope::GSL},
{"hlsl", AttributeCommonInfo::Scope::HLSL},
{"msvc", AttributeCommonInfo::Scope::MSVC},
{"omp", AttributeCommonInfo::Scope::OMP},
{"riscv", AttributeCommonInfo::Scope::RISCV}};

AttributeCommonInfo::Scope
getScopeFromNormalizedScopeName(StringRef ScopeName) {
auto It = std::lower_bound(
std::begin(ScopeList), std::end(ScopeList), ScopeName,
[](const std::pair<StringRef, AttributeCommonInfo::Scope> &Element,
StringRef Value) { return Element.first < Value; });
assert(It != std::end(ScopeList) && It->first == ScopeName);

return It->second;
return llvm::StringSwitch<AttributeCommonInfo::Scope>(ScopeName)
.Case("", AttributeCommonInfo::Scope::NONE)
.Case("clang", AttributeCommonInfo::Scope::CLANG)
.Case("gnu", AttributeCommonInfo::Scope::GNU)
.Case("gsl", AttributeCommonInfo::Scope::GSL)
.Case("hlsl", AttributeCommonInfo::Scope::HLSL)
.Case("msvc", AttributeCommonInfo::Scope::MSVC)
.Case("omp", AttributeCommonInfo::Scope::OMP)
.Case("riscv", AttributeCommonInfo::Scope::RISCV);
}

unsigned AttributeCommonInfo::calculateAttributeSpellingListIndex() const {
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAMXFP8 = true;
} else if (Feature == "+amx-transpose") {
HasAMXTRANSPOSE = true;
} else if (Feature == "+amx-avx512") {
HasAMXAVX512 = true;
} else if (Feature == "+cmpccxadd") {
HasCMPCCXADD = true;
} else if (Feature == "+raoint") {
Expand Down Expand Up @@ -955,6 +957,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AMX_FP8__");
if (HasAMXTRANSPOSE)
Builder.defineMacro("__AMX_TRANSPOSE__");
if (HasAMXAVX512)
Builder.defineMacro("__AMX_AVX512__");
if (HasCMPCCXADD)
Builder.defineMacro("__CMPCCXADD__");
if (HasRAOINT)
Expand Down Expand Up @@ -1080,6 +1084,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
return llvm::StringSwitch<bool>(Name)
.Case("adx", true)
.Case("aes", true)
.Case("amx-avx512", true)
.Case("amx-bf16", true)
.Case("amx-complex", true)
.Case("amx-fp16", true)
Expand Down Expand Up @@ -1200,6 +1205,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("adx", HasADX)
.Case("aes", HasAES)
.Case("amx-avx512", HasAMXAVX512)
.Case("amx-bf16", HasAMXBF16)
.Case("amx-complex", HasAMXCOMPLEX)
.Case("amx-fp16", HasAMXFP16)
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAMXCOMPLEX = false;
bool HasAMXFP8 = false;
bool HasAMXTRANSPOSE = false;
bool HasAMXAVX512 = false;
bool HasSERIALIZE = false;
bool HasTSXLDTRK = false;
bool HasUSERMSR = false;
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
#include "mlir/IR/Location.h"
#include "mlir/IR/MLIRContext.h"

using namespace cir;
using namespace clang;
using namespace clang::CIRGen;

CIRGenModule::CIRGenModule(mlir::MLIRContext &context,
clang::ASTContext &astctx,
const clang::CodeGenOptions &cgo,
Expand Down Expand Up @@ -75,7 +77,7 @@ void CIRGenModule::buildGlobal(clang::GlobalDecl gd) {
void CIRGenModule::buildGlobalFunctionDefinition(clang::GlobalDecl gd,
mlir::Operation *op) {
auto const *funcDecl = cast<FunctionDecl>(gd.getDecl());
auto funcOp = builder.create<mlir::cir::FuncOp>(
auto funcOp = builder.create<cir::FuncOp>(
getLoc(funcDecl->getSourceRange()), funcDecl->getIdentifier()->getName());
theModule.push_back(funcOp);
}
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ class LangOptions;
class SourceLocation;
class SourceRange;
class TargetInfo;
} // namespace clang

using namespace clang;
namespace cir {
namespace CIRGen {

/// This class organizes the cross-function state that is used while generating
/// CIR code.
Expand Down Expand Up @@ -91,6 +89,8 @@ class CIRGenModule : public CIRGenTypeCache {
DiagnosticBuilder errorNYI(SourceRange, llvm::StringRef);
DiagnosticBuilder errorNYI(SourceRange, llvm::StringRef, llvm::StringRef);
};
} // namespace cir
} // namespace CIRGen

} // namespace clang

#endif // LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENMODULE_H
4 changes: 2 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenTypeCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#ifndef LLVM_CLANG_LIB_CIR_CIRGENTYPECACHE_H
#define LLVM_CLANG_LIB_CIR_CIRGENTYPECACHE_H

namespace cir {
namespace clang::CIRGen {

/// This structure provides a set of types that are commonly used
/// during IR emission. It's initialized once in CodeGenModule's
Expand All @@ -22,6 +22,6 @@ struct CIRGenTypeCache {
CIRGenTypeCache() = default;
};

} // namespace cir
} // namespace clang::CIRGen

#endif // LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTYPECACHE_H
6 changes: 3 additions & 3 deletions clang/lib/CIR/CodeGen/CIRGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ void CIRGenerator::Initialize(ASTContext &astCtx) {
this->astCtx = &astCtx;

mlirCtx = std::make_unique<mlir::MLIRContext>();
mlirCtx->loadDialect<mlir::cir::CIRDialect>();
cgm = std::make_unique<CIRGenModule>(*mlirCtx.get(), astCtx, codeGenOpts,
diags);
mlirCtx->loadDialect<cir::CIRDialect>();
cgm = std::make_unique<clang::CIRGen::CIRGenModule>(*mlirCtx.get(), astCtx,
codeGenOpts, diags);
}

mlir::ModuleOp CIRGenerator::getModule() const { return cgm->getModule(); }
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "clang/CIR/Dialect/IR/CIRDialect.h"

using namespace mlir;
using namespace mlir::cir;
using namespace cir;

//===----------------------------------------------------------------------===//
// General CIR parsing / printing
Expand Down
10 changes: 5 additions & 5 deletions clang/lib/CIR/Dialect/IR/CIRDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
#include "clang/CIR/Dialect/IR/CIROpsDialect.cpp.inc"

using namespace mlir;
using namespace mlir::cir;
using namespace cir;

//===----------------------------------------------------------------------===//
// CIR Dialect
//===----------------------------------------------------------------------===//

void mlir::cir::CIRDialect::initialize() {
void cir::CIRDialect::initialize() {
registerTypes();
registerAttributes();
addOperations<
Expand All @@ -36,8 +36,8 @@ void mlir::cir::CIRDialect::initialize() {
// FuncOp
//===----------------------------------------------------------------------===//

void mlir::cir::FuncOp::build(OpBuilder &builder, OperationState &result,
StringRef name) {
void cir::FuncOp::build(OpBuilder &builder, OperationState &result,
StringRef name) {
result.addAttribute(SymbolTable::getSymbolAttrName(),
builder.getStringAttr(name));
}
Expand All @@ -56,7 +56,7 @@ void cir::FuncOp::print(OpAsmPrinter &p) {
p.printSymbolName(getSymName());
}

mlir::LogicalResult mlir::cir::FuncOp::verify() { return success(); }
mlir::LogicalResult cir::FuncOp::verify() { return success(); }

//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CIR/Dialect/IR/CIRTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "clang/CIR/Dialect/IR/CIRDialect.h"

using namespace mlir;
using namespace mlir::cir;
using namespace cir;

//===----------------------------------------------------------------------===//
// General CIR parsing / printing
Expand Down
8 changes: 6 additions & 2 deletions clang/lib/Driver/XRayArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
false)) {
XRayShared = true;

// DSO instrumentation is currently limited to x86_64
if (Triple.getArch() != llvm::Triple::x86_64) {
// Certain targets support DSO instrumentation
switch (Triple.getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::x86_64:
break;
default:
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< "-fxray-shared" << Triple.str();
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ CreateFrontendBaseAction(CompilerInstance &CI) {
case EmitBC: return std::make_unique<EmitBCAction>();
case EmitCIR:
#if CLANG_ENABLE_CIR
return std::make_unique<::cir::EmitCIRAction>();
return std::make_unique<cir::EmitCIRAction>();
#else
llvm_unreachable("CIR suppport not built into clang");
#endif
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Headers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ set(x86_files
adcintrin.h
adxintrin.h
ammintrin.h
amxavx512intrin.h
amxcomplexintrin.h
amxfp16intrin.h
amxfp8intrin.h
Expand Down
382 changes: 382 additions & 0 deletions clang/lib/Headers/amxavx512intrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,382 @@
/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===------------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
#endif // __IMMINTRIN_H

#ifndef __AMX_AVX512INTRIN_H
#define __AMX_AVX512INTRIN_H
#if defined(__x86_64__) && defined(__SSE2__)

#define __DEFAULT_FN_ATTRS_AVX512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("amx-avx512,avx10.2-512")))

/// Moves a row from a tile register to a zmm destination register, converting
/// the int32 source elements to fp32. The row of the tile is selected by a
/// 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The row of the source tile
#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to bf16. It places the resulting bf16 elements
/// in the high 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+0] := 0
/// dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16h(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16h(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to bf16. It places the resulting bf16 elements
/// in the low 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+1] := 0
/// dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16l(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16l(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to fp16. It places the resulting fp16 elements
/// in the high 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+0] := 0
/// dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to fp16. It places the resulting fp16 elements
/// in the low 16 bits within each dword. The row of the tile is selected
/// by a 32b GPR.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL >> 3
/// row_index := row & 0xffff
/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes / 4) - 1
/// IF i + row_chunk / 4 >= tsrc.colsb / 4
/// dst.dword[i] := 0
/// ELSE
/// dst.word[2*i+1] := 0
/// dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
/// FI
/// ENDFOR
/// dst[MAX_VL-1:VL] := 0
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)

/// Move one row of a tile data to a v16f32 data.
/// The row of the tile is selected by a 32b GPR.
///
/// \headerfile <immintrin.h>
///
/// \code
/// __m512 _tile_movrow(__tile a, unsigned b);
/// \endcode
///
/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
///
/// \param a
/// The 1st source tile. Max size is 1024 Bytes.
/// \param b
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v16f32 data. Size is 64 Bytes.
///
/// \code{.operation}
/// VL := 512
/// VL_bytes := VL>>3
/// row_index := b&0xffff
/// row_chunk := ((b>>16)&0xffff) * VL_bytes
/// FOR i := 0 TO (VL_bytes-1)
/// IF (row_chunk + i >= a.colsb)
/// dst.byte[i] := 0
/// ELSE
/// dst.byte[i] := a.row[row_index].byte[row_chunk+i]
/// ENDFOR
/// \endcode
#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b)

/// This is internal intrinsic. C/C++ user should avoid calling it directly.

static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
}

/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source
/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if
/// MXCSR.RC=RNE. Embedded rounding is not supported.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v16f32 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
/// elements to bf16 at high 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
/// elements to bf16 at low 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
/// elements to fp16 at high 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
/// elements to fp16 at low 16-bits of each dword.
/// The row and chunk elements of tile is fetched from 32bit src1.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v32fp16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move one row of a tile data to a v16f32 data.
/// The row of the tile is selected by a 32b GPR.
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
/// \param src1
/// The 2nd source r32. Size is 4 Bytes.
/// \returns
/// The destination v16i32 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512i __tile_movrow(__tile1024i src0, unsigned src1) {
return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);
}

#endif // __x86_64__ && __SSE2__
#endif // __AMX_AVX512INTRIN_H
5 changes: 3 additions & 2 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -4626,8 +4626,9 @@ _mm_movepi64_pi64(__m128i __a) {
/// A 64-bit value.
/// \returns A 128-bit integer vector. The lower 64 bits contain the value from
/// the operand. The upper 64 bits are assigned zeros.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) {
return __extension__(__m128i)(__v2di){(long long)__a, 0};
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movpi64_epi64(__m64 __a) {
return __builtin_shufflevector((__v1di)__a, _mm_setzero_si64(), 0, 1);
}

/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Headers/immintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,10 @@ _storebe_i64(void * __P, long long __D) {
#include <amxtransposeintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
#include <amxavx512intrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || \
defined(__AVX512VP2INTERSECT__)
#include <avx512vp2intersectintrin.h>
Expand Down
78 changes: 37 additions & 41 deletions clang/lib/Headers/mmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ typedef char __v16qi __attribute__((__vector_size__(16)));
__min_vector_width__(128)))
#endif

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
#else
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
#endif

#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
#define __anyext128(x) \
Expand Down Expand Up @@ -1332,10 +1338,9 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
/// This intrinsic corresponds to the <c> PXOR </c> instruction.
///
/// \returns An initialized 64-bit integer vector with all elements set to zero.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setzero_si64(void)
{
return __extension__ (__m64){ 0LL };
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setzero_si64(void) {
return __extension__(__m64){0LL};
}

/// Constructs a 64-bit integer vector initialized with the specified
Expand All @@ -1353,10 +1358,9 @@ _mm_setzero_si64(void)
/// A 32-bit integer value used to initialize the lower 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi32(int __i1, int __i0)
{
return __extension__ (__m64)(__v2si){__i0, __i1};
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set_pi32(int __i1, int __i0) {
return __extension__(__m64)(__v2si){__i0, __i1};
}

/// Constructs a 64-bit integer vector initialized with the specified
Expand All @@ -1376,10 +1380,9 @@ _mm_set_pi32(int __i1, int __i0)
/// \param __s0
/// A 16-bit integer value used to initialize bits [15:0] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
{
return __extension__ (__m64)(__v4hi){__s0, __s1, __s2, __s3};
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) {
return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3};
}

/// Constructs a 64-bit integer vector initialized with the specified
Expand Down Expand Up @@ -1407,12 +1410,11 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
/// \param __b0
/// An 8-bit integer value used to initialize bits [7:0] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
char __b1, char __b0)
{
return __extension__ (__m64)(__v8qi){__b0, __b1, __b2, __b3,
__b4, __b5, __b6, __b7};
char __b1, char __b0) {
return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3,
__b4, __b5, __b6, __b7};
}

/// Constructs a 64-bit integer vector of [2 x i32], with each of the
Expand All @@ -1428,10 +1430,9 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
/// A 32-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [2 x i32].
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi32(int __i)
{
return _mm_set_pi32(__i, __i);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set1_pi32(int __i) {
return _mm_set_pi32(__i, __i);
}

/// Constructs a 64-bit integer vector of [4 x i16], with each of the
Expand All @@ -1447,10 +1448,9 @@ _mm_set1_pi32(int __i)
/// A 16-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [4 x i16].
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi16(short __w)
{
return _mm_set_pi16(__w, __w, __w, __w);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set1_pi16(short __w) {
return _mm_set_pi16(__w, __w, __w, __w);
}

/// Constructs a 64-bit integer vector of [8 x i8], with each of the
Expand All @@ -1465,10 +1465,9 @@ _mm_set1_pi16(short __w)
/// An 8-bit integer value used to initialize each vector element of the
/// result.
/// \returns An initialized 64-bit integer vector of [8 x i8].
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_set1_pi8(char __b)
{
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_set1_pi8(char __b) {
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
}

/// Constructs a 64-bit integer vector, initialized in reverse order with
Expand All @@ -1486,10 +1485,9 @@ _mm_set1_pi8(char __b)
/// A 32-bit integer value used to initialize the upper 32 bits of the
/// result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi32(int __i0, int __i1)
{
return _mm_set_pi32(__i1, __i0);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setr_pi32(int __i0, int __i1) {
return _mm_set_pi32(__i1, __i0);
}

/// Constructs a 64-bit integer vector, initialized in reverse order with
Expand All @@ -1509,10 +1507,9 @@ _mm_setr_pi32(int __i0, int __i1)
/// \param __w3
/// A 16-bit integer value used to initialize bits [63:48] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
{
return _mm_set_pi16(__w3, __w2, __w1, __w0);
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
return _mm_set_pi16(__w3, __w2, __w1, __w0);
}

/// Constructs a 64-bit integer vector, initialized in reverse order with
Expand Down Expand Up @@ -1540,11 +1537,10 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
/// \param __b7
/// An 8-bit integer value used to initialize bits [63:56] of the result.
/// \returns An initialized 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
char __b6, char __b7)
{
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
char __b6, char __b7) {
return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
}

#undef __anyext128
Expand Down
5 changes: 0 additions & 5 deletions clang/lib/Headers/stdalign.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
#ifndef __STDALIGN_H
#define __STDALIGN_H

#if defined(__MVS__) && __has_include_next(<stdalign.h>)
#include_next <stdalign.h>
#else

#if defined(__cplusplus) || \
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L)
#ifndef __cplusplus
Expand All @@ -25,5 +21,4 @@
#define __alignof_is_defined 1
#endif /* __STDC_VERSION__ */

#endif /* __MVS__ */
#endif /* __STDALIGN_H */
7 changes: 4 additions & 3 deletions clang/lib/Parse/ParseOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1498,14 +1498,15 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() {
return StmtError();

StmtResult AssocStmt;
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(
getActions().OpenACC(), DirInfo.DirKind, {}, DirInfo.Clauses);
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(getActions().OpenACC(),
DirInfo.DirKind, DirInfo.DirLoc,
{}, DirInfo.Clauses);
if (doesDirectiveHaveAssociatedStmt(DirInfo.DirKind)) {
ParsingOpenACCDirectiveRAII DirScope(*this, /*Value=*/false);
ParseScope ACCScope(this, getOpenACCScopeFlags(DirInfo.DirKind));

AssocStmt = getActions().OpenACC().ActOnAssociatedStmt(
DirInfo.StartLoc, DirInfo.DirKind, ParseStatement());
DirInfo.StartLoc, DirInfo.DirKind, DirInfo.Clauses, ParseStatement());
}

return getActions().OpenACC().ActOnEndStmtDirective(
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/Parse/ParseStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2360,7 +2360,11 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
// OpenACC Restricts a for-loop inside of certain construct/clause
// combinations, so diagnose that here in OpenACC mode.
SemaOpenACC::LoopInConstructRAII LCR{getActions().OpenACC()};
getActions().OpenACC().ActOnForStmtBegin(ForLoc);
if (ForRangeInfo.ParsedForRangeDecl())
getActions().OpenACC().ActOnRangeForStmtBegin(ForLoc, ForRangeStmt.get());
else
getActions().OpenACC().ActOnForStmtBegin(
ForLoc, FirstPart.get(), SecondPart.get().second, ThirdPart.get());

// C99 6.8.5p5 - In C99, the body of the for statement is a scope, even if
// there is no compound stmt. C90 does not have this clause. We only do this
Expand Down
30 changes: 26 additions & 4 deletions clang/lib/Sema/HLSLExternalSemaSource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,8 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.addSimpleTemplateParams(*SemaPtr, {"element_type"})
.Record;
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::SRV,
ResourceKind::TypedBuffer, /*IsROV=*/false,
setupBufferType(Decl, *SemaPtr, ResourceClass::SRV, ResourceKind::RawBuffer,
/*IsROV=*/false,
/*RawBuffer=*/true)
.addArraySubscriptOperators()
.completeDefinition();
Expand All @@ -513,13 +513,35 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
.addSimpleTemplateParams(*SemaPtr, {"element_type"})
.Record;
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV,
ResourceKind::TypedBuffer, /*IsROV=*/false,
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer,
/*IsROV=*/false,
/*RawBuffer=*/true)
.addArraySubscriptOperators()
.completeDefinition();
});

Decl =
BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "AppendStructuredBuffer")
.addSimpleTemplateParams(*SemaPtr, {"element_type"})
.Record;
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer,
/*IsROV=*/false,
/*RawBuffer=*/true)
.completeDefinition();
});

Decl =
BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "ConsumeStructuredBuffer")
.addSimpleTemplateParams(*SemaPtr, {"element_type"})
.Record;
onCompletion(Decl, [this](CXXRecordDecl *Decl) {
setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer,
/*IsROV=*/false,
/*RawBuffer=*/true)
.completeDefinition();
});

Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace,
"RasterizerOrderedStructuredBuffer")
.addSimpleTemplateParams(*SemaPtr, {"element_type"})
Expand Down
14 changes: 8 additions & 6 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15021,7 +15021,8 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D,
: diag::warn_deprecated_register)
<< FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
} else if (!getLangOpts().CPlusPlus &&
DS.getTypeSpecType() == DeclSpec::TST_void) {
DS.getTypeSpecType() == DeclSpec::TST_void &&
D.getNumTypeObjects() == 0) {
Diag(DS.getStorageClassSpecLoc(),
diag::err_invalid_storage_class_in_func_decl)
<< FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
Expand Down Expand Up @@ -19364,11 +19365,12 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
}

// Structs without named members are extension in C (C99 6.7.2.1p7),
// but are accepted by GCC.
if (NonBitFields == 0 && !getLangOpts().CPlusPlus) {
Diag(RecLoc, IsEmpty ? diag::ext_empty_struct_union :
diag::ext_no_named_members_in_struct_union)
<< Record->isUnion();
// but are accepted by GCC. In C2y, this became implementation-defined
// (C2y 6.7.3.2p10).
if (NonBitFields == 0 && !getLangOpts().CPlusPlus && !getLangOpts().C2y) {
Diag(RecLoc, IsEmpty ? diag::ext_empty_struct_union
: diag::ext_no_named_members_in_struct_union)
<< Record->isUnion();
}
}
} else {
Expand Down
19 changes: 17 additions & 2 deletions clang/lib/Sema/SemaFunctionEffects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ class Analyzer {
CallableInfo &CurrentCaller;
ViolationSite VSite;
const Expr *TrailingRequiresClause = nullptr;
const Expr *NoexceptExpr = nullptr;

FunctionBodyASTVisitor(Analyzer &Outer,
PendingFunctionAnalysis &CurrentFunction,
Expand All @@ -986,9 +987,22 @@ class Analyzer {
if (auto *Dtor = dyn_cast<CXXDestructorDecl>(CurrentCaller.CDecl))
followDestructor(dyn_cast<CXXRecordDecl>(Dtor->getParent()), Dtor);

if (auto *FD = dyn_cast<FunctionDecl>(CurrentCaller.CDecl))
if (auto *FD = dyn_cast<FunctionDecl>(CurrentCaller.CDecl)) {
TrailingRequiresClause = FD->getTrailingRequiresClause();

// Note that FD->getType->getAs<FunctionProtoType>() can yield a
// noexcept Expr which has been boiled down to a constant expression.
// Going through the TypeSourceInfo obtains the actual expression which
// will be traversed as part of the function -- unless we capture it
// here and have TraverseStmt skip it.
if (TypeSourceInfo *TSI = FD->getTypeSourceInfo()) {
if (FunctionProtoTypeLoc TL =
TSI->getTypeLoc().getAs<FunctionProtoTypeLoc>())
if (const FunctionProtoType *FPT = TL.getTypePtr())
NoexceptExpr = FPT->getNoexceptExpr();
}
}

// Do an AST traversal of the function/block body
TraverseDecl(const_cast<Decl *>(CurrentCaller.CDecl));
}
Expand Down Expand Up @@ -1269,7 +1283,8 @@ class Analyzer {
// We skip the traversal of lambdas (beyond their captures, see
// TraverseLambdaExpr below), so just caching this from our constructor
// should suffice.
if (Statement != TrailingRequiresClause)
// The exact same is true for a conditional `noexcept()` clause.
if (Statement != TrailingRequiresClause && Statement != NoexceptExpr)
return Base::TraverseStmt(Statement);
return true;
}
Expand Down
487 changes: 454 additions & 33 deletions clang/lib/Sema/SemaOpenACC.cpp

Large diffs are not rendered by default.

14 changes: 0 additions & 14 deletions clang/lib/Sema/SemaRISCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,20 +282,6 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
if ((BaseTypeI & Record.TypeRangeMask) != BaseTypeI)
continue;

// TODO: Remove the check below and use RequiredFeatures in
// riscv_vector.td to check the intrinsics instead, the type check should
// be done in checkRVVTypeSupport. This check also not able to work on the
// intrinsics that have Float16 but the BaseType is not Float16 such as
// `vfcvt_f_x_v`.
if (BaseType == BasicType::Float16) {
if ((Record.RequiredExtensions & RVV_REQ_Zvfhmin) == RVV_REQ_Zvfhmin) {
if (!TI.hasFeature("zvfhmin"))
continue;
} else if (!TI.hasFeature("zvfh")) {
continue;
}
}

// Expanded with different LMUL.
for (int Log2LMUL = -3; Log2LMUL <= 3; Log2LMUL++) {
if (!(Record.Log2LMULMask & (1 << (Log2LMUL + 3))))
Expand Down
49 changes: 24 additions & 25 deletions clang/lib/Sema/SemaTemplateInstantiate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1749,31 +1749,21 @@ namespace {
return inherited::TransformLambdaBody(E, Body);
}

ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc,
NamedDecl *Pack, SourceLocation PackLoc,
SourceLocation RParenLoc,
std::optional<unsigned> Length,
ArrayRef<TemplateArgument> PartialArgs) {
if (SemaRef.CodeSynthesisContexts.back().Kind !=
Sema::CodeSynthesisContext::ConstraintNormalization)
return inherited::RebuildSizeOfPackExpr(OperatorLoc, Pack, PackLoc,
RParenLoc, Length, PartialArgs);

#ifndef NDEBUG
for (auto *Iter = TemplateArgs.begin(); Iter != TemplateArgs.end();
++Iter)
for (const TemplateArgument &TA : Iter->Args)
assert(TA.getKind() != TemplateArgument::Pack || TA.pack_size() == 1);
#endif
Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(
SemaRef, /*NewSubstitutionIndex=*/0);
Decl *NewPack = TransformDecl(PackLoc, Pack);
if (!NewPack)
return ExprError();

return inherited::RebuildSizeOfPackExpr(OperatorLoc,
cast<NamedDecl>(NewPack), PackLoc,
RParenLoc, Length, PartialArgs);
ExprResult TransformSizeOfPackExpr(SizeOfPackExpr *E) {
ExprResult Transformed = inherited::TransformSizeOfPackExpr(E);
if (!Transformed.isUsable())
return Transformed;
auto *TransformedExpr = cast<SizeOfPackExpr>(Transformed.get());
if (SemaRef.CodeSynthesisContexts.back().Kind ==
Sema::CodeSynthesisContext::ConstraintNormalization &&
TransformedExpr->getPack() == E->getPack()) {
Decl *NewPack =
TransformDecl(E->getPackLoc(), TransformedExpr->getPack());
if (!NewPack)
return ExprError();
TransformedExpr->setPack(cast<NamedDecl>(NewPack));
}
return TransformedExpr;
}

ExprResult TransformRequiresExpr(RequiresExpr *E) {
Expand Down Expand Up @@ -1899,6 +1889,15 @@ Decl *TemplateInstantiator::TransformDecl(SourceLocation Loc, Decl *D) {
TemplateArgument Arg = TemplateArgs(TTP->getDepth(), TTP->getPosition());

if (TTP->isParameterPack()) {
// We might not have an index for pack expansion when normalizing
// constraint expressions. In that case, resort to instantiation scopes
// for the transformed declarations.
if (SemaRef.ArgumentPackSubstitutionIndex == -1 &&
SemaRef.CodeSynthesisContexts.back().Kind ==
Sema::CodeSynthesisContext::ConstraintNormalization) {
return SemaRef.FindInstantiatedDecl(Loc, cast<NamedDecl>(D),
TemplateArgs);
}
assert(Arg.getKind() == TemplateArgument::Pack &&
"Missing argument pack");
Arg = getPackSubstitutedTemplateArgument(getSema(), Arg);
Expand Down
9 changes: 6 additions & 3 deletions clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1500,16 +1500,19 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
// C99 6.7.3p8:
// If the specification of a function type includes any type qualifiers,
// the behavior is undefined.
// C2y changed this behavior to be implementation-defined. Clang defines
// the behavior in all cases to ignore the qualifier, as in C++.
// C++11 [dcl.fct]p7:
// The effect of a cv-qualifier-seq in a function declarator is not the
// same as adding cv-qualification on top of the function type. In the
// latter case, the cv-qualifiers are ignored.
if (Result->isFunctionType()) {
unsigned DiagId = diag::warn_typecheck_function_qualifiers_ignored;
if (!S.getLangOpts().CPlusPlus && !S.getLangOpts().C2y)
DiagId = diag::ext_typecheck_function_qualifiers_unspecified;
diagnoseAndRemoveTypeQualifiers(
S, DS, TypeQuals, Result, DeclSpec::TQ_const | DeclSpec::TQ_volatile,
S.getLangOpts().CPlusPlus
? diag::warn_typecheck_function_qualifiers_ignored
: diag::warn_typecheck_function_qualifiers_unspecified);
DiagId);
// No diagnostic for 'restrict' or '_Atomic' applied to a
// function type; we'll diagnose those later, in BuildQualifiedType.
}
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Sema/SemaX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,12 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_t2rpntlvwz0t1:
case X86::BI__builtin_ia32_t2rpntlvwz1:
case X86::BI__builtin_ia32_t2rpntlvwz1t1:
case X86::BI__builtin_ia32_tcvtrowps2pbf16h:
case X86::BI__builtin_ia32_tcvtrowps2pbf16l:
case X86::BI__builtin_ia32_tcvtrowps2phh:
case X86::BI__builtin_ia32_tcvtrowps2phl:
case X86::BI__builtin_ia32_tcvtrowd2ps:
case X86::BI__builtin_ia32_tilemovrow:
return CheckBuiltinTileArgumentsRange(TheCall, 0);
case X86::BI__builtin_ia32_tdpbssd:
case X86::BI__builtin_ia32_tdpbsud:
Expand Down
20 changes: 11 additions & 9 deletions clang/lib/Sema/TreeTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -8298,7 +8298,9 @@ TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
// OpenACC Restricts a for-loop inside of certain construct/clause
// combinations, so diagnose that here in OpenACC mode.
SemaOpenACC::LoopInConstructRAII LCR{SemaRef.OpenACC()};
SemaRef.OpenACC().ActOnForStmtBegin(S->getBeginLoc());
SemaRef.OpenACC().ActOnForStmtBegin(
S->getBeginLoc(), S->getInit(), Init.get(), S->getCond(),
Cond.get().second, S->getInc(), Inc.get());

// Transform the body
StmtResult Body = getDerived().TransformStmt(S->getBody());
Expand Down Expand Up @@ -9048,7 +9050,7 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
// OpenACC Restricts a while-loop inside of certain construct/clause
// combinations, so diagnose that here in OpenACC mode.
SemaOpenACC::LoopInConstructRAII LCR{SemaRef.OpenACC()};
SemaRef.OpenACC().ActOnForStmtBegin(S->getBeginLoc());
SemaRef.OpenACC().ActOnRangeForStmtBegin(S->getBeginLoc(), S, NewStmt.get());

StmtResult Body = getDerived().TransformStmt(S->getBody());
if (Body.isInvalid())
Expand Down Expand Up @@ -12073,11 +12075,11 @@ StmtResult TreeTransform<Derived>::TransformOpenACCComputeConstruct(

// Transform Structured Block.
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(
getSema().OpenACC(), C->getDirectiveKind(), C->clauses(),
TransformedClauses);
getSema().OpenACC(), C->getDirectiveKind(), C->getDirectiveLoc(),
C->clauses(), TransformedClauses);
StmtResult StrBlock = getDerived().TransformStmt(C->getStructuredBlock());
StrBlock = getSema().OpenACC().ActOnAssociatedStmt(
C->getBeginLoc(), C->getDirectiveKind(), StrBlock);
C->getBeginLoc(), C->getDirectiveKind(), TransformedClauses, StrBlock);

return getDerived().RebuildOpenACCComputeConstruct(
C->getDirectiveKind(), C->getBeginLoc(), C->getDirectiveLoc(),
Expand All @@ -12100,11 +12102,11 @@ TreeTransform<Derived>::TransformOpenACCLoopConstruct(OpenACCLoopConstruct *C) {

// Transform Loop.
SemaOpenACC::AssociatedStmtRAII AssocStmtRAII(
getSema().OpenACC(), C->getDirectiveKind(), C->clauses(),
TransformedClauses);
getSema().OpenACC(), C->getDirectiveKind(), C->getDirectiveLoc(),
C->clauses(), TransformedClauses);
StmtResult Loop = getDerived().TransformStmt(C->getLoop());
Loop = getSema().OpenACC().ActOnAssociatedStmt(C->getBeginLoc(),
C->getDirectiveKind(), Loop);
Loop = getSema().OpenACC().ActOnAssociatedStmt(
C->getBeginLoc(), C->getDirectiveKind(), TransformedClauses, Loop);

return getDerived().RebuildOpenACCLoopConstruct(
C->getBeginLoc(), C->getDirectiveLoc(), C->getEndLoc(),
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5866,6 +5866,12 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
}

CurrentModule->Kind = Kind;
// Note that we may be rewriting an existing location and it is important
// to keep doing that. In particular, we would like to prefer a
// `DefinitionLoc` loaded from the module file instead of the location
// created in the current source manager, because it allows the new
// location to be marked as "unaffecting" when writing and avoid creating
// duplicate locations for the same module map file.
CurrentModule->DefinitionLoc = DefinitionLoc;
CurrentModule->Signature = F.Signature;
CurrentModule->IsFromModuleFile = true;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Serialization/ASTReaderStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2836,12 +2836,12 @@ void ASTStmtReader::VisitOpenACCAssociatedStmtConstruct(
void ASTStmtReader::VisitOpenACCComputeConstruct(OpenACCComputeConstruct *S) {
VisitStmt(S);
VisitOpenACCAssociatedStmtConstruct(S);
S->findAndSetChildLoops();
}

void ASTStmtReader::VisitOpenACCLoopConstruct(OpenACCLoopConstruct *S) {
VisitStmt(S);
VisitOpenACCAssociatedStmtConstruct(S);
S->ParentComputeConstructKind = Record.readEnum<OpenACCDirectiveKind>();
}

//===----------------------------------------------------------------------===//
Expand Down
46 changes: 35 additions & 11 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "clang/AST/TypeLocVisitor.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/FileSystemOptions.h"
#include "clang/Basic/IdentifierTable.h"
Expand Down Expand Up @@ -81,6 +82,7 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
Expand Down Expand Up @@ -166,18 +168,25 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) {

namespace {

std::optional<std::set<const FileEntry *>>
struct AffectingModuleMaps {
llvm::DenseSet<FileID> DefinitionFileIDs;
llvm::DenseSet<const FileEntry *> DefinitionFiles;
};

std::optional<AffectingModuleMaps>
GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
if (!PP.getHeaderSearchInfo()
.getHeaderSearchOpts()
.ModulesPruneNonAffectingModuleMaps)
return std::nullopt;

const HeaderSearch &HS = PP.getHeaderSearchInfo();
const SourceManager &SM = PP.getSourceManager();
const ModuleMap &MM = HS.getModuleMap();

std::set<const FileEntry *> ModuleMaps;
std::set<const Module *> ProcessedModules;
llvm::DenseSet<FileID> ModuleMaps;

llvm::DenseSet<const Module *> ProcessedModules;
auto CollectModuleMapsForHierarchy = [&](const Module *M) {
M = M->getTopLevelModule();

Expand All @@ -192,13 +201,13 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {

// The containing module map is affecting, because it's being pointed
// into by Module::DefinitionLoc.
if (auto FE = MM.getContainingModuleMapFile(Mod))
ModuleMaps.insert(*FE);
if (auto F = MM.getContainingModuleMapFileID(Mod); F.isValid())
ModuleMaps.insert(F);
// For inferred modules, the module map that allowed inferring is not
// related to the virtual containing module map file. It did affect the
// compilation, though.
if (auto FE = MM.getModuleMapFileForUniquing(Mod))
ModuleMaps.insert(*FE);
if (auto UniqF = MM.getModuleMapFileIDForUniquing(Mod); UniqF.isValid())
ModuleMaps.insert(UniqF);

for (auto *SubM : Mod->submodules())
Q.push(SubM);
Expand Down Expand Up @@ -268,7 +277,16 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
// just ban module map hierarchies where module map defining a (sub)module X
// includes a module map defining a module that's not a submodule of X.

return ModuleMaps;
llvm::DenseSet<const FileEntry *> ModuleFileEntries;
for (FileID MM : ModuleMaps) {
if (auto *FE = SM.getFileEntryForID(MM))
ModuleFileEntries.insert(FE);
}

AffectingModuleMaps R;
R.DefinitionFileIDs = std::move(ModuleMaps);
R.DefinitionFiles = std::move(ModuleFileEntries);
return std::move(R);
}

class ASTTypeWriter {
Expand Down Expand Up @@ -1770,14 +1788,17 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
continue;

// Do not emit input files that do not affect current module.
if (!IsSLocAffecting[I])
if (!IsSLocFileEntryAffecting[I])
continue;

InputFileEntry Entry(*Cache->OrigEntry);
Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
Entry.IsTransient = Cache->IsTransient;
Entry.BufferOverridden = Cache->BufferOverridden;
Entry.IsTopLevel = getAffectingIncludeLoc(SourceMgr, File).isInvalid();

FileID IncludeFileID = SourceMgr.getFileID(File.getIncludeLoc());
Entry.IsTopLevel = IncludeFileID.isInvalid() || IncludeFileID.ID < 0 ||
!IsSLocFileEntryAffecting[IncludeFileID.ID];
Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());

uint64_t ContentHash = 0;
Expand Down Expand Up @@ -4920,6 +4941,7 @@ void ASTWriter::computeNonAffectingInputFiles() {
unsigned N = SrcMgr.local_sloc_entry_size();

IsSLocAffecting.resize(N, true);
IsSLocFileEntryAffecting.resize(N, true);

if (!WritingModule)
return;
Expand Down Expand Up @@ -4956,10 +4978,12 @@ void ASTWriter::computeNonAffectingInputFiles() {
continue;

// Don't prune module maps that are affecting.
if (llvm::is_contained(*AffectingModuleMaps, *Cache->OrigEntry))
if (AffectingModuleMaps->DefinitionFileIDs.contains(FID))
continue;

IsSLocAffecting[I] = false;
IsSLocFileEntryAffecting[I] =
AffectingModuleMaps->DefinitionFiles.contains(*Cache->OrigEntry);

FileIDAdjustment += 1;
// Even empty files take up one element in the offset table.
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Serialization/ASTWriterStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2915,6 +2915,7 @@ void ASTStmtWriter::VisitOpenACCComputeConstruct(OpenACCComputeConstruct *S) {
void ASTStmtWriter::VisitOpenACCLoopConstruct(OpenACCLoopConstruct *S) {
VisitStmt(S);
VisitOpenACCAssociatedStmtConstruct(S);
Record.writeEnum(S->getParentComputeConstructKind());
Code = serialization::STMT_OPENACC_LOOP_CONSTRUCT;
}

Expand Down
50 changes: 50 additions & 0 deletions clang/test/AST/HLSL/AppendStructuredBuffer-AST.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s


// This test tests two different AST generations. The "EMPTY" test mode verifies
// the AST generated by forward declaration of the HLSL types which happens on
// initializing the HLSL external AST with an AST Context.

// The non-empty mode has a use that requires the AppendStructuredBuffer type be complete,
// which results in the AST being populated by the external AST source. That
// case covers the full implementation of the template declaration and the
// instantiated specialization.

// EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit AppendStructuredBuffer
// EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
// EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit <undeserialized declarations> class AppendStructuredBuffer
// EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final

// There should be no more occurrences of AppendStructuredBuffer
// EMPTY-NOT: {{[^[:alnum:]]}}AppendStructuredBuffer

#ifndef EMPTY

AppendStructuredBuffer<int> Buffer;

#endif

// CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit AppendStructuredBuffer
// CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
// CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class AppendStructuredBuffer definition

// CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer

// CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
// CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)'

// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class AppendStructuredBuffer definition
// CHECK: TemplateArgument type 'int'
// CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'int'
// CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(int)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer
51 changes: 51 additions & 0 deletions clang/test/AST/HLSL/ConsumeStructuredBuffer-AST.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump -DEMPTY %s | FileCheck -check-prefix=EMPTY %s
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -x hlsl -ast-dump %s | FileCheck %s


// This test tests two different AST generations. The "EMPTY" test mode verifies
// the AST generated by forward declaration of the HLSL types which happens on
// initializing the HLSL external AST with an AST Context.

// The non-empty mode has a use that requires the ConsumeStructuredBuffer type be complete,
// which results in the AST being populated by the external AST source. That
// case covers the full implementation of the template declaration and the
// instantiated specialization.

// EMPTY: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit ConsumeStructuredBuffer
// EMPTY-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
// EMPTY-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit <undeserialized declarations> class ConsumeStructuredBuffer
// EMPTY-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final

// There should be no more occurrences of ConsumeStructuredBuffer
// EMPTY-NOT: {{[^[:alnum:]]}}ConsumeStructuredBuffer

#ifndef EMPTY

ConsumeStructuredBuffer<int> Buffer;

#endif

// CHECK: ClassTemplateDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit ConsumeStructuredBuffer
// CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class depth 0 index 0 element_type
// CHECK-NEXT: CXXRecordDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit class ConsumeStructuredBuffer definition

// CHECK: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer

// CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
// CHECK-NOT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)'

// CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class ConsumeStructuredBuffer definition

// CHECK: TemplateArgument type 'int'
// CHECK-NEXT: BuiltinType 0x{{[0-9A-Fa-f]+}} 'int'
// CHECK-NEXT: FinalAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit final
// CHECK-NEXT: FieldDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> implicit h '__hlsl_resource_t
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(int)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer
4 changes: 2 additions & 2 deletions clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ RWStructuredBuffer<int> Buffer;
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer

// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
Expand All @@ -61,4 +61,4 @@ RWStructuredBuffer<int> Buffer;
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(int)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer
4 changes: 2 additions & 2 deletions clang/test/AST/HLSL/StructuredBuffer-AST.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ StructuredBuffer<float> Buffer;
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer

// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &const (unsigned int) const'
// CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int'
Expand All @@ -61,4 +61,4 @@ StructuredBuffer<float> Buffer;
// CHECK-SAME{LITERAL}: [[hlsl::resource_class(SRV)]]
// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
// CHECK-SAME{LITERAL}: [[hlsl::contained_type(float)]]
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit TypedBuffer
// CHECK-NEXT: HLSLResourceAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit RawBuffer
198 changes: 99 additions & 99 deletions clang/test/AST/ast-print-openacc-loop-construct.cpp

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions clang/test/C/C2y/n3341.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
// RUN: %clang_cc1 -verify=gnu -Wall -pedantic %s

/* WG14 N3341: Yes
* Slay Some Earthly Demons III
*
* Empty structure and union objects are now implementation-defined.
*/

// expected-no-diagnostics

struct R {}; // gnu-warning {{empty struct is a GNU extension}}
#if __STDC_VERSION__ >= 201112L
struct S { struct { }; }; // gnu-warning {{empty struct is a GNU extension}}
#endif
struct T { int : 0; }; // gnu-warning {{struct without named members is a GNU extension}}
union U {}; // gnu-warning {{empty union is a GNU extension}}

31 changes: 31 additions & 0 deletions clang/test/C/C2y/n3342.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: %clang_cc1 -verify=expected,both -std=c2y -Wall -pedantic %s
// RUN: %clang_cc1 -verify=clang,both -Wall -pedantic %s

/* WG14 N3342: Yes
* Slay Some Earthly Demons IV
*
* Qualified function types are now implementation-defined instead of
* undefined. Clang strips the qualifiers.
*/

typedef int f(void);

const f one; /* expected-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect}}
clang-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}}
*/
volatile f two; /* expected-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect}}
clang-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}}
*/

const volatile f three; /* expected-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect}}
clang-warning {{'const' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}}
expected-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect}}
clang-warning {{'volatile' qualifier on function type 'f' (aka 'int (void)') has no effect and is a Clang extension}}
*/

#if __STDC_VERSION__ >= 201112L
// Atomic types have an explicit constraint making it ill-formed.
_Atomic f four; // both-error {{_Atomic cannot be applied to function type 'f' (aka 'int (void)')}}
#endif

// There's no point to testing 'restrict' because that requires a pointer type.
5 changes: 5 additions & 0 deletions clang/test/C/C2y/n3344.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
void baz(volatile void); // expected-error {{'void' as parameter must not have type qualifiers}}
void bar(const void); // expected-error {{'void' as parameter must not have type qualifiers}}
void foo(register void); // expected-error {{invalid storage class specifier in function declarator}}
void foop(void register); // expected-error {{invalid storage class specifier in function declarator}}
void quux(static void); // expected-error {{invalid storage class specifier in function declarator}}
void quobble(auto void); // expected-error {{invalid storage class specifier in function declarator}}
void quubble(extern void); // expected-error {{invalid storage class specifier in function declarator}}
Expand All @@ -28,3 +29,7 @@ void quabble(_Thread_local void); // expected-error {{'_Thread_local' is only al
#endif
void bing(void, ...); // expected-error {{'void' must be the first and only parameter if specified}}

// These declarations are fine.
void one(register void *);
void two(void register *);
void three(register void * (*)[4]);
74 changes: 74 additions & 0 deletions clang/test/C/C2y/n3346.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic -ffreestanding %s
// RUN: %clang_cc1 -std=c99 -verify=expected,ped -Wall -pedantic -ffreestanding %s

/* WG14 N3346: Yes
* Slay Some Earthly Demons VIII
*
* Updates some undefined behavior during initialization to instead be a
* constraint violation.
*/

// The initializer for a scalar shall be a single expression, optionally
// enclosed in braces, or it shall be an empty initializer.
int i = 12, j = {12}, k = {}; // ped-warning {{use of an empty initializer is a C23 extension}}

struct S {
int i;
float f;
int : 0;
char c;
};

void test1(void) {
// The initializer for an object that has structure or union type shall be
// either a single expression that has compatible type or a brace-enclosed
// list of initializers for the elements or named members.
struct S s1 = { 1, 1.2f, 'a' };
struct S s2 = s1;

// Despite being structurally identical to S, T is not compatible with S.
struct T { int i; float f; int : 0; char c; } t;
struct S s3 = t; // expected-error {{initializing 'struct S' with an expression of incompatible type 'struct T'}}
}

void test2(void) {
typedef __WCHAR_TYPE__ wchar_t;

// The initializer for an array shall be either a string literal, optionally
// enclosed in braces, or a brace-enclosed list of initializers for the
// elements. An array initialized by character string literal or UTF-8 string
// literal shall have a character type as element type. An array initialized
// with a wide string literal shall have element type compatible with a
// qualified or unqualified wchar_t, char16_t, or char32_t, and the string
// literal shall have the corresponding encoding prefix (L, u, or U,
// respectively).
char str1[] = "string literal";
char str2[] = { "string literal" };

float str5[] = "this doesn't work"; // expected-error {{array initializer must be an initializer list}}
float str6[] = { "this also doesn't work" }; // expected-error {{initializing 'float' with an expression of incompatible type 'char[23]'}}

wchar_t str7[] = L"string literal";
wchar_t str8[] = { L"string literal" };

#if __STDC_VERSION__ >= 201112L
typedef __CHAR16_TYPE__ char16_t;
typedef __CHAR32_TYPE__ char32_t;

char str3[] = u8"string literal";
char str4[] = { u8"string literal" };

char16_t str9[] = u"string literal";
char16_t str10[] = { u"string literal" };
char32_t str11[] = U"string literal";
char32_t str12[] = { U"string literal" };

char16_t str15[] = "nope"; // expected-error {{initializing wide char array with non-wide string literal}}
char16_t str16[] = { "nope" }; // expected-error-re {{incompatible pointer to integer conversion initializing 'char16_t' (aka '{{.*}}') with an expression of type 'char[5]'}}
char32_t str17[] = "nope"; // expected-error {{initializing wide char array with non-wide string literal}}
char32_t str18[] = { "nope" }; // expected-error-re {{incompatible pointer to integer conversion initializing 'char32_t' (aka '{{.*}}') with an expression of type 'char[5]'}}
#endif

wchar_t str13[] = "nope"; // expected-error {{initializing wide char array with non-wide string literal}}
wchar_t str14[] = { "nope" }; // expected-error-re {{incompatible pointer to integer conversion initializing 'wchar_t' (aka '{{.*}}') with an expression of type 'char[5]'}}
}
52 changes: 52 additions & 0 deletions clang/test/CodeGen/X86/amx_avx512_api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \
// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \
// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK

#include <immintrin.h>

char buf[1024];
#define STRIDE 32

char buf2[1024];

__m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowd2ps
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <16 x float> @llvm.x86.tcvtrowd2ps.internal
return __tile_cvtrowd2ps(a, b);
}

__m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16h
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal
return __tile_cvtrowps2pbf16h(a, b);
}

__m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16l
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal
return __tile_cvtrowps2pbf16l(a, b);
}

__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phh
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phh.internal
return __tile_cvtrowps2phh(a, b);
}

__m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2phl
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phl.internal
return __tile_cvtrowps2phl(a, b);
}

__m512i test_tile_movrow(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_movrow
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <16 x i32> @llvm.x86.tilemovrow.internal
return __tile_movrow(a, b);
}
41 changes: 41 additions & 0 deletions clang/test/CodeGen/X86/amxavx512-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \
// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s

#include <immintrin.h>
#include <stddef.h>

__m512 test_tile_cvtrowd2ps(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowd2ps(
// CHECK: call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 %{{.*}})
return _tile_cvtrowd2ps(1, A);
}

__m512bh test_tile_cvtrowps2pbf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16h(1, A);
}

__m512bh test_tile_cvtrowps2pbf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16l(1, A);
}

__m512h test_tile_cvtrowps2phh(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phh(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}})
return _tile_cvtrowps2phh(1, A);
}

__m512h test_tile_cvtrowps2phl(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2phl(
// CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}})
return _tile_cvtrowps2phl(1, A);
}

__m512i test_tile_movrow(unsigned int A) {
// CHECK-LABEL: @test_tile_movrow
// CHECK: %1 = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 %{{.*}})
return _tile_movrow(1, A);
}
19 changes: 19 additions & 0 deletions clang/test/CodeGen/X86/builtin_test_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,25 @@ constexpr bool match_m64(__m64 _v, unsigned long long a) {
return v[0] == a;
}

constexpr bool match_v1di(__m64 v, long long a) {
return v[0] == a;
}

constexpr bool match_v2si(__m64 _v, int a, int b) {
__v2si v = (__v2si)_v;
return v[0] == a && v[1] == b;
}

constexpr bool match_v4hi(__m64 _v, short a, short b, short c, short d) {
__v4hi v = (__v4hi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}

constexpr bool match_v8qi(__m64 _v, char a, char b, char c, char d, char e, char f, char g, char h) {
__v8qi v = (__v8qi)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}

constexpr bool match_m128(__m128 v, float a, float b, float c, float d) {
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}
Expand Down
Loading