25 changes: 23 additions & 2 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19184,8 +19184,24 @@ static bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) {
}

/// Check if two fields are layout-compatible.
/// Can be used on union members, which are exempt from alignment requirement
/// of common initial sequence.
static bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1,
FieldDecl *Field2) {
FieldDecl *Field2,
bool AreUnionMembers = false) {
[[maybe_unused]] const Type *Field1Parent =
Field1->getParent()->getTypeForDecl();
[[maybe_unused]] const Type *Field2Parent =
Field2->getParent()->getTypeForDecl();
assert(((Field1Parent->isStructureOrClassType() &&
Field2Parent->isStructureOrClassType()) ||
(Field1Parent->isUnionType() && Field2Parent->isUnionType())) &&
"Can't evaluate layout compatibility between a struct field and a "
"union field.");
assert(((!AreUnionMembers && Field1Parent->isStructureOrClassType()) ||
(AreUnionMembers && Field1Parent->isUnionType())) &&
"AreUnionMembers should be 'true' for union fields (only).");

if (!isLayoutCompatible(C, Field1->getType(), Field2->getType()))
return false;

Expand All @@ -19204,6 +19220,11 @@ static bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1,
if (Field1->hasAttr<clang::NoUniqueAddressAttr>() ||
Field2->hasAttr<clang::NoUniqueAddressAttr>())
return false;

if (!AreUnionMembers &&
Field1->getMaxAlignment() != Field2->getMaxAlignment())
return false;

return true;
}

Expand Down Expand Up @@ -19265,7 +19286,7 @@ static bool isLayoutCompatibleUnion(ASTContext &C, RecordDecl *RD1,
E = UnmatchedFields.end();

for ( ; I != E; ++I) {
if (isLayoutCompatible(C, Field1, *I)) {
if (isLayoutCompatible(C, Field1, *I, /*IsUnionMember=*/true)) {
bool Result = UnmatchedFields.erase(*I);
(void) Result;
assert(Result);
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaConcept.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,8 @@ bool Sema::addInstantiatedCapturesToScope(

bool Sema::SetupConstraintScope(
FunctionDecl *FD, std::optional<ArrayRef<TemplateArgument>> TemplateArgs,
MultiLevelTemplateArgumentList MLTAL, LocalInstantiationScope &Scope) {
const MultiLevelTemplateArgumentList &MLTAL,
LocalInstantiationScope &Scope) {
if (FD->isTemplateInstantiation() && FD->getPrimaryTemplate()) {
FunctionTemplateDecl *PrimaryTemplate = FD->getPrimaryTemplate();
InstantiatingTemplate Inst(
Expand Down
18 changes: 16 additions & 2 deletions clang/lib/Sema/SemaCoroutine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "clang/Sema/Initialization.h"
#include "clang/Sema/Overload.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/SemaInternal.h"
#include "llvm/ADT/SmallSet.h"

Expand Down Expand Up @@ -1378,8 +1379,21 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() {
static bool collectPlacementArgs(Sema &S, FunctionDecl &FD, SourceLocation Loc,
SmallVectorImpl<Expr *> &PlacementArgs) {
if (auto *MD = dyn_cast<CXXMethodDecl>(&FD)) {
if (MD->isImplicitObjectMemberFunction() && !isLambdaCallOperator(MD)) {
ExprResult ThisExpr = S.ActOnCXXThis(Loc);
if (MD->isImplicitObjectMemberFunction()) {
ExprResult ThisExpr{};

if (isLambdaCallOperator(MD) && !MD->isStatic()) {
Qualifiers ThisQuals = MD->getMethodQualifiers();
CXXRecordDecl *Record = MD->getParent();

Sema::CXXThisScopeRAII ThisScope(S, Record, ThisQuals,
Record != nullptr);

ThisExpr = S.ActOnCXXThis(Loc, /*ThisRefersToClosureObject=*/true);
} else {
ThisExpr = S.ActOnCXXThis(Loc);
}

if (ThisExpr.isInvalid())
return false;
ThisExpr = S.CreateBuiltinUnaryOp(Loc, UO_Deref, ThisExpr.get());
Expand Down
27 changes: 16 additions & 11 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1110,9 +1110,7 @@ Sema::NameClassification Sema::ClassifyName(Scope *S, CXXScopeSpec &SS,
// unqualified-id followed by a < and name lookup finds either one
// or more functions or finds nothing.
if (!IsFilteredTemplateName)
FilterAcceptableTemplateNames(Result,
/*AllowFunctionTemplates=*/true,
/*AllowDependent=*/true);
FilterAcceptableTemplateNames(Result);

bool IsFunctionTemplate;
bool IsVarTemplate;
Expand All @@ -1122,7 +1120,6 @@ Sema::NameClassification Sema::ClassifyName(Scope *S, CXXScopeSpec &SS,
Template = Context.getOverloadedTemplateName(Result.begin(),
Result.end());
} else if (!Result.empty()) {
assert(!Result.isUnresolvableResult());
auto *TD = cast<TemplateDecl>(getAsTemplateNameDecl(
*Result.begin(), /*AllowFunctionTemplates=*/true,
/*AllowDependent=*/false));
Expand Down Expand Up @@ -15795,10 +15792,19 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
// captures during transformation of nested lambdas, it is necessary to
// have the LSI properly restored.
if (isGenericLambdaCallOperatorSpecialization(FD)) {
assert(inTemplateInstantiation() &&
"There should be an active template instantiation on the stack "
"when instantiating a generic lambda!");
RebuildLambdaScopeInfo(cast<CXXMethodDecl>(D));
// C++2c 7.5.5.2p17 A member of a closure type shall not be explicitly
// instantiated, explicitly specialized.
if (FD->getTemplateSpecializationInfo()
->isExplicitInstantiationOrSpecialization()) {
Diag(FD->getLocation(), diag::err_lambda_explicit_spec);
FD->setInvalidDecl();
PushFunctionScope();
} else {
assert(inTemplateInstantiation() &&
"There should be an active template instantiation on the stack "
"when instantiating a generic lambda!");
RebuildLambdaScopeInfo(cast<CXXMethodDecl>(D));
}
} else {
// Enter a new function scope
PushFunctionScope();
Expand Down Expand Up @@ -16317,9 +16323,8 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
}
}

assert(
(FD == getCurFunctionDecl() || getCurLambda()->CallOperator == FD) &&
"Function parsing confused");
assert((FD == getCurFunctionDecl(/*AllowLambdas=*/true)) &&
"Function parsing confused");
} else if (ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(dcl)) {
assert(MD == getCurMethodDecl() && "Method parsing confused");
MD->setBody(Body);
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19218,7 +19218,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
// externalize the static device side variable ODR-used by host code.
if (!Var->hasExternalStorage())
SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
else if (SemaRef.LangOpts.GPURelocatableDeviceCode)
else if (SemaRef.LangOpts.GPURelocatableDeviceCode &&
(!FD || (!FD->getDescribedFunctionTemplate() &&
SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
GVA_StrongExternal)))
SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
}
}
Expand Down
16 changes: 11 additions & 5 deletions clang/lib/Sema/SemaExprCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,8 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit,
return false;
}

ExprResult Sema::ActOnCXXThis(SourceLocation Loc) {
ExprResult Sema::ActOnCXXThis(SourceLocation Loc,
bool ThisRefersToClosureObject) {
/// C++ 9.3.2: In the body of a non-static member function, the keyword this
/// is a non-lvalue expression whose value is the address of the object for
/// which the function is called.
Expand All @@ -1434,13 +1435,18 @@ ExprResult Sema::ActOnCXXThis(SourceLocation Loc) {
return Diag(Loc, diag::err_invalid_this_use) << 0;
}

return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false);
return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false,
ThisRefersToClosureObject);
}

Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type,
bool IsImplicit) {
Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type, bool IsImplicit,
bool ThisRefersToClosureObject) {
auto *This = CXXThisExpr::Create(Context, Loc, Type, IsImplicit);
MarkThisReferenced(This);

if (!ThisRefersToClosureObject) {
MarkThisReferenced(This);
}

return This;
}

Expand Down
37 changes: 32 additions & 5 deletions clang/lib/Sema/SemaInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10720,13 +10720,40 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer(
if (TemplateName.isDependent())
return SubstAutoTypeDependent(TSInfo->getType());

// We can only perform deduction for class templates.
// We can only perform deduction for class templates or alias templates.
auto *Template =
dyn_cast_or_null<ClassTemplateDecl>(TemplateName.getAsTemplateDecl());
TemplateDecl *LookupTemplateDecl = Template;
if (!Template) {
if (auto *AliasTemplate = dyn_cast_or_null<TypeAliasTemplateDecl>(
TemplateName.getAsTemplateDecl())) {
Diag(Kind.getLocation(),
diag::warn_cxx17_compat_ctad_for_alias_templates);
LookupTemplateDecl = AliasTemplate;
auto UnderlyingType = AliasTemplate->getTemplatedDecl()
->getUnderlyingType()
.getCanonicalType();
// C++ [over.match.class.deduct#3]: ..., the defining-type-id of A must be
// of the form
// [typename] [nested-name-specifier] [template] simple-template-id
if (const auto *TST =
UnderlyingType->getAs<TemplateSpecializationType>()) {
Template = dyn_cast_or_null<ClassTemplateDecl>(
TST->getTemplateName().getAsTemplateDecl());
} else if (const auto *RT = UnderlyingType->getAs<RecordType>()) {
// Cases where template arguments in the RHS of the alias are not
// dependent. e.g.
// using AliasFoo = Foo<bool>;
if (const auto *CTSD = llvm::dyn_cast<ClassTemplateSpecializationDecl>(
RT->getAsCXXRecordDecl()))
Template = CTSD->getSpecializedTemplate();
}
}
}
if (!Template) {
Diag(Kind.getLocation(),
diag::err_deduced_non_class_template_specialization_type)
<< (int)getTemplateNameKindForDiagnostics(TemplateName) << TemplateName;
diag::err_deduced_non_class_or_alias_template_specialization_type)
<< (int)getTemplateNameKindForDiagnostics(TemplateName) << TemplateName;
if (auto *TD = TemplateName.getAsTemplateDecl())
NoteTemplateLocation(*TD);
return QualType();
Expand All @@ -10753,10 +10780,10 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer(
// template-name, a function template [...]
// - For each deduction-guide, a function or function template [...]
DeclarationNameInfo NameInfo(
Context.DeclarationNames.getCXXDeductionGuideName(Template),
Context.DeclarationNames.getCXXDeductionGuideName(LookupTemplateDecl),
TSInfo->getTypeLoc().getEndLoc());
LookupResult Guides(*this, NameInfo, LookupOrdinaryName);
LookupQualifiedName(Guides, Template->getDeclContext());
LookupQualifiedName(Guides, LookupTemplateDecl->getDeclContext());

// FIXME: Do not diagnose inaccessible deduction guides. The standard isn't
// clear on this, but they're not found by name so access does not apply.
Expand Down
508 changes: 418 additions & 90 deletions clang/lib/Sema/SemaTemplate.cpp

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions clang/lib/Sema/SemaTemplateDeduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2531,6 +2531,15 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
return TemplateDeductionResult::Success;
}

TemplateDeductionResult Sema::DeduceTemplateArguments(
TemplateParameterList *TemplateParams, ArrayRef<TemplateArgument> Ps,
ArrayRef<TemplateArgument> As, sema::TemplateDeductionInfo &Info,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
bool NumberOfArgumentsMustMatch) {
return ::DeduceTemplateArguments(*this, TemplateParams, Ps, As, Info, Deduced,
NumberOfArgumentsMustMatch);
}

/// Determine whether two template arguments are the same.
static bool isSameTemplateArg(ASTContext &Context,
TemplateArgument X,
Expand Down
69 changes: 66 additions & 3 deletions clang/lib/Sema/SemaTemplateInstantiate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "clang/AST/ExprConcepts.h"
#include "clang/AST/PrettyDeclStackTrace.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/TypeVisitor.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Stack.h"
Expand Down Expand Up @@ -547,9 +548,9 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
: InstantiatingTemplate(SemaRef, Kind, PointOfInstantiation,
InstantiationRange, FunctionTemplate, nullptr,
TemplateArgs, &DeductionInfo) {
assert(
Kind == CodeSynthesisContext::ExplicitTemplateArgumentSubstitution ||
Kind == CodeSynthesisContext::DeducedTemplateArgumentSubstitution);
assert(Kind == CodeSynthesisContext::ExplicitTemplateArgumentSubstitution ||
Kind == CodeSynthesisContext::DeducedTemplateArgumentSubstitution ||
Kind == CodeSynthesisContext::BuildingDeductionGuides);
}

Sema::InstantiatingTemplate::InstantiatingTemplate(
Expand Down Expand Up @@ -1446,6 +1447,59 @@ namespace {
return inherited::TransformFunctionProtoType(TLB, TL);
}

QualType TransformInjectedClassNameType(TypeLocBuilder &TLB,
InjectedClassNameTypeLoc TL) {
auto Type = inherited::TransformInjectedClassNameType(TLB, TL);
// Special case for transforming a deduction guide, we return a
// transformed TemplateSpecializationType.
if (Type.isNull() &&
SemaRef.CodeSynthesisContexts.back().Kind ==
Sema::CodeSynthesisContext::BuildingDeductionGuides) {
// Return a TemplateSpecializationType for transforming a deduction
// guide.
if (auto *ICT = TL.getType()->getAs<InjectedClassNameType>()) {
auto Type =
inherited::TransformType(ICT->getInjectedSpecializationType());
TLB.pushTrivial(SemaRef.Context, Type, TL.getNameLoc());
return Type;
}
}
return Type;
}
// Override the default version to handle a rewrite-template-arg-pack case
// for building a deduction guide.
bool TransformTemplateArgument(const TemplateArgumentLoc &Input,
TemplateArgumentLoc &Output,
bool Uneval = false) {
const TemplateArgument &Arg = Input.getArgument();
std::vector<TemplateArgument> TArgs;
switch (Arg.getKind()) {
case TemplateArgument::Pack:
// Literally rewrite the template argument pack, instead of unpacking
// it.
assert(
SemaRef.CodeSynthesisContexts.back().Kind ==
Sema::CodeSynthesisContext::BuildingDeductionGuides &&
"Transforming a template argument pack is only allowed in building "
"deduction guide");
for (auto &pack : Arg.getPackAsArray()) {
TemplateArgumentLoc Input = SemaRef.getTrivialTemplateArgumentLoc(
pack, QualType(), SourceLocation{});
TemplateArgumentLoc Output;
if (SemaRef.SubstTemplateArgument(Input, TemplateArgs, Output))
return true; // fails
TArgs.push_back(Output.getArgument());
}
Output = SemaRef.getTrivialTemplateArgumentLoc(
TemplateArgument(llvm::ArrayRef(TArgs).copy(SemaRef.Context)),
QualType(), SourceLocation{});
return false;
default:
break;
}
return inherited::TransformTemplateArgument(Input, Output, Uneval);
}

template<typename Fn>
QualType TransformFunctionProtoType(TypeLocBuilder &TLB,
FunctionProtoTypeLoc TL,
Expand Down Expand Up @@ -4138,6 +4192,15 @@ Sema::SubstStmt(Stmt *S, const MultiLevelTemplateArgumentList &TemplateArgs) {
return Instantiator.TransformStmt(S);
}

bool Sema::SubstTemplateArgument(
const TemplateArgumentLoc &Input,
const MultiLevelTemplateArgumentList &TemplateArgs,
TemplateArgumentLoc &Output) {
TemplateInstantiator Instantiator(*this, TemplateArgs, SourceLocation(),
DeclarationName());
return Instantiator.TransformTemplateArgument(Input, Output);
}

bool Sema::SubstTemplateArguments(
ArrayRef<TemplateArgumentLoc> Args,
const MultiLevelTemplateArgumentList &TemplateArgs,
Expand Down
29 changes: 19 additions & 10 deletions clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2219,7 +2219,9 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
FunctionTemplate->setInstantiatedFromMemberTemplate(
D->getDescribedFunctionTemplate());
}
} else if (FunctionTemplate) {
} else if (FunctionTemplate &&
SemaRef.CodeSynthesisContexts.back().Kind !=
Sema::CodeSynthesisContext::BuildingDeductionGuides) {
// Record this function template specialization.
ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
Function->setFunctionTemplateSpecialization(FunctionTemplate,
Expand Down Expand Up @@ -4853,16 +4855,13 @@ bool TemplateDeclInstantiator::SubstDefaultedFunction(FunctionDecl *New,
///
/// Usually this should not be used, and template argument deduction should be
/// used in its place.
FunctionDecl *
Sema::InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD,
const TemplateArgumentList *Args,
SourceLocation Loc) {
FunctionDecl *Sema::InstantiateFunctionDeclaration(
FunctionTemplateDecl *FTD, const TemplateArgumentList *Args,
SourceLocation Loc, CodeSynthesisContext::SynthesisKind CSC) {
FunctionDecl *FD = FTD->getTemplatedDecl();

sema::TemplateDeductionInfo Info(Loc);
InstantiatingTemplate Inst(
*this, Loc, FTD, Args->asArray(),
CodeSynthesisContext::ExplicitTemplateArgumentSubstitution, Info);
InstantiatingTemplate Inst(*this, Loc, FTD, Args->asArray(), CSC, Info);
if (Inst.isInvalid())
return nullptr;

Expand Down Expand Up @@ -6286,8 +6285,18 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
QualType T = CheckTemplateIdType(TemplateName(TD), Loc, Args);
if (T.isNull())
return nullptr;
auto *SubstRecord = T->getAsCXXRecordDecl();
assert(SubstRecord && "class template id not a class type?");
CXXRecordDecl *SubstRecord = T->getAsCXXRecordDecl();

if (!SubstRecord) {
// T can be a dependent TemplateSpecializationType when performing a
// substitution for building a deduction guide.
assert(CodeSynthesisContexts.back().Kind ==
CodeSynthesisContext::BuildingDeductionGuides);
// Return a nullptr as a sentinel value, we handle it properly in
// the TemplateInstantiator::TransformInjectedClassNameType
// override, which we transform it to a TemplateSpecializationType.
return nullptr;
}
// Check that this template-id names the primary template and not a
// partial or explicit specialization. (In the latter cases, it's
// meaningless to attempt to find an instantiation of D within the
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Sema/TreeTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -4785,6 +4785,14 @@ bool TreeTransform<Derived>::TransformTemplateArguments(
TemplateArgumentLoc In = *First;

if (In.getArgument().getKind() == TemplateArgument::Pack) {
// When building the deduction guides, we rewrite the argument packs
// instead of unpacking.
if (getSema().CodeSynthesisContexts.back().Kind ==
Sema::CodeSynthesisContext::BuildingDeductionGuides) {
if (getDerived().TransformTemplateArgument(In, Out, Uneval))
return true;
continue;
}
// Unpack argument packs, which we translate them into separate
// arguments.
// FIXME: We could do much better if we could guarantee that the
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Serialization/ASTReaderStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2805,7 +2805,7 @@ void ASTStmtReader::VisitOpenACCAssociatedStmtConstruct(

void ASTStmtReader::VisitOpenACCComputeConstruct(OpenACCComputeConstruct *S) {
VisitStmt(S);
VisitOpenACCConstructStmt(S);
VisitOpenACCAssociatedStmtConstruct(S);
}

//===----------------------------------------------------------------------===//
Expand Down
32 changes: 18 additions & 14 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4623,10 +4623,12 @@ ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream,
SmallVectorImpl<char> &Buffer,
InMemoryModuleCache &ModuleCache,
ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
bool IncludeTimestamps, bool BuildingImplicitModule)
bool IncludeTimestamps, bool BuildingImplicitModule,
bool GeneratingReducedBMI)
: Stream(Stream), Buffer(Buffer), ModuleCache(ModuleCache),
IncludeTimestamps(IncludeTimestamps),
BuildingImplicitModule(BuildingImplicitModule) {
BuildingImplicitModule(BuildingImplicitModule),
GeneratingReducedBMI(GeneratingReducedBMI) {
for (const auto &Ext : Extensions) {
if (auto Writer = Ext->createExtensionWriter(*this))
ModuleFileExtensionWriters.push_back(std::move(Writer));
Expand Down Expand Up @@ -5457,18 +5459,20 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {

// Add a trailing update record, if any. These must go last because we
// lazily load their attached statement.
if (HasUpdatedBody) {
const auto *Def = cast<FunctionDecl>(D);
Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION);
Record.push_back(Def->isInlined());
Record.AddSourceLocation(Def->getInnerLocStart());
Record.AddFunctionDefinition(Def);
} else if (HasAddedVarDefinition) {
const auto *VD = cast<VarDecl>(D);
Record.push_back(UPD_CXX_ADDED_VAR_DEFINITION);
Record.push_back(VD->isInline());
Record.push_back(VD->isInlineSpecified());
Record.AddVarDeclInit(VD);
if (!GeneratingReducedBMI || !CanElideDeclDef(D)) {
if (HasUpdatedBody) {
const auto *Def = cast<FunctionDecl>(D);
Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION);
Record.push_back(Def->isInlined());
Record.AddSourceLocation(Def->getInnerLocStart());
Record.AddFunctionDefinition(Def);
} else if (HasAddedVarDefinition) {
const auto *VD = cast<VarDecl>(D);
Record.push_back(UPD_CXX_ADDED_VAR_DEFINITION);
Record.push_back(VD->isInline());
Record.push_back(VD->isInlineSpecified());
Record.AddVarDeclInit(VD);
}
}

OffsetsRecord.push_back(GetDeclRef(D));
Expand Down
45 changes: 38 additions & 7 deletions clang/lib/Serialization/ASTWriterDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ODRHash.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/PrettyDeclStackTrace.h"
#include "clang/Basic/SourceManager.h"
Expand All @@ -40,11 +41,14 @@ namespace clang {
serialization::DeclCode Code;
unsigned AbbrevToUse;

bool GeneratingReducedBMI = false;

public:
ASTDeclWriter(ASTWriter &Writer, ASTContext &Context,
ASTWriter::RecordDataImpl &Record)
ASTWriter::RecordDataImpl &Record, bool GeneratingReducedBMI)
: Writer(Writer), Context(Context), Record(Writer, Record),
Code((serialization::DeclCode)0), AbbrevToUse(0) {}
Code((serialization::DeclCode)0), AbbrevToUse(0),
GeneratingReducedBMI(GeneratingReducedBMI) {}

uint64_t Emit(Decl *D) {
if (!Code)
Expand Down Expand Up @@ -270,6 +274,27 @@ namespace clang {
};
}

bool clang::CanElideDeclDef(const Decl *D) {
if (auto *FD = dyn_cast<FunctionDecl>(D)) {
if (FD->isInlined() || FD->isConstexpr())
return false;

if (FD->isDependentContext())
return false;
}

if (auto *VD = dyn_cast<VarDecl>(D)) {
if (!VD->getDeclContext()->getRedeclContext()->isFileContext() ||
VD->isInline() || VD->isConstexpr() || isa<ParmVarDecl>(VD))
return false;

if (VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
return false;
}

return true;
}

void ASTDeclWriter::Visit(Decl *D) {
DeclVisitor<ASTDeclWriter>::Visit(D);

Expand All @@ -285,17 +310,23 @@ void ASTDeclWriter::Visit(Decl *D) {
// have been written. We want it last because we will not read it back when
// retrieving it from the AST, we'll just lazily set the offset.
if (auto *FD = dyn_cast<FunctionDecl>(D)) {
Record.push_back(FD->doesThisDeclarationHaveABody());
if (FD->doesThisDeclarationHaveABody())
Record.AddFunctionDefinition(FD);
if (!GeneratingReducedBMI || !CanElideDeclDef(FD)) {
Record.push_back(FD->doesThisDeclarationHaveABody());
if (FD->doesThisDeclarationHaveABody())
Record.AddFunctionDefinition(FD);
} else
Record.push_back(0);
}

// Similar to FunctionDecls, handle VarDecl's initializer here and write it
// after all other Stmts/Exprs. We will not read the initializer until after
// we have finished recursive deserialization, because it can recursively
// refer back to the variable.
if (auto *VD = dyn_cast<VarDecl>(D)) {
Record.AddVarDeclInit(VD);
if (!GeneratingReducedBMI || !CanElideDeclDef(VD))
Record.AddVarDeclInit(VD);
else
Record.push_back(0);
}

// And similarly for FieldDecls. We already serialized whether there is a
Expand Down Expand Up @@ -2729,7 +2760,7 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
assert(ID >= FirstDeclID && "invalid decl ID");

RecordData Record;
ASTDeclWriter W(*this, Context, Record);
ASTDeclWriter W(*this, Context, Record, GeneratingReducedBMI);

// Build a record for this declaration
W.Visit(D);
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Serialization/ASTWriterStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2855,7 +2855,7 @@ void ASTStmtWriter::VisitOpenACCAssociatedStmtConstruct(

void ASTStmtWriter::VisitOpenACCComputeConstruct(OpenACCComputeConstruct *S) {
VisitStmt(S);
VisitOpenACCConstructStmt(S);
VisitOpenACCAssociatedStmtConstruct(S);
Code = serialization::STMT_OPENACC_COMPUTE_CONSTRUCT;
}

Expand Down
37 changes: 35 additions & 2 deletions clang/lib/Serialization/GeneratePCH.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//

#include "clang/AST/ASTContext.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/SemaConsumer.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/ASTWriter.h"
#include "llvm/Bitstream/BitstreamWriter.h"

Expand All @@ -25,11 +27,12 @@ PCHGenerator::PCHGenerator(
StringRef OutputFile, StringRef isysroot, std::shared_ptr<PCHBuffer> Buffer,
ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
bool AllowASTWithErrors, bool IncludeTimestamps,
bool BuildingImplicitModule, bool ShouldCacheASTInMemory)
bool BuildingImplicitModule, bool ShouldCacheASTInMemory,
bool GeneratingReducedBMI)
: PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()),
SemaPtr(nullptr), Buffer(std::move(Buffer)), Stream(this->Buffer->Data),
Writer(Stream, this->Buffer->Data, ModuleCache, Extensions,
IncludeTimestamps, BuildingImplicitModule),
IncludeTimestamps, BuildingImplicitModule, GeneratingReducedBMI),
AllowASTWithErrors(AllowASTWithErrors),
ShouldCacheASTInMemory(ShouldCacheASTInMemory) {
this->Buffer->IsComplete = false;
Expand Down Expand Up @@ -78,3 +81,33 @@ ASTMutationListener *PCHGenerator::GetASTMutationListener() {
ASTDeserializationListener *PCHGenerator::GetASTDeserializationListener() {
return &Writer;
}

ReducedBMIGenerator::ReducedBMIGenerator(const Preprocessor &PP,
InMemoryModuleCache &ModuleCache,
StringRef OutputFile,
std::shared_ptr<PCHBuffer> Buffer,
bool IncludeTimestamps)
: PCHGenerator(
PP, ModuleCache, OutputFile, llvm::StringRef(), Buffer,
/*Extensions=*/ArrayRef<std::shared_ptr<ModuleFileExtension>>(),
/*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/IncludeTimestamps,
/*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false,
/*GeneratingReducedBMI=*/true) {}

void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) {
PCHGenerator::HandleTranslationUnit(Ctx);

if (!isComplete())
return;

std::error_code EC;
auto OS = std::make_unique<llvm::raw_fd_ostream>(getOutputFile(), EC);
if (EC) {
getDiagnostics().Report(diag::err_fe_unable_to_open_output)
<< getOutputFile() << EC.message() << "\n";
return;
}

*OS << getBufferPtr()->Data;
OS->flush();
}
91 changes: 57 additions & 34 deletions clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,64 +307,64 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
{{{"fclose"}, 1},
{&StreamChecker::preDefault, &StreamChecker::evalFclose, 0}},
{{{"fread"}, 4},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalFreadFwrite, _1, _2, _3, _4, true), 3}},
{{{"fwrite"}, 4},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
{&StreamChecker::preWrite,
std::bind(&StreamChecker::evalFreadFwrite, _1, _2, _3, _4, false), 3}},
{{{"fgetc"}, 1},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}},
{{{"fgets"}, 3},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, false), 2}},
{{{"getc"}, 1},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalFgetx, _1, _2, _3, _4, true), 0}},
{{{"fputc"}, 2},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
{&StreamChecker::preWrite,
std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}},
{{{"fputs"}, 2},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
{&StreamChecker::preWrite,
std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, false), 1}},
{{{"putc"}, 2},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
{&StreamChecker::preWrite,
std::bind(&StreamChecker::evalFputx, _1, _2, _3, _4, true), 1}},
{{{"fprintf"}},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
{&StreamChecker::preWrite,
std::bind(&StreamChecker::evalFprintf, _1, _2, _3, _4), 0}},
{{{"vfprintf"}, 3},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
{&StreamChecker::preWrite,
std::bind(&StreamChecker::evalFprintf, _1, _2, _3, _4), 0}},
{{{"fscanf"}},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalFscanf, _1, _2, _3, _4), 0}},
{{{"vfscanf"}, 3},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalFscanf, _1, _2, _3, _4), 0}},
{{{"ungetc"}, 2},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, false),
{&StreamChecker::preWrite,
std::bind(&StreamChecker::evalUngetc, _1, _2, _3, _4), 1}},
{{{"getdelim"}, 4},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalGetdelim, _1, _2, _3, _4), 3}},
{{{"getline"}, 3},
{std::bind(&StreamChecker::preReadWrite, _1, _2, _3, _4, true),
{&StreamChecker::preRead,
std::bind(&StreamChecker::evalGetdelim, _1, _2, _3, _4), 2}},
{{{"fseek"}, 3},
{&StreamChecker::preFseek, &StreamChecker::evalFseek, 0}},
{{{"fseeko"}, 3},
{&StreamChecker::preFseek, &StreamChecker::evalFseek, 0}},
{{{"ftell"}, 1},
{&StreamChecker::preDefault, &StreamChecker::evalFtell, 0}},
{&StreamChecker::preWrite, &StreamChecker::evalFtell, 0}},
{{{"ftello"}, 1},
{&StreamChecker::preDefault, &StreamChecker::evalFtell, 0}},
{&StreamChecker::preWrite, &StreamChecker::evalFtell, 0}},
{{{"fflush"}, 1},
{&StreamChecker::preFflush, &StreamChecker::evalFflush, 0}},
{{{"rewind"}, 1},
{&StreamChecker::preDefault, &StreamChecker::evalRewind, 0}},
{{{"fgetpos"}, 2},
{&StreamChecker::preDefault, &StreamChecker::evalFgetpos, 0}},
{&StreamChecker::preWrite, &StreamChecker::evalFgetpos, 0}},
{{{"fsetpos"}, 2},
{&StreamChecker::preDefault, &StreamChecker::evalFsetpos, 0}},
{{{"clearerr"}, 1},
Expand All @@ -384,12 +384,18 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
CallDescriptionMap<FnDescription> FnTestDescriptions = {
{{{"StreamTesterChecker_make_feof_stream"}, 1},
{nullptr,
std::bind(&StreamChecker::evalSetFeofFerror, _1, _2, _3, _4, ErrorFEof),
std::bind(&StreamChecker::evalSetFeofFerror, _1, _2, _3, _4, ErrorFEof,
false),
0}},
{{{"StreamTesterChecker_make_ferror_stream"}, 1},
{nullptr,
std::bind(&StreamChecker::evalSetFeofFerror, _1, _2, _3, _4,
ErrorFError),
ErrorFError, false),
0}},
{{{"StreamTesterChecker_make_ferror_indeterminate_stream"}, 1},
{nullptr,
std::bind(&StreamChecker::evalSetFeofFerror, _1, _2, _3, _4,
ErrorFError, true),
0}},
};

Expand All @@ -415,8 +421,11 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
void evalFclose(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C) const;

void preReadWrite(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C, bool IsRead) const;
void preRead(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C) const;

void preWrite(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C) const;

void evalFreadFwrite(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C, bool IsFread) const;
Expand Down Expand Up @@ -467,8 +476,8 @@ class StreamChecker : public Checker<check::PreCall, eval::Call,
const StreamErrorState &ErrorKind) const;

void evalSetFeofFerror(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C,
const StreamErrorState &ErrorKind) const;
CheckerContext &C, const StreamErrorState &ErrorKind,
bool Indeterminate) const;

void preFflush(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C) const;
Expand Down Expand Up @@ -849,9 +858,8 @@ void StreamChecker::evalFclose(const FnDescription *Desc, const CallEvent &Call,
C.addTransition(E.bindReturnValue(State, C, *EofVal));
}

void StreamChecker::preReadWrite(const FnDescription *Desc,
const CallEvent &Call, CheckerContext &C,
bool IsRead) const {
void StreamChecker::preRead(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
SVal StreamVal = getStreamArg(Desc, Call);
State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C,
Expand All @@ -865,11 +873,6 @@ void StreamChecker::preReadWrite(const FnDescription *Desc,
if (!State)
return;

if (!IsRead) {
C.addTransition(State);
return;
}

SymbolRef Sym = StreamVal.getAsSymbol();
if (Sym && State->get<StreamMap>(Sym)) {
const StreamState *SS = State->get<StreamMap>(Sym);
Expand All @@ -880,6 +883,24 @@ void StreamChecker::preReadWrite(const FnDescription *Desc,
}
}

void StreamChecker::preWrite(const FnDescription *Desc, const CallEvent &Call,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
SVal StreamVal = getStreamArg(Desc, Call);
State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C,
State);
if (!State)
return;
State = ensureStreamOpened(StreamVal, C, State);
if (!State)
return;
State = ensureNoFilePositionIndeterminate(StreamVal, C, State);
if (!State)
return;

C.addTransition(State);
}

void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
const CallEvent &Call, CheckerContext &C,
bool IsFread) const {
Expand Down Expand Up @@ -1496,14 +1517,16 @@ void StreamChecker::preDefault(const FnDescription *Desc, const CallEvent &Call,

void StreamChecker::evalSetFeofFerror(const FnDescription *Desc,
const CallEvent &Call, CheckerContext &C,
const StreamErrorState &ErrorKind) const {
const StreamErrorState &ErrorKind,
bool Indeterminate) const {
ProgramStateRef State = C.getState();
SymbolRef StreamSym = getStreamArg(Desc, Call).getAsSymbol();
assert(StreamSym && "Operation not permitted on non-symbolic stream value.");
const StreamState *SS = State->get<StreamMap>(StreamSym);
assert(SS && "Stream should be tracked by the checker.");
State = State->set<StreamMap>(
StreamSym, StreamState::getOpened(SS->LastOperation, ErrorKind));
StreamSym,
StreamState::getOpened(SS->LastOperation, ErrorKind, Indeterminate));
C.addTransition(State);
}

Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Testing/CommandLineArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ std::vector<std::string> getCommandLineArgsForTesting(TestLanguage Lang) {
case Lang_CXX20:
Args = {"-std=c++20", "-frtti"};
break;
case Lang_CXX23:
Args = {"-std=c++23", "-frtti"};
break;
case Lang_OBJC:
Args = {"-x", "objective-c", "-frtti", "-fobjc-nonfragile-abi"};
break;
Expand Down Expand Up @@ -73,6 +76,9 @@ std::vector<std::string> getCC1ArgsForTesting(TestLanguage Lang) {
case Lang_CXX20:
Args = {"-std=c++20"};
break;
case Lang_CXX23:
Args = {"-std=c++23"};
break;
case Lang_OBJC:
Args = {"-xobjective-c"};
break;
Expand All @@ -96,6 +102,7 @@ StringRef getFilenameForTesting(TestLanguage Lang) {
case Lang_CXX14:
case Lang_CXX17:
case Lang_CXX20:
case Lang_CXX23:
return "input.cc";

case Lang_OpenCL:
Expand Down
27 changes: 23 additions & 4 deletions clang/test/Analysis/stream-error.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ void clang_analyzer_dump(int);
void clang_analyzer_warnIfReached(void);
void StreamTesterChecker_make_feof_stream(FILE *);
void StreamTesterChecker_make_ferror_stream(FILE *);
void StreamTesterChecker_make_ferror_indeterminate_stream(FILE *);

void error_fopen(void) {
FILE *F = fopen("file", "r");
Expand Down Expand Up @@ -52,6 +53,8 @@ void stream_error_feof(void) {
clearerr(F);
clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}}
clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}}
StreamTesterChecker_make_ferror_indeterminate_stream(F);
clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}}
fclose(F);
}

Expand All @@ -65,6 +68,8 @@ void stream_error_ferror(void) {
clearerr(F);
clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}}
clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}}
StreamTesterChecker_make_ferror_indeterminate_stream(F);
clang_analyzer_eval(ferror(F)); // expected-warning {{TRUE}}
fclose(F);
}

Expand Down Expand Up @@ -233,7 +238,7 @@ void error_fscanf(int *A) {
fscanf(F, "ccc"); // expected-warning {{Stream might be already closed}}
}

void error_ungetc() {
void error_ungetc(int TestIndeterminate) {
FILE *F = tmpfile();
if (!F)
return;
Expand All @@ -245,8 +250,12 @@ void error_ungetc() {
clang_analyzer_eval(Ret == 'X'); // expected-warning {{TRUE}}
}
fputc('Y', F); // no-warning
if (TestIndeterminate) {
StreamTesterChecker_make_ferror_indeterminate_stream(F);
ungetc('X', F); // expected-warning {{might be 'indeterminate'}}
}
fclose(F);
ungetc('A', F); // expected-warning {{Stream might be already closed}}
ungetc('A', F); // expected-warning {{Stream might be already closed}}
}

void error_getdelim(char *P, size_t Sz) {
Expand Down Expand Up @@ -449,7 +458,7 @@ void error_fseeko_0(void) {
fclose(F);
}

void error_ftell(void) {
void error_ftell(int TestIndeterminate) {
FILE *F = fopen("file", "r");
if (!F)
return;
Expand All @@ -467,10 +476,14 @@ void error_ftell(void) {
rc = ftell(F);
clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}}
clang_analyzer_eval(ferror(F)); // expected-warning {{TRUE}}
if (TestIndeterminate) {
StreamTesterChecker_make_ferror_indeterminate_stream(F);
ftell(F); // expected-warning {{might be 'indeterminate'}}
}
fclose(F);
}

void error_ftello(void) {
void error_ftello(int TestIndeterminate) {
FILE *F = fopen("file", "r");
if (!F)
return;
Expand All @@ -488,6 +501,10 @@ void error_ftello(void) {
rc = ftello(F);
clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}}
clang_analyzer_eval(ferror(F)); // expected-warning {{TRUE}}
if (TestIndeterminate) {
StreamTesterChecker_make_ferror_indeterminate_stream(F);
ftell(F); // expected-warning {{might be 'indeterminate'}}
}
fclose(F);
}

Expand All @@ -506,6 +523,8 @@ void error_fileno(void) {
N = fileno(F);
clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}}
clang_analyzer_eval(ferror(F)); // expected-warning {{TRUE}}
StreamTesterChecker_make_ferror_indeterminate_stream(F);
fileno(F); // no warning
fclose(F);
}

Expand Down
2 changes: 2 additions & 0 deletions clang/test/CXX/basic/basic.link/p10-ex2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
//
// RUN: %clang_cc1 -std=c++20 M.cpp -fsyntax-only -DTEST_INTERFACE -verify
// RUN: %clang_cc1 -std=c++20 M.cpp -emit-module-interface -o M.pcm
// RUN: %clang_cc1 -std=c++20 M.cpp -emit-reduced-module-interface -o M.reduced.pcm
// RUN: %clang_cc1 -std=c++20 useM.cpp -fsyntax-only -fmodule-file=M=M.pcm -verify
// RUN: %clang_cc1 -std=c++20 useM.cpp -fsyntax-only -fmodule-file=M=M.reduced.pcm -verify

//--- decls.h
int f(); // #1, attached to the global module
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
// RUN: split-file %s %t
//
// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/Friend-in-reachable-class.cppm -o %t/X.pcm
// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp -verify -fsyntax-only
// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Friend-in-reachable-class.cppm \
// RUN: -o %t/X.reduced.pcm
// RUN: %clang_cc1 -std=c++20 -fmodule-file=X=%t/X.pcm %t/Use.cpp -verify -fsyntax-only
// RUN: %clang_cc1 -std=c++20 -fmodule-file=X=%t/X.reduced.pcm %t/Use.cpp -verify -fsyntax-only
//
//--- Friend-in-reachable-class.cppm
module;
Expand Down
26 changes: 26 additions & 0 deletions clang/test/CXX/drs/dr25xx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,32 @@ namespace dr2565 { // dr2565: 16 open 2023-06-07
#endif
}

namespace dr2583 { // dr2583: 19
#if __cplusplus >= 201103L
struct A {
int i;
char c;
};

struct B {
int i;
alignas(8) char c;
};

union U {
A a;
B b;
};

union V {
A a;
alignas(64) B b;
};

static_assert(!__is_layout_compatible(A, B), "");
static_assert(__is_layout_compatible(U, V), "");
#endif
} // namespace dr2583

namespace dr2598 { // dr2598: 18
#if __cplusplus >= 201103L
Expand Down
11 changes: 11 additions & 0 deletions clang/test/CodeGen/X86/attribute-cmpsd-no-error.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown-emit-llvm -o /dev/null -verify
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown-emit-llvm -o /dev/null -verify

// expected-no-diagnostics

#include <immintrin.h>

__attribute__((target("avx")))
__m128 test(__m128 a, __m128 b) {
return _mm_cmp_ps(a, b, 14);
}
96 changes: 0 additions & 96 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -596,54 +596,6 @@ __m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) {
return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
}

__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_oq
// CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
}

__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_lt_os
// CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LT_OS);
}

__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_le_os
// CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LE_OS);
}

__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_unord_q
// CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_UNORD_Q);
}

__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_uq
// CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
}

__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nlt_us
// CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLT_US);
}

__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nle_us
// CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLE_US);
}

__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ord_q
// CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_ORD_Q);
}

__m128d test_mm_cmp_pd_eq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_uq
// CHECK: fcmp ueq <2 x double> %{{.*}}, %{{.*}}
Expand Down Expand Up @@ -788,54 +740,6 @@ __m128d test_mm_cmp_pd_true_us(__m128d a, __m128d b) {
return _mm_cmp_pd(a, b, _CMP_TRUE_US);
}

__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
// CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_OQ);
}

__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_lt_os
// CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LT_OS);
}

__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_le_os
// CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LE_OS);
}

__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_unord_q
// CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_UNORD_Q);
}

__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_uq
// CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_UQ);
}

__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nlt_us
// CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLT_US);
}

__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nle_us
// CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLE_US);
}

__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ord_q
// CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_ORD_Q);
}

__m128 test_mm_cmp_ps_eq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_uq
// CHECK: fcmp ueq <4 x float> %{{.*}}, %{{.*}}
Expand Down
22 changes: 22 additions & 0 deletions clang/test/CodeGen/X86/cmp-avx-builtins-error.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
// RUN: -target-feature +avx -emit-llvm -fsyntax-only -verify
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown \
// RUN: -target-feature +avx -emit-llvm -fsyntax-only -verify

#include <immintrin.h>

__m128d test_mm_cmp_pd(__m128d a, __m128d b) {
return _mm_cmp_pd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}

__m128d test_mm_cmp_sd(__m128d a, __m128d b) {
return _mm_cmp_sd(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}

__m128 test_mm_cmp_ps(__m128 a, __m128 b) {
return _mm_cmp_ps(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}

__m128 test_mm_cmp_ss(__m128 a, __m128 b) {
return _mm_cmp_ss(a, b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}}
}
54 changes: 54 additions & 0 deletions clang/test/CodeGen/X86/sse-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,60 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
return _mm_andnot_ps(A, B);
}

__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
// CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_OQ);
}

__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_lt_os
// CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LT_OS);
}

__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_le_os
// CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LE_OS);
}

__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_unord_q
// CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_UNORD_Q);
}

__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_uq
// CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_UQ);
}

__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nlt_us
// CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLT_US);
}

__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nle_us
// CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLE_US);
}

__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ord_q
// CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_ORD_Q);
}

__m128 test_mm_cmp_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_cmp_ss
// CHECK: call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
return _mm_cmp_ss(A, B, _CMP_ORD_Q);
}

__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpeq_ps
// CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
Expand Down
54 changes: 54 additions & 0 deletions clang/test/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,60 @@ void test_mm_clflush(void* A) {
_mm_clflush(A);
}

__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_oq
// CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
}

__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_lt_os
// CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LT_OS);
}

__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_le_os
// CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LE_OS);
}

__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_unord_q
// CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_UNORD_Q);
}

__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_uq
// CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
}

__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nlt_us
// CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLT_US);
}

__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nle_us
// CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLE_US);
}

__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ord_q
// CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_ORD_Q);
}

__m128d test_mm_cmp_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmp_sd
// CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
return _mm_cmp_sd(A, B, _CMP_ORD_Q);
}

__m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpeq_epi8
// CHECK: icmp eq <16 x i8>
Expand Down
213 changes: 213 additions & 0 deletions clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
// RUN: -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s


#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
// A simple used,unused... macro, long enough to represent any SVE builtin.
#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
#else
#define SVE_ACLE_FUNC(A1, A2) A1##A2
#endif

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_s8
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_s8u10__SVInt8_t
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svdup_laneq_s8(svint8_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0);
}

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_u8
// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_u8u11__SVUint8_t
// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15);
}

// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_s16
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_s16u11__SVInt16_t
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svint16_t test_svdup_laneq_s16(svint16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_u16
// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_u16u12__SVUint16_t
// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7);
}

// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_s32
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_s32u11__SVInt32_t
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svint32_t test_svdup_laneq_s32(svint32_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2);
}

// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_u32
// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_u32u12__SVUint32_t
// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3);
}

// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_s64
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_s64u11__SVInt64_t
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svint64_t test_svdup_laneq_s64(svint64_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0);
}

// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_u64
// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_u64u12__SVUint64_t
// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
//
svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svdup_laneq_f16
// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svdup_laneq_f16u13__SVFloat16_t
// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4);
}

// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svdup_laneq_f32
// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svdup_laneq_f32u13__SVFloat32_t
// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svdup_laneq_f64
// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svdup_laneq_f64u13__SVFloat64_t
// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
//
svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1);
}

// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svdup_laneq_bf16
// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svdup_laneq_bf16u14__SVBfloat16_t
// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
}
10 changes: 10 additions & 0 deletions clang/test/CodeGen/attr-availability-visionos.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %clang_cc1 -triple arm64-apple-xros1 -emit-llvm -o - %s 2>&1 | FileCheck %s

__attribute__((availability(visionOS, introduced=1.1)))
void introduced_1_1();

void use() {
if (__builtin_available(visionOS 1.2, *))
introduced_1_1();
// CHECK: call i32 @__isPlatformVersionAtLeast(i32 11, i32 1, i32 2, i32 0)
}
File renamed without changes.
2 changes: 1 addition & 1 deletion clang/test/CodeGen/target-features-error-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ int baz(__m256i a) {

#if NEED_AVX_2
__m128 need_avx(__m128 a, __m128 b) {
return _mm_cmp_ps(a, b, 0); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}}
return _mm_cmp_ps(a, b, 8); // expected-error {{'__builtin_ia32_cmpps' needs target feature avx}}
}
#endif

Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \
// RUN: FileCheck %s
// RUN: %clang_cc1 -triple x86_64-apple-darwin -new-struct-path-tbaa \
// RUN: -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \
// RUN: FileCheck %s
//
// Check that we do not create tbaa for instructions generated for copies.

// CHECK-NOT: !tbaa

struct A {
short s;
int i;
char c;
int j;
};

void copyStruct(A *a1, A *a2) {
*a1 = *a2;
}

void copyInt(int *a, int *b) {
*a = *b;
}
13 changes: 13 additions & 0 deletions clang/test/CodeGenCUDA/host-used-extern.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
Expand All @@ -44,6 +45,10 @@ __global__ void kernel3();
// kernel4 is marked as used even though it is not called.
__global__ void kernel4();

// kernel5 is not marked as used since it is called by host function
// with weak_odr linkage, which may be dropped by linker.
__global__ void kernel5();

extern __device__ int var1;

__device__ int var2;
Expand All @@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
};
lambda();
}

template<class T>
void template_caller() {
kernel5<<<1, 1>>>();
var1 = 1;
}

template void template_caller<int>();
2 changes: 2 additions & 0 deletions clang/test/Driver/aarch64-mcpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
// CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78"
// RUN: %clang --target=aarch64 -mcpu=cortex-a78c -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A78C %s
// CORTEX-A78C: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78c"
// RUN: %clang --target=aarch64 -mcpu=cortex-a78ae -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A78AE %s
// CORTEX-A78AE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78ae"
// RUN: %clang --target=aarch64 -mcpu=cortex-a715 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A715 %s
// CORTEX-A715: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a715"
// RUN: %clang --target=aarch64 -mcpu=cortex-a720 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A720 %s
Expand Down
7 changes: 7 additions & 0 deletions clang/test/Driver/arm-cortex-cpus-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,13 @@
// CHECK-CORTEX-A78C-MFPU: "-target-feature" "+sha2"
// CHECK-CORTEX-A78C-MFPU: "-target-feature" "+aes"

// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a78ae -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A78AE %s
// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a78ae -mfpu=crypto-neon-fp-armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A78AE-MFPU %s
// CHECK-CORTEX-A78AE: "-cc1"{{.*}} "-triple" "armv8.2a-{{.*}} "-target-cpu" "cortex-a78ae"
// CHECK-CORTEX-A78AE-MFPU: "-cc1"{{.*}} "-target-feature" "+fp-armv8"
// CHECK-CORTEX-A78AE-MFPU: "-target-feature" "+sha2"
// CHECK-CORTEX-A78AE-MFPU: "-target-feature" "+aes"

// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a710 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A710 %s
// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a710 -mfpu=crypto-neon-fp-armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A710-MFPU %s
// CHECK-CORTEX-A710: "-cc1"{{.*}} "-triple" "armv9a-{{.*}} "-target-cpu" "cortex-a710"
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/cuda-bad-arch.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
// RUN: | FileCheck -check-prefix OK %s

// We don't allow using NVPTX/AMDGCN for host compilation.
// RUN: not %clang -### --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
// RUN: | FileCheck -check-prefix HOST_NVPTX %s
// RUN: not %clang -### --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
// RUN: | FileCheck -check-prefix HOST_AMDGCN %s

// OK-NOT: error: Unsupported CUDA gpu architecture
Expand Down
14 changes: 7 additions & 7 deletions clang/test/Driver/cuda-external-tools.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
// Generating relocatable device code
// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s

// With debugging enabled, ptxas should be run with with no ptxas optimizations.
Expand Down Expand Up @@ -59,7 +59,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
// Separate compilation targeting sm_35.
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s

// 32-bit compile.
Expand All @@ -68,7 +68,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
// 32-bit compile when generating relocatable device code.
// RUN: %clang -### --target=i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s

// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
Expand All @@ -77,7 +77,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// Check that we still pass -c when generating relocatable device code.
// RUN: %clang -### --target=x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s

// Check -Xcuda-ptxas and -Xcuda-fatbinary
Expand All @@ -99,13 +99,13 @@

// Check relocatable device code generation on MacOS.
// RUN: %clang -### --target=x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// RUN: %clang -### --target=x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// RUN: %clang -### --target=i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s

// Check that CLANG forwards the -v flag to PTXAS.
Expand Down
49 changes: 25 additions & 24 deletions clang/test/Driver/cuda-phases.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//
// Test CUDA NVPTX phases.
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=BIN %s
//
// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
Expand All @@ -34,7 +34,7 @@
// Test single gpu architecture up to the assemble phase.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 %s -S 2>&1 \
// RUN: | FileCheck -check-prefixes=ASM %s
// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
Expand All @@ -50,7 +50,7 @@
// Test two gpu architectures with complete compilation.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=BIN2 %s
// BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
Expand Down Expand Up @@ -79,7 +79,7 @@
// Test two gpu architecturess up to the assemble phase.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
// RUN: | FileCheck -check-prefixes=ASM2 %s
// ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]])
// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
Expand All @@ -101,7 +101,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
// RUN: | FileCheck -check-prefixes=HBIN %s
// HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
Expand All @@ -115,7 +115,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
// RUN: | FileCheck -check-prefixes=HASM %s
// HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
Expand All @@ -128,7 +128,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
// RUN: | FileCheck -check-prefixes=HBIN2 %s
// HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
Expand All @@ -143,7 +143,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \
// RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s
// HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
// HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
Expand All @@ -156,7 +156,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
// RUN: | FileCheck -check-prefixes=DBIN %s
// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
Expand All @@ -170,7 +170,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
// RUN: | FileCheck -check-prefixes=DASM %s
// DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
Expand All @@ -184,7 +184,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
// RUN: | FileCheck -check-prefixes=DBIN2 %s
// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
Expand All @@ -204,7 +204,7 @@
// compilation mode.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \
// RUN: --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \
// RUN: 2>&1 | FileCheck -check-prefixes=DASM2 %s
// DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
// DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
Expand Down Expand Up @@ -244,31 +244,32 @@
// NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda)
// NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)

// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s
// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir
// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda)
// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
// NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda)
// NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda)
// NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)

// RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s

// NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
// NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
// NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda)
Expand All @@ -277,13 +278,13 @@
// NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
// NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
// NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
// NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
// NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
// NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
// NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda)
Expand Down
10 changes: 5 additions & 5 deletions clang/test/Driver/hip-binding.hip
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

// RUN: %clang -ccc-print-bindings --target=x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
// RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s
// RUN: --no-offload-new-driver -c 2>&1 | FileCheck -check-prefix=NRDCS %s
// RUN: %clang -ccc-print-bindings --target=x86_64-linux-gnu --offload-new-driver \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
// RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s
// RUN: --no-offload-new-driver -c 2>&1 | FileCheck -check-prefix=NRDCS %s
// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]"
// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ1]]"], output: "[[IMG1:.*]]"
// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]"
Expand All @@ -16,7 +16,7 @@

// RUN: %clang -ccc-print-bindings --target=x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
// RUN: -c -fgpu-rdc 2>&1 | FileCheck -check-prefix=RDCS %s
// RUN: --no-offload-new-driver -c -fgpu-rdc 2>&1 | FileCheck -check-prefix=RDCS %s
// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC1:.*bc]]"
// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC2:.*bc]]"
// RDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: "[[HOSTOBJ:.*o]]"
Expand All @@ -32,7 +32,7 @@

// RUN: touch %t.o
// RUN: %clang --hip-link -ccc-print-bindings --target=x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
// RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
// RUN: 2>&1 | FileCheck %s

// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"]
Expand All @@ -46,7 +46,7 @@
// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "[[FATBINOBJ]]"], output: "a.out"

// RUN: %clang --hip-link -ccc-print-bindings --target=x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
// RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
// RUN: 2>&1 | FileCheck -check-prefix=NORDC %s

// NORDC-NOT: offload bundler
Expand Down
8 changes: 4 additions & 4 deletions clang/test/Driver/hip-cuid-hash.hip
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
// Check CUID generated by hash.
// The same CUID is generated for the same file with the same options.

// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu \
// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
// RUN: --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1

// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu \
// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
// RUN: --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1

Expand All @@ -17,11 +17,11 @@
// Check CUID generated by hash.
// Different CUID's are generated for the same file with different options.

// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=1 \
// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=1 --no-offload-new-driver \
// RUN: --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1

// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=2 \
// RUN: %clang -### -x hip --target=x86_64-unknown-linux-gnu -DX=2 --no-offload-new-driver \
// RUN: --offload-arch=gfx906 -c -nogpuinc -nogpulib -fuse-cuid=hash \
// RUN: %S/Inputs/../Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1

Expand Down
5 changes: 5 additions & 0 deletions clang/test/Driver/hip-cuid.hip
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

// RUN: not %clang -### -x hip \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver \
// RUN: --offload-arch=gfx900 \
// RUN: --offload-arch=gfx906 \
// RUN: -c -nogpuinc -nogpulib -fuse-cuid=invalid \
Expand All @@ -16,6 +17,7 @@

// RUN: %clang -### -x hip \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver \
// RUN: --offload-arch=gfx900 \
// RUN: --offload-arch=gfx906 \
// RUN: -c -nogpuinc -nogpulib -fuse-cuid=random \
Expand All @@ -27,6 +29,7 @@

// RUN: %clang -### -x hip \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver \
// RUN: --offload-arch=gfx900 \
// RUN: --offload-arch=gfx906 \
// RUN: -c -nogpuinc -nogpulib -cuid=xyz_123 \
Expand All @@ -38,6 +41,7 @@

// RUN: %clang -### -x hip \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver \
// RUN: --offload-arch=gfx900 \
// RUN: --offload-arch=gfx906 \
// RUN: -c -nogpuinc -nogpulib -fuse-cuid=random -cuid=xyz_123 \
Expand All @@ -49,6 +53,7 @@

// RUN: %clang -### -x hip \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver \
// RUN: --offload-arch=gfx900 \
// RUN: --offload-arch=gfx906 \
// RUN: -c -nogpuinc -nogpulib -fuse-cuid=hash \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/hip-dependent-options.hip
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: not %clang -### --target=x86_64-linux-gnu \
// RUN: not %clang -### --target=x86_64-linux-gnu --no-offload-new-driver \
// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
// RUN: -c -fhip-emit-relocatable -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
Expand All @@ -7,7 +7,7 @@

// RELOCRDC: error: option '-fhip-emit-relocatable' cannot be specified with '-fgpu-rdc'

// RUN: not %clang -### --target=x86_64-linux-gnu \
// RUN: not %clang -### --target=x86_64-linux-gnu --no-offload-new-driver \
// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
// RUN: -c -fhip-emit-relocatable -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
Expand Down
22 changes: 11 additions & 11 deletions clang/test/Driver/hip-device-compile.hip
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,55 @@
// Output unbundled bitcode.
// RUN: %clang -c -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BC,NBUN %s

// Output bundled bitcode.
// RUN: %clang -c -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.bc -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BCBUN %s

// Output unbundled LLVM IR.
// RUN: %clang -c -S -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LL,NBUN %s

// Output bundled LLVM IR.
// RUN: %clang -c -S -emit-llvm --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.ll -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LLBUN %s

// Output unbundled assembly.
// RUN: %clang -c -S --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM,NBUN %s

// Output relocatable.
// RUN: %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 -fhip-emit-relocatable \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,NBUN,RELOC %s

// Output bundled assembly.
// RUN: %clang -c -S --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.s -x hip --cuda-gpu-arch=gfx900 --no-gpu-bundle-output \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu --gpu-bundle-output \
// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASMBUN %s
Expand Down Expand Up @@ -96,31 +96,31 @@
// Output bundled code objects.
// RUN: %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-BUN %s

// Output unbundled code objects.
// RUN: %clang -c --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \
// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-UBUN %s

// Output bundled code objects.
// RUN: %clang --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-BUN %s

// Output unbundled code objects.
// RUN: %clang --cuda-device-only -### --target=x86_64-linux-gnu \
// RUN: --rocm-path=%S/Inputs/rocm -o a.o -x hip --cuda-gpu-arch=gfx900 \
// RUN: --hip-device-lib=lib1.bc \
// RUN: --no-offload-new-driver --hip-device-lib=lib1.bc \
// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \
// RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-UBUN %s
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/hip-link-bc-to-bc.hip
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

// RUN: %clang -### --target=x86_64-unknown-linux-gnu --offload-arch=gfx906 --hip-link \
// RUN: -nogpulib -nogpuinc -emit-llvm -fgpu-rdc --cuda-device-only \
// RUN: %t/bundle1.bc %t/bundle2.bc \
// RUN: --no-offload-new-driver %t/bundle1.bc %t/bundle2.bc \
// RUN: 2>&1 | FileCheck -check-prefix=BITCODE %s

// BITCODE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle1.bc" "-output=[[B1HOST:.*\.bc]]" "-output=[[B1DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
Expand All @@ -24,7 +24,7 @@

// RUN: %clang -### --target=x86_64-unknown-linux-gnu --offload-arch=gfx906 --hip-link \
// RUN: -nogpulib -nogpuinc -emit-llvm -fgpu-rdc --cuda-device-only \
// RUN: %t/bundle.bc -L%t -lhipbundle \
// RUN: --no-offload-new-driver %t/bundle.bc -L%t -lhipbundle \
// RUN: 2>&1 | FileCheck -check-prefix=ARCHIVE %s

// ARCHIVE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle.bc" "-output=[[HOST:.*\.bc]]" "-output=[[DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
Expand Down
22 changes: 11 additions & 11 deletions clang/test/Driver/hip-link-bundle-archive.hip
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,64 @@
// RUN: touch %t/dummy.bc
// RUN: llvm-ar cr %t/libhipBundled.a %t/dummy.bc
// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc -L%t -lhipBundled \
// RUN: 2>&1 | FileCheck -check-prefixes=GNU,GNU1,GNU-L %s

// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 -nogpuinc \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc -L%t -l:libhipBundled.a \
// RUN: 2>&1 | FileCheck -check-prefixes=GNU,GNU1,GNU-LA %s

// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc %t/libhipBundled.a \
// RUN: 2>&1 | FileCheck -check-prefixes=GNU,GNU1,GNU-A %s

// RUN: llvm-ar cr %t/libhipBundled.a.5.2 %t/dummy.bc
// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc %t/libhipBundled.a.5.2 \
// RUN: 2>&1 | FileCheck -check-prefixes=GNU,GNU2,GNU-A %s

// Check if a file is not an archive, it is not unbundled.

// RUN: touch %t/libNonArchive.a
// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc -L%t -lNonArchive \
// RUN: 2>&1 | FileCheck -check-prefixes=NONARCHIVE %s
// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc -L%t -l:libNonArchive.a \
// RUN: 2>&1 | FileCheck -check-prefixes=NONARCHIVE %s
// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc -L%t libNonArchive.a \
// RUN: 2>&1 | FileCheck -check-prefixes=NONARCHIVE %s

// Check if a file does not exist, it is not unbundled.

// RUN: not %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-unknown-linux-gnu \
// RUN: --no-offload-new-driver --target=x86_64-unknown-linux-gnu \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc %t/NoneExist.a \
// RUN: 2>&1 | FileCheck -check-prefixes=NONE %s

// Check unbundling archive for MSVC.

// RUN: llvm-ar cr %t/hipBundled2.lib %t/dummy.bc
// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-pc-windows-msvc -fuse-ld= \
// RUN: --no-offload-new-driver --target=x86_64-pc-windows-msvc -fuse-ld= \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc -L%t -lhipBundled2 \
// RUN: 2>&1 | FileCheck -check-prefix=MSVC %s

// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-pc-windows-msvc -fuse-ld= \
// RUN: --no-offload-new-driver --target=x86_64-pc-windows-msvc -fuse-ld= \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc -L%t -l:hipBundled2.lib \
// RUN: 2>&1 | FileCheck -check-prefix=MSVC %s

// RUN: %clang -### --offload-arch=gfx906 --offload-arch=gfx1030 \
// RUN: --target=x86_64-pc-windows-msvc -fuse-ld= \
// RUN: --no-offload-new-driver --target=x86_64-pc-windows-msvc -fuse-ld= \
// RUN: -nogpuinc -nogpulib %s -fgpu-rdc %t/hipBundled2.lib \
// RUN: 2>&1 | FileCheck -check-prefix=MSVC %s

Expand Down
8 changes: 4 additions & 4 deletions clang/test/Driver/hip-link-save-temps.hip
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
// RUN: touch %t/obj2.o
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
// RUN: --hip-link -o executable -fgpu-rdc --cuda-gpu-arch=gfx900 \
// RUN: --offload-arch=gfx906 %t/obj1.o %t/obj2.o 2>&1 | \
// RUN: --no-offload-new-driver --offload-arch=gfx906 %t/obj1.o %t/obj2.o 2>&1 | \
// RUN: FileCheck -check-prefixes=CHECK,OUT %s

// -fgpu-rdc link without output
// RUN: touch %t/obj1.o
// RUN: touch %t/obj2.o
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
// RUN: --hip-link -fgpu-rdc --cuda-gpu-arch=gfx900 \
// RUN: --no-offload-new-driver --hip-link -fgpu-rdc --cuda-gpu-arch=gfx900 \
// RUN: --offload-arch=gfx906 %t/obj1.o %t/obj2.o 2>&1 | \
// RUN: FileCheck -check-prefixes=CHECK,NOUT %s

Expand All @@ -23,7 +23,7 @@
// RUN: touch %t/obj2.o
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
// RUN: --hip-link -o libTest.a -fgpu-rdc --cuda-gpu-arch=gfx900 \
// RUN: --emit-static-lib \
// RUN: --no-offload-new-driver --emit-static-lib \
// RUN: --offload-arch=gfx906 %t/obj1.o %t/obj2.o 2>&1 | \
// RUN: FileCheck -check-prefixes=CHECK,SLO %s

Expand All @@ -32,7 +32,7 @@
// RUN: touch %t/obj2.o
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -save-temps \
// RUN: --hip-link -fgpu-rdc --cuda-gpu-arch=gfx900 \
// RUN: --emit-static-lib \
// RUN: --no-offload-new-driver --emit-static-lib \
// RUN: --offload-arch=gfx906 %t/obj1.o %t/obj2.o 2>&1 | \
// RUN: FileCheck -check-prefixes=CHECK,SLNO %s

Expand Down
3 changes: 1 addition & 2 deletions clang/test/Driver/hip-link-shared-library.hip
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: touch %t.o
// RUN: %clang --hip-link -ccc-print-bindings --target=x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o %S/Inputs/in.so \
// RUN: -fgpu-rdc 2>&1 | FileCheck %s
// RUN: --no-offload-new-driver -fgpu-rdc 2>&1 | FileCheck %s

// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"]
// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"]
Expand All @@ -12,4 +12,3 @@
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]"
// CHECK-NOT: offload bundler
// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "{{.*}}/Inputs/in.so", "[[FATBINOBJ]]"], output: "a.out"

Loading