708 changes: 708 additions & 0 deletions bolt/test/X86/dwarf5-debug-names-skip-forward-decl.s

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ CUDA/HIP Language Changes

CUDA Support
^^^^^^^^^^^^
- Clang now supports CUDA SDK up to 12.4
- Clang now supports CUDA SDK up to 12.5

AIX Support
^^^^^^^^^^^
Expand Down
143 changes: 143 additions & 0 deletions clang/include/clang/AST/OpenACCClause.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,149 @@ class OpenACCClause {
virtual ~OpenACCClause() = default;
};

// Represents the 'auto' clause.
class OpenACCAutoClause : public OpenACCClause {
protected:
OpenACCAutoClause(SourceLocation BeginLoc, SourceLocation EndLoc)
: OpenACCClause(OpenACCClauseKind::Auto, BeginLoc, EndLoc) {}

public:
static bool classof(const OpenACCClause *C) {
return C->getClauseKind() == OpenACCClauseKind::Auto;
}

static OpenACCAutoClause *
Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc);

child_range children() {
return child_range(child_iterator(), child_iterator());
}
const_child_range children() const {
return const_child_range(const_child_iterator(), const_child_iterator());
}
};

// Represents the 'independent' clause.
class OpenACCIndependentClause : public OpenACCClause {
protected:
OpenACCIndependentClause(SourceLocation BeginLoc, SourceLocation EndLoc)
: OpenACCClause(OpenACCClauseKind::Independent, BeginLoc, EndLoc) {}

public:
static bool classof(const OpenACCClause *C) {
return C->getClauseKind() == OpenACCClauseKind::Independent;
}

static OpenACCIndependentClause *
Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc);

child_range children() {
return child_range(child_iterator(), child_iterator());
}
const_child_range children() const {
return const_child_range(const_child_iterator(), const_child_iterator());
}
};
// Represents the 'seq' clause.
class OpenACCSeqClause : public OpenACCClause {
protected:
OpenACCSeqClause(SourceLocation BeginLoc, SourceLocation EndLoc)
: OpenACCClause(OpenACCClauseKind::Seq, BeginLoc, EndLoc) {}

public:
static bool classof(const OpenACCClause *C) {
return C->getClauseKind() == OpenACCClauseKind::Seq;
}

static OpenACCSeqClause *
Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc);

child_range children() {
return child_range(child_iterator(), child_iterator());
}
const_child_range children() const {
return const_child_range(const_child_iterator(), const_child_iterator());
}
};

// Not yet implemented, but the type name is necessary for 'seq' diagnostics, so
// this provides a basic, do-nothing implementation. We still need to add this
// type to the visitors/etc, as well as get it to take its proper arguments.
class OpenACCGangClause : public OpenACCClause {
protected:
OpenACCGangClause(SourceLocation BeginLoc, SourceLocation EndLoc)
: OpenACCClause(OpenACCClauseKind::Gang, BeginLoc, EndLoc) {
llvm_unreachable("Not yet implemented");
}

public:
static bool classof(const OpenACCClause *C) {
return C->getClauseKind() == OpenACCClauseKind::Gang;
}

static OpenACCGangClause *
Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc);

child_range children() {
return child_range(child_iterator(), child_iterator());
}
const_child_range children() const {
return const_child_range(const_child_iterator(), const_child_iterator());
}
};

// Not yet implemented, but the type name is necessary for 'seq' diagnostics, so
// this provides a basic, do-nothing implementation. We still need to add this
// type to the visitors/etc, as well as get it to take its proper arguments.
class OpenACCVectorClause : public OpenACCClause {
protected:
OpenACCVectorClause(SourceLocation BeginLoc, SourceLocation EndLoc)
: OpenACCClause(OpenACCClauseKind::Vector, BeginLoc, EndLoc) {
llvm_unreachable("Not yet implemented");
}

public:
static bool classof(const OpenACCClause *C) {
return C->getClauseKind() == OpenACCClauseKind::Gang;
}

static OpenACCVectorClause *
Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc);

child_range children() {
return child_range(child_iterator(), child_iterator());
}
const_child_range children() const {
return const_child_range(const_child_iterator(), const_child_iterator());
}
};

// Not yet implemented, but the type name is necessary for 'seq' diagnostics, so
// this provides a basic, do-nothing implementation. We still need to add this
// type to the visitors/etc, as well as get it to take its proper arguments.
class OpenACCWorkerClause : public OpenACCClause {
protected:
OpenACCWorkerClause(SourceLocation BeginLoc, SourceLocation EndLoc)
: OpenACCClause(OpenACCClauseKind::Gang, BeginLoc, EndLoc) {
llvm_unreachable("Not yet implemented");
}

public:
static bool classof(const OpenACCClause *C) {
return C->getClauseKind() == OpenACCClauseKind::Gang;
}

static OpenACCWorkerClause *
Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc);

child_range children() {
return child_range(child_iterator(), child_iterator());
}
const_child_range children() const {
return const_child_range(const_child_iterator(), const_child_iterator());
}
};

/// Represents a clause that has a list of parameters.
class OpenACCClauseWithParams : public OpenACCClause {
/// Location of the '('.
Expand Down
5 changes: 4 additions & 1 deletion clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@
#pragma push_macro("PTX82")
#pragma push_macro("PTX83")
#pragma push_macro("PTX84")
#define PTX84 "ptx84"
#pragma push_macro("PTX85")
#define PTX85 "ptx85"
#define PTX84 "ptx84|" PTX85
#define PTX83 "ptx83|" PTX84
#define PTX82 "ptx82|" PTX83
#define PTX81 "ptx81|" PTX82
Expand Down Expand Up @@ -1094,3 +1096,4 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("PTX82")
#pragma pop_macro("PTX83")
#pragma pop_macro("PTX84")
#pragma pop_macro("PTX85")
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@ enum class CudaVersion {
CUDA_122,
CUDA_123,
CUDA_124,
CUDA_125,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
CUDA_124, // Partially supported. Proceed with a warning.
CUDA_125, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -12399,6 +12399,9 @@ def note_acc_expected_pointer_var : Note<"expected variable of pointer type">;
def err_acc_clause_after_device_type
: Error<"OpenACC clause '%0' may not follow a '%1' clause in a "
"%select{'%3'|compute}2 construct">;
def err_acc_clause_cannot_combine
: Error<"OpenACC clause '%0' may not appear on the same construct as a "
"'%1' clause on a 'loop' construct">;
def err_acc_reduction_num_gangs_conflict
: Error<
"OpenACC 'reduction' clause may not appear on a 'parallel' construct "
Expand All @@ -12416,6 +12419,9 @@ def note_acc_reduction_composite_member_loc : Note<"invalid field is here">;
def err_acc_loop_not_for_loop
: Error<"OpenACC 'loop' construct can only be applied to a 'for' loop">;
def note_acc_construct_here : Note<"'%0' construct is here">;
def err_acc_loop_spec_conflict
: Error<"OpenACC clause '%0' on '%1' construct conflicts with previous "
"data dependence clause">;

// AMDGCN builtins diagnostics
def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">;
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/OpenACCClauses.def
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define CLAUSE_ALIAS(ALIAS_NAME, CLAUSE_NAME)
#endif

VISIT_CLAUSE(Auto)
VISIT_CLAUSE(Async)
VISIT_CLAUSE(Attach)
VISIT_CLAUSE(Copy)
Expand All @@ -41,13 +42,15 @@ VISIT_CLAUSE(DeviceType)
CLAUSE_ALIAS(DType, DeviceType)
VISIT_CLAUSE(FirstPrivate)
VISIT_CLAUSE(If)
VISIT_CLAUSE(Independent)
VISIT_CLAUSE(NoCreate)
VISIT_CLAUSE(NumGangs)
VISIT_CLAUSE(NumWorkers)
VISIT_CLAUSE(Present)
VISIT_CLAUSE(Private)
VISIT_CLAUSE(Reduction)
VISIT_CLAUSE(Self)
VISIT_CLAUSE(Seq)
VISIT_CLAUSE(VectorLength)
VISIT_CLAUSE(Wait)

Expand Down
55 changes: 55 additions & 0 deletions clang/lib/AST/OpenACCClause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,48 @@ OpenACCReductionClause *OpenACCReductionClause::Create(
OpenACCReductionClause(BeginLoc, LParenLoc, Operator, VarList, EndLoc);
}

OpenACCAutoClause *OpenACCAutoClause::Create(const ASTContext &C,
SourceLocation BeginLoc,
SourceLocation EndLoc) {
void *Mem = C.Allocate(sizeof(OpenACCAutoClause));
return new (Mem) OpenACCAutoClause(BeginLoc, EndLoc);
}

OpenACCIndependentClause *
OpenACCIndependentClause::Create(const ASTContext &C, SourceLocation BeginLoc,
SourceLocation EndLoc) {
void *Mem = C.Allocate(sizeof(OpenACCIndependentClause));
return new (Mem) OpenACCIndependentClause(BeginLoc, EndLoc);
}

OpenACCSeqClause *OpenACCSeqClause::Create(const ASTContext &C,
SourceLocation BeginLoc,
SourceLocation EndLoc) {
void *Mem = C.Allocate(sizeof(OpenACCSeqClause));
return new (Mem) OpenACCSeqClause(BeginLoc, EndLoc);
}

OpenACCGangClause *OpenACCGangClause::Create(const ASTContext &C,
SourceLocation BeginLoc,
SourceLocation EndLoc) {
void *Mem = C.Allocate(sizeof(OpenACCGangClause));
return new (Mem) OpenACCGangClause(BeginLoc, EndLoc);
}

OpenACCWorkerClause *OpenACCWorkerClause::Create(const ASTContext &C,
SourceLocation BeginLoc,
SourceLocation EndLoc) {
void *Mem = C.Allocate(sizeof(OpenACCWorkerClause));
return new (Mem) OpenACCWorkerClause(BeginLoc, EndLoc);
}

OpenACCVectorClause *OpenACCVectorClause::Create(const ASTContext &C,
SourceLocation BeginLoc,
SourceLocation EndLoc) {
void *Mem = C.Allocate(sizeof(OpenACCVectorClause));
return new (Mem) OpenACCVectorClause(BeginLoc, EndLoc);
}

//===----------------------------------------------------------------------===//
// OpenACC clauses printing methods
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -495,3 +537,16 @@ void OpenACCClausePrinter::VisitDeviceTypeClause(
});
OS << ")";
}

void OpenACCClausePrinter::VisitAutoClause(const OpenACCAutoClause &C) {
OS << "auto";
}

void OpenACCClausePrinter::VisitIndependentClause(
const OpenACCIndependentClause &C) {
OS << "independent";
}

void OpenACCClausePrinter::VisitSeqClause(const OpenACCSeqClause &C) {
OS << "seq";
}
7 changes: 7 additions & 0 deletions clang/lib/AST/StmtProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2589,6 +2589,13 @@ void OpenACCClauseProfiler::VisitWaitClause(const OpenACCWaitClause &Clause) {
void OpenACCClauseProfiler::VisitDeviceTypeClause(
const OpenACCDeviceTypeClause &Clause) {}

void OpenACCClauseProfiler::VisitAutoClause(const OpenACCAutoClause &Clause) {}

void OpenACCClauseProfiler::VisitIndependentClause(
const OpenACCIndependentClause &Clause) {}

void OpenACCClauseProfiler::VisitSeqClause(const OpenACCSeqClause &Clause) {}

void OpenACCClauseProfiler::VisitReductionClause(
const OpenACCReductionClause &Clause) {
for (auto *E : Clause.getVarList())
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/AST/TextNodeDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,13 @@ void TextNodeDumper::Visit(const OpenACCClause *C) {
OS << '(' << cast<OpenACCDefaultClause>(C)->getDefaultClauseKind() << ')';
break;
case OpenACCClauseKind::Async:
case OpenACCClauseKind::Auto:
case OpenACCClauseKind::Attach:
case OpenACCClauseKind::Copy:
case OpenACCClauseKind::PCopy:
case OpenACCClauseKind::PresentOrCopy:
case OpenACCClauseKind::If:
case OpenACCClauseKind::Independent:
case OpenACCClauseKind::DevicePtr:
case OpenACCClauseKind::FirstPrivate:
case OpenACCClauseKind::NoCreate:
Expand All @@ -411,6 +413,7 @@ void TextNodeDumper::Visit(const OpenACCClause *C) {
case OpenACCClauseKind::Present:
case OpenACCClauseKind::Private:
case OpenACCClauseKind::Self:
case OpenACCClauseKind::Seq:
case OpenACCClauseKind::VectorLength:
// The condition expression will be printed as a part of the 'children',
// but print 'clause' here so it is clear what is happening from the dump.
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 2),
CUDA_ENTRY(12, 3),
CUDA_ENTRY(12, 4),
CUDA_ENTRY(12, 5),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_123;
if (raw_version < 12050)
return CudaVersion::CUDA_124;
if (raw_version < 12060)
return CudaVersion::CUDA_125;
return CudaVersion::NEW;
}

Expand Down Expand Up @@ -690,6 +692,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
CASE_CUDA_VERSION(125, 85);
CASE_CUDA_VERSION(124, 84);
CASE_CUDA_VERSION(123, 83);
CASE_CUDA_VERSION(122, 82);
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Parse/ParseOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,8 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
// clause, as we are a 'single token' clause.
ParsedClause.setEndLoc(ClauseLoc);
}
} else {
ParsedClause.setEndLoc(ClauseLoc);
}
return OpenACCSuccess(
Actions.OpenACC().ActOnClause(ExistingClauses, ParsedClause));
Expand Down
123 changes: 123 additions & 0 deletions clang/lib/Sema/SemaOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,30 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind,
return false;
}

case OpenACCClauseKind::Seq:
switch (DirectiveKind) {
case OpenACCDirectiveKind::Loop:
case OpenACCDirectiveKind::Routine:
case OpenACCDirectiveKind::ParallelLoop:
case OpenACCDirectiveKind::SerialLoop:
case OpenACCDirectiveKind::KernelsLoop:
return true;
default:
return false;
}

case OpenACCClauseKind::Independent:
case OpenACCClauseKind::Auto:
switch (DirectiveKind) {
case OpenACCDirectiveKind::Loop:
case OpenACCDirectiveKind::ParallelLoop:
case OpenACCDirectiveKind::SerialLoop:
case OpenACCDirectiveKind::KernelsLoop:
return true;
default:
return false;
}

case OpenACCClauseKind::Reduction:
switch (DirectiveKind) {
case OpenACCDirectiveKind::Parallel:
Expand Down Expand Up @@ -866,6 +890,102 @@ SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,
Clause.getLParenLoc(), Clause.getDeviceTypeArchitectures(),
Clause.getEndLoc());
}
case OpenACCClauseKind::Auto: {
// Restrictions only properly implemented on 'loop' constructs, and it is
// the only construct that can do anything with this, so skip/treat as
// unimplemented for the combined constructs.
if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
break;

// OpenACC 3.3 2.9:
// Only one of the seq, independent, and auto clauses may appear.
const auto *Itr = llvm::find_if(
ExistingClauses,
llvm::IsaPred<OpenACCIndependentClause, OpenACCSeqClause>);
if (Itr != ExistingClauses.end()) {
Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
<< Clause.getClauseKind() << Clause.getDirectiveKind();
Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
}

return OpenACCAutoClause::Create(getASTContext(), Clause.getBeginLoc(),
Clause.getEndLoc());
}
case OpenACCClauseKind::Independent: {
// Restrictions only properly implemented on 'loop' constructs, and it is
// the only construct that can do anything with this, so skip/treat as
// unimplemented for the combined constructs.
if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
break;

// OpenACC 3.3 2.9:
// Only one of the seq, independent, and auto clauses may appear.
const auto *Itr = llvm::find_if(
ExistingClauses, llvm::IsaPred<OpenACCAutoClause, OpenACCSeqClause>);
if (Itr != ExistingClauses.end()) {
Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
<< Clause.getClauseKind() << Clause.getDirectiveKind();
Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
}

return OpenACCIndependentClause::Create(
getASTContext(), Clause.getBeginLoc(), Clause.getEndLoc());
}
case OpenACCClauseKind::Seq: {
// Restrictions only properly implemented on 'loop' constructs, and it is
// the only construct that can do anything with this, so skip/treat as
// unimplemented for the combined constructs.
if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop)
break;

// OpenACC 3.3 2.9:
// Only one of the seq, independent, and auto clauses may appear.
const auto *Itr = llvm::find_if(
ExistingClauses,
llvm::IsaPred<OpenACCAutoClause, OpenACCIndependentClause>);
if (Itr != ExistingClauses.end()) {
Diag(Clause.getBeginLoc(), diag::err_acc_loop_spec_conflict)
<< Clause.getClauseKind() << Clause.getDirectiveKind();
Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
}

// OpenACC 3.3 2.9:
// A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause
// appears.
Itr = llvm::find_if(ExistingClauses,
llvm::IsaPred<OpenACCGangClause, OpenACCWorkerClause,
OpenACCVectorClause>);

if (Itr != ExistingClauses.end()) {
Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine)
<< Clause.getClauseKind() << (*Itr)->getClauseKind();
Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
}

// TODO OpenACC: 2.9 ~ line 2010 specifies that the associated loop has some
// restrictions when there is a 'seq' clause in place. We probably need to
// implement that.
return OpenACCSeqClause::Create(getASTContext(), Clause.getBeginLoc(),
Clause.getEndLoc());
}
case OpenACCClauseKind::Gang:
case OpenACCClauseKind::Worker:
case OpenACCClauseKind::Vector: {
// OpenACC 3.3 2.9:
// A 'gang', 'worker', or 'vector' clause may not appear if a 'seq' clause
// appears.
const auto *Itr =
llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCSeqClause>);

if (Itr != ExistingClauses.end()) {
Diag(Clause.getBeginLoc(), diag::err_acc_clause_cannot_combine)
<< Clause.getClauseKind() << (*Itr)->getClauseKind();
Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
}
// Not yet implemented, so immediately drop to the 'not yet implemented'
// diagnostic.
break;
}
case OpenACCClauseKind::Reduction: {
// Restrictions only properly implemented on 'compute' constructs, and
// 'compute' constructs are the only construct that can do anything with
Expand Down Expand Up @@ -1461,6 +1581,9 @@ StmtResult SemaOpenACC::ActOnAssociatedStmt(SourceLocation DirectiveLoc,
Diag(DirectiveLoc, diag::note_acc_construct_here) << K;
return StmtError();
}
// TODO OpenACC: 2.9 ~ line 2010 specifies that the associated loop has some
// restrictions when there is a 'seq' clause in place. We probably need to
// implement that, including piping in the clauses here.
return AssocStmt;
}
llvm_unreachable("Invalid associated statement application");
Expand Down
25 changes: 25 additions & 0 deletions clang/lib/Sema/TreeTransform.h
Original file line number Diff line number Diff line change
Expand Up @@ -11496,6 +11496,31 @@ void OpenACCClauseTransform<Derived>::VisitDeviceTypeClause(
C.getArchitectures(), ParsedClause.getEndLoc());
}

template <typename Derived>
void OpenACCClauseTransform<Derived>::VisitAutoClause(
const OpenACCAutoClause &C) {
// Nothing to do, so just create a new node.
NewClause = OpenACCAutoClause::Create(Self.getSema().getASTContext(),
ParsedClause.getBeginLoc(),
ParsedClause.getEndLoc());
}

template <typename Derived>
void OpenACCClauseTransform<Derived>::VisitIndependentClause(
const OpenACCIndependentClause &C) {
NewClause = OpenACCIndependentClause::Create(Self.getSema().getASTContext(),
ParsedClause.getBeginLoc(),
ParsedClause.getEndLoc());
}

template <typename Derived>
void OpenACCClauseTransform<Derived>::VisitSeqClause(
const OpenACCSeqClause &C) {
NewClause = OpenACCSeqClause::Create(Self.getSema().getASTContext(),
ParsedClause.getBeginLoc(),
ParsedClause.getEndLoc());
}

template <typename Derived>
void OpenACCClauseTransform<Derived>::VisitReductionClause(
const OpenACCReductionClause &C) {
Expand Down
9 changes: 6 additions & 3 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11944,12 +11944,15 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
return OpenACCReductionClause::Create(getContext(), BeginLoc, LParenLoc, Op,
VarList, EndLoc);
}

case OpenACCClauseKind::Finalize:
case OpenACCClauseKind::IfPresent:
case OpenACCClauseKind::Seq:
return OpenACCSeqClause::Create(getContext(), BeginLoc, EndLoc);
case OpenACCClauseKind::Independent:
return OpenACCIndependentClause::Create(getContext(), BeginLoc, EndLoc);
case OpenACCClauseKind::Auto:
return OpenACCAutoClause::Create(getContext(), BeginLoc, EndLoc);

case OpenACCClauseKind::Finalize:
case OpenACCClauseKind::IfPresent:
case OpenACCClauseKind::Worker:
case OpenACCClauseKind::Vector:
case OpenACCClauseKind::NoHost:
Expand Down
9 changes: 6 additions & 3 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7975,12 +7975,15 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
writeOpenACCVarList(RC);
return;
}

case OpenACCClauseKind::Finalize:
case OpenACCClauseKind::IfPresent:
case OpenACCClauseKind::Seq:
case OpenACCClauseKind::Independent:
case OpenACCClauseKind::Auto:
// Nothing to do here, there is no additional information beyond the
// begin/end loc and clause kind.
return;

case OpenACCClauseKind::Finalize:
case OpenACCClauseKind::IfPresent:
case OpenACCClauseKind::Worker:
case OpenACCClauseKind::Vector:
case OpenACCClauseKind::NoHost:
Expand Down
16 changes: 16 additions & 0 deletions clang/test/AST/ast-print-openacc-loop-construct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,20 @@ void foo() {
// CHECK-NEXT: ;
#pragma acc loop dtype(AnotherIdent)
for(;;);

// CHECK: #pragma acc loop independent
// CHECK-NEXT: for (;;)
// CHECK-NEXT: ;
#pragma acc loop independent
for(;;);
// CHECK: #pragma acc loop seq
// CHECK-NEXT: for (;;)
// CHECK-NEXT: ;
#pragma acc loop seq
for(;;);
// CHECK: #pragma acc loop auto
// CHECK-NEXT: for (;;)
// CHECK-NEXT: ;
#pragma acc loop auto
for(;;);
}
478 changes: 199 additions & 279 deletions clang/test/ParserOpenACC/parse-clauses.c

Large diffs are not rendered by default.

26 changes: 12 additions & 14 deletions clang/test/SemaOpenACC/compute-construct-default-clause.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,31 @@ void SingleOnly() {
#pragma acc parallel default(none)
while(0);

// expected-warning@+3{{OpenACC clause 'seq' not yet implemented}}
// expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'serial' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc serial default(present) seq default(none)
#pragma acc serial default(present) self default(none)
while(0);

// expected-warning@+5{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+4{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+3{{OpenACC clause 'seq' not yet implemented}}
int i;

// expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'kernels' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc kernels seq default(present) seq default(none) seq
#pragma acc kernels self default(present) present(i) default(none) copy(i)
while(0);

// expected-warning@+6{{OpenACC construct 'parallel loop' not yet implemented}}
// expected-warning@+5{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+4{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+3{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+5{{OpenACC clause 'self' not yet implemented}}
// expected-warning@+4{{OpenACC clause 'default' not yet implemented}}
// expected-warning@+3{{OpenACC clause 'private' not yet implemented}}
// expected-warning@+2{{OpenACC clause 'default' not yet implemented}}
// expected-warning@+1{{OpenACC clause 'default' not yet implemented}}
#pragma acc parallel loop seq default(present) seq default(none) seq
// expected-warning@+1{{OpenACC clause 'copy' not yet implemented}}
#pragma acc parallel loop self default(present) private(i) default(none) copy(i)
while(0);

// expected-warning@+3{{OpenACC construct 'serial loop' not yet implemented}}
// expected-warning@+2{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+3{{OpenACC clause 'self' not yet implemented, clause ignored}}
// expected-warning@+2{{OpenACC construct 'serial loop' not yet implemented}}
// expected-error@+1{{expected '('}}
#pragma acc serial loop seq default seq default(none) seq
#pragma acc serial loop self default private(i) default(none) if(i)
while(0);

// expected-warning@+2{{OpenACC construct 'kernels loop' not yet implemented}}
Expand Down
19 changes: 6 additions & 13 deletions clang/test/SemaOpenACC/compute-construct-default-clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,25 @@ void SingleOnly() {
#pragma acc parallel default(none)
while(false);

// expected-warning@+3{{OpenACC clause 'seq' not yet implemented}}
int i;

// expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'parallel' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc parallel default(present) seq default(none)
#pragma acc parallel default(present) async default(none)
while(false);

// expected-warning@+5{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+4{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+3{{OpenACC clause 'seq' not yet implemented}}
// expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'serial' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc serial seq default(present) seq default(none) seq
#pragma acc serial async default(present) copy(i) default(none) self
while(false);

// expected-warning@+5{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+4{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+3{{OpenACC clause 'seq' not yet implemented}}
// expected-error@+2{{OpenACC 'default' clause cannot appear more than once on a 'kernels' directive}}
// expected-note@+1{{previous clause is here}}
#pragma acc kernels seq default(present) seq default(none) seq
#pragma acc kernels async default(present) copy(i) default(none) self
while(false);

// expected-warning@+3{{OpenACC clause 'seq' not yet implemented}}
// expected-warning@+2{{OpenACC clause 'seq' not yet implemented}}
// expected-error@+1{{expected '('}}
#pragma acc parallel seq default(none) seq default seq
#pragma acc parallel async default(none) copy(i) default self
while(false);
}

Expand Down
9 changes: 3 additions & 6 deletions clang/test/SemaOpenACC/compute-construct-device_type-clause.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,13 @@ void uses() {
// expected-note@+1{{previous clause is here}}
#pragma acc kernels device_type(*) if_present
while(1);
// expected-error@+2{{OpenACC clause 'seq' may not follow a 'device_type' clause in a compute construct}}
// expected-note@+1{{previous clause is here}}
// expected-error@+1{{OpenACC 'seq' clause is not valid on 'kernels' directive}}
#pragma acc kernels device_type(*) seq
while(1);
// expected-error@+2{{OpenACC clause 'independent' may not follow a 'device_type' clause in a compute construct}}
// expected-note@+1{{previous clause is here}}
// expected-error@+1{{OpenACC 'independent' clause is not valid on 'kernels' directive}}
#pragma acc kernels device_type(*) independent
while(1);
// expected-error@+2{{OpenACC clause 'auto' may not follow a 'device_type' clause in a compute construct}}
// expected-note@+1{{previous clause is here}}
// expected-error@+1{{OpenACC 'auto' clause is not valid on 'kernels' directive}}
#pragma acc kernels device_type(*) auto
while(1);
// expected-error@+2{{OpenACC clause 'worker' may not follow a 'device_type' clause in a compute construct}}
Expand Down
124 changes: 124 additions & 0 deletions clang/test/SemaOpenACC/loop-construct-auto_seq_independent-ast.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s

// Test this with PCH.
// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s
// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s
#ifndef PCH_HELPER
#define PCH_HELPER

void NormalUses() {
// CHECK: FunctionDecl{{.*}}NormalUses
// CHECK-NEXT: CompoundStmt

#pragma acc loop auto
for(;;){}
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: auto clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt

#pragma acc loop seq
for(;;){}
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: seq clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt

#pragma acc loop independent
for(;;){}
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: independent clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt
}

template<typename T>
void TemplUses() {
// CHECK: FunctionTemplateDecl{{.*}}TemplUses
// CHECK-NEXT: TemplateTypeParmDecl
// CHECK-NEXT: FunctionDecl{{.*}} TemplUses
// CHECK-NEXT: CompoundStmt

#pragma acc loop auto
for(;;){}
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: auto clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt

#pragma acc loop seq
for(;;){}
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: seq clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt

#pragma acc loop independent
for(;;){}
// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: independent clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt

// Instantiations.
// CHECK-NEXT: FunctionDecl{{.*}}TemplUses 'void ()' implicit_instantiation
// CHECK-NEXT: TemplateArgument
// CHECK-NEXT: BuiltinType
// CHECK-NEXT: CompoundStmt

// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: auto clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt

// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: seq clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt

// CHECK-NEXT: OpenACCLoopConstruct{{.*}}
// CHECK-NEXT: independent clause
// CHECK-NEXT: ForStmt
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: <<<NULL>>>
// CHECK-NEXT: CompoundStmt
}

void Inst() {
TemplUses<int>();
}
#endif // PCH_HELPER
896 changes: 896 additions & 0 deletions clang/test/SemaOpenACC/loop-construct-auto_seq_independent-clauses.c

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions clang/test/SemaOpenACC/loop-construct-device_type-clause.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,10 @@ void uses() {
// expected-note@+1{{previous clause is here}}
#pragma acc loop device_type(*) if_present
for(;;);
// expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}}
#pragma acc loop device_type(*) seq
for(;;);
// expected-warning@+1{{OpenACC clause 'independent' not yet implemented, clause ignored}}
#pragma acc loop device_type(*) independent
for(;;);
// expected-warning@+1{{OpenACC clause 'auto' not yet implemented, clause ignored}}
#pragma acc loop device_type(*) auto
for(;;);
// expected-warning@+1{{OpenACC clause 'worker' not yet implemented, clause ignored}}
Expand Down
4 changes: 4 additions & 0 deletions clang/tools/libclang/CIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2860,6 +2860,10 @@ void OpenACCClauseEnqueue::VisitReductionClause(
const OpenACCReductionClause &C) {
VisitVarList(C);
}
void OpenACCClauseEnqueue::VisitAutoClause(const OpenACCAutoClause &C) {}
void OpenACCClauseEnqueue::VisitIndependentClause(
const OpenACCIndependentClause &C) {}
void OpenACCClauseEnqueue::VisitSeqClause(const OpenACCSeqClause &C) {}
} // namespace

void EnqueueVisitor::EnqueueChildren(const OpenACCClause *C) {
Expand Down
6 changes: 5 additions & 1 deletion lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,11 @@ static bool shouldSkip(SectionCommand *cmd) {
static SmallVectorImpl<SectionCommand *>::iterator
findOrphanPos(SmallVectorImpl<SectionCommand *>::iterator b,
SmallVectorImpl<SectionCommand *>::iterator e) {
// Place non-alloc orphan sections at the end. This matches how we assign file
// offsets to non-alloc sections.
OutputSection *sec = &cast<OutputDesc>(*e)->osec;
if (!(sec->flags & SHF_ALLOC))
return e;

// As a special case, place .relro_padding before the SymbolAssignment using
// DATA_SEGMENT_RELRO_END, if present.
Expand Down Expand Up @@ -1311,8 +1315,8 @@ template <class ELFT> void Writer<ELFT>::sortOrphanSections() {
i = firstSectionOrDotAssignment;

while (nonScriptI != e) {
auto pos = findOrphanPos(i, nonScriptI);
OutputSection *orphan = &cast<OutputDesc>(*nonScriptI)->osec;
auto pos = findOrphanPos(i, nonScriptI);

// As an optimization, find all sections with the same sort rank
// and insert them with one rotate.
Expand Down
13 changes: 7 additions & 6 deletions lld/test/ELF/linkerscript/memory-nonalloc-no-warn.test
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@
# CHECK-NEXT: [ 3] .intvec0_out PROGBITS 0000000000000000 002068 000000 00 W 0 0 1
# CHECK-NEXT: [ 4] .intvec1_out PROGBITS 0000000000000000 002068 000000 00 W 0 0 1
# CHECK-NEXT: [ 5] .intvec2_out PROGBITS 0000000000000000 002068 000000 00 W 0 0 1
# CHECK-NEXT: [ 6] .comment PROGBITS 0000000000000000 {{.*}} {{.*}} 01 MS 0 0 1
# CHECK-NEXT: [ 7] .symtab SYMTAB 0000000000000000 {{.*}} {{.*}} 18 9 1 8
# CHECK-NEXT: [ 8] .shstrtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1
# CHECK-NEXT: [ 9] .strtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1
# CHECK-NEXT: [10] .intvec3_out PROGBITS 00000000803fe060 001060 000004 00 AX 0 0 1
# CHECK-NEXT: [11] .text PROGBITS 00000000803fe064 001064 000000 00 AX 0 0 4
# CHECK-NEXT: [ 6] .intvec3_out PROGBITS 00000000803fe060 001060 000004 00 AX 0 0 1
# CHECK-NEXT: [ 7] .text PROGBITS 00000000803fe064 001064 000000 00 AX 0 0 4
# CHECK-NEXT: [ 8] .comment PROGBITS 0000000000000000 {{.*}} {{.*}} 01 MS 0 0 1
# CHECK-NEXT: [ 9] .symtab SYMTAB 0000000000000000 {{.*}} {{.*}} 18 11 1 8
# CHECK-NEXT: [10] .shstrtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1
# CHECK-NEXT: [11] .strtab STRTAB 0000000000000000 {{.*}} {{.*}} 00 0 0 1


#--- a.s
.global _start
Expand Down
28 changes: 14 additions & 14 deletions lld/test/ELF/linkerscript/sections-nonalloc.s
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
# CHECK-NEXT: [ 2] data1 PROGBITS 0000000000000001 001001 000001 00 WA 0
# CHECK-NEXT: [ 3] other1 PROGBITS 0000000000000000 001008 000001 00 0
# CHECK-NEXT: [ 4] other2 PROGBITS 0000000000000000 001010 000001 00 0
## Orphan placement places other3, .symtab, .shstrtab and .strtab after other2.
# CHECK-NEXT: [ 5] other3 PROGBITS 0000000000000000 001020 000001 00 0
# CHECK-NEXT: [ 6] .symtab SYMTAB 0000000000000000 001028 000030 18 8
# CHECK-NEXT: [ 7] .shstrtab STRTAB 0000000000000000 001058 00004d 00 0
# CHECK-NEXT: [ 8] .strtab STRTAB 0000000000000000 0010a5 000008 00 0
# CHECK-NEXT: [ 9] data2 PROGBITS 0000000000000002 001002 000001 00 WA 0
# CHECK-NEXT: [ 5] data2 PROGBITS 0000000000000002 001002 000001 00 WA 0
## max{sortRank(data1),sortRank(data2)} <= sortRank(data3). data3 is placed after the latter.
# CHECK-NEXT: [10] data3 PROGBITS 0000000000000003 001003 000001 00 WA 0
# CHECK-NEXT: [11] .text PROGBITS 0000000000000004 001004 000001 00 AX 0
# CHECK-NEXT: [ 6] data3 PROGBITS 0000000000000003 001003 000001 00 WA 0
# CHECK-NEXT: [ 7] .text PROGBITS 0000000000000004 001004 000001 00 AX 0
## Non-alloc orphan sections other3, .symtab, .shstrtab and .strtab are placed at the end.
# CHECK-NEXT: [ 8] other3 PROGBITS 0000000000000000 001020 000001 00 0
# CHECK-NEXT: [ 9] .symtab SYMTAB 0000000000000000 001028 000030 18 11
# CHECK-NEXT: [10] .shstrtab STRTAB 0000000000000000 001058 00004d 00 0
# CHECK-NEXT: [11] .strtab STRTAB 0000000000000000 0010a5 000008 00 0

# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
# CHECK-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x000004 0x000004 RW 0x1000
Expand All @@ -41,12 +41,12 @@
# CHECK1-NEXT: [ 3] data1 PROGBITS 00000000000000b2 0000b2 000001 00 WA 0
# CHECK1-NEXT: [ 4] other1 PROGBITS 0000000000000000 0000b8 000001 00 0
# CHECK1-NEXT: [ 5] other2 PROGBITS 0000000000000000 0000c0 000001 00 0
# CHECK1-NEXT: [ 6] other3 PROGBITS 0000000000000000 0000d0 000001 00 0
# CHECK1-NEXT: [ 7] .symtab SYMTAB 0000000000000000 0000d8 000030 18 9
# CHECK1-NEXT: [ 8] .shstrtab STRTAB 0000000000000000 000108 00004d 00 0
# CHECK1-NEXT: [ 9] .strtab STRTAB 0000000000000000 000155 000008 00 0
# CHECK1-NEXT: [10] data2 PROGBITS 00000000000000b3 0000b3 000001 00 WA 0
# CHECK1-NEXT: [11] data3 PROGBITS 00000000000000b4 0000b4 000001 00 WA 0
# CHECK1-NEXT: [ 6] data2 PROGBITS 00000000000000b3 0000b3 000001 00 WA 0
# CHECK1-NEXT: [ 7] data3 PROGBITS 00000000000000b4 0000b4 000001 00 WA 0
# CHECK1-NEXT: [ 8] other3 PROGBITS 0000000000000000 0000d0 000001 00 0
# CHECK1-NEXT: [ 9] .symtab SYMTAB 0000000000000000 0000d8 000030 18 11
# CHECK1-NEXT: [10] .shstrtab STRTAB 0000000000000000 000108 00004d 00 0
# CHECK1-NEXT: [11] .strtab STRTAB 0000000000000000 000155 000008 00 0
# CHECK1: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
# CHECK1-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0000b5 0x0000b5 RWE 0x1000
# CHECK1-NEXT: 0x60000000 0x0000b8 0x0000000000000000 0x0000000000000000 0x000009 0x000001 0x8
Expand Down
4 changes: 2 additions & 2 deletions lld/test/ELF/linkerscript/sections.s
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@
# SEP-BY-NONALLOC: [ 1] .text PROGBITS 0000000000000000 001000 00000e 00 AX
# SEP-BY-NONALLOC-NEXT: [ 2] .data PROGBITS 000000000000000e 00100e 000020 00 WA
# SEP-BY-NONALLOC-NEXT: [ 3] .comment PROGBITS 0000000000000000 001031 000008 01 MS
# SEP-BY-NONALLOC: [ 7] other PROGBITS 000000000000002e 00102e 000003 00 WA
# SEP-BY-NONALLOC-NEXT: [ 8] .bss NOBITS 0000000000000031 001031 000002 00 WA
# SEP-BY-NONALLOC: [ 4] other PROGBITS 000000000000002e 00102e 000003 00 WA
# SEP-BY-NONALLOC-NEXT: [ 5] .bss NOBITS 0000000000000031 001031 000002 00 WA

# SEP-BY-NONALLOC: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
# SEP-BY-NONALLOC-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x00000e 0x00000e R E 0x1000
Expand Down
9 changes: 3 additions & 6 deletions llvm/include/llvm/Bitcode/BitcodeWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,7 @@ class BitcodeWriter {

void writeIndex(
const ModuleSummaryIndex *Index,
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex,
const GVSummaryPtrSet *DecSummaries);
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex);
};

/// Write the specified module to the specified raw output stream.
Expand Down Expand Up @@ -148,12 +147,10 @@ void writeThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
/// where it will be written in a new bitcode block. This is used when
/// writing the combined index file for ThinLTO. When writing a subset of the
/// index for a distributed backend, provide the \p ModuleToSummariesForIndex
/// map. \p DecSummaries specifies the set of summaries for which the
/// corresponding value should be imported as a declaration (prototype).
/// map.
void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out,
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr,
const GVSummaryPtrSet *DecSummaries = nullptr);
*ModuleToSummariesForIndex = nullptr);

/// If EmbedBitcode is set, save a copy of the llvm IR as data in the
/// __LLVM,__bitcode section (.llvmbc on non-MacOS).
Expand Down
7 changes: 0 additions & 7 deletions llvm/include/llvm/IR/ModuleSummaryIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -587,10 +587,6 @@ class GlobalValueSummary {

void setImportKind(ImportKind IK) { Flags.ImportType = IK; }

GlobalValueSummary::ImportKind importType() const {
return static_cast<ImportKind>(Flags.ImportType);
}

GlobalValue::VisibilityTypes getVisibility() const {
return (GlobalValue::VisibilityTypes)Flags.Visibility;
}
Expand Down Expand Up @@ -1276,9 +1272,6 @@ using ModulePathStringTableTy = StringMap<ModuleHash>;
/// a particular module, and provide efficient access to their summary.
using GVSummaryMapTy = DenseMap<GlobalValue::GUID, GlobalValueSummary *>;

/// A set of global value summary pointers.
using GVSummaryPtrSet = SmallPtrSet<GlobalValueSummary *, 4>;

/// Map of a type GUID to type id string and summary (multimap used
/// in case of GUID conflicts).
using TypeIdSummaryMapTy =
Expand Down
5 changes: 2 additions & 3 deletions llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,13 +271,12 @@ class ThinLTOCodeGenerator {
const lto::InputFile &File);

/**
* Compute the list of summaries and the subset of declaration summaries
* needed for importing into module.
* Compute the list of summaries needed for importing into module.
*/
void gatherImportedSummariesForModule(
Module &Module, ModuleSummaryIndex &Index,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
GVSummaryPtrSet &DecSummaries, const lto::InputFile &File);
const lto::InputFile &File);

/**
* Perform internalization. Index is updated to reflect linkage changes.
Expand Down
21 changes: 6 additions & 15 deletions llvm/include/llvm/Transforms/IPO/FunctionImport.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ class Module;
/// based on the provided summary informations.
class FunctionImporter {
public:
/// The functions to import from a source module and their import type.
using FunctionsToImportTy =
DenseMap<GlobalValue::GUID, GlobalValueSummary::ImportKind>;
/// Set of functions to import from a source module. Each entry is a set
/// containing all the GUIDs of all functions to import for a source module.
using FunctionsToImportTy = std::unordered_set<GlobalValue::GUID>;

/// The different reasons selectCallee will chose not to import a
/// candidate.
Expand Down Expand Up @@ -99,13 +99,8 @@ class FunctionImporter {
/// index's module path string table).
using ImportMapTy = DenseMap<StringRef, FunctionsToImportTy>;

/// The map contains an entry for every global value the module exports.
/// The key is ValueInfo, and the value indicates whether the definition
/// or declaration is visible to another module. If a function's definition is
/// visible to other modules, the global values this function referenced are
/// visible and shouldn't be internalized.
/// TODO: Rename to `ExportMapTy`.
using ExportSetTy = DenseMap<ValueInfo, GlobalValueSummary::ImportKind>;
/// The set contains an entry for every global value the module exports.
using ExportSetTy = DenseSet<ValueInfo>;

/// A function of this type is used to load modules referenced by the index.
using ModuleLoaderTy =
Expand Down Expand Up @@ -212,15 +207,11 @@ bool convertToDeclaration(GlobalValue &GV);
/// \p ModuleToSummariesForIndex will be populated with the needed summaries
/// from each required module path. Use a std::map instead of StringMap to get
/// stable order for bitcode emission.
///
/// \p DecSummaries will be popluated with the subset of of summary pointers
/// that have 'declaration' import type among all summaries the module need.
void gatherImportedSummariesForModule(
StringRef ModulePath,
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
const FunctionImporter::ImportMapTy &ImportList,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
GVSummaryPtrSet &DecSummaries);
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);

/// Emit into \p OutputFilename the files module \p ModulePath will import from.
std::error_code EmitImportsFiles(
Expand Down
38 changes: 8 additions & 30 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,11 +424,6 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
/// The combined index to write to bitcode.
const ModuleSummaryIndex &Index;

/// When writing combined summaries, provides the set of global value
/// summaries for which the value (function, function alias, etc) should be
/// imported as a declaration.
const GVSummaryPtrSet *DecSummaries = nullptr;

/// When writing a subset of the index for distributed backends, client
/// provides a map of modules to the corresponding GUIDs/summaries to write.
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex;
Expand Down Expand Up @@ -457,16 +452,11 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
/// Constructs a IndexBitcodeWriter object for the given combined index,
/// writing to the provided \p Buffer. When writing a subset of the index
/// for a distributed backend, provide a \p ModuleToSummariesForIndex map.
/// If provided, \p ModuleToDecSummaries specifies the set of summaries for
/// which the corresponding functions or aliased functions should be imported
/// as a declaration (but not definition) for each module.
IndexBitcodeWriter(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder,
const ModuleSummaryIndex &Index,
const GVSummaryPtrSet *DecSummaries = nullptr,
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr)
: BitcodeWriterBase(Stream, StrtabBuilder), Index(Index),
DecSummaries(DecSummaries),
ModuleToSummariesForIndex(ModuleToSummariesForIndex) {

// See if the StackIdIndex was already added to the StackId map and
Expand Down Expand Up @@ -1221,8 +1211,7 @@ static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {

// Decode the flags for GlobalValue in the summary. See getDecodedGVSummaryFlags
// in BitcodeReader.cpp.
static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags,
bool ImportAsDecl = false) {
static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
uint64_t RawFlags = 0;

RawFlags |= Flags.NotEligibleToImport; // bool
Expand All @@ -1237,8 +1226,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags,

RawFlags |= (Flags.Visibility << 8); // 2 bits

unsigned ImportType = Flags.ImportType | ImportAsDecl;
RawFlags |= (ImportType << 10); // 1 bit
RawFlags |= (Flags.ImportType << 10); // 1 bit

return RawFlags;
}
Expand Down Expand Up @@ -4565,12 +4553,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));

auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
if (DecSummaries == nullptr)
return false;
return DecSummaries->contains(GVS);
};

// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
SmallVector<AliasSummary *, 64> Aliases;
Expand Down Expand Up @@ -4681,8 +4663,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.push_back(*ValueId);
assert(ModuleIdMap.count(FS->modulePath()));
NameVals.push_back(ModuleIdMap[FS->modulePath()]);
NameVals.push_back(
getEncodedGVSummaryFlags(FS->flags(), shouldImportValueAsDecl(FS)));
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
NameVals.push_back(FS->entryCount());
Expand Down Expand Up @@ -4731,8 +4712,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.push_back(AliasValueId);
assert(ModuleIdMap.count(AS->modulePath()));
NameVals.push_back(ModuleIdMap[AS->modulePath()]);
NameVals.push_back(
getEncodedGVSummaryFlags(AS->flags(), shouldImportValueAsDecl(AS)));
NameVals.push_back(getEncodedGVSummaryFlags(AS->flags()));
auto AliaseeValueId = SummaryToValueIdMap[&AS->getAliasee()];
assert(AliaseeValueId);
NameVals.push_back(AliaseeValueId);
Expand Down Expand Up @@ -5073,9 +5053,8 @@ void BitcodeWriter::writeModule(const Module &M,

void BitcodeWriter::writeIndex(
const ModuleSummaryIndex *Index,
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex,
const GVSummaryPtrSet *DecSummaries) {
IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index, DecSummaries,
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index,
ModuleToSummariesForIndex);
IndexWriter.write();
}
Expand Down Expand Up @@ -5130,13 +5109,12 @@ void IndexBitcodeWriter::write() {
// index for a distributed backend, provide a \p ModuleToSummariesForIndex map.
void llvm::writeIndexToFile(
const ModuleSummaryIndex &Index, raw_ostream &Out,
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex,
const GVSummaryPtrSet *DecSummaries) {
const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
SmallVector<char, 0> Buffer;
Buffer.reserve(256 * 1024);

BitcodeWriter Writer(Buffer);
Writer.writeIndex(&Index, ModuleToSummariesForIndex, DecSummaries);
Writer.writeIndex(&Index, ModuleToSummariesForIndex);
Writer.writeStrtab();

Out.write((char *)&Buffer.front(), Buffer.size());
Expand Down
38 changes: 14 additions & 24 deletions llvm/lib/LTO/LTO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ void llvm::computeLTOCacheKey(
support::endian::write64le(Data, I);
Hasher.update(Data);
};
auto AddUint8 = [&](const uint8_t I) {
Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&I, 1));
};
AddString(Conf.CPU);
// FIXME: Hash more of Options. For now all clients initialize Options from
// command-line flags (which is unsupported in production), but may set
Expand Down Expand Up @@ -159,18 +156,18 @@ void llvm::computeLTOCacheKey(
auto ModHash = Index.getModuleHash(ModuleID);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));

std::vector<std::pair<uint64_t, uint8_t>> ExportsGUID;
std::vector<uint64_t> ExportsGUID;
ExportsGUID.reserve(ExportList.size());
for (const auto &[VI, ExportType] : ExportList)
ExportsGUID.push_back(
std::make_pair(VI.getGUID(), static_cast<uint8_t>(ExportType)));
for (const auto &VI : ExportList) {
auto GUID = VI.getGUID();
ExportsGUID.push_back(GUID);
}

// Sort the export list elements GUIDs.
llvm::sort(ExportsGUID);
for (auto [GUID, ExportType] : ExportsGUID) {
for (uint64_t GUID : ExportsGUID) {
// The export list can impact the internalization, be conservative here
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
AddUint8(ExportType);
}

// Include the hash for every module we import functions from. The set of
Expand Down Expand Up @@ -202,21 +199,19 @@ void llvm::computeLTOCacheKey(
[](const ImportModule &Lhs, const ImportModule &Rhs) -> bool {
return Lhs.getHash() < Rhs.getHash();
});
std::vector<std::pair<uint64_t, uint8_t>> ImportedGUIDs;
std::vector<uint64_t> ImportedGUIDs;
for (const ImportModule &Entry : ImportModulesVector) {
auto ModHash = Entry.getHash();
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));

AddUint64(Entry.getFunctions().size());

ImportedGUIDs.clear();
for (auto &[Fn, ImportType] : Entry.getFunctions())
ImportedGUIDs.push_back(std::make_pair(Fn, ImportType));
for (auto &Fn : Entry.getFunctions())
ImportedGUIDs.push_back(Fn);
llvm::sort(ImportedGUIDs);
for (auto &[GUID, Type] : ImportedGUIDs) {
for (auto &GUID : ImportedGUIDs)
AddUint64(GUID);
AddUint8(Type);
}
}

// Include the hash for the resolved ODR.
Expand Down Expand Up @@ -286,9 +281,9 @@ void llvm::computeLTOCacheKey(
// Imported functions may introduce new uses of type identifier resolutions,
// so we need to collect their used resolutions as well.
for (const ImportModule &ImpM : ImportModulesVector)
for (auto &[GUID, UnusedImportType] : ImpM.getFunctions()) {
for (auto &ImpF : ImpM.getFunctions()) {
GlobalValueSummary *S =
Index.findSummaryInModule(GUID, ImpM.getIdentifier());
Index.findSummaryInModule(ImpF, ImpM.getIdentifier());
AddUsedThings(S);
// If this is an alias, we also care about any types/etc. that the aliasee
// may reference.
Expand Down Expand Up @@ -1400,20 +1395,15 @@ class lto::ThinBackendProc {
llvm::StringRef ModulePath,
const std::string &NewModulePath) {
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
GVSummaryPtrSet DeclarationSummaries;

std::error_code EC;
gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
ImportList, ModuleToSummariesForIndex,
DeclarationSummaries);
ImportList, ModuleToSummariesForIndex);

raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
sys::fs::OpenFlags::OF_None);
if (EC)
return errorCodeToError(EC);

writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
&DeclarationSummaries);
writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);

if (ShouldEmitImportsFiles) {
EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
Expand Down
9 changes: 1 addition & 8 deletions llvm/lib/LTO/LTOBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -720,14 +720,7 @@ bool lto::initImportList(const Module &M,
if (Summary->modulePath() == M.getModuleIdentifier())
continue;
// Add an entry to provoke importing by thinBackend.
// Try emplace the entry first. If an entry with the same key already
// exists, set the value to 'std::min(existing-value, new-value)' to make
// sure a definition takes precedence over a declaration.
auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
GUID, Summary->importType());

if (!Inserted)
Iter->second = std::min(Iter->second, Summary->importType());
ImportList[Summary->modulePath()].insert(GUID);
}
}
return true;
Expand Down
10 changes: 3 additions & 7 deletions llvm/lib/LTO/ThinLTOCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
Module &TheModule, ModuleSummaryIndex &Index,
std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
GVSummaryPtrSet &DecSummaries, const lto::InputFile &File) {
const lto::InputFile &File) {
auto ModuleCount = Index.modulePaths().size();
auto ModuleIdentifier = TheModule.getModuleIdentifier();

Expand Down Expand Up @@ -796,7 +796,7 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(

llvm::gatherImportedSummariesForModule(
ModuleIdentifier, ModuleToDefinedGVSummaries,
ImportLists[ModuleIdentifier], ModuleToSummariesForIndex, DecSummaries);
ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
}

/**
Expand Down Expand Up @@ -832,14 +832,10 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);

// 'EmitImportsFiles' emits the list of modules from which to import from, and
// the set of keys in `ModuleToSummariesForIndex` should be a superset of keys
// in `DecSummaries`, so no need to use `DecSummaries` in `EmitImportFiles`.
GVSummaryPtrSet DecSummaries;
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
llvm::gatherImportedSummariesForModule(
ModuleIdentifier, ModuleToDefinedGVSummaries,
ImportLists[ModuleIdentifier], ModuleToSummariesForIndex, DecSummaries);
ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);

std::error_code EC;
if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
def SM90a: FeatureSM<"90a", 901>;

foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84] in
70, 71, 72, 73, 74, 75, 76, 77, 78,
80, 81, 82, 83, 84, 85] in
def PTX#version: FeaturePTX<version>;

//===----------------------------------------------------------------------===//
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -6306,7 +6306,7 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rs1,
(NegImm simm5_plus1:$rs2),
GPR:$vl,
vti.Log2SEW, TA_MA)>;
vti.Log2SEW, TU_MU)>;
def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
Expand Down
273 changes: 63 additions & 210 deletions llvm/lib/Transforms/IPO/FunctionImport.cpp

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions llvm/test/CodeGen/RISCV/rvv/vsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2147,7 +2147,7 @@ entry:
define <vscale x 1 x i8> @intrinsic_vsub_vi_tu_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i8_nxv1i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2196,7 +2196,7 @@ entry:
define <vscale x 2 x i8> @intrinsic_vsub_vi_tu_nxv2i8_nxv2i8_i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i8_nxv2i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2245,7 +2245,7 @@ entry:
define <vscale x 4 x i8> @intrinsic_vsub_vi_tu_nxv4i8_nxv4i8_i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i8_nxv4i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2294,7 +2294,7 @@ entry:
define <vscale x 8 x i8> @intrinsic_vsub_vi_tu_nxv8i8_nxv8i8_i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i8_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2343,7 +2343,7 @@ entry:
define <vscale x 16 x i8> @intrinsic_vsub_vi_tu_nxv16i8_nxv16i8_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv16i8_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2392,7 +2392,7 @@ entry:
define <vscale x 32 x i8> @intrinsic_vsub_vi_tu_nxv32i8_nxv32i8_i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv32i8_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2441,7 +2441,7 @@ entry:
define <vscale x 64 x i8> @intrinsic_vsub_vi_tu_nxv64i8_nxv64i8_i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv64i8_nxv64i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2490,7 +2490,7 @@ entry:
define <vscale x 1 x i16> @intrinsic_vsub_vi_tu_nxv1i16_nxv1i16_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i16_nxv1i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2539,7 +2539,7 @@ entry:
define <vscale x 2 x i16> @intrinsic_vsub_vi_tu_nxv2i16_nxv2i16_i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i16_nxv2i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2588,7 +2588,7 @@ entry:
define <vscale x 4 x i16> @intrinsic_vsub_vi_tu_nxv4i16_nxv4i16_i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i16_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2637,7 +2637,7 @@ entry:
define <vscale x 8 x i16> @intrinsic_vsub_vi_tu_nxv8i16_nxv8i16_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i16_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2686,7 +2686,7 @@ entry:
define <vscale x 16 x i16> @intrinsic_vsub_vi_tu_nxv16i16_nxv16i16_i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv16i16_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2735,7 +2735,7 @@ entry:
define <vscale x 32 x i16> @intrinsic_vsub_vi_tu_nxv32i16_nxv32i16_i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv32i16_nxv32i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2784,7 +2784,7 @@ entry:
define <vscale x 1 x i32> @intrinsic_vsub_vi_tu_nxv1i32_nxv1i32_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i32_nxv1i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2833,7 +2833,7 @@ entry:
define <vscale x 2 x i32> @intrinsic_vsub_vi_tu_nxv2i32_nxv2i32_i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i32_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2882,7 +2882,7 @@ entry:
define <vscale x 4 x i32> @intrinsic_vsub_vi_tu_nxv4i32_nxv4i32_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i32_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2931,7 +2931,7 @@ entry:
define <vscale x 8 x i32> @intrinsic_vsub_vi_tu_nxv8i32_nxv8i32_i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i32_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2980,7 +2980,7 @@ entry:
define <vscale x 16 x i32> @intrinsic_vsub_vi_tu_nxv16i32_nxv16i32_i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv16i32_nxv16i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3029,7 +3029,7 @@ entry:
define <vscale x 1 x i64> @intrinsic_vsub_vi_tu_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i64_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3078,7 +3078,7 @@ entry:
define <vscale x 2 x i64> @intrinsic_vsub_vi_tu_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i64_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3127,7 +3127,7 @@ entry:
define <vscale x 4 x i64> @intrinsic_vsub_vi_tu_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i64_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3176,7 +3176,7 @@ entry:
define <vscale x 8 x i64> @intrinsic_vsub_vi_tu_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i64_nxv8i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/ThinLTO/X86/funcimport-stats.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
; RUN: cat %t4 | grep 'Is importing aliasee' | count 1
; RUN: cat %t4 | FileCheck %s

; CHECK: - [[NUM_FUNCS:[0-9]+]] function definitions and 0 function declarations imported from
; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars definition and 0 global vars declaration imported from
; CHECK: - [[NUM_FUNCS:[0-9]+]] functions imported from
; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars imported from

; CHECK: [[NUM_FUNCS]] function-import - Number of functions imported in backend
; CHECK-NEXT: [[NUM_FUNCS]] function-import - Number of functions thin link decided to import
Expand Down
221 changes: 0 additions & 221 deletions llvm/test/ThinLTO/X86/import_callee_declaration.ll

This file was deleted.

5 changes: 2 additions & 3 deletions llvm/test/Transforms/FunctionImport/funcimport.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ declare void @variadic_va_start(...)
; GUID-DAG: GUID {{.*}} is linkoncefunc

; DUMP: Module [[M1:.*]] imports from 1 module
; DUMP-NEXT: 15 function definitions and 0 function declarations imported from [[M2:.*]]
; DUMP-NEXT: 4 var definitions and 0 var declarations imported from [[M2]]

; DUMP-NEXT: 15 functions imported from [[M2:.*]]
; DUMP-NEXT: 4 vars imported from [[M2]]
; DUMP: Imported 15 functions for Module [[M1]]
; DUMP-NEXT: Imported 4 global variables for Module [[M1]]
6 changes: 1 addition & 5 deletions llvm/tools/llvm-link/llvm-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,13 +377,9 @@ static bool importFunctions(const char *argv0, Module &DestModule) {
if (Verbose)
errs() << "Importing " << FunctionName << " from " << FileName << "\n";

// `-import` specifies the `<filename,function-name>` pairs to import as
// definition, so make the import type definition directly.
// FIXME: A follow-up patch should add test coverage for import declaration
// in `llvm-link` CLI (e.g., by introducing a new command line option).
auto &Entry =
ImportList[FileNameStringCache.insert(FileName).first->getKey()];
Entry[F->getGUID()] = GlobalValueSummary::Definition;
Entry.insert(F->getGUID());
}
auto CachedModuleLoader = [&](StringRef Identifier) {
return ModuleLoaderCache.takeModule(std::string(Identifier));
Expand Down
5 changes: 2 additions & 3 deletions llvm/tools/llvm-lto/llvm-lto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,9 +692,8 @@ class ThinLTOProcessing {
// Build a map of module to the GUIDs and summary objects that should
// be written to its index.
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
GVSummaryPtrSet DecSummaries;
ThinGenerator.gatherImportedSummariesForModule(
*TheModule, *Index, ModuleToSummariesForIndex, DecSummaries, *Input);
*TheModule, *Index, ModuleToSummariesForIndex, *Input);

std::string OutputName = OutputFilename;
if (OutputName.empty()) {
Expand All @@ -704,7 +703,7 @@ class ThinLTOProcessing {
std::error_code EC;
raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputName + "'");
writeIndexToFile(*Index, OS, &ModuleToSummariesForIndex, &DecSummaries);
writeIndexToFile(*Index, OS, &ModuleToSummariesForIndex);
}
}

Expand Down