42 changes: 42 additions & 0 deletions flang/lib/Semantics/check-directive-structure.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ class DirectiveStructureChecker : public virtual BaseChecker {
const PC *clause{nullptr};
ClauseMapTy clauseInfo;
std::list<C> actualClauses;
std::list<C> crtGroup;
Symbol *loopIV{nullptr};
};

Expand Down Expand Up @@ -261,6 +262,12 @@ class DirectiveStructureChecker : public virtual BaseChecker {
GetContext().actualClauses.push_back(type);
}

void AddClauseToCrtGroupInContext(C type) {
GetContext().crtGroup.push_back(type);
}

void ResetCrtGroup() { GetContext().crtGroup.clear(); }

// Check if the given clause is present in the current context
const PC *FindClause(C type) { return FindClause(GetContext(), type); }

Expand Down Expand Up @@ -353,6 +360,9 @@ class DirectiveStructureChecker : public virtual BaseChecker {
// separator clause appears.
void CheckAllowedOncePerGroup(C clause, C separator);

void CheckMutuallyExclusivePerGroup(
C clause, C separator, common::EnumSet<C, ClauseEnumSize> set);

void CheckAtLeastOneClause();

void CheckNotAllowedIfClause(
Expand Down Expand Up @@ -526,6 +536,7 @@ void DirectiveStructureChecker<D, C, PC, ClauseEnumSize>::CheckAllowed(
}
SetContextClauseInfo(clause);
AddClauseToCrtContext(clause);
AddClauseToCrtGroupInContext(clause);
}

// Enforce restriction where clauses in the given set are not allowed if the
Expand Down Expand Up @@ -570,6 +581,37 @@ void DirectiveStructureChecker<D, C, PC,
}
}

template <typename D, typename C, typename PC, std::size_t ClauseEnumSize>
void DirectiveStructureChecker<D, C, PC,
ClauseEnumSize>::CheckMutuallyExclusivePerGroup(C clause, C separator,
common::EnumSet<C, ClauseEnumSize> set) {

// Checking of there is any offending clauses before the first separator.
for (auto cl : GetContext().actualClauses) {
if (cl == separator) {
break;
}
if (set.test(cl)) {
context_.Say(GetContext().directiveSource,
"Clause %s is not allowed if clause %s appears on the %s directive"_err_en_US,
parser::ToUpperCaseLetters(getClauseName(clause).str()),
parser::ToUpperCaseLetters(getClauseName(cl).str()),
ContextDirectiveAsFortran());
}
}

// Checking for mutually exclusive clauses in the current group.
for (auto cl : GetContext().crtGroup) {
if (set.test(cl)) {
context_.Say(GetContext().directiveSource,
"Clause %s is not allowed if clause %s appears on the %s directive"_err_en_US,
parser::ToUpperCaseLetters(getClauseName(clause).str()),
parser::ToUpperCaseLetters(getClauseName(cl).str()),
ContextDirectiveAsFortran());
}
}
}

// Check the value of the clause is a constant positive integer.
template <typename D, typename C, typename PC, std::size_t ClauseEnumSize>
void DirectiveStructureChecker<D, C, PC,
Expand Down
54 changes: 37 additions & 17 deletions flang/lib/Semantics/mod-file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,31 +491,51 @@ void ModFileWriter::PutDECStructure(
static const Attrs subprogramPrefixAttrs{Attr::ELEMENTAL, Attr::IMPURE,
Attr::MODULE, Attr::NON_RECURSIVE, Attr::PURE, Attr::RECURSIVE};

static void PutOpenACCDeviceTypeRoutineInfo(
llvm::raw_ostream &os, const OpenACCRoutineDeviceTypeInfo &info) {
if (info.isSeq()) {
os << " seq";
}
if (info.isGang()) {
os << " gang";
if (info.gangDim() > 0) {
os << "(dim: " << info.gangDim() << ")";
}
}
if (info.isVector()) {
os << " vector";
}
if (info.isWorker()) {
os << " worker";
}
if (info.bindName()) {
os << " bind(" << *info.bindName() << ")";
}
}

static void PutOpenACCRoutineInfo(
llvm::raw_ostream &os, const SubprogramDetails &details) {
for (auto info : details.openACCRoutineInfos()) {
os << "!$acc routine";
if (info.isSeq()) {
os << " seq";
}
if (info.isGang()) {
os << " gang";
if (info.gangDim() > 0) {
os << "(dim: " << info.gangDim() << ")";
}
}
if (info.isVector()) {
os << " vector";
}
if (info.isWorker()) {
os << " worker";
}

PutOpenACCDeviceTypeRoutineInfo(os, info);

if (info.isNohost()) {
os << " nohost";
}
if (info.bindName()) {
os << " bind(" << *info.bindName() << ")";

for (auto dtype : info.deviceTypeInfos()) {
os << " device_type(";
if (dtype.dType() == common::OpenACCDeviceType::Star) {
os << "*";
} else {
os << parser::ToLowerCaseLetters(common::EnumToString(dtype.dType()));
}
os << ")";

PutOpenACCDeviceTypeRoutineInfo(os, dtype);
}

os << "\n";
}
}
Expand Down
50 changes: 43 additions & 7 deletions flang/lib/Semantics/resolve-directives.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -945,33 +945,58 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
const auto &clauses = std::get<Fortran::parser::AccClauseList>(x.t);
for (const Fortran::parser::AccClause &clause : clauses.v) {
if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
info.set_isSeq();
if (info.deviceTypeInfos().empty()) {
info.set_isSeq();
} else {
info.deviceTypeInfos().back().set_isSeq();
}
} else if (const auto *gangClause =
std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) {
info.set_isGang();
if (info.deviceTypeInfos().empty()) {
info.set_isGang();
} else {
info.deviceTypeInfos().back().set_isGang();
}
if (gangClause->v) {
const Fortran::parser::AccGangArgList &x = *gangClause->v;
for (const Fortran::parser::AccGangArg &gangArg : x.v) {
if (const auto *dim =
std::get_if<Fortran::parser::AccGangArg::Dim>(&gangArg.u)) {
if (const auto v{EvaluateInt64(context_, dim->v)}) {
info.set_gangDim(*v);
if (info.deviceTypeInfos().empty()) {
info.set_gangDim(*v);
} else {
info.deviceTypeInfos().back().set_gangDim(*v);
}
}
}
}
}
} else if (std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) {
info.set_isVector();
if (info.deviceTypeInfos().empty()) {
info.set_isVector();
} else {
info.deviceTypeInfos().back().set_isVector();
}
} else if (std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) {
info.set_isWorker();
if (info.deviceTypeInfos().empty()) {
info.set_isWorker();
} else {
info.deviceTypeInfos().back().set_isWorker();
}
} else if (std::get_if<Fortran::parser::AccClause::Nohost>(&clause.u)) {
info.set_isNohost();
} else if (const auto *bindClause =
std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
if (const auto *name =
std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
if (Symbol *sym = ResolveFctName(*name)) {
info.set_bindName(sym->name().ToString());
if (info.deviceTypeInfos().empty()) {
info.set_bindName(sym->name().ToString());
} else {
info.deviceTypeInfos().back().set_bindName(
sym->name().ToString());
}
} else {
context_.Say((*name).source,
"No function or subroutine declared for '%s'"_err_en_US,
Expand All @@ -986,8 +1011,19 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
std::string str{std::get<std::string>(charConst->t)};
std::stringstream bindName;
bindName << "\"" << str << "\"";
info.set_bindName(bindName.str());
if (info.deviceTypeInfos().empty()) {
info.set_bindName(bindName.str());
} else {
info.deviceTypeInfos().back().set_bindName(bindName.str());
}
}
} else if (const auto *dType =
std::get_if<Fortran::parser::AccClause::DeviceType>(
&clause.u)) {
const parser::AccDeviceTypeExprList &deviceTypeExprList = dType->v;
OpenACCRoutineDeviceTypeInfo dtypeInfo;
dtypeInfo.set_dType(deviceTypeExprList.v.front().v);
info.add_deviceTypeInfo(dtypeInfo);
}
}
symbol.get<SubprogramDetails>().add_openACCRoutineInfo(info);
Expand Down
12 changes: 6 additions & 6 deletions flang/test/Lower/OpenACC/acc-kernels-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -497,10 +497,10 @@ subroutine acc_kernels_loop
END DO

! CHECK: acc.kernels {
! CHECK: acc.loop {
! CHECK: acc.loop gang {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {gang = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -550,10 +550,10 @@ subroutine acc_kernels_loop
a(i) = b(i)
END DO
! CHECK: acc.kernels {
! CHECK: acc.loop {
! CHECK: acc.loop vector {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {vector = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -591,10 +591,10 @@ subroutine acc_kernels_loop
END DO

! CHECK: acc.kernels {
! CHECK: acc.loop {
! CHECK: acc.loop worker {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {worker = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
! CHECK-NEXT: }{{$}}

Expand Down
18 changes: 12 additions & 6 deletions flang/test/Lower/OpenACC/acc-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ program acc_loop
a(i) = b(i)
END DO

!CHECK: acc.loop {
!CHECK: acc.loop gang {
!CHECK: fir.do_loop
!CHECK: acc.yield
!CHECK-NEXT: } attributes {gang = [#acc.device_type<none>]}{{$}}
!CHECK-NEXT: }{{$}}

!$acc loop gang(num: 8)
DO i = 1, n
Expand Down Expand Up @@ -109,10 +109,10 @@ program acc_loop
a(i) = b(i)
END DO

!CHECK: acc.loop {
!CHECK: acc.loop vector {
!CHECK: fir.do_loop
!CHECK: acc.yield
!CHECK-NEXT: } attributes {vector = [#acc.device_type<none>]}{{$}}
!CHECK-NEXT: }{{$}}

!$acc loop vector(128)
DO i = 1, n
Expand Down Expand Up @@ -141,10 +141,10 @@ program acc_loop
a(i) = b(i)
END DO

!CHECK: acc.loop {
!CHECK: acc.loop worker {
!CHECK: fir.do_loop
!CHECK: acc.yield
!CHECK-NEXT: } attributes {worker = [#acc.device_type<none>]}{{$}}
!CHECK-NEXT: }{{$}}

!$acc loop worker(128)
DO i = 1, n
Expand Down Expand Up @@ -320,4 +320,10 @@ program acc_loop
! CHECK: acc.loop
! CHECK: fir.do_loop

!$acc loop gang device_type(nvidia) gang(8)
DO i = 1, n
END DO

! CHECK: acc.loop gang([#acc.device_type<none>], {num=%c8{{.*}} : i32} [#acc.device_type<nvidia>])

end program
12 changes: 6 additions & 6 deletions flang/test/Lower/OpenACC/acc-parallel-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,10 @@ subroutine acc_parallel_loop
END DO

! CHECK: acc.parallel {
! CHECK: acc.loop {
! CHECK: acc.loop gang {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {gang = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -565,10 +565,10 @@ subroutine acc_parallel_loop
a(i) = b(i)
END DO
! CHECK: acc.parallel {
! CHECK: acc.loop {
! CHECK: acc.loop vector {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {vector = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -606,10 +606,10 @@ subroutine acc_parallel_loop
END DO

! CHECK: acc.parallel {
! CHECK: acc.loop {
! CHECK: acc.loop worker {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {worker = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down
12 changes: 6 additions & 6 deletions flang/test/Lower/OpenACC/acc-serial-loop.f90
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,10 @@ subroutine acc_serial_loop
END DO

! CHECK: acc.serial {
! CHECK: acc.loop {
! CHECK: acc.loop gang {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {gang = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -500,10 +500,10 @@ subroutine acc_serial_loop
a(i) = b(i)
END DO
! CHECK: acc.serial {
! CHECK: acc.loop {
! CHECK: acc.loop vector {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {vector = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down Expand Up @@ -541,10 +541,10 @@ subroutine acc_serial_loop
END DO

! CHECK: acc.serial {
! CHECK: acc.loop {
! CHECK: acc.loop worker {
! CHECK: fir.do_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {worker = [#acc.device_type<none>]}{{$}}
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}

Expand Down
13 changes: 13 additions & 0 deletions flang/test/Semantics/OpenACC/acc-module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ subroutine sub9()
subroutine sub10()
end subroutine

subroutine sub11()
!$acc routine device_type(nvidia) gang device_type(*) seq
end subroutine

subroutine sub12()
!$acc routine device_type(host) bind(sub7) device_type(multicore) bind(sub8)
end subroutine
end module

!Expect: acc_mod.mod
Expand Down Expand Up @@ -107,4 +114,10 @@ subroutine sub10()
! subroutine sub10()
! !$acc routine seq
! end
! subroutinesub11()
! !$acc routine device_type(nvidia) gang device_type(*) seq
! end
! subroutinesub12()
! !$acc routine device_type(host) bind(sub7) device_type(multicore) bind(sub8)
! end
! end
121 changes: 121 additions & 0 deletions flang/test/Semantics/OpenACC/acc-routine.f90
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,124 @@ subroutine sub2(a)
subroutine sub3()
!$acc routine bind(sub1)
end subroutine

subroutine sub4()
!ERROR: Only the dim argument is allowed on the GANG clause on the ROUTINE directive
!$acc routine gang(num: 1)
end subroutine

subroutine sub5()
!ERROR: Only the dim argument is allowed on the GANG clause on the ROUTINE directive
!$acc routine gang(static: 1)
end subroutine

subroutine sub6()
!ERROR: Clause GANG is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang gang

!ERROR: Clause GANG is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker gang

!ERROR: Clause GANG is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector gang

!ERROR: Clause GANG is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq gang

!ERROR: Clause WORKER is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker worker

!ERROR: Clause WORKER is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang worker

!ERROR: Clause WORKER is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector worker

!ERROR: Clause WORKER is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq worker

!ERROR: Clause VECTOR is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector vector

!ERROR: Clause VECTOR is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang vector

!ERROR: Clause VECTOR is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker vector

!ERROR: Clause VECTOR is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq vector

!ERROR: Clause SEQ is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq seq

!ERROR: Clause SEQ is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang seq

!ERROR: Clause SEQ is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker seq

!ERROR: Clause SEQ is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector seq

end subroutine

subroutine sub7()
!$acc routine device_type(*) gang device_type(host) worker

!ERROR: Clause SEQ is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine device_type(*) gang seq

!ERROR: Clause WORKER is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine device_type(*) gang worker

!ERROR: Clause GANG is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang device_type(*) gang

!ERROR: Clause WORKER is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang device_type(*) worker

!ERROR: Clause VECTOR is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang device_type(*) vector

!ERROR: Clause SEQ is not allowed if clause GANG appears on the ROUTINE directive
!$acc routine gang device_type(*) seq

!ERROR: Clause WORKER is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker device_type(*) worker

!ERROR: Clause GANG is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker device_type(*) gang

!ERROR: Clause VECTOR is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker device_type(*) vector

!ERROR: Clause SEQ is not allowed if clause WORKER appears on the ROUTINE directive
!$acc routine worker device_type(*) seq

!ERROR: Clause VECTOR is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector device_type(*) vector

!ERROR: Clause GANG is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector device_type(*) gang

!ERROR: Clause VECTOR is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector device_type(*) vector

!ERROR: Clause SEQ is not allowed if clause VECTOR appears on the ROUTINE directive
!$acc routine vector device_type(*) seq

!ERROR: Clause SEQ is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq device_type(*) seq

!ERROR: Clause GANG is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq device_type(*) gang

!ERROR: Clause VECTOR is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq device_type(*) vector

!ERROR: Clause WORKER is not allowed if clause SEQ appears on the ROUTINE directive
!$acc routine seq device_type(*) worker

!$acc routine device_type(host) seq device_type(nvidia) gang device_type(multicore) vector device_type(*) worker
end subroutine
2 changes: 1 addition & 1 deletion libcxx/include/string
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ public:
// Turning off ASan instrumentation for variable initialization with _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
// does not work consistently during initialization of __r_, so we instead unpoison __str's memory manually first.
// __str's memory needs to be unpoisoned only in the case where it's a short string.
: __r_(((__str.__is_long() ? 0 : (__str.__annotate_delete(), 0)), std::move(__str.__r_))) {
: __r_([](basic_string &__s) -> decltype(__s.__r_)&& { if(!__s.__is_long()) __s.__annotate_delete(); return std::move(__s.__r_); }(__str)) {
__str.__r_.first() = __rep();
__str.__annotate_new(0);
if (!__is_long())
Expand Down
2 changes: 1 addition & 1 deletion libcxx/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ if (LIBCXX_ENABLE_SHARED)
# Maybe re-export symbols from libc++abi
# In particular, we don't re-export the symbols if libc++abi is merged statically
# into libc++ because in that case there's no dylib to re-export from.
if (APPLE AND LIBCXX_CXX_ABI STREQUAL "libcxxabi"
if (APPLE AND LIBCXX_CXX_ABI MATCHES "libcxxabi$"
AND NOT DEFINED LIBCXX_OSX_REEXPORT_LIBCXXABI_SYMBOLS
AND NOT LIBCXX_STATICALLY_LINK_ABI_IN_SHARED_LIBRARY)
set(LIBCXX_OSX_REEXPORT_LIBCXXABI_SYMBOLS ON)
Expand Down
4 changes: 4 additions & 0 deletions llvm/docs/GettingInvolved.rst
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ what to add to your calendar invite.
- Every 2nd Tuesday of the month
- `gcal <https://calendar.google.com/calendar/u/0?cid=ZDcyMjc0ZjZiZjNhMzFlYmE3NTNkMWM2MGM2NjM5ZWU3ZDE2MjM4MGFlZDc2ZjViY2UyYzMwNzVhZjk4MzQ4ZEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__
- `Meeting details/agenda <https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing>`__
* - Floating Point Working Group
- Every 3rd Wednesday of the month
- `gcal <https://calendar.google.com/calendar/u/0?cid=MDI1ODI1MDdiYWM3OWQxODY5MDA3MTI1NjZlYzNmYzY5YjMzYWMyNGQ3ZGUwYThjNzZjN2IxOTk3NmYxOTBjMEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__
- `Meeting details/agenda: <https://docs.google.com/document/d/1QcmUlWftPlBi-Wz6b6PipqJfvjpJ-OuRMRnN9Dm2t0c>`__

.. _office-hours:

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Analysis/StackSafetyAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
const SCEV *Expr =
SE.getTruncateOrZeroExtend(SE.getSCEV(MI->getLength()), CalculationTy);
ConstantRange Sizes = SE.getSignedRange(Expr);
if (Sizes.getUpper().isNegative() || isUnsafe(Sizes))
if (!Sizes.getUpper().isStrictlyPositive() || isUnsafe(Sizes))
return UnknownRange;
Sizes = Sizes.sextOrTrunc(PointerSize);
ConstantRange SizeRange(APInt::getZero(PointerSize), Sizes.getUpper() - 1);
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,7 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
const TargetTransformInfo::LSRCost &C2);

bool shouldFoldTerminatingConditionAfterLSR() const {
// FIXME: Enabling this causes miscompiles.
return false;
return true;
}
};

Expand Down
34 changes: 34 additions & 0 deletions llvm/test/Analysis/StackSafetyAnalysis/memintrin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,40 @@ entry:
ret void
}

define void @MemsetHugeUpper_m1(i1 %bool) {
; CHECK-LABEL: MemsetHugeUpper_m1 dso_preemptable{{$}}
; CHECK-NEXT: args uses:
; CHECK-NEXT: allocas uses:
; CHECK-NEXT: x[4]: full-set
entry:
%x = alloca i32, align 4
br i1 %bool, label %if.then, label %if.end

if.then:
call void @llvm.memset.p0.i64(ptr %x, i8 0, i64 -1, i1 false)
br label %if.end

if.end:
ret void
}

define void @MemsetHugeUpper_m2(i1 %bool) {
; CHECK-LABEL: MemsetHugeUpper_m2 dso_preemptable{{$}}
; CHECK-NEXT: args uses:
; CHECK-NEXT: allocas uses:
; CHECK-NEXT: x[4]: full-set
entry:
%x = alloca i32, align 4
br i1 %bool, label %if.then, label %if.end

if.then:
call void @llvm.memset.p0.i64(ptr %x, i8 0, i64 -2, i1 false)
br label %if.end

if.end:
ret void
}

define void @MemcpyInBounds() {
; CHECK-LABEL: MemcpyInBounds dso_preemptable{{$}}
; CHECK-NEXT: args uses:
Expand Down
41 changes: 25 additions & 16 deletions llvm/test/CodeGen/RISCV/branch-on-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -120,36 +120,45 @@ define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
; RV32-LABEL: test_lshr2:
; RV32: # %bb.0: # %entry
; RV32-NEXT: srli a2, a2, 2
; RV32-NEXT: beqz a2, .LBB3_2
; RV32-NEXT: .LBB3_1: # %while.body
; RV32-NEXT: beqz a2, .LBB3_3
; RV32-NEXT: # %bb.1: # %while.body.preheader
; RV32-NEXT: slli a2, a2, 2
; RV32-NEXT: add a2, a1, a2
; RV32-NEXT: .LBB3_2: # %while.body
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: lw a3, 0(a1)
; RV32-NEXT: addi a1, a1, 4
; RV32-NEXT: addi a4, a1, 4
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: addi a4, a0, 4
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: addi a1, a0, 4
; RV32-NEXT: sw a3, 0(a0)
; RV32-NEXT: mv a0, a4
; RV32-NEXT: bnez a2, .LBB3_1
; RV32-NEXT: .LBB3_2: # %while.end
; RV32-NEXT: mv a0, a1
; RV32-NEXT: mv a1, a4
; RV32-NEXT: bne a4, a2, .LBB3_2
; RV32-NEXT: .LBB3_3: # %while.end
; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_lshr2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: srliw a2, a2, 2
; RV64-NEXT: beqz a2, .LBB3_2
; RV64-NEXT: .LBB3_1: # %while.body
; RV64-NEXT: beqz a2, .LBB3_3
; RV64-NEXT: # %bb.1: # %while.body.preheader
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: slli a2, a2, 32
; RV64-NEXT: srli a2, a2, 30
; RV64-NEXT: add a2, a2, a1
; RV64-NEXT: addi a2, a2, 4
; RV64-NEXT: .LBB3_2: # %while.body
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: lw a3, 0(a1)
; RV64-NEXT: addi a1, a1, 4
; RV64-NEXT: addi a4, a1, 4
; RV64-NEXT: slli a3, a3, 1
; RV64-NEXT: addi a4, a0, 4
; RV64-NEXT: addiw a2, a2, -1
; RV64-NEXT: addi a1, a0, 4
; RV64-NEXT: sw a3, 0(a0)
; RV64-NEXT: mv a0, a4
; RV64-NEXT: bnez a2, .LBB3_1
; RV64-NEXT: .LBB3_2: # %while.end
; RV64-NEXT: mv a0, a1
; RV64-NEXT: mv a1, a4
; RV64-NEXT: bne a4, a2, .LBB3_2
; RV64-NEXT: .LBB3_3: # %while.end
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
entry:
Expand Down
12 changes: 7 additions & 5 deletions llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@
define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: blez a1, .LBB0_2
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: blez a1, .LBB0_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lw a2, 0(a0)
; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: sw a2, 0(a0)
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 4
; CHECK-NEXT: bnez a1, .LBB0_1
; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup
; CHECK-NEXT: bne a0, a1, .LBB0_2
; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
%cmp3 = icmp sgt i32 %n, 0
Expand Down
43 changes: 16 additions & 27 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -206,33 +206,19 @@ define <8 x float> @splat_idx_v8f32(<8 x float> %v, i64 %idx) {

; Test that we pull the vlse of the constant pool out of the loop.
define dso_local void @splat_load_licm(float* %0) {
; RV32-LABEL: splat_load_licm:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 1024
; RV32-NEXT: lui a2, 263168
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: addi a1, a1, -4
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: bnez a1, .LBB12_1
; RV32-NEXT: # %bb.2:
; RV32-NEXT: ret
;
; RV64-LABEL: splat_load_licm:
; RV64: # %bb.0:
; RV64-NEXT: li a1, 1024
; RV64-NEXT: lui a2, 263168
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: addiw a1, a1, -4
; RV64-NEXT: addi a0, a0, 16
; RV64-NEXT: bnez a1, .LBB12_1
; RV64-NEXT: # %bb.2:
; RV64-NEXT: ret
; CHECK-LABEL: splat_load_licm:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 1
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 263168
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a2
; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: bne a0, a1, .LBB12_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: ret
br label %2

2: ; preds = %2, %1
Expand Down Expand Up @@ -1408,3 +1394,6 @@ define <2 x double> @vid_step2_v2f64() {
; CHECK-NEXT: ret
ret <2 x double> <double 0.0, double 2.0>
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
; RV64: {{.*}}
185 changes: 90 additions & 95 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions llvm/test/CodeGen/RISCV/rvv/sink-splat-operands-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,16 @@ define void @sink_splat_vp_and_i1(ptr nocapture %a, i1 zeroext %x, <8 x i1> %m,
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vmsne.vi v8, v8, 0
; CHECK-NEXT: li a1, 1024
; CHECK-NEXT: addi a1, a0, 1024
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vlm.v v9, (a0)
; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; CHECK-NEXT: vmand.mm v9, v9, v8
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vsm.v v9, (a0)
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: bnez a1, .LBB0_1
; CHECK-NEXT: bne a0, a1, .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: ret
entry:
Expand Down
1,108 changes: 554 additions & 554 deletions llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -951,16 +951,15 @@ if.end:
define void @pre_over_vle(ptr %A) {
; CHECK-LABEL: pre_over_vle:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a1, 100
; CHECK-NEXT: addi a1, a0, 800
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: .LBB22_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsext.vf4 v9, v8
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 8
; CHECK-NEXT: bnez a1, .LBB22_1
; CHECK-NEXT: bne a0, a1, .LBB22_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: ret
entry:
Expand Down
36 changes: 16 additions & 20 deletions llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@ target triple = "riscv64"
define void @test1(ptr %a) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP2]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand All @@ -39,15 +38,14 @@ exit: ; preds = %loop
define void @test2(ptr %a) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP2]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: call void @use(ptr [[A]])
; CHECK-NEXT: ret void
Expand All @@ -72,19 +70,18 @@ exit: ; preds = %loop
define void @test3(ptr %a, ptr %b) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B]], [[ENTRY]] ]
; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4
; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00
; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP4]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand All @@ -110,19 +107,18 @@ exit: ; preds = %loop
define void @test4(ptr %a, ptr %b) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 128000
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B]], [[ENTRY]] ]
; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4
; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00
; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP4]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: call void @use(ptr [[A]])
; CHECK-NEXT: call void @use(ptr [[B]])
Expand Down
20 changes: 9 additions & 11 deletions mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
Example:

```mlir
acc.loop {
acc.loop gang vector {
scf.for %arg3 = %c0 to %c10 step %c1 {
scf.for %arg4 = %c0 to %c10 step %c1 {
scf.for %arg5 = %c0 to %c10 step %c1 {
Expand All @@ -1492,10 +1492,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
}
}
acc.yield
} attributes {
collapse = [3], gang = [#acc.device_type<none>],
vector = [#acc.device_type<none>]
}
} attributes { collapse = [3] }
```
}];

Expand Down Expand Up @@ -1613,13 +1610,14 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
let hasCustomAssemblyFormat = 1;
let assemblyFormat = [{
oilist(
`gang` `` `(` custom<GangClause>($gangOperands, type($gangOperands),
`gang` `` custom<GangClause>($gangOperands, type($gangOperands),
$gangOperandsArgType, $gangOperandsDeviceType,
$gangOperandsSegments) `)`
| `worker` `` `(` custom<DeviceTypeOperands>($workerNumOperands,
type($workerNumOperands), $workerNumOperandsDeviceType) `)`
| `vector` `` `(` custom<DeviceTypeOperands>($vectorOperands,
type($vectorOperands), $vectorOperandsDeviceType) `)`
$gangOperandsSegments, $gang)
| `worker` `` custom<DeviceTypeOperandsWithKeywordOnly>(
$workerNumOperands, type($workerNumOperands),
$workerNumOperandsDeviceType, $worker)
| `vector` `` custom<DeviceTypeOperandsWithKeywordOnly>($vectorOperands,
type($vectorOperands), $vectorOperandsDeviceType, $vector)
| `private` `(` custom<SymOperandList>(
$privateOperands, type($privateOperands), $privatizations) `)`
| `tile` `(` custom<DeviceTypeOperandsWithSegment>($tileOperands,
Expand Down
275 changes: 228 additions & 47 deletions mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,12 @@ static ParseResult parseDeviceTypeOperandsWithSegment(
return success();
}

static void printSingleDeviceType(mlir::OpAsmPrinter &p, mlir::Attribute attr) {
auto deviceTypeAttr = mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(attr);
if (deviceTypeAttr.getValue() != mlir::acc::DeviceType::None)
p << " [" << attr << "]";
}

static void printDeviceTypeOperandsWithSegment(
mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::OperandRange operands,
mlir::TypeRange types, std::optional<mlir::ArrayAttr> deviceTypes,
Expand All @@ -937,10 +943,7 @@ static void printDeviceTypeOperandsWithSegment(
++opIdx;
}
p << "}";
auto deviceTypeAttr =
mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*deviceTypes)[i]);
if (deviceTypeAttr.getValue() != mlir::acc::DeviceType::None)
p << " [" << (*deviceTypes)[i] << "]";
printSingleDeviceType(p, (*deviceTypes)[i]);
}
}

Expand Down Expand Up @@ -978,11 +981,120 @@ printDeviceTypeOperands(mlir::OpAsmPrinter &p, mlir::Operation *op,
if (i != 0)
p << ", ";
p << operands[i] << " : " << operands[i].getType();
printSingleDeviceType(p, (*deviceTypes)[i]);
}
}

static ParseResult parseDeviceTypeOperandsWithKeywordOnly(
mlir::OpAsmParser &parser,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &operands,
llvm::SmallVectorImpl<Type> &types, mlir::ArrayAttr &deviceTypes,
mlir::ArrayAttr &keywordOnlyDeviceType) {

llvm::SmallVector<mlir::Attribute> keywordOnlyDeviceTypeAttributes;
bool needCommaBeforeOperands = false;

// Keyword only
if (failed(parser.parseOptionalLParen())) {
keywordOnlyDeviceTypeAttributes.push_back(mlir::acc::DeviceTypeAttr::get(
parser.getContext(), mlir::acc::DeviceType::None));
keywordOnlyDeviceType =
ArrayAttr::get(parser.getContext(), keywordOnlyDeviceTypeAttributes);
return success();
}

// Parse keyword only attributes
if (succeeded(parser.parseOptionalLSquare())) {
if (failed(parser.parseCommaSeparatedList([&]() {
if (parser.parseAttribute(
keywordOnlyDeviceTypeAttributes.emplace_back()))
return failure();
return success();
})))
return failure();
if (parser.parseRSquare())
return failure();
needCommaBeforeOperands = true;
}

if (needCommaBeforeOperands && failed(parser.parseComma()))
return failure();

llvm::SmallVector<DeviceTypeAttr> attributes;
if (failed(parser.parseCommaSeparatedList([&]() {
if (parser.parseOperand(operands.emplace_back()) ||
parser.parseColonType(types.emplace_back()))
return failure();
if (succeeded(parser.parseOptionalLSquare())) {
if (parser.parseAttribute(attributes.emplace_back()) ||
parser.parseRSquare())
return failure();
} else {
attributes.push_back(mlir::acc::DeviceTypeAttr::get(
parser.getContext(), mlir::acc::DeviceType::None));
}
return success();
})))
return failure();

if (failed(parser.parseRParen()))
return failure();

llvm::SmallVector<mlir::Attribute> arrayAttr(attributes.begin(),
attributes.end());
deviceTypes = ArrayAttr::get(parser.getContext(), arrayAttr);
return success();
}

bool hasDeviceTypeValues(std::optional<mlir::ArrayAttr> arrayAttr) {
if (arrayAttr && *arrayAttr && arrayAttr->size() > 0)
return true;
return false;
}

static void printDeviceTypes(mlir::OpAsmPrinter &p,
std::optional<mlir::ArrayAttr> deviceTypes) {
if (!hasDeviceTypeValues(deviceTypes))
return;
p << "[";
for (unsigned i = 0; i < deviceTypes.value().size(); ++i) {
if (i != 0)
p << ", ";
auto deviceTypeAttr =
mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*deviceTypes)[i]);
if (deviceTypeAttr.getValue() != mlir::acc::DeviceType::None)
p << " [" << (*deviceTypes)[i] << "]";
p << deviceTypeAttr;
}
p << "]";
}

static void printDeviceTypeOperandsWithKeywordOnly(
mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::OperandRange operands,
mlir::TypeRange types, std::optional<mlir::ArrayAttr> deviceTypes,
std::optional<mlir::ArrayAttr> keywordOnlyDeviceTypes) {

if (operands.begin() == operands.end() && keywordOnlyDeviceTypes &&
keywordOnlyDeviceTypes->size() == 1) {
auto deviceTypeAttr =
mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*keywordOnlyDeviceTypes)[0]);
if (deviceTypeAttr.getValue() == mlir::acc::DeviceType::None)
return;
}

p << "(";

printDeviceTypes(p, keywordOnlyDeviceTypes);

if (hasDeviceTypeValues(keywordOnlyDeviceTypes) &&
hasDeviceTypeValues(deviceTypes))
p << ", ";

for (unsigned i = 0, e = deviceTypes->size(); i < e; ++i) {
if (i != 0)
p << ", ";
p << operands[i] << " : " << operands[i].getType();
printSingleDeviceType(p, (*deviceTypes)[i]);
}
p << ")";
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1215,15 +1327,15 @@ static ParseResult parseGangValue(
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &operands,
llvm::SmallVectorImpl<Type> &types,
llvm::SmallVector<GangArgTypeAttr> &attributes, GangArgTypeAttr gangArgType,
bool &needComa, bool &newValue) {
bool &needCommaBetweenValues, bool &newValue) {
if (succeeded(parser.parseOptionalKeyword(keyword))) {
if (parser.parseEqual())
return failure();
if (parser.parseOperand(operands.emplace_back()) ||
parser.parseColonType(types.emplace_back()))
return failure();
attributes.push_back(gangArgType);
needComa = true;
needCommaBetweenValues = true;
newValue = true;
}
return success();
Expand All @@ -1233,11 +1345,37 @@ static ParseResult parseGangClause(
OpAsmParser &parser,
llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &gangOperands,
llvm::SmallVectorImpl<Type> &gangOperandsType, mlir::ArrayAttr &gangArgType,
mlir::ArrayAttr &deviceType, mlir::DenseI32ArrayAttr &segments) {
llvm::SmallVector<GangArgTypeAttr> attributes;
llvm::SmallVector<DeviceTypeAttr> deviceTypeAttributes;
mlir::ArrayAttr &deviceType, mlir::DenseI32ArrayAttr &segments,
mlir::ArrayAttr &gangOnlyDeviceType) {
llvm::SmallVector<GangArgTypeAttr> gangArgTypeAttributes;
llvm::SmallVector<mlir::Attribute> deviceTypeAttributes;
llvm::SmallVector<mlir::Attribute> gangOnlyDeviceTypeAttributes;
llvm::SmallVector<int32_t> seg;
bool needComa = false;
bool needCommaBetweenValues = false;
bool needCommaBeforeOperands = false;

// Gang only keyword
if (failed(parser.parseOptionalLParen())) {
gangOnlyDeviceTypeAttributes.push_back(mlir::acc::DeviceTypeAttr::get(
parser.getContext(), mlir::acc::DeviceType::None));
gangOnlyDeviceType =
ArrayAttr::get(parser.getContext(), gangOnlyDeviceTypeAttributes);
return success();
}

// Parse gang only attributes
if (succeeded(parser.parseOptionalLSquare())) {
if (failed(parser.parseCommaSeparatedList([&]() {
if (parser.parseAttribute(
gangOnlyDeviceTypeAttributes.emplace_back()))
return failure();
return success();
})))
return failure();
if (parser.parseRSquare())
return failure();
needCommaBeforeOperands = true;
}

auto argNum = mlir::acc::GangArgTypeAttr::get(parser.getContext(),
mlir::acc::GangArgType::Num);
Expand All @@ -1247,31 +1385,39 @@ static ParseResult parseGangClause(
parser.getContext(), mlir::acc::GangArgType::Static);

do {
if (needCommaBeforeOperands) {
needCommaBeforeOperands = false;
continue;
}

if (failed(parser.parseLBrace()))
return failure();

int32_t crtOperandsSize = gangOperands.size();
while (true) {
bool newValue = false;
bool needValue = false;
if (needComa) {
if (needCommaBetweenValues) {
if (succeeded(parser.parseOptionalComma()))
needValue = true; // expect a new value after comma.
else
break;
}

if (failed(parseGangValue(parser, LoopOp::getGangNumKeyword(),
gangOperands, gangOperandsType, attributes,
argNum, needComa, newValue)))
gangOperands, gangOperandsType,
gangArgTypeAttributes, argNum,
needCommaBetweenValues, newValue)))
return failure();
if (failed(parseGangValue(parser, LoopOp::getGangDimKeyword(),
gangOperands, gangOperandsType, attributes,
argDim, needComa, newValue)))
gangOperands, gangOperandsType,
gangArgTypeAttributes, argDim,
needCommaBetweenValues, newValue)))
return failure();
if (failed(parseGangValue(parser, LoopOp::getGangStaticKeyword(),
gangOperands, gangOperandsType, attributes,
argStatic, needComa, newValue)))
gangOperands, gangOperandsType,
gangArgTypeAttributes, argStatic,
needCommaBetweenValues, newValue)))
return failure();

if (!newValue && needValue) {
Expand Down Expand Up @@ -1305,13 +1451,18 @@ static ParseResult parseGangClause(

} while (succeeded(parser.parseOptionalComma()));

llvm::SmallVector<mlir::Attribute> arrayAttr(attributes.begin(),
attributes.end());
if (failed(parser.parseRParen()))
return failure();

llvm::SmallVector<mlir::Attribute> arrayAttr(gangArgTypeAttributes.begin(),
gangArgTypeAttributes.end());
gangArgType = ArrayAttr::get(parser.getContext(), arrayAttr);
deviceType = ArrayAttr::get(parser.getContext(), deviceTypeAttributes);

llvm::SmallVector<mlir::Attribute> gangOnlyAttr(
gangOnlyDeviceTypeAttributes.begin(), gangOnlyDeviceTypeAttributes.end());
gangOnlyDeviceType = ArrayAttr::get(parser.getContext(), gangOnlyAttr);

llvm::SmallVector<mlir::Attribute> deviceTypeAttr(
deviceTypeAttributes.begin(), deviceTypeAttributes.end());
deviceType = ArrayAttr::get(parser.getContext(), deviceTypeAttr);
segments = DenseI32ArrayAttr::get(parser.getContext(), seg);
return success();
}
Expand All @@ -1320,33 +1471,63 @@ void printGangClause(OpAsmPrinter &p, Operation *op,
mlir::OperandRange operands, mlir::TypeRange types,
std::optional<mlir::ArrayAttr> gangArgTypes,
std::optional<mlir::ArrayAttr> deviceTypes,
std::optional<mlir::DenseI32ArrayAttr> segments) {
unsigned opIdx = 0;
for (unsigned i = 0; i < deviceTypes->size(); ++i) {
if (i != 0)
p << ", ";
p << "{";
for (int32_t j = 0; j < (*segments)[i]; ++j) {
if (j != 0)
std::optional<mlir::DenseI32ArrayAttr> segments,
std::optional<mlir::ArrayAttr> gangOnlyDeviceTypes) {

if (operands.begin() == operands.end() && gangOnlyDeviceTypes &&
gangOnlyDeviceTypes->size() == 1) {
auto deviceTypeAttr =
mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*gangOnlyDeviceTypes)[0]);
if (deviceTypeAttr.getValue() == mlir::acc::DeviceType::None)
return;
}

p << "(";
if (hasDeviceTypeValues(gangOnlyDeviceTypes)) {
p << "[";
for (unsigned i = 0; i < gangOnlyDeviceTypes.value().size(); ++i) {
if (i != 0)
p << ", ";
auto gangArgTypeAttr =
mlir::dyn_cast<mlir::acc::GangArgTypeAttr>((*gangArgTypes)[opIdx]);
if (gangArgTypeAttr.getValue() == mlir::acc::GangArgType::Num)
p << LoopOp::getGangNumKeyword();
else if (gangArgTypeAttr.getValue() == mlir::acc::GangArgType::Dim)
p << LoopOp::getGangDimKeyword();
else if (gangArgTypeAttr.getValue() == mlir::acc::GangArgType::Static)
p << LoopOp::getGangStaticKeyword();
p << "=" << operands[opIdx] << " : " << operands[opIdx].getType();
++opIdx;
auto deviceTypeAttr =
mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*gangOnlyDeviceTypes)[i]);
p << deviceTypeAttr;
}
p << "]";
}

p << "}";
auto deviceTypeAttr =
mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*deviceTypes)[i]);
if (deviceTypeAttr.getValue() != mlir::acc::DeviceType::None)
p << " [" << (*deviceTypes)[i] << "]";
if (hasDeviceTypeValues(gangOnlyDeviceTypes) &&
hasDeviceTypeValues(deviceTypes))
p << ", ";

if (deviceTypes) {
unsigned opIdx = 0;
for (unsigned i = 0; i < deviceTypes->size(); ++i) {
if (i != 0)
p << ", ";
p << "{";
for (int32_t j = 0; j < (*segments)[i]; ++j) {
if (j != 0)
p << ", ";
auto gangArgTypeAttr =
mlir::dyn_cast<mlir::acc::GangArgTypeAttr>((*gangArgTypes)[opIdx]);
if (gangArgTypeAttr.getValue() == mlir::acc::GangArgType::Num)
p << LoopOp::getGangNumKeyword();
else if (gangArgTypeAttr.getValue() == mlir::acc::GangArgType::Dim)
p << LoopOp::getGangDimKeyword();
else if (gangArgTypeAttr.getValue() == mlir::acc::GangArgType::Static)
p << LoopOp::getGangStaticKeyword();
p << "=" << operands[opIdx] << " : " << operands[opIdx].getType();
++opIdx;
}

p << "}";
auto deviceTypeAttr =
mlir::dyn_cast<mlir::acc::DeviceTypeAttr>((*deviceTypes)[i]);
if (deviceTypeAttr.getValue() != mlir::acc::DeviceType::None)
p << " [" << (*deviceTypes)[i] << "]";
}
}
p << ")";
}

bool hasDuplicateDeviceTypes(
Expand Down
40 changes: 20 additions & 20 deletions mlir/test/Dialect/OpenACC/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ func.func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
%async = arith.constant 1 : i64

acc.parallel async(%async: i64) {
acc.loop {
acc.loop gang vector {
scf.for %arg3 = %c0 to %c10 step %c1 {
scf.for %arg4 = %c0 to %c10 step %c1 {
scf.for %arg5 = %c0 to %c10 step %c1 {
Expand All @@ -25,7 +25,7 @@ func.func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
}
}
acc.yield
} attributes { collapse = [3], collapseDeviceType = [#acc.device_type<none>], vector = [#acc.device_type<none>], gang = [#acc.device_type<none>]}
} attributes { collapse = [3], collapseDeviceType = [#acc.device_type<none>]}
acc.yield
}

Expand All @@ -38,7 +38,7 @@ func.func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
// CHECK-NEXT: %{{.*}} = arith.constant 1 : index
// CHECK-NEXT: [[ASYNC:%.*]] = arith.constant 1 : i64
// CHECK-NEXT: acc.parallel async([[ASYNC]] : i64) {
// CHECK-NEXT: acc.loop {
// CHECK-NEXT: acc.loop gang vector {
// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
Expand All @@ -52,7 +52,7 @@ func.func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>], gang = [#acc.device_type<none>], vector = [#acc.device_type<none>]}
// CHECK-NEXT: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>]}
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: return %{{.*}} : memref<10x10xf32>
Expand Down Expand Up @@ -138,17 +138,17 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
acc.data dataOperands(%pa, %pb, %pc, %pd: memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) {
%private = acc.private varPtr(%c : memref<10xf32>) -> memref<10xf32>
acc.parallel num_gangs({%numGangs: i64}) num_workers(%numWorkers: i64 [#acc.device_type<nvidia>]) private(@privatization_memref_10_f32 -> %private : memref<10xf32>) {
acc.loop {
acc.loop gang {
scf.for %x = %lb to %c10 step %st {
acc.loop {
acc.loop worker {
scf.for %y = %lb to %c10 step %st {
%axy = memref.load %a[%x, %y] : memref<10x10xf32>
%bxy = memref.load %b[%x, %y] : memref<10x10xf32>
%tmp = arith.addf %axy, %bxy : f32
memref.store %tmp, %c[%y] : memref<10xf32>
}
acc.yield
} attributes {worker = [#acc.device_type<none>]}
}

acc.loop {
// for i = 0 to 10 step 1
Expand All @@ -163,7 +163,7 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
} attributes {seq = [#acc.device_type<none>]}
}
acc.yield
} attributes {gang = [#acc.device_type<none>]}
}
acc.yield
}
acc.terminator
Expand All @@ -181,17 +181,17 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
// CHECK: acc.data dataOperands(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) {
// CHECK-NEXT: %[[P_ARG2:.*]] = acc.private varPtr([[ARG2]] : memref<10xf32>) -> memref<10xf32>
// CHECK-NEXT: acc.parallel num_gangs({[[NUMGANG]] : i64}) num_workers([[NUMWORKERS]] : i64 [#acc.device_type<nvidia>]) private(@privatization_memref_10_f32 -> %[[P_ARG2]] : memref<10xf32>) {
// CHECK-NEXT: acc.loop {
// CHECK-NEXT: acc.loop gang {
// CHECK-NEXT: scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
// CHECK-NEXT: acc.loop {
// CHECK-NEXT: acc.loop worker {
// CHECK-NEXT: scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32
// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {worker = [#acc.device_type<none>]}
// CHECK-NEXT: }
// CHECK-NEXT: acc.loop {
// CHECK-NEXT: scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
Expand All @@ -203,7 +203,7 @@ func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10x
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]}
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {gang = [#acc.device_type<none>]}
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.terminator
Expand All @@ -218,10 +218,10 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
%i32Value = arith.constant 128 : i32
%idxValue = arith.constant 8 : index

acc.loop {
acc.loop gang vector worker {
"test.openacc_dummy_op"() : () -> ()
acc.yield
} attributes {vector = [#acc.device_type<none>], worker = [#acc.device_type<none>], gang = [#acc.device_type<none>]}
}
acc.loop gang({num=%i64Value: i64}) {
"test.openacc_dummy_op"() : () -> ()
acc.yield
Expand Down Expand Up @@ -254,10 +254,10 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
"test.openacc_dummy_op"() : () -> ()
acc.yield
}
acc.loop gang({num=%i64Value: i64}) {
acc.loop gang({num=%i64Value: i64}) worker vector {
"test.openacc_dummy_op"() : () -> ()
acc.yield
} attributes {vector = [#acc.device_type<none>], worker = [#acc.device_type<none>]}
}
acc.loop gang({num=%i64Value: i64, static=%i64Value: i64}) worker(%i64Value: i64) vector(%i64Value: i64) {
"test.openacc_dummy_op"() : () -> ()
acc.yield
Expand Down Expand Up @@ -293,10 +293,10 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64
// CHECK-NEXT: [[I32VALUE:%.*]] = arith.constant 128 : i32
// CHECK-NEXT: [[IDXVALUE:%.*]] = arith.constant 8 : index
// CHECK: acc.loop {
// CHECK: acc.loop gang worker vector {
// CHECK-NEXT: "test.openacc_dummy_op"() : () -> ()
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {gang = [#acc.device_type<none>], vector = [#acc.device_type<none>], worker = [#acc.device_type<none>]}
// CHECK-NEXT: }
// CHECK: acc.loop gang({num=[[I64VALUE]] : i64}) {
// CHECK-NEXT: "test.openacc_dummy_op"() : () -> ()
// CHECK-NEXT: acc.yield
Expand Down Expand Up @@ -329,10 +329,10 @@ func.func @testloopop(%a : memref<10xf32>) -> () {
// CHECK-NEXT: "test.openacc_dummy_op"() : () -> ()
// CHECK-NEXT: acc.yield
// CHECK-NEXT: }
// CHECK: acc.loop gang({num=[[I64VALUE]] : i64}) {
// CHECK: acc.loop gang({num=[[I64VALUE]] : i64}) worker vector {
// CHECK-NEXT: "test.openacc_dummy_op"() : () -> ()
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } attributes {vector = [#acc.device_type<none>], worker = [#acc.device_type<none>]}
// CHECK-NEXT: }
// CHECK: acc.loop gang({num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64}) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64) {
// CHECK-NEXT: "test.openacc_dummy_op"() : () -> ()
// CHECK-NEXT: acc.yield
Expand Down
18 changes: 13 additions & 5 deletions openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1976,12 +1976,18 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.init");
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (Handler.isSymbolInImage(*this, Image, "amdgcn.device.fini"))
Image.setPendingGlobalDtors();

return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true);
}

virtual Error callGlobalDestructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.fini");
if (Image.hasPendingGlobalDtors())
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
return Plugin::success();
}

const uint64_t getStreamBusyWaitMicroseconds() const {
Expand Down Expand Up @@ -2701,15 +2707,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Common method to invoke a single threaded constructor or destructor
/// kernel by name.
Error callGlobalCtorDtorCommon(GenericPluginTy &Plugin, DeviceImageTy &Image,
const char *Name) {
bool IsCtor) {
const char *KernelName =
IsCtor ? "amdgcn.device.init" : "amdgcn.device.fini";
// Perform a quick check for the named kernel in the image. The kernel
// should be created by the 'amdgpu-lower-ctor-dtor' pass.
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (!Handler.isSymbolInImage(*this, Image, Name))
if (IsCtor && !Handler.isSymbolInImage(*this, Image, KernelName))
return Plugin::success();

// Allocate and construct the AMDGPU kernel.
AMDGPUKernelTy AMDGPUKernel(Name);
AMDGPUKernelTy AMDGPUKernel(KernelName);
if (auto Err = AMDGPUKernel.init(*this, Image))
return Err;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,19 @@ class DeviceImageTy {
const __tgt_device_image *TgtImage;
const __tgt_device_image *TgtImageBitcode;

/// If this image has any global destructors that much be called.
/// FIXME: This is only required because we currently have no invariants
/// towards the lifetime of the underlying image. We should either copy
/// the image into memory locally or erase the pointers after init.
bool PendingGlobalDtors;

/// Table of offload entries.
OffloadEntryTableTy OffloadEntryTable;

public:
DeviceImageTy(int32_t Id, const __tgt_device_image *Image)
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr) {
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr),
PendingGlobalDtors(false) {
assert(TgtImage && "Invalid target image");
}

Expand Down Expand Up @@ -255,6 +262,10 @@ class DeviceImageTy {
"Image");
}

/// Accessors to the boolean value
bool setPendingGlobalDtors() { return PendingGlobalDtors = true; }
bool hasPendingGlobalDtors() const { return PendingGlobalDtors; }

/// Get a reference to the offload entry table for the image.
OffloadEntryTableTy &getOffloadEntryTable() { return OffloadEntryTable; }
};
Expand Down
12 changes: 10 additions & 2 deletions openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,20 @@ struct CUDADeviceTy : public GenericDeviceTy {

virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
// Check for the presense of global destructors at initialization time. This
// is required when the image may be deallocated before destructors are run.
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (Handler.isSymbolInImage(*this, Image, "nvptx$device$fini"))
Image.setPendingGlobalDtors();

return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true);
}

virtual Error callGlobalDestructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
if (Image.hasPendingGlobalDtors())
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
return Plugin::success();
}

Expected<std::unique_ptr<MemoryBuffer>>
Expand Down Expand Up @@ -1145,7 +1153,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
// Perform a quick check for the named kernel in the image. The kernel
// should be created by the 'nvptx-lower-ctor-dtor' pass.
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (!Handler.isSymbolInImage(*this, Image, KernelName))
if (IsCtor && !Handler.isSymbolInImage(*this, Image, KernelName))
return Plugin::success();

// The Nvidia backend cannot handle creating the ctor / dtor array
Expand Down