Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions flang-rt/lib/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,6 @@ endif ()
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx")
set(sources ${gpu_sources})
elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA")
# findloc.cpp has some issues with higher compute capability. Remove it
# from CUDA build until we can lower its memory footprint.
list(REMOVE_ITEM supported_sources findloc.cpp)
set(sources ${supported_sources})
else ()
set(sources ${supported_sources} ${host_sources} ${f128_sources})
Expand Down
9 changes: 6 additions & 3 deletions flang-rt/lib/runtime/extrema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,12 @@ template <TypeCategory CAT, bool IS_MAX,
template <typename, bool, bool> class COMPARE>
struct DoPartialMaxOrMinLocHelper {
template <int KIND> struct Functor {
RT_API_ATTRS void operator()(const char *intrinsic, Descriptor &result,
const Descriptor &x, int kind, int dim, const Descriptor *mask,
bool back, Terminator &terminator) const {
// NVCC inlines more aggressively which causes too many specializations of
// this function to be inlined causing compiler timeouts. Set as
// noinline to allow compilation to complete.
RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(const char *intrinsic,
Descriptor &result, const Descriptor &x, int kind, int dim,
const Descriptor *mask, bool back, Terminator &terminator) const {
DoPartialMaxOrMinLoc<CAT, KIND, IS_MAX, COMPARE>(
intrinsic, result, x, kind, dim, mask, back, terminator);
}
Expand Down
11 changes: 7 additions & 4 deletions flang-rt/lib/runtime/findloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,13 @@ template <TypeCategory CAT,
class HELPER>
struct NumericFindlocHelper {
template <int KIND> struct Functor {
RT_API_ATTRS void operator()(TypeCategory targetCat, int targetKind,
Descriptor &result, const Descriptor &x, const Descriptor &target,
int kind, int dim, const Descriptor *mask, bool back,
Terminator &terminator) const {
// NVCC inlines more aggressively which causes too many specializations of
// this function to be inlined causing compiler timeouts. Set as
// noinline to allow compilation to complete.
RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(TypeCategory targetCat,
int targetKind, Descriptor &result, const Descriptor &x,
const Descriptor &target, int kind, int dim, const Descriptor *mask,
bool back, Terminator &terminator) const {
switch (targetCat) {
case TypeCategory::Integer:
case TypeCategory::Unsigned:
Expand Down
Loading