Skip to content

Commit

Permalink
[flang][runtime] Enable more code for offload device builds. (llvm#67489
Browse files Browse the repository at this point in the history
)

I extended the "closure" of the device code containing the initial
transformational.cpp. The device side of the library should not be
complete at least for some APIs. For example, I tested with C OpenMP
code calling BesselJnX0 with a nullptr descriptor that failed with
a runtime error when executing on a GPU.

I added `--expt-relaxed-constexpr` for NVCC compiler to avoid multiple
warnings about missing `__attribute__((device))` on constexpr methods
coming from C++ header files.
  • Loading branch information
vzakhari authored and legrosbuffle committed Sep 29, 2023
1 parent 9545388 commit 0f2ce3c
Show file tree
Hide file tree
Showing 12 changed files with 396 additions and 150 deletions.
24 changes: 24 additions & 0 deletions flang/include/flang/Runtime/api-attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@
#endif
#endif /* !defined(RT_EXT_API_GROUP_END) */

/*
* RT_OFFLOAD_API_GROUP_BEGIN/END pair is placed around definitions
* of functions that can be referenced in other modules of Flang
* runtime. For OpenMP offload these functions are made "declare target"
* making sure they are compiled for the target even though direct
* references to them from other "declare target" functions may not
* be seen. Host-only functions should not be put in between these
* two macros.
*/
#define RT_OFFLOAD_API_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
#define RT_OFFLOAD_API_GROUP_END RT_EXT_API_GROUP_END

/*
* RT_VAR_GROUP_BEGIN/END pair is placed around definitions
* of module scope variables referenced by Flang runtime (directly
Expand Down Expand Up @@ -88,4 +100,16 @@
#endif
#endif /* !defined(RT_CONST_VAR_ATTRS) */

/*
* RT_DEVICE_COMPILATION is defined for any device compilation.
* Note that it can only be used reliably with compilers that perform
* separate host and device compilations.
*/
#if ((defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)) || \
(defined(_OPENMP) && (defined(__AMDGCN__) || defined(__NVPTX__)))
#define RT_DEVICE_COMPILATION 1
#else
#undef RT_DEVICE_COMPILATION
#endif

#endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */
15 changes: 8 additions & 7 deletions flang/include/flang/Runtime/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,21 @@ class Descriptor {
ISO::CFI_attribute_t attribute = CFI_attribute_other);

// CUDA_TODO: Clang does not support unique_ptr on device.
static OwningPtr<Descriptor> Create(TypeCode t, std::size_t elementBytes,
void *p = nullptr, int rank = maxRank,
static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCode t,
std::size_t elementBytes, void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other,
int derivedTypeLenParameters = 0);
static OwningPtr<Descriptor> Create(TypeCategory, int kind, void *p = nullptr,
int rank = maxRank, const SubscriptValue *extent = nullptr,
static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCategory, int kind,
void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);
static OwningPtr<Descriptor> Create(int characterKind,
static RT_API_ATTRS OwningPtr<Descriptor> Create(int characterKind,
SubscriptValue characters, void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);
static OwningPtr<Descriptor> Create(const typeInfo::DerivedType &dt,
void *p = nullptr, int rank = maxRank,
static RT_API_ATTRS OwningPtr<Descriptor> Create(
const typeInfo::DerivedType &dt, void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);

Expand Down
97 changes: 91 additions & 6 deletions flang/include/flang/Runtime/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,116 @@
#ifndef FORTRAN_RUNTIME_MEMORY_H_
#define FORTRAN_RUNTIME_MEMORY_H_

#include "flang/Runtime/api-attrs.h"
#include <cassert>
#include <memory>
#include <type_traits>

namespace Fortran::runtime {

class Terminator;

[[nodiscard]] void *AllocateMemoryOrCrash(
[[nodiscard]] RT_API_ATTRS void *AllocateMemoryOrCrash(
const Terminator &, std::size_t bytes);
template <typename A> [[nodiscard]] A &AllocateOrCrash(const Terminator &t) {
return *reinterpret_cast<A *>(AllocateMemoryOrCrash(t, sizeof(A)));
}
void FreeMemory(void *);
template <typename A> void FreeMemory(A *p) {
RT_API_ATTRS void FreeMemory(void *);
template <typename A> RT_API_ATTRS void FreeMemory(A *p) {
FreeMemory(reinterpret_cast<void *>(p));
}
template <typename A> void FreeMemoryAndNullify(A *&p) {
FreeMemory(p);
p = nullptr;
}

template <typename A> struct OwningPtrDeleter {
void operator()(A *p) { FreeMemory(p); }
// Very basic implementation mimicking std::unique_ptr.
// It should work for any offload device compiler.
// It uses a fixed memory deleter based on FreeMemory(),
// and does not support array objects with runtime length.
template <typename A> class OwningPtr {
public:
using pointer_type = A *;

OwningPtr() = default;
RT_API_ATTRS explicit OwningPtr(pointer_type p) : ptr_(p) {}
RT_API_ATTRS OwningPtr(const OwningPtr &) = delete;
RT_API_ATTRS OwningPtr &operator=(const OwningPtr &) = delete;
RT_API_ATTRS OwningPtr(OwningPtr &&other) {
ptr_ = other.ptr_;
other.ptr_ = pointer_type{};
}
RT_API_ATTRS OwningPtr &operator=(OwningPtr &&other) {
if (this != &other) {
delete_ptr(ptr_);
ptr_ = other.ptr_;
other.ptr_ = pointer_type{};
}
return *this;
}
constexpr RT_API_ATTRS OwningPtr(std::nullptr_t) : OwningPtr() {}

// Delete the pointer, if owns one.
RT_API_ATTRS ~OwningPtr() {
if (ptr_ != pointer_type{}) {
delete_ptr(ptr_);
ptr_ = pointer_type{};
}
}

// Release the ownership.
RT_API_ATTRS pointer_type release() {
pointer_type p = ptr_;
ptr_ = pointer_type{};
return p;
}

// Replace the pointer.
RT_API_ATTRS void reset(pointer_type p = pointer_type{}) {
std::swap(ptr_, p);
if (p != pointer_type{}) {
// Delete the owned pointer.
delete_ptr(p);
}
}

// Exchange the pointer with another object.
RT_API_ATTRS void swap(OwningPtr &other) { std::swap(ptr_, other.ptr_); }

// Get the stored pointer.
RT_API_ATTRS pointer_type get() const { return ptr_; }

RT_API_ATTRS explicit operator bool() const {
return get() != pointer_type{};
}

RT_API_ATTRS typename std::add_lvalue_reference<A>::type operator*() const {
assert(get() != pointer_type{});
return *get();
}

RT_API_ATTRS pointer_type operator->() const { return get(); }

private:
RT_API_ATTRS void delete_ptr(pointer_type p) { FreeMemory(p); }
pointer_type ptr_{};
};

template <typename A> using OwningPtr = std::unique_ptr<A, OwningPtrDeleter<A>>;
template <typename X, typename Y>
inline RT_API_ATTRS bool operator!=(
const OwningPtr<X> &x, const OwningPtr<Y> &y) {
return x.get() != y.get();
}

template <typename X>
inline RT_API_ATTRS bool operator!=(const OwningPtr<X> &x, std::nullptr_t) {
return (bool)x;
}

template <typename X>
inline RT_API_ATTRS bool operator!=(std::nullptr_t, const OwningPtr<X> &x) {
return (bool)x;
}

template <typename A> class SizedNew {
public:
Expand Down
22 changes: 13 additions & 9 deletions flang/include/flang/Runtime/type-code.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,33 @@ class TypeCode {

RT_API_ATTRS int raw() const { return raw_; }

constexpr bool IsValid() const {
constexpr RT_API_ATTRS bool IsValid() const {
return raw_ >= CFI_type_signed_char && raw_ <= CFI_TYPE_LAST;
}
constexpr bool IsInteger() const {
constexpr RT_API_ATTRS bool IsInteger() const {
return raw_ >= CFI_type_signed_char && raw_ <= CFI_type_ptrdiff_t;
}
constexpr bool IsReal() const {
constexpr RT_API_ATTRS bool IsReal() const {
return raw_ >= CFI_type_half_float && raw_ <= CFI_type_float128;
}
constexpr bool IsComplex() const {
constexpr RT_API_ATTRS bool IsComplex() const {
return raw_ >= CFI_type_half_float_Complex &&
raw_ <= CFI_type_float128_Complex;
}
constexpr bool IsCharacter() const {
constexpr RT_API_ATTRS bool IsCharacter() const {
return raw_ == CFI_type_char || raw_ == CFI_type_char16_t ||
raw_ == CFI_type_char32_t;
}
constexpr bool IsLogical() const {
constexpr RT_API_ATTRS bool IsLogical() const {
return raw_ == CFI_type_Bool ||
(raw_ >= CFI_type_int_least8_t && raw_ <= CFI_type_int_least64_t);
}
constexpr bool IsDerived() const { return raw_ == CFI_type_struct; }
constexpr bool IsIntrinsic() const { return IsValid() && !IsDerived(); }
constexpr RT_API_ATTRS bool IsDerived() const {
return raw_ == CFI_type_struct;
}
constexpr RT_API_ATTRS bool IsIntrinsic() const {
return IsValid() && !IsDerived();
}

RT_API_ATTRS std::optional<std::pair<TypeCategory, int>>
GetCategoryAndKind() const;
Expand All @@ -65,7 +69,7 @@ class TypeCode {
return thisCK && thatCK && *thisCK == *thatCK;
}
}
bool operator!=(TypeCode that) const { return !(*this == that); }
RT_API_ATTRS bool operator!=(TypeCode that) const { return !(*this == that); }

private:
ISO::CFI_type_t raw_{CFI_type_other};
Expand Down
8 changes: 8 additions & 0 deletions flang/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ option(FLANG_EXPERIMENTAL_CUDA_RUNTIME

# List of files that are buildable for all devices.
set(supported_files
descriptor.cpp
terminator.cpp
transformational.cpp
type-code.cpp
)

if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
Expand All @@ -175,6 +178,11 @@ if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
-Xclang -fcuda-allow-variadic-functions
)
endif()
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
set(CUDA_COMPILE_OPTIONS
--expt-relaxed-constexpr
)
endif()
set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
"${CUDA_COMPILE_OPTIONS}"
)
Expand Down
20 changes: 10 additions & 10 deletions flang/runtime/ISO_Fortran_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
#include <cstdlib>

namespace Fortran::ISO {
static inline constexpr bool IsCharacterType(CFI_type_t ty) {
static inline constexpr RT_API_ATTRS bool IsCharacterType(CFI_type_t ty) {
return ty == CFI_type_char || ty == CFI_type_char16_t ||
ty == CFI_type_char32_t;
}
static inline constexpr bool IsAssumedSize(const CFI_cdesc_t *dv) {
static inline constexpr RT_API_ATTRS bool IsAssumedSize(const CFI_cdesc_t *dv) {
return dv->rank > 0 && dv->dim[dv->rank - 1].extent == -1;
}

static inline std::size_t MinElemLen(CFI_type_t type) {
static inline RT_API_ATTRS std::size_t MinElemLen(CFI_type_t type) {
auto typeParams{Fortran::runtime::TypeCode{type}.GetCategoryAndKind()};
if (!typeParams) {
Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
Expand All @@ -38,10 +38,10 @@ static inline std::size_t MinElemLen(CFI_type_t type) {
typeParams->first, typeParams->second);
}

static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[],
bool external) {
static inline RT_API_ATTRS int VerifyEstablishParameters(
CFI_cdesc_t *descriptor, void *base_addr, CFI_attribute_t attribute,
CFI_type_t type, std::size_t elem_len, CFI_rank_t rank,
const CFI_index_t extents[], bool external) {
if (attribute != CFI_attribute_other && attribute != CFI_attribute_pointer &&
attribute != CFI_attribute_allocatable) {
return CFI_INVALID_ATTRIBUTE;
Expand Down Expand Up @@ -77,9 +77,9 @@ static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
return CFI_SUCCESS;
}

static inline void EstablishDescriptor(CFI_cdesc_t *descriptor, void *base_addr,
CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
CFI_rank_t rank, const CFI_index_t extents[]) {
static inline RT_API_ATTRS void EstablishDescriptor(CFI_cdesc_t *descriptor,
void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[]) {
descriptor->base_addr = base_addr;
descriptor->elem_len = elem_len;
descriptor->version = CFI_VERSION;
Expand Down
14 changes: 8 additions & 6 deletions flang/runtime/derived.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef FORTRAN_RUNTIME_DERIVED_H_
#define FORTRAN_RUNTIME_DERIVED_H_

#include "flang/Runtime/api-attrs.h"

namespace Fortran::runtime::typeInfo {
class DerivedType;
}
Expand All @@ -21,21 +23,21 @@ class Terminator;

// Perform default component initialization, allocate automatic components.
// Returns a STAT= code (0 when all's well).
int Initialize(const Descriptor &, const typeInfo::DerivedType &, Terminator &,
bool hasStat = false, const Descriptor *errMsg = nullptr);
RT_API_ATTRS int Initialize(const Descriptor &, const typeInfo::DerivedType &,
Terminator &, bool hasStat = false, const Descriptor *errMsg = nullptr);

// Call FINAL subroutines, if any
void Finalize(
RT_API_ATTRS void Finalize(
const Descriptor &, const typeInfo::DerivedType &derived, Terminator *);

// Call FINAL subroutines, deallocate allocatable & automatic components.
// Does not deallocate the original descriptor.
void Destroy(const Descriptor &, bool finalize, const typeInfo::DerivedType &,
Terminator *);
RT_API_ATTRS void Destroy(const Descriptor &, bool finalize,
const typeInfo::DerivedType &, Terminator *);

// Return true if the passed descriptor is for a derived type
// entity that has a dynamic (allocatable, automatic) component.
bool HasDynamicComponent(const Descriptor &);
RT_API_ATTRS bool HasDynamicComponent(const Descriptor &);

} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_DERIVED_H_

0 comments on commit 0f2ce3c

Please sign in to comment.