Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flang][runtime] Enable more code for offload device builds. #67489

Merged
merged 3 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions flang/include/flang/Runtime/api-attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@
#endif
#endif /* !defined(RT_EXT_API_GROUP_END) */

/*
* RT_OFFLOAD_API_GROUP_BEGIN/END pair is placed around definitions
* of functions that can be referenced in other modules of Flang
* runtime. For OpenMP offload these functions are made "declare target"
* making sure they are compiled for the target even though direct
* references to them from other "declare target" functions may not
* be seen. Host-only functions should not be put in between these
* two macros.
*/
#define RT_OFFLOAD_API_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
#define RT_OFFLOAD_API_GROUP_END RT_EXT_API_GROUP_END

/*
* RT_VAR_GROUP_BEGIN/END pair is placed around definitions
* of module scope variables referenced by Flang runtime (directly
Expand Down Expand Up @@ -88,4 +100,16 @@
#endif
#endif /* !defined(RT_CONST_VAR_ATTRS) */

/*
* RT_DEVICE_COMPILATION is defined for any device compilation.
* Note that it can only be used reliably with compilers that perform
* separate host and device compilations.
*/
#if ((defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)) || \
(defined(_OPENMP) && (defined(__AMDGCN__) || defined(__NVPTX__)))
#define RT_DEVICE_COMPILATION 1
#else
#undef RT_DEVICE_COMPILATION
#endif

#endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */
15 changes: 8 additions & 7 deletions flang/include/flang/Runtime/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,21 @@ class Descriptor {
ISO::CFI_attribute_t attribute = CFI_attribute_other);

// CUDA_TODO: Clang does not support unique_ptr on device.
static OwningPtr<Descriptor> Create(TypeCode t, std::size_t elementBytes,
void *p = nullptr, int rank = maxRank,
static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCode t,
std::size_t elementBytes, void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other,
int derivedTypeLenParameters = 0);
static OwningPtr<Descriptor> Create(TypeCategory, int kind, void *p = nullptr,
int rank = maxRank, const SubscriptValue *extent = nullptr,
static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCategory, int kind,
void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);
static OwningPtr<Descriptor> Create(int characterKind,
static RT_API_ATTRS OwningPtr<Descriptor> Create(int characterKind,
SubscriptValue characters, void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);
static OwningPtr<Descriptor> Create(const typeInfo::DerivedType &dt,
void *p = nullptr, int rank = maxRank,
static RT_API_ATTRS OwningPtr<Descriptor> Create(
const typeInfo::DerivedType &dt, void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);

Expand Down
97 changes: 91 additions & 6 deletions flang/include/flang/Runtime/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,116 @@
#ifndef FORTRAN_RUNTIME_MEMORY_H_
#define FORTRAN_RUNTIME_MEMORY_H_

#include "flang/Runtime/api-attrs.h"
#include <cassert>
#include <memory>
#include <type_traits>

namespace Fortran::runtime {

class Terminator;

[[nodiscard]] void *AllocateMemoryOrCrash(
[[nodiscard]] RT_API_ATTRS void *AllocateMemoryOrCrash(
const Terminator &, std::size_t bytes);
template <typename A> [[nodiscard]] A &AllocateOrCrash(const Terminator &t) {
return *reinterpret_cast<A *>(AllocateMemoryOrCrash(t, sizeof(A)));
}
void FreeMemory(void *);
template <typename A> void FreeMemory(A *p) {
RT_API_ATTRS void FreeMemory(void *);
template <typename A> RT_API_ATTRS void FreeMemory(A *p) {
FreeMemory(reinterpret_cast<void *>(p));
}
template <typename A> void FreeMemoryAndNullify(A *&p) {
FreeMemory(p);
p = nullptr;
}

template <typename A> struct OwningPtrDeleter {
void operator()(A *p) { FreeMemory(p); }
// Very basic implementation mimicking std::unique_ptr.
// It should work for any offload device compiler.
// It uses a fixed memory deleter based on FreeMemory(),
// and does not support array objects with runtime length.
template <typename A> class OwningPtr {
public:
using pointer_type = A *;

OwningPtr() = default;
RT_API_ATTRS explicit OwningPtr(pointer_type p) : ptr_(p) {}
RT_API_ATTRS OwningPtr(const OwningPtr &) = delete;
RT_API_ATTRS OwningPtr &operator=(const OwningPtr &) = delete;
RT_API_ATTRS OwningPtr(OwningPtr &&other) {
ptr_ = other.ptr_;
other.ptr_ = pointer_type{};
}
RT_API_ATTRS OwningPtr &operator=(OwningPtr &&other) {
if (this != &other) {
delete_ptr(ptr_);
ptr_ = other.ptr_;
other.ptr_ = pointer_type{};
}
return *this;
}
constexpr RT_API_ATTRS OwningPtr(std::nullptr_t) : OwningPtr() {}

// Delete the pointer, if owns one.
RT_API_ATTRS ~OwningPtr() {
if (ptr_ != pointer_type{}) {
delete_ptr(ptr_);
ptr_ = pointer_type{};
}
}

// Release the ownership.
RT_API_ATTRS pointer_type release() {
pointer_type p = ptr_;
ptr_ = pointer_type{};
return p;
}

// Replace the pointer.
RT_API_ATTRS void reset(pointer_type p = pointer_type{}) {
std::swap(ptr_, p);
if (p != pointer_type{}) {
// Delete the owned pointer.
delete_ptr(p);
}
}

// Exchange the pointer with another object.
RT_API_ATTRS void swap(OwningPtr &other) { std::swap(ptr_, other.ptr_); }

// Get the stored pointer.
RT_API_ATTRS pointer_type get() const { return ptr_; }

RT_API_ATTRS explicit operator bool() const {
return get() != pointer_type{};
}

RT_API_ATTRS typename std::add_lvalue_reference<A>::type operator*() const {
assert(get() != pointer_type{});
return *get();
}

RT_API_ATTRS pointer_type operator->() const { return get(); }

private:
RT_API_ATTRS void delete_ptr(pointer_type p) { FreeMemory(p); }
pointer_type ptr_{};
};

template <typename A> using OwningPtr = std::unique_ptr<A, OwningPtrDeleter<A>>;
template <typename X, typename Y>
inline RT_API_ATTRS bool operator!=(
const OwningPtr<X> &x, const OwningPtr<Y> &y) {
return x.get() != y.get();
}

template <typename X>
inline RT_API_ATTRS bool operator!=(const OwningPtr<X> &x, std::nullptr_t) {
return (bool)x;
}

template <typename X>
inline RT_API_ATTRS bool operator!=(std::nullptr_t, const OwningPtr<X> &x) {
return (bool)x;
}

template <typename A> class SizedNew {
public:
Expand Down
22 changes: 13 additions & 9 deletions flang/include/flang/Runtime/type-code.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,33 @@ class TypeCode {

RT_API_ATTRS int raw() const { return raw_; }

constexpr bool IsValid() const {
constexpr RT_API_ATTRS bool IsValid() const {
return raw_ >= CFI_type_signed_char && raw_ <= CFI_TYPE_LAST;
}
constexpr bool IsInteger() const {
constexpr RT_API_ATTRS bool IsInteger() const {
return raw_ >= CFI_type_signed_char && raw_ <= CFI_type_ptrdiff_t;
}
constexpr bool IsReal() const {
constexpr RT_API_ATTRS bool IsReal() const {
return raw_ >= CFI_type_half_float && raw_ <= CFI_type_float128;
}
constexpr bool IsComplex() const {
constexpr RT_API_ATTRS bool IsComplex() const {
return raw_ >= CFI_type_half_float_Complex &&
raw_ <= CFI_type_float128_Complex;
}
constexpr bool IsCharacter() const {
constexpr RT_API_ATTRS bool IsCharacter() const {
return raw_ == CFI_type_char || raw_ == CFI_type_char16_t ||
raw_ == CFI_type_char32_t;
}
constexpr bool IsLogical() const {
constexpr RT_API_ATTRS bool IsLogical() const {
return raw_ == CFI_type_Bool ||
(raw_ >= CFI_type_int_least8_t && raw_ <= CFI_type_int_least64_t);
}
constexpr bool IsDerived() const { return raw_ == CFI_type_struct; }
constexpr bool IsIntrinsic() const { return IsValid() && !IsDerived(); }
constexpr RT_API_ATTRS bool IsDerived() const {
return raw_ == CFI_type_struct;
}
constexpr RT_API_ATTRS bool IsIntrinsic() const {
return IsValid() && !IsDerived();
}

RT_API_ATTRS std::optional<std::pair<TypeCategory, int>>
GetCategoryAndKind() const;
Expand All @@ -65,7 +69,7 @@ class TypeCode {
return thisCK && thatCK && *thisCK == *thatCK;
}
}
bool operator!=(TypeCode that) const { return !(*this == that); }
RT_API_ATTRS bool operator!=(TypeCode that) const { return !(*this == that); }

private:
ISO::CFI_type_t raw_{CFI_type_other};
Expand Down
8 changes: 8 additions & 0 deletions flang/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ option(FLANG_EXPERIMENTAL_CUDA_RUNTIME

# List of files that are buildable for all devices.
set(supported_files
descriptor.cpp
terminator.cpp
transformational.cpp
type-code.cpp
)

if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
Expand All @@ -175,6 +178,11 @@ if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
-Xclang -fcuda-allow-variadic-functions
)
endif()
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
set(CUDA_COMPILE_OPTIONS
--expt-relaxed-constexpr
)
endif()
set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
"${CUDA_COMPILE_OPTIONS}"
)
Expand Down
20 changes: 10 additions & 10 deletions flang/runtime/ISO_Fortran_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
#include <cstdlib>

namespace Fortran::ISO {
static inline constexpr bool IsCharacterType(CFI_type_t ty) {
static inline constexpr RT_API_ATTRS bool IsCharacterType(CFI_type_t ty) {
return ty == CFI_type_char || ty == CFI_type_char16_t ||
ty == CFI_type_char32_t;
}
static inline constexpr bool IsAssumedSize(const CFI_cdesc_t *dv) {
static inline constexpr RT_API_ATTRS bool IsAssumedSize(const CFI_cdesc_t *dv) {
return dv->rank > 0 && dv->dim[dv->rank - 1].extent == -1;
}

static inline std::size_t MinElemLen(CFI_type_t type) {
static inline RT_API_ATTRS std::size_t MinElemLen(CFI_type_t type) {
auto typeParams{Fortran::runtime::TypeCode{type}.GetCategoryAndKind()};
if (!typeParams) {
Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
Expand All @@ -38,10 +38,10 @@ static inline std::size_t MinElemLen(CFI_type_t type) {
typeParams->first, typeParams->second);
}

static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[],
bool external) {
static inline RT_API_ATTRS int VerifyEstablishParameters(
CFI_cdesc_t *descriptor, void *base_addr, CFI_attribute_t attribute,
CFI_type_t type, std::size_t elem_len, CFI_rank_t rank,
const CFI_index_t extents[], bool external) {
if (attribute != CFI_attribute_other && attribute != CFI_attribute_pointer &&
attribute != CFI_attribute_allocatable) {
return CFI_INVALID_ATTRIBUTE;
Expand Down Expand Up @@ -77,9 +77,9 @@ static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
return CFI_SUCCESS;
}

static inline void EstablishDescriptor(CFI_cdesc_t *descriptor, void *base_addr,
CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
CFI_rank_t rank, const CFI_index_t extents[]) {
static inline RT_API_ATTRS void EstablishDescriptor(CFI_cdesc_t *descriptor,
void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[]) {
descriptor->base_addr = base_addr;
descriptor->elem_len = elem_len;
descriptor->version = CFI_VERSION;
Expand Down
14 changes: 8 additions & 6 deletions flang/runtime/derived.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#ifndef FORTRAN_RUNTIME_DERIVED_H_
#define FORTRAN_RUNTIME_DERIVED_H_

#include "flang/Runtime/api-attrs.h"

namespace Fortran::runtime::typeInfo {
class DerivedType;
}
Expand All @@ -21,21 +23,21 @@ class Terminator;

// Perform default component initialization, allocate automatic components.
// Returns a STAT= code (0 when all's well).
int Initialize(const Descriptor &, const typeInfo::DerivedType &, Terminator &,
bool hasStat = false, const Descriptor *errMsg = nullptr);
RT_API_ATTRS int Initialize(const Descriptor &, const typeInfo::DerivedType &,
Terminator &, bool hasStat = false, const Descriptor *errMsg = nullptr);

// Call FINAL subroutines, if any
void Finalize(
RT_API_ATTRS void Finalize(
const Descriptor &, const typeInfo::DerivedType &derived, Terminator *);

// Call FINAL subroutines, deallocate allocatable & automatic components.
// Does not deallocate the original descriptor.
void Destroy(const Descriptor &, bool finalize, const typeInfo::DerivedType &,
Terminator *);
RT_API_ATTRS void Destroy(const Descriptor &, bool finalize,
const typeInfo::DerivedType &, Terminator *);

// Return true if the passed descriptor is for a derived type
// entity that has a dynamic (allocatable, automatic) component.
bool HasDynamicComponent(const Descriptor &);
RT_API_ATTRS bool HasDynamicComponent(const Descriptor &);

} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_DERIVED_H_