31 changes: 31 additions & 0 deletions clang/test/Analysis/block-in-critical-section-inheritance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: %clang_analyze_cc1 \
// RUN: -analyzer-checker=unix.BlockInCriticalSection \
// RUN: -std=c++11 \
// RUN: -analyzer-output text \
// RUN: -verify %s

unsigned int sleep(unsigned int seconds) {return 0;}
namespace std {
// There are some standard library implementations where some mutex methods
// come from an implementation detail base class. We need to ensure that these
// are matched correctly.
class __mutex_base {
public:
void lock();
};
class mutex : public __mutex_base{
public:
void unlock();
bool try_lock();
};
} // namespace std

void gh_99628() {
std::mutex m;
m.lock();
// expected-note@-1 {{Entering critical section here}}
sleep(10);
// expected-warning@-1 {{Call to blocking function 'sleep' inside of critical section}}
// expected-note@-2 {{Call to blocking function 'sleep' inside of critical section}}
m.unlock();
}
20 changes: 18 additions & 2 deletions clang/test/Analysis/exercise-ps.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
// RUN: %clang_analyze_cc1 %s -verify -Wno-error=implicit-function-declaration \
// RUN: -analyzer-checker=core,unix.Malloc \
// RUN: %clang_analyze_cc1 %s -triple=x86_64-unknown-linux \
// RUN: -verify -Wno-error=implicit-function-declaration \
// RUN: -analyzer-checker=core,unix.Malloc,debug.ExprInspection \
// RUN: -analyzer-config core.CallAndMessage:ArgPointeeInitializedness=true
//
// Just exercise the analyzer on code that has at one point caused issues
// (i.e., no assertions or crashes).

void clang_analyzer_dump_int(int);

static void f1(const char *x, char *y) {
while (*x != 0) {
*y++ = *x++;
Expand All @@ -30,3 +33,16 @@ void f3(void *dest) {
void *src = __builtin_alloca(5);
memcpy(dest, src, 1); // expected-warning{{2nd function call argument is a pointer to uninitialized value}}
}

// Reproduce crash from GH#94496. When array is used as subcript to another array, CSA cannot model it
// and should just assume it's unknown and do not crash.
void f4(char *array) {
char b[4] = {0};

_Static_assert(sizeof(int) == 4, "Wrong triple for the test");

clang_analyzer_dump_int(__builtin_bit_cast(int, b)); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_int(array[__builtin_bit_cast(int, b)]); // expected-warning {{Unknown}}

array[__builtin_bit_cast(int, b)] = 0x10; // no crash
}
569 changes: 569 additions & 0 deletions clang/test/C/C11/n1396.c

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions clang/test/Headers/__clang_hip_cmath.hip
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,22 @@ extern "C" __device__ float test_sin_f32(float x) {
extern "C" __device__ float test_cos_f32(float x) {
return cos(x);
}

// Check user defined type which can be converted to float and double but not
// specializes __numeric_type will not cause ambiguity diagnostics.
struct user_bfloat16 {
__host__ __device__ user_bfloat16(float);
operator float();
operator double();
};

namespace user_namespace {
__device__ user_bfloat16 fma(const user_bfloat16 a, const user_bfloat16 b, const user_bfloat16 c) {
return a;
}

__global__ void test_fma() {
user_bfloat16 a = 1.0f, b = 2.0f;
fma(a, b, b);
}
}
84 changes: 84 additions & 0 deletions clang/test/Modules/builtin-vararg.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Check how builtins using varargs behave with the modules.

// REQUIRES: x86-registered-target
// RUN: rm -rf %t
// RUN: split-file %s %t

// RUN: %clang_cc1 -triple x86_64-apple-darwin \
// RUN: -fmodules -fno-implicit-modules -fbuiltin-headers-in-system-modules \
// RUN: -emit-module -fmodule-name=DeclareVarargs \
// RUN: -x c %t/include/module.modulemap -o %t/DeclareVarargs.pcm \
// RUN: -fmodule-map-file=%t/resource_dir/module.modulemap -isystem %t/resource_dir
// RUN: %clang_cc1 -triple x86_64-apple-darwin \
// RUN: -fmodules -fno-implicit-modules -fbuiltin-headers-in-system-modules \
// RUN: -emit-pch -fmodule-name=Prefix \
// RUN: -x c-header %t/prefix.pch -o %t/prefix.pch.gch \
// RUN: -fmodule-map-file=%t/include/module.modulemap -fmodule-file=DeclareVarargs=%t/DeclareVarargs.pcm \
// RUN: -I %t/include
// RUN: %clang_cc1 -triple x86_64-apple-darwin \
// RUN: -fmodules -fno-implicit-modules -fbuiltin-headers-in-system-modules \
// RUN: -emit-obj -fmodule-name=test \
// RUN: -x c %t/test.c -o %t/test.o \
// RUN: -Werror=incompatible-pointer-types \
// RUN: -fmodule-file=%t/DeclareVarargs.pcm -include-pch %t/prefix.pch.gch \
// RUN: -I %t/include

//--- include/declare-varargs.h
#ifndef DECLARE_VARARGS_H
#define DECLARE_VARARGS_H

#include <stdarg.h>

int vprintf(const char *format, va_list args);

// 1. initializeBuiltins 'acos' causes its deserialization and deserialization
// of 'implementation_of_builtin'. Because this is done before Sema initialization,
// 'implementation_of_builtin' DeclID is added to PreloadedDeclIDs.
#undef acos
#define acos(__x) implementation_of_builtin(__x)

// 2. Because of 'static' the function isn't added to EagerlyDeserializedDecls
// and not deserialized in `ASTReader::StartTranslationUnit` before `ASTReader::InitializeSema`.
// 3. Because of '__overloadable__' attribute the function requires name mangling during deserialization.
// And the name mangling requires '__builtin_va_list' decl.
// Because the function is added to PreloadedDeclIDs, the deserialization happens in `ASTReader::InitializeSema`.
static int __attribute__((__overloadable__)) implementation_of_builtin(int x) {
return x;
}

#endif // DECLARE_VARARGS_H

//--- include/module.modulemap
module DeclareVarargs {
header "declare-varargs.h"
export *
}

//--- resource_dir/stdarg.h
#ifndef STDARG_H
#define STDARG_H

typedef __builtin_va_list va_list;
#define va_start(ap, param) __builtin_va_start(ap, param)
#define va_end(ap) __builtin_va_end(ap)

#endif // STDARG_H

//--- resource_dir/module.modulemap
module _Builtin_stdarg {
header "stdarg.h"
export *
}

//--- prefix.pch
#include <declare-varargs.h>

//--- test.c
#include <declare-varargs.h>

void test(const char *format, ...) {
va_list argParams;
va_start(argParams, format);
vprintf(format, argParams);
va_end(argParams);
}
6 changes: 2 additions & 4 deletions clang/test/Preprocessor/pragma_mc_func.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
// RUN: not %clang --target=powerpc64-ibm-aix -fsyntax-only %s 2>&1 | FileCheck %s
// RUN: not %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
// RUN: %s 2>&1 | FileCheck %s
// RUN: not %clang --target=powerpc64-ibm-aix -fno-err-pragma-mc-func-aix \
// RUN: -ferr-pragma-mc-func-aix -fsyntax-only %s 2>&1 | FileCheck %s
#pragma mc_func asm_barrier {"60000000"}

// CHECK: error: #pragma mc_func is not supported
Expand All @@ -11,10 +8,11 @@
// RUN: %clang --target=powerpc64-ibm-aix -fno-err-pragma-mc-func-aix -fsyntax-only %s
// RUN: %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
// RUN: -fno-err-pragma-mc-func-aix %s
// RUN: %clang --target=powerpc64-ibm-aix -fsyntax-only %s
// RUN: %clang --target=powerpc64-ibm-aix -Werror=unknown-pragmas \
// RUN: -fno-err-pragma-mc-func-aix -fsyntax-only %s

// Cases on a non-AIX target.
// Cases where we have errors or warnings.
// RUN: not %clang --target=powerpc64le-unknown-linux-gnu \
// RUN: -Werror=unknown-pragmas -fno-err-pragma-mc-func-aix -fsyntax-only %s 2>&1 | \
// RUN: FileCheck --check-prefix=UNUSED %s
Expand Down
3 changes: 3 additions & 0 deletions clang/test/Sema/constexpr.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,3 +357,6 @@ void infsNaNs() {
constexpr double db5 = LD_SNAN; // expected-error {{constexpr initializer evaluates to nan which is not exactly representable in type 'const double'}}
constexpr double db6 = INF;
}

constexpr struct S9 s9 = { }; // expected-error {{variable has incomplete type 'const struct S9'}} \
// expected-note {{forward declaration of 'struct S9'}}
76 changes: 76 additions & 0 deletions clang/test/SemaCXX/coroutines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify=expected,cxx20_23,cxx23 %s -fcxx-exceptions -fexceptions -Wunused-result
// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx14_20,cxx20_23 %s -fcxx-exceptions -fexceptions -Wunused-result

// Run without -verify to check the order of errors we show.
// RUN: not %clang_cc1 -std=c++20 -fsyntax-only %s -fcxx-exceptions -fexceptions -Wunused-result 2>&1 | FileCheck %s

void no_coroutine_traits_bad_arg_await() {
co_await a; // expected-error {{include <coroutine>}}
// expected-error@-1 {{use of undeclared identifier 'a'}}
Expand Down Expand Up @@ -154,12 +157,15 @@ namespace std {
template <class PromiseType = void>
struct coroutine_handle {
static coroutine_handle from_address(void *) noexcept;
static coroutine_handle from_promise(PromiseType &promise);
};
template <>
struct coroutine_handle<void> {
template <class PromiseType>
coroutine_handle(coroutine_handle<PromiseType>) noexcept;
static coroutine_handle from_address(void *) noexcept;
template <class PromiseType>
static coroutine_handle from_promise(PromiseType &promise);
};
} // namespace std

Expand Down Expand Up @@ -206,6 +212,22 @@ void mixed_yield_invalid() {
return; // expected-error {{return statement not allowed in coroutine}}
}

void mixed_yield_return_first(bool b) {
if (b) {
return; // expected-error {{return statement not allowed in coroutine}}
}
co_yield 0; // expected-note {{function is a coroutine due to use of 'co_yield'}}
}

template<typename T>
void mixed_return_for_range(bool b, T t) {
if (b) {
return; // expected-error {{return statement not allowed in coroutine}}
}
for co_await (auto i : t){}; // expected-warning {{'for co_await' belongs to CoroutineTS instead of C++20, which is deprecated}}
// expected-note@-1 {{function is a coroutine due to use of 'co_await'}}
}

template <class T>
void mixed_yield_template(T) {
co_yield blah; // expected-error {{use of undeclared identifier}}
Expand Down Expand Up @@ -264,6 +286,13 @@ void mixed_coreturn(void_tag, bool b) {
return; // expected-error {{not allowed in coroutine}}
}

void mixed_coreturn_return_first(void_tag, bool b) {
if (b)
return; // expected-error {{not allowed in coroutine}}
else
co_return; // expected-note {{use of 'co_return'}}
}

void mixed_coreturn_invalid(bool b) {
if (b)
co_return; // expected-note {{use of 'co_return'}}
Expand Down Expand Up @@ -291,6 +320,53 @@ void mixed_coreturn_template2(bool b, T) {
return; // expected-error {{not allowed in coroutine}}
}

struct promise_handle;

struct Handle : std::coroutine_handle<promise_handle> { // expected-note 4{{not viable}}
// expected-note@-1 4{{not viable}}
using promise_type = promise_handle;
};

struct promise_handle {
Handle get_return_object() noexcept {
{ return Handle(std::coroutine_handle<Handle::promise_type>::from_promise(*this)); }
}
suspend_never initial_suspend() const noexcept { return {}; }
suspend_never final_suspend() const noexcept { return {}; }
void return_void() const noexcept {}
void unhandled_exception() const noexcept {}
};

Handle mixed_return_value() {
co_await a; // expected-note {{function is a coroutine due to use of 'co_await' here}}
return 0; // expected-error {{return statement not allowed in coroutine}}
// expected-error@-1 {{no viable conversion from returned value of type}}
// Check that we first show that return is not allowed in coroutine.
// The error about bad conversion is most likely spurious so we prefer to have it afterwards.
// CHECK-NOT: error: no viable conversion from returned value of type
// CHECK: error: return statement not allowed in coroutine
// CHECK: error: no viable conversion from returned value of type
}

Handle mixed_return_value_return_first(bool b) {
if (b) {
return 0; // expected-error {{no viable conversion from returned value of type}}
// expected-error@-1 {{return statement not allowed in coroutine}}
}
co_await a; // expected-note {{function is a coroutine due to use of 'co_await' here}}
co_return 0; // expected-error {{no member named 'return_value' in 'promise_handle'}}
}

Handle mixed_multiple_returns(bool b) {
if (b) {
return 0; // expected-error {{no viable conversion from returned value of type}}
// expected-error@-1 {{return statement not allowed in coroutine}}
}
co_await a; // expected-note {{function is a coroutine due to use of 'co_await' here}}
// The error 'return statement not allowed in coroutine' should appear only once.
return 0; // expected-error {{no viable conversion from returned value of type}}
}

struct CtorDtor {
CtorDtor() {
co_yield 0; // expected-error {{'co_yield' cannot be used in a constructor}}
Expand Down
2 changes: 1 addition & 1 deletion clang/tools/clang-format/ClangFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName,
: SourceMgr::DiagKind::DK_Warning,
"code should be clang-formatted [-Wclang-format-violations]");

Diag.print(nullptr, llvm::errs(), (ShowColors && !NoShowColors));
Diag.print(nullptr, llvm::errs(), ShowColors && !NoShowColors);
if (ErrorLimit && ++Errors >= ErrorLimit)
break;
}
Expand Down
4 changes: 4 additions & 0 deletions clang/unittests/Format/FormatTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27445,6 +27445,10 @@ TEST_F(FormatTest, RemoveParentheses) {
verifyFormat("static_assert((std::is_constructible_v<T, Args &&> && ...));",
"static_assert(((std::is_constructible_v<T, Args &&> && ...)));",
Style);
verifyFormat("foo((a, b));", "foo(((a, b)));", Style);
verifyFormat("foo((a, b));", "foo(((a), b));", Style);
verifyFormat("foo((a, b));", "foo((a, (b)));", Style);
verifyFormat("foo((a, b, c));", "foo((a, ((b)), c));", Style);
verifyFormat("return (0);", "return (((0)));", Style);
verifyFormat("return (({ 0; }));", "return ((({ 0; })));", Style);
verifyFormat("return ((... && std::is_convertible_v<TArgsLocal, TArgs>));",
Expand Down
2 changes: 1 addition & 1 deletion clang/unittests/Format/MatchFilePathTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ TEST_F(MatchFilePathTest, Newline) {

TEST_F(MatchFilePathTest, Star) {
EXPECT_TRUE(match(std::string(50, 'a'), "*a*a*a*a*a*a*a*a*a*a"));
EXPECT_FALSE(match((std::string(50, 'a') + 'b'), "*a*a*a*a*a*a*a*a*a*a"));
EXPECT_FALSE(match(std::string(50, 'a') + 'b', "*a*a*a*a*a*a*a*a*a*a"));
}

TEST_F(MatchFilePathTest, CaseSensitive) {
Expand Down
14 changes: 12 additions & 2 deletions clang/www/c_status.html
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,10 @@ <h2 id="c99">C99 implementation status</h2>
glibc</a>, <a href="https://git.musl-libc.org/cgit/musl/tree/include/stdc-predef.h">
musl</a> will define the macro regardless of compiler support unless
the compiler defines <code>__GCC_IEC_559</code>, which Clang does not
currently define.
currently define.<br>
Additionally, Clang intentionally will not conform to Annex F on
32-bit x86 without SSE2 due to the behavior of floating-point
operations in x87.
</details>
</td>
</tr>
Expand Down Expand Up @@ -501,7 +504,14 @@ <h2 id="c11">C11 implementation status</h2>
<tr>
<td>Wide function returns (alternate proposal)</td>
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1396.htm">N1396</a></td>
<td class="unknown" align="center">Unknown</td>
<td class="full" align="center">
<details><summary>Yes*</summary>
Clang conforms to this paper on all targets except 32-bit x86 without
SSE2. However, Clang does not claim conformance to Annex F on any
target and does not intend to ever conform to Annex F on that specific
target, so no changes are needed to conform to this paper.
</details>
</td>
</tr>
<tr id="alignment">
<td rowspan="3">Alignment</td>
Expand Down
26 changes: 20 additions & 6 deletions compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ elseif(OS_NAME MATCHES "Android")
set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64})
elseif(OS_NAME MATCHES "Fuchsia")
set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64})
elseif(OS_NAME MATCHES "FreeBSD")
set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64})
else()
set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64})
endif()
Expand All @@ -60,22 +62,34 @@ else()
set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64} ${ARM32}
${PPC64} ${S390X} ${RISCV64} ${HEXAGON} ${LOONGARCH64})
endif()
set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}
${LOONGARCH64})
if (OS_NAME MATCHES "FreeBSD")
set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${ARM64})
else()
set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}
${LOONGARCH64})
endif()
set(ALL_NSAN_SUPPORTED_ARCH ${X86_64})
set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64})
set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64})
set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64}
${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
${RISCV32} ${RISCV64} ${LOONGARCH64})
set(ALL_CTX_PROFILE_SUPPORTED_ARCH ${X86_64})
set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}
${LOONGARCH64} ${RISCV64})
if (OS_NAME MATCHES "FreeBSD")
set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
else()
set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}
${LOONGARCH64} ${RISCV64})
endif()
set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64}
${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
${LOONGARCH64})
set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64}
${HEXAGON} ${LOONGARCH64} ${SPARC} ${SPARCV9})
if (OS_NAME MATCHES "FreeBSD")
set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64})
else()
set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64}
${HEXAGON} ${LOONGARCH64} ${SPARC} ${SPARCV9})
endif()
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64}
${HEXAGON} ${LOONGARCH64})
set(ALL_SCUDO_STANDALONE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/lib/asan/asan_ignorelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
# global:*global_with_initialization_issues*=init
# type:*Namespace::ClassName*=init

# Stack buffer overflow in VC/INCLUDE/xlocnum, see http://goo.gl/L4qqUG
# Stack buffer overflow in VC/INCLUDE/xlocnum, see
# https://web.archive.org/web/20140729123024/https://connect.microsoft.com/VisualStudio/feedback/details/829931/vs2012-and-vs2013-istream-code-reads-off-the-end-of-its-non-null-terminated-stack-copied-string
fun:*_Find_elem@*@std*
10 changes: 9 additions & 1 deletion compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,15 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
#define zva_val x5

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
dup v0.16B, valw
#ifdef __ARM_FEATURE_SVE
mov z0.b, valw
#else
bfi valw, valw, #8, #8
bfi valw, valw, #16, #16
bfi val, val, #32, #32
fmov d0, val
fmov v0.d[1], val
#endif
add dstend2, dstin, count

cmp count, 96
Expand Down
6 changes: 5 additions & 1 deletion compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;

# if SANITIZER_FREEBSD
# define SANITIZER_USE_GETENTROPY 1
extern "C" void *__sys_mmap(void *addr, size_t len, int prot, int flags, int fd,
off_t offset);
# endif

namespace __sanitizer {
Expand Down Expand Up @@ -218,7 +220,9 @@ ScopedBlockSignals::~ScopedBlockSignals() { SetSigProcMask(&saved_, nullptr); }
# if !SANITIZER_S390
uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
u64 offset) {
# if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
# if SANITIZER_FREEBSD
return (uptr)__sys_mmap(addr, length, prot, flags, fd, offset);
# elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
return internal_syscall(SYSCALL(mmap), (uptr)addr, length, prot, flags, fd,
offset);
# else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
// RUN: %clangxx_asan -g -O0 %s %libdl -Wl,--export-dynamic -o %t
// RUN: %env_asan_opts=report_globals=2:detect_odr_violation=1 %run %t 2>&1 | FileCheck %s

// FIXME: Checks do not match on Android.
// UNSUPPORTED: android

#include <cstdlib>
#include <dlfcn.h>
#include <stdio.h>
Expand Down
3 changes: 2 additions & 1 deletion compiler-rt/test/asan/TestCases/Linux/printf-fortify-5.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// RUN: %clang -fPIC -shared -O2 -D_FORTIFY_SOURCE=2 -D_DSO %s -o %t.so
// RUN: %clang_asan -o %t %t.so %s
// RUN: not %run %t 2>&1 | FileCheck %s
// REQUIRES: glibc-2.27
/// Incompatible with pass_object_info style fortified source since glibc 2.40.
// REQUIRES: glibc-2.27 && !glibc-2.40
#ifdef _DSO
#include <stdio.h>
#include <stdlib.h>
Expand Down
4 changes: 1 addition & 3 deletions compiler-rt/test/asan/TestCases/initialization-nobug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
// Make sure that accessing a global in the same TU is safe

bool condition = true;
__attribute__((noinline, weak))
int initializeSameTU() {
__attribute__((noinline, weak)) int initializeSameTU() {
return condition ? 0x2a : 052;
}
int sameTU = initializeSameTU();
Expand Down Expand Up @@ -44,7 +43,6 @@ int getStructWithDtorValue() { return struct_with_dtor.value; }

int main() { return 0; }


// CHECK: DynInitPoison module: {{.*}}initialization-nobug.cpp
// CHECK: DynInitUnpoison
// CHECK: DynInitPoison module: {{.*}}initialization-nobug-extra.cpp
Expand Down
11 changes: 10 additions & 1 deletion compiler-rt/test/lit.common.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,16 @@ def add_glibc_versions(ver_string):

ver = LooseVersion(ver_string)
any_glibc = False
for required in ["2.19", "2.27", "2.30", "2.33", "2.34", "2.37", "2.38"]:
for required in [
"2.19",
"2.27",
"2.30",
"2.33",
"2.34",
"2.37",
"2.38",
"2.40",
]:
if ver >= LooseVersion(required):
config.available_features.add("glibc-" + required)
any_glibc = True
Expand Down
7 changes: 7 additions & 0 deletions flang/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()

option(FLANG_CUF_RUNTIME
"Compile CUDA Fortran runtime sources" OFF)
if (FLANG_CUF_RUNTIME)
find_package(CUDAToolkit REQUIRED)
endif()

add_subdirectory(runtime)

if (LLVM_INCLUDE_EXAMPLES)
Expand Down
2 changes: 1 addition & 1 deletion flang/include/flang/Evaluate/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ extern template class Relational<SomeType>;
// Logical expressions of a kind bigger than LogicalResult
// do not include Relational<> operations as possibilities,
// since the results of Relationals are always LogicalResult
// (kind=1).
// (kind=4).
template <int KIND>
class Expr<Type<TypeCategory::Logical, KIND>>
: public ExpressionBase<Type<TypeCategory::Logical, KIND>> {
Expand Down
6 changes: 6 additions & 0 deletions flang/include/flang/Frontend/LangOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#define FORTRAN_FRONTEND_LANGOPTIONS_H

#include <string>
#include <vector>

#include "llvm/TargetParser/Triple.h"

namespace Fortran::frontend {

Expand Down Expand Up @@ -58,6 +61,9 @@ class LangOptions : public LangOptionsBase {
/// host code generation.
std::string OMPHostIRFile;

/// List of triples passed in using -fopenmp-targets.
std::vector<llvm::Triple> OMPTargetTriples;

LangOptions();
};

Expand Down
4 changes: 3 additions & 1 deletion flang/include/flang/Lower/Allocatable.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "flang/Lower/AbstractConverter.h"
#include "flang/Optimizer/Builder/MutableBox.h"
#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"

namespace mlir {
Expand Down Expand Up @@ -70,7 +71,8 @@ void genDeallocateIfAllocated(AbstractConverter &converter,
fir::MutableBoxValue
createMutableBox(AbstractConverter &converter, mlir::Location loc,
const pft::Variable &var, mlir::Value boxAddr,
mlir::ValueRange nonDeferredParams, bool alwaysUseBox);
mlir::ValueRange nonDeferredParams, bool alwaysUseBox,
unsigned allocator = kDefaultAllocator);

/// Assign a boxed value to a boxed variable, \p box (known as a
/// MutableBoxValue). Expression \p source will be lowered to build the
Expand Down
7 changes: 5 additions & 2 deletions flang/include/flang/Optimizer/Builder/MutableBox.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define FORTRAN_OPTIMIZER_BUILDER_MUTABLEBOX_H

#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Runtime/allocator-registry.h"
#include "llvm/ADT/StringRef.h"

namespace mlir {
Expand Down Expand Up @@ -43,7 +44,8 @@ namespace fir::factory {
mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Type boxType,
mlir::ValueRange nonDeferredParams,
mlir::Value typeSourceBox = {});
mlir::Value typeSourceBox = {},
unsigned allocator = kDefaultAllocator);

/// Create a MutableBoxValue for a temporary allocatable.
/// The created MutableBoxValue wraps a fir.ref<fir.box<fir.heap<type>>> and is
Expand Down Expand Up @@ -80,7 +82,8 @@ void associateMutableBoxWithRemap(fir::FirOpBuilder &builder,
/// address field of the MutableBoxValue to zero.
void disassociateMutableBox(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
bool polymorphicSetType = true);
bool polymorphicSetType = true,
unsigned allocator = kDefaultAllocator);

/// Generate code to conditionally reallocate a MutableBoxValue with a new
/// shape, lower bounds, and LEN parameters if it is unallocated or if its
Expand Down
2 changes: 2 additions & 0 deletions flang/include/flang/Optimizer/CodeGen/CGOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", [AttrSizedOperandSegments]> {
- substring: A substring operator (offset, length) for CHARACTER.
- LEN type parameters: A vector of runtime LEN type parameters that
describe an correspond to the elemental derived type.
- sourceBox: A box to read information from such as CFI type,
type descriptor or element size to populate the new descriptor.
- allocator_idx: specify special allocator to use.

The memref and shape arguments are mandatory. The rest are optional.
Expand Down
2 changes: 2 additions & 0 deletions flang/include/flang/Optimizer/Dialect/FIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,8 @@ def fir_EmboxOp : fir_Op<"embox", [NoMemoryEffect, AttrSizedOperandSegments]> {
lower bounds and extents may not be known until runtime),
- slice: an array section can be described with a slice triple,
- typeparams: for emboxing a derived type with LEN type parameters,
- sourceBox: A box to read information from such as CFI type,
type descriptor or element size to populate the new descriptor.
- accessMap: unused/experimental.
- allocator_idx: specify special allocator to use.
}];
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Optimizer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ namespace fir {
#define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
#define GEN_PASS_DECL_CHARACTERCONVERSION
#define GEN_PASS_DECL_CFGCONVERSION
#define GEN_PASS_DECL_CUFOPCONVERSION
#define GEN_PASS_DECL_EXTERNALNAMECONVERSION
#define GEN_PASS_DECL_MEMREFDATAFLOWOPT
#define GEN_PASS_DECL_SIMPLIFYINTRINSICS
Expand Down
7 changes: 7 additions & 0 deletions flang/include/flang/Optimizer/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -429,4 +429,11 @@ def AssumedRankOpConversion : Pass<"fir-assumed-rank-op", "mlir::ModuleOp"> {
];
}

def CufOpConversion : Pass<"cuf-convert", "mlir::ModuleOp"> {
let summary = "Convert some CUF operations to runtime calls";
let dependentDialects = [
"fir::FIROpsDialect"
];
}

#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
40 changes: 40 additions & 0 deletions flang/include/flang/Runtime/CUDA/allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_

#include "flang/Runtime/descriptor.h"

#define CUDA_REPORT_IF_ERROR(expr) \
[](CUresult result) { \
if (!result) \
return; \
const char *name = nullptr; \
cuGetErrorName(result, &name); \
if (!name) \
name = "<unknown>"; \
Terminator terminator{__FILE__, __LINE__}; \
terminator.Crash("'%s' failed with '%s'", #expr, name); \
}(expr)

namespace Fortran::runtime::cuf {

void CUFRegisterAllocator();

void *CUFAllocPinned(std::size_t);
void CUFFreePinned(void *);

void *CUFAllocDevice(std::size_t);
void CUFFreeDevice(void *);

void *CUFAllocManaged(std::size_t);
void CUFFreeManaged(void *);

} // namespace Fortran::runtime::cuf
#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
5 changes: 5 additions & 0 deletions flang/include/flang/Runtime/allocator-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@

static constexpr unsigned kDefaultAllocator = 0;

// Allocator used for CUF
static constexpr unsigned kPinnedAllocatorPos = 1;
static constexpr unsigned kDeviceAllocatorPos = 2;
static constexpr unsigned kManagedAllocatorPos = 3;

#define MAX_ALLOCATOR 5

namespace Fortran::runtime {
Expand Down
14 changes: 11 additions & 3 deletions flang/include/flang/Tools/CrossToolHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,19 @@ struct OffloadModuleOpts {
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
bool OpenMPIsGPU, bool OpenMPForceUSM, uint32_t OpenMPVersion,
std::string OMPHostIRFile = {}, bool NoGPULib = false)
std::string OMPHostIRFile = {},
const std::vector<llvm::Triple> &OMPTargetTriples = {},
bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
OpenMPForceUSM(OpenMPForceUSM), OpenMPVersion(OpenMPVersion),
OMPHostIRFile(OMPHostIRFile), NoGPULib(NoGPULib) {}
OMPHostIRFile(OMPHostIRFile),
OMPTargetTriples(OMPTargetTriples.begin(), OMPTargetTriples.end()),
NoGPULib(NoGPULib) {}

OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
Expand All @@ -150,7 +154,7 @@ struct OffloadModuleOpts {
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPForceUSM(Opts.OpenMPForceUSM),
OpenMPVersion(Opts.OpenMPVersion), OMPHostIRFile(Opts.OMPHostIRFile),
NoGPULib(Opts.NoGPULib) {}
OMPTargetTriples(Opts.OMPTargetTriples), NoGPULib(Opts.NoGPULib) {}

uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
Expand All @@ -162,6 +166,7 @@ struct OffloadModuleOpts {
bool OpenMPForceUSM = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
std::vector<llvm::Triple> OMPTargetTriples = {};
bool NoGPULib = false;
};

Expand All @@ -185,6 +190,9 @@ struct OffloadModuleOpts {
if (!Opts.OMPHostIRFile.empty())
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
}
auto strTriples = llvm::to_vector(llvm::map_range(Opts.OMPTargetTriples,
[](llvm::Triple triple) { return triple.normalize(); }));
offloadMod.setTargetTriples(strTriples);
}
}

Expand Down
50 changes: 20 additions & 30 deletions flang/lib/Evaluate/fold-real.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,28 +359,27 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
using TS = ResultType<decltype(sVal)>;
bool badSConst{false};
if (auto sConst{GetScalarConstantValue<TS>(sVal)}; sConst &&
sConst->IsZero() &&
(sConst->IsZero() || sConst->IsNotANumber()) &&
context.languageFeatures().ShouldWarn(
common::UsageWarning::FoldingValueChecks)) {
context.messages().Say("NEAREST: S argument is zero"_warn_en_US);
context.messages().Say("NEAREST: S argument is %s"_warn_en_US,
sConst->IsZero() ? "zero" : "NaN");
badSConst = true;
}
return FoldElementalIntrinsic<T, T, TS>(context, std::move(funcRef),
ScalarFunc<T, T, TS>([&](const Scalar<T> &x,
const Scalar<TS> &s) -> Scalar<T> {
if (!badSConst && s.IsZero() &&
if (!badSConst && (s.IsZero() || s.IsNotANumber()) &&
context.languageFeatures().ShouldWarn(
common::UsageWarning::FoldingValueChecks)) {
context.messages().Say(
"NEAREST: S argument is zero"_warn_en_US);
"NEAREST: S argument is %s"_warn_en_US,
s.IsZero() ? "zero" : "NaN");
}
auto result{x.NEAREST(!s.IsNegative())};
if (context.languageFeatures().ShouldWarn(
common::UsageWarning::FoldingException)) {
if (result.flags.test(RealFlag::Overflow)) {
context.messages().Say(
"NEAREST intrinsic folding overflow"_warn_en_US);
} else if (result.flags.test(RealFlag::InvalidArgument)) {
if (result.flags.test(RealFlag::InvalidArgument)) {
context.messages().Say(
"NEAREST intrinsic folding: bad argument"_warn_en_US);
}
Expand Down Expand Up @@ -469,32 +468,26 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
return FoldElementalIntrinsic<T, T, TY>(context, std::move(funcRef),
ScalarFunc<T, T, TY>([&](const Scalar<T> &x,
const Scalar<TY> &y) -> Scalar<T> {
bool upward{true};
switch (x.Compare(Scalar<T>::Convert(y).value)) {
bool reverseCompare{
Scalar<T>::binaryPrecision < Scalar<TY>::binaryPrecision};
switch (reverseCompare
? y.Compare(Scalar<TY>::Convert(x).value)
: x.Compare(Scalar<T>::Convert(y).value)) {
case Relation::Unordered:
if (context.languageFeatures().ShouldWarn(
common::UsageWarning::FoldingValueChecks)) {
context.messages().Say(
"IEEE_NEXT_AFTER intrinsic folding: bad argument"_warn_en_US);
"IEEE_NEXT_AFTER intrinsic folding: arguments are unordered"_warn_en_US);
}
return x;
return x.NotANumber();
case Relation::Equal:
return x;
case Relation::Less:
upward = true;
break;
case Relation::Less:
return x.NEAREST(!reverseCompare).value;
case Relation::Greater:
upward = false;
break;
}
auto result{x.NEAREST(upward)};
if (result.flags.test(RealFlag::Overflow) &&
context.languageFeatures().ShouldWarn(
common::UsageWarning::FoldingException)) {
context.messages().Say(
"IEEE_NEXT_AFTER intrinsic folding overflow"_warn_en_US);
return x.NEAREST(reverseCompare).value;
}
return result.value;
return x; // dodge bogus "missing return" GCC warning
}));
},
yExpr->u);
Expand All @@ -508,12 +501,9 @@ Expr<Type<TypeCategory::Real, KIND>> FoldIntrinsicFunction(
auto result{x.NEAREST(upward)};
if (context.languageFeatures().ShouldWarn(
common::UsageWarning::FoldingException)) {
if (result.flags.test(RealFlag::Overflow)) {
context.messages().Say(
"%s intrinsic folding overflow"_warn_en_US, iName);
} else if (result.flags.test(RealFlag::InvalidArgument)) {
if (result.flags.test(RealFlag::InvalidArgument)) {
context.messages().Say(
"%s intrinsic folding: bad argument"_warn_en_US, iName);
"%s intrinsic folding: argument is NaN"_warn_en_US, iName);
}
}
return result.value;
Expand Down
21 changes: 16 additions & 5 deletions flang/lib/Evaluate/real.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,10 +358,15 @@ ValueWithRealFlags<Real<W, P>> Real<W, P>::NEAREST(bool upward) const {
}
}
}
result.flags = result.value.Normalize(isNegative, expo, nearest);
} else if (IsInfinite() && upward == isNegative) {
result.value = isNegative ? HUGE().Negate() : HUGE(); // largest mag finite
} else {
result.value.Normalize(isNegative, expo, nearest);
} else if (IsInfinite()) {
if (upward == isNegative) {
result.value =
isNegative ? HUGE().Negate() : HUGE(); // largest mag finite
} else {
result.value = *this;
}
} else { // NaN
result.flags.set(RealFlag::InvalidArgument);
result.value = *this;
}
Expand Down Expand Up @@ -526,10 +531,16 @@ RealFlags Real<W, P>::Normalize(bool negative, int exponent,
(rounding.mode == common::RoundingMode::Up && !negative) ||
(rounding.mode == common::RoundingMode::Down && negative)) {
word_ = Word{maxExponent}.SHIFTL(significandBits); // Inf
if constexpr (!isImplicitMSB) {
word_ = word_.IBSET(significandBits - 1);
}
} else {
// directed rounding: round to largest finite value rather than infinity
// (x86 does this, not sure whether it's standard behavior)
word_ = Word{word_.MASKR(word_.bits - 1)}.IBCLR(significandBits);
word_ = Word{word_.MASKR(word_.bits - 1)};
if constexpr (isImplicitMSB) {
word_ = word_.IBCLR(significandBits);
}
}
if (negative) {
word_ = word_.IBSET(bits - 1);
Expand Down
206 changes: 126 additions & 80 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -929,90 +929,11 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
Fortran::common::LanguageFeature::CUDA);
}

// -fopenmp and -fopenacc
// -fopenacc
if (args.hasArg(clang::driver::options::OPT_fopenacc)) {
res.getFrontendOpts().features.Enable(
Fortran::common::LanguageFeature::OpenACC);
}
if (args.hasArg(clang::driver::options::OPT_fopenmp)) {
// By default OpenMP is set to 1.1 version
res.getLangOpts().OpenMPVersion = 11;
res.getFrontendOpts().features.Enable(
Fortran::common::LanguageFeature::OpenMP);
if (int Version = getLastArgIntValue(
args, clang::driver::options::OPT_fopenmp_version_EQ,
res.getLangOpts().OpenMPVersion, diags)) {
res.getLangOpts().OpenMPVersion = Version;
}
if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
res.getLangOpts().OpenMPForceUSM = 1;
}
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
res.getLangOpts().OpenMPIsTargetDevice = 1;

// Get OpenMP host file path if any and report if a non existent file is
// found
if (auto *arg = args.getLastArg(
clang::driver::options::OPT_fopenmp_host_ir_file_path)) {
res.getLangOpts().OMPHostIRFile = arg->getValue();
if (!llvm::sys::fs::exists(res.getLangOpts().OMPHostIRFile))
diags.Report(clang::diag::err_drv_omp_host_ir_file_not_found)
<< res.getLangOpts().OMPHostIRFile;
}

if (args.hasFlag(
clang::driver::options::OPT_fopenmp_assume_teams_oversubscription,
clang::driver::options::
OPT_fno_openmp_assume_teams_oversubscription,
/*Default=*/false))
res.getLangOpts().OpenMPTeamSubscription = true;

if (args.hasArg(
clang::driver::options::OPT_fopenmp_assume_no_thread_state))
res.getLangOpts().OpenMPNoThreadState = 1;

if (args.hasArg(
clang::driver::options::OPT_fopenmp_assume_no_nested_parallelism))
res.getLangOpts().OpenMPNoNestedParallelism = 1;

if (args.hasFlag(clang::driver::options::
OPT_fopenmp_assume_threads_oversubscription,
clang::driver::options::
OPT_fno_openmp_assume_threads_oversubscription,
/*Default=*/false))
res.getLangOpts().OpenMPThreadSubscription = true;

if ((args.hasArg(clang::driver::options::OPT_fopenmp_target_debug) ||
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug_EQ))) {
res.getLangOpts().OpenMPTargetDebug = getLastArgIntValue(
args, clang::driver::options::OPT_fopenmp_target_debug_EQ,
res.getLangOpts().OpenMPTargetDebug, diags);

if (!res.getLangOpts().OpenMPTargetDebug &&
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug))
res.getLangOpts().OpenMPTargetDebug = 1;
}
if (args.hasArg(clang::driver::options::OPT_nogpulib))
res.getLangOpts().NoGPULib = 1;
}

switch (llvm::Triple(res.getTargetOpts().triple).getArch()) {
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
case llvm::Triple::amdgcn:
if (!res.getLangOpts().OpenMPIsTargetDevice) {
const unsigned diagID = diags.getCustomDiagID(
clang::DiagnosticsEngine::Error,
"OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
diags.Report(diagID);
}
res.getLangOpts().OpenMPIsGPU = 1;
break;
default:
res.getLangOpts().OpenMPIsGPU = 0;
break;
}
}

// -pedantic
if (args.hasArg(clang::driver::options::OPT_pedantic)) {
Expand Down Expand Up @@ -1042,6 +963,130 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
return diags.getNumErrors() == numErrorsBefore;
}

/// Parses all OpenMP related arguments if the -fopenmp option is present,
/// populating the \c res object accordingly. Returns false if new errors are
/// generated.
static bool parseOpenMPArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
clang::DiagnosticsEngine &diags) {
if (!args.hasArg(clang::driver::options::OPT_fopenmp))
return true;

unsigned numErrorsBefore = diags.getNumErrors();
llvm::Triple t(res.getTargetOpts().triple);

// By default OpenMP is set to 1.1 version
res.getLangOpts().OpenMPVersion = 11;
res.getFrontendOpts().features.Enable(
Fortran::common::LanguageFeature::OpenMP);
if (int Version = getLastArgIntValue(
args, clang::driver::options::OPT_fopenmp_version_EQ,
res.getLangOpts().OpenMPVersion, diags)) {
res.getLangOpts().OpenMPVersion = Version;
}
if (args.hasArg(clang::driver::options::OPT_fopenmp_force_usm)) {
res.getLangOpts().OpenMPForceUSM = 1;
}
if (args.hasArg(clang::driver::options::OPT_fopenmp_is_target_device)) {
res.getLangOpts().OpenMPIsTargetDevice = 1;

// Get OpenMP host file path if any and report if a non existent file is
// found
if (auto *arg = args.getLastArg(
clang::driver::options::OPT_fopenmp_host_ir_file_path)) {
res.getLangOpts().OMPHostIRFile = arg->getValue();
if (!llvm::sys::fs::exists(res.getLangOpts().OMPHostIRFile))
diags.Report(clang::diag::err_drv_omp_host_ir_file_not_found)
<< res.getLangOpts().OMPHostIRFile;
}

if (args.hasFlag(
clang::driver::options::OPT_fopenmp_assume_teams_oversubscription,
clang::driver::options::
OPT_fno_openmp_assume_teams_oversubscription,
/*Default=*/false))
res.getLangOpts().OpenMPTeamSubscription = true;

if (args.hasArg(clang::driver::options::OPT_fopenmp_assume_no_thread_state))
res.getLangOpts().OpenMPNoThreadState = 1;

if (args.hasArg(
clang::driver::options::OPT_fopenmp_assume_no_nested_parallelism))
res.getLangOpts().OpenMPNoNestedParallelism = 1;

if (args.hasFlag(
clang::driver::options::OPT_fopenmp_assume_threads_oversubscription,
clang::driver::options::
OPT_fno_openmp_assume_threads_oversubscription,
/*Default=*/false))
res.getLangOpts().OpenMPThreadSubscription = true;

if ((args.hasArg(clang::driver::options::OPT_fopenmp_target_debug) ||
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug_EQ))) {
res.getLangOpts().OpenMPTargetDebug = getLastArgIntValue(
args, clang::driver::options::OPT_fopenmp_target_debug_EQ,
res.getLangOpts().OpenMPTargetDebug, diags);

if (!res.getLangOpts().OpenMPTargetDebug &&
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug))
res.getLangOpts().OpenMPTargetDebug = 1;
}
if (args.hasArg(clang::driver::options::OPT_nogpulib))
res.getLangOpts().NoGPULib = 1;
}

switch (llvm::Triple(res.getTargetOpts().triple).getArch()) {
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
case llvm::Triple::amdgcn:
if (!res.getLangOpts().OpenMPIsTargetDevice) {
const unsigned diagID = diags.getCustomDiagID(
clang::DiagnosticsEngine::Error,
"OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
diags.Report(diagID);
}
res.getLangOpts().OpenMPIsGPU = 1;
break;
default:
res.getLangOpts().OpenMPIsGPU = 0;
break;
}

// Get the OpenMP target triples if any.
if (auto *arg =
args.getLastArg(clang::driver::options::OPT_fopenmp_targets_EQ)) {
enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit };
auto getArchPtrSize = [](const llvm::Triple &triple) {
if (triple.isArch16Bit())
return Arch16Bit;
if (triple.isArch32Bit())
return Arch32Bit;
assert(triple.isArch64Bit() && "Expected 64-bit architecture");
return Arch64Bit;
};

for (unsigned i = 0; i < arg->getNumValues(); ++i) {
llvm::Triple tt(arg->getValue(i));

if (tt.getArch() == llvm::Triple::UnknownArch ||
!(tt.getArch() == llvm::Triple::aarch64 || tt.isPPC() ||
tt.getArch() == llvm::Triple::systemz ||
tt.getArch() == llvm::Triple::nvptx ||
tt.getArch() == llvm::Triple::nvptx64 ||
tt.getArch() == llvm::Triple::amdgcn ||
tt.getArch() == llvm::Triple::x86 ||
tt.getArch() == llvm::Triple::x86_64))
diags.Report(clang::diag::err_drv_invalid_omp_target)
<< arg->getValue(i);
else if (getArchPtrSize(t) != getArchPtrSize(tt))
diags.Report(clang::diag::err_drv_incompatible_omp_arch)
<< arg->getValue(i) << t.str();
else
res.getLangOpts().OMPTargetTriples.push_back(tt);
}
}
return diags.getNumErrors() == numErrorsBefore;
}

/// Parses all floating point related arguments and populates the
/// CompilerInvocation accordingly.
/// Returns false if new errors are generated.
Expand Down Expand Up @@ -1277,6 +1322,7 @@ bool CompilerInvocation::createFromArgs(
success &= parseVectorLibArg(invoc.getCodeGenOpts(), args, diags);
success &= parseSemaArgs(invoc, args, diags);
success &= parseDialectArgs(invoc, args, diags);
success &= parseOpenMPArgs(invoc, args, diags);
success &= parseDiagArgs(invoc, args, diags);

// Collect LLVM (-mllvm) and MLIR (-mmlir) options.
Expand Down
6 changes: 3 additions & 3 deletions flang/lib/Lower/Allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1052,15 +1052,15 @@ createMutableProperties(Fortran::lower::AbstractConverter &converter,
fir::MutableBoxValue Fortran::lower::createMutableBox(
Fortran::lower::AbstractConverter &converter, mlir::Location loc,
const Fortran::lower::pft::Variable &var, mlir::Value boxAddr,
mlir::ValueRange nonDeferredParams, bool alwaysUseBox) {

mlir::ValueRange nonDeferredParams, bool alwaysUseBox, unsigned allocator) {
fir::MutableProperties mutableProperties = createMutableProperties(
converter, loc, var, nonDeferredParams, alwaysUseBox);
fir::MutableBoxValue box(boxAddr, nonDeferredParams, mutableProperties);
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
if (!var.isGlobal() && !Fortran::semantics::IsDummy(var.getSymbol()))
fir::factory::disassociateMutableBox(builder, loc, box,
/*polymorphicSetType=*/false);
/*polymorphicSetType=*/false,
allocator);
return box;
}

Expand Down
19 changes: 18 additions & 1 deletion flang/lib/Lower/ConvertVariable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "flang/Optimizer/Support/FatalError.h"
#include "flang/Optimizer/Support/InternalNames.h"
#include "flang/Optimizer/Support/Utils.h"
#include "flang/Runtime/allocator-registry.h"
#include "flang/Semantics/runtime-type-info.h"
#include "flang/Semantics/tools.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -1851,6 +1852,21 @@ static void genBoxDeclare(Fortran::lower::AbstractConverter &converter,
replace);
}

static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
std::optional<Fortran::common::CUDADataAttr> cudaAttr =
Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
if (cudaAttr) {
if (*cudaAttr == Fortran::common::CUDADataAttr::Pinned)
return kPinnedAllocatorPos;
if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
return kDeviceAllocatorPos;
if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
*cudaAttr == Fortran::common::CUDADataAttr::Unified)
return kManagedAllocatorPos;
}
return kDefaultAllocator;
}

/// Lower specification expressions and attributes of variable \p var and
/// add it to the symbol map. For a global or an alias, the address must be
/// pre-computed and provided in \p preAlloc. A dummy argument for the current
Expand Down Expand Up @@ -1940,7 +1956,8 @@ void Fortran::lower::mapSymbolAttributes(
fir::MutableBoxValue box = Fortran::lower::createMutableBox(
converter, loc, var, boxAlloc, nonDeferredLenParams,
/*alwaysUseBox=*/
converter.getLoweringOptions().getLowerToHighLevelFIR());
converter.getLoweringOptions().getLowerToHighLevelFIR(),
getAllocatorIdx(var.getSymbol()));
genAllocatableOrPointerDeclare(converter, symMap, var.getSymbol(), box,
replace);
return;
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2157,7 +2157,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
symTable.pushScope();
loopDsp.emplace(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
enableDelayedPrivatization, &symTable);
/*useDelayedPrivatization=*/false, &symTable);
loopDsp->processStep1();
}

Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Lower/OpenMP/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ llvm::cl::opt<bool> enableDelayedPrivatization(
"openmp-enable-delayed-privatization",
llvm::cl::desc(
"Emit `[first]private` variables as clauses on the MLIR ops."),
llvm::cl::init(false));
llvm::cl::init(true));

llvm::cl::opt<bool> enableDelayedPrivatizationStaging(
"openmp-enable-delayed-privatization-staging",
Expand Down
22 changes: 15 additions & 7 deletions flang/lib/Optimizer/Builder/MutableBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,9 @@ class MutablePropertyWriter {
public:
MutablePropertyWriter(fir::FirOpBuilder &builder, mlir::Location loc,
const fir::MutableBoxValue &box,
mlir::Value typeSourceBox = {})
: builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox} {}
mlir::Value typeSourceBox = {}, unsigned allocator = 0)
: builder{builder}, loc{loc}, box{box}, typeSourceBox{typeSourceBox},
allocator{allocator} {}
/// Update MutableBoxValue with new address, shape and length parameters.
/// Extents and lbounds must all have index type.
/// lbounds can be empty in which case all ones is assumed.
Expand Down Expand Up @@ -242,7 +243,7 @@ class MutablePropertyWriter {
// declared type, not retain the previous dynamic type.
auto deallocatedBox = fir::factory::createUnallocatedBox(
builder, loc, box.getBoxTy(), box.nonDeferredLenParams(),
typeSourceBox);
typeSourceBox, allocator);
builder.create<fir::StoreOp>(loc, deallocatedBox, box.getAddr());
}
}
Expand Down Expand Up @@ -276,7 +277,8 @@ class MutablePropertyWriter {
/// Update the IR box (fir.ref<fir.box<T>>) of the MutableBoxValue.
void updateIRBox(mlir::Value addr, mlir::ValueRange lbounds,
mlir::ValueRange extents, mlir::ValueRange lengths,
mlir::Value tdesc = {}) {
mlir::Value tdesc = {},
unsigned allocator = kDefaultAllocator) {
mlir::Value irBox = createNewFirBox(builder, loc, box, addr, lbounds,
extents, lengths, tdesc);
builder.create<fir::StoreOp>(loc, irBox, box.getAddr());
Expand Down Expand Up @@ -322,13 +324,15 @@ class MutablePropertyWriter {
mlir::Location loc;
fir::MutableBoxValue box;
mlir::Value typeSourceBox;
unsigned allocator;
};

} // namespace

mlir::Value fir::factory::createUnallocatedBox(
fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type boxType,
mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox) {
mlir::ValueRange nonDeferredParams, mlir::Value typeSourceBox,
unsigned allocator) {
auto baseBoxType = mlir::cast<fir::BaseBoxType>(boxType);
// Giving unallocated/disassociated status to assumed-rank POINTER/
// ALLOCATABLE is not directly possible to a Fortran user. But the
Expand Down Expand Up @@ -374,6 +378,8 @@ mlir::Value fir::factory::createUnallocatedBox(
mlir::Value emptySlice;
auto embox = builder.create<fir::EmboxOp>(
loc, baseBoxType, nullAddr, shape, emptySlice, lenParams, typeSourceBox);
if (allocator != 0)
embox.setAllocatorIdx(allocator);
if (isAssumedRank)
return builder.createConvert(loc, boxType, embox);
return embox;
Expand Down Expand Up @@ -691,7 +697,8 @@ void fir::factory::associateMutableBoxWithRemap(
void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
mlir::Location loc,
const fir::MutableBoxValue &box,
bool polymorphicSetType) {
bool polymorphicSetType,
unsigned allocator) {
if (box.isPolymorphic() && polymorphicSetType) {
// 7.3.2.3 point 7. The dynamic type of a disassociated pointer is the
// same as its declared type.
Expand All @@ -704,7 +711,8 @@ void fir::factory::disassociateMutableBox(fir::FirOpBuilder &builder,
return;
}
}
MutablePropertyWriter{builder, loc, box}.setUnallocatedStatus();
MutablePropertyWriter{builder, loc, box, {}, allocator}
.setUnallocatedStatus();
}

static llvm::SmallVector<mlir::Value>
Expand Down
3 changes: 3 additions & 0 deletions flang/lib/Optimizer/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ add_flang_library(FIRTransforms
CharacterConversion.cpp
ConstantArgumentGlobalisation.cpp
ControlFlowConverter.cpp
CufOpConversion.cpp
ArrayValueCopy.cpp
ExternalNameConversion.cpp
MemoryUtils.cpp
Expand All @@ -29,11 +30,13 @@ add_flang_library(FIRTransforms
DebugTypeGenerator.cpp

DEPENDS
CUFDialect
FIRDialect
FIROptTransformsPassIncGen
HLFIROpsIncGen

LINK_LIBS
CUFDialect
FIRAnalysis
FIRBuilder
FIRCodeGen
Expand Down
156 changes: 156 additions & 0 deletions flang/lib/Optimizer/Transforms/CufOpConversion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
//===-- CufOpConversion.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Common/Fortran.h"
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Runtime/allocatable.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

namespace fir {
#define GEN_PASS_DEF_CUFOPCONVERSION
#include "flang/Optimizer/Transforms/Passes.h.inc"
} // namespace fir

using namespace fir;
using namespace mlir;
using namespace Fortran::runtime;

namespace {

template <typename OpTy>
static bool isBoxGlobal(OpTy op) {
if (auto declareOp =
mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp())) {
if (mlir::isa_and_nonnull<fir::AddrOfOp>(
declareOp.getMemref().getDefiningOp()))
return true;
} else if (auto declareOp = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
op.getBox().getDefiningOp())) {
if (mlir::isa_and_nonnull<fir::AddrOfOp>(
declareOp.getMemref().getDefiningOp()))
return true;
}
return false;
}

template <typename OpTy>
static mlir::LogicalResult convertOpToCall(OpTy op,
mlir::PatternRewriter &rewriter,
mlir::func::FuncOp func) {
auto mod = op->template getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
auto fTy = func.getFunctionType();

mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
mlir::Value sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(4));

mlir::Value hasStat = op.getHasStat() ? builder.createBool(loc, true)
: builder.createBool(loc, false);

mlir::Value errmsg;
if (op.getErrmsg()) {
errmsg = op.getErrmsg();
} else {
mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
errmsg = builder.create<fir::AbsentOp>(loc, boxNoneTy).getResult();
}
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
builder, loc, fTy, op.getBox(), hasStat, errmsg, sourceFile, sourceLine)};
auto callOp = builder.create<fir::CallOp>(loc, func, args);
rewriter.replaceOp(op, callOp);
return mlir::success();
}

struct CufAllocateOpConversion
: public mlir::OpRewritePattern<cuf::AllocateOp> {
using OpRewritePattern::OpRewritePattern;

mlir::LogicalResult
matchAndRewrite(cuf::AllocateOp op,
mlir::PatternRewriter &rewriter) const override {
// TODO: Allocation with source will need a new entry point in the runtime.
if (op.getSource())
return mlir::failure();

// TODO: Allocation using different stream.
if (op.getStream())
return mlir::failure();

// TODO: Pinned is a reference to a logical value that can be set to true
// when pinned allocation succeed. This will require a new entry point.
if (op.getPinned())
return mlir::failure();

// TODO: Allocation of module variable will need more work as the descriptor
// will be duplicated and needs to be synced after allocation.
if (isBoxGlobal(op))
return mlir::failure();

// Allocation for local descriptor falls back on the standard runtime
// AllocatableAllocate as the dedicated allocator is set in the descriptor
// before the call.
auto mod = op->template getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
mlir::func::FuncOp func =
fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc,
builder);
return convertOpToCall<cuf::AllocateOp>(op, rewriter, func);
}
};

struct CufDeallocateOpConversion
: public mlir::OpRewritePattern<cuf::DeallocateOp> {
using OpRewritePattern::OpRewritePattern;

mlir::LogicalResult
matchAndRewrite(cuf::DeallocateOp op,
mlir::PatternRewriter &rewriter) const override {
// TODO: Allocation of module variable will need more work as the descriptor
// will be duplicated and needs to be synced after allocation.
if (isBoxGlobal(op))
return mlir::failure();

// Deallocation for local descriptor falls back on the standard runtime
// AllocatableDeallocate as the dedicated deallocator is set in the
// descriptor before the call.
auto mod = op->getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
mlir::func::FuncOp func =
fir::runtime::getRuntimeFunc<mkRTKey(AllocatableDeallocate)>(loc,
builder);
return convertOpToCall<cuf::DeallocateOp>(op, rewriter, func);
}
};

class CufOpConversion : public fir::impl::CufOpConversionBase<CufOpConversion> {
public:
void runOnOperation() override {
auto *ctx = &getContext();
mlir::RewritePatternSet patterns(ctx);
mlir::ConversionTarget target(*ctx);
target.addIllegalOp<cuf::AllocateOp, cuf::DeallocateOp>();
patterns.insert<CufAllocateOpConversion, CufDeallocateOpConversion>(ctx);
if (mlir::failed(mlir::applyPartialConversion(getOperation(), target,
std::move(patterns)))) {
mlir::emitError(mlir::UnknownLoc::get(ctx),
"error in CUF op conversion\n");
signalPassFailure();
}
}
};
} // namespace
8 changes: 6 additions & 2 deletions flang/lib/Semantics/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3587,8 +3587,12 @@ MaybeExpr RelationHelper(ExpressionAnalyzer &context, RelationalOperator opr,
analyzer.IsIntrinsicRelational(opr, *leftType, *rightType)) {
analyzer.CheckForNullPointer("as a relational operand");
analyzer.CheckForAssumedRank("as a relational operand");
return AsMaybeExpr(Relate(context.GetContextualMessages(), opr,
analyzer.MoveExpr(0), analyzer.MoveExpr(1)));
if (auto cmp{Relate(context.GetContextualMessages(), opr,
analyzer.MoveExpr(0), analyzer.MoveExpr(1))}) {
return AsMaybeExpr(ConvertToKind<TypeCategory::Logical>(
context.GetDefaultKind(TypeCategory::Logical),
AsExpr(std::move(*cmp))));
}
} else {
return analyzer.TryDefinedOp(opr,
leftType && leftType->category() == TypeCategory::Logical &&
Expand Down
3 changes: 3 additions & 0 deletions flang/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
add_dependencies(FortranRuntime flang-new module_files)
endif()

if (FLANG_CUF_RUNTIME)
add_subdirectory(CUDA)
endif()
19 changes: 19 additions & 0 deletions flang/runtime/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#

include_directories(${CUDAToolkit_INCLUDE_DIRS})
find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)

add_flang_library(CufRuntime
allocator.cpp
)
target_link_libraries(CufRuntime
PRIVATE
FortranRuntime
${CUDA_RUNTIME_LIBRARY}
)
60 changes: 60 additions & 0 deletions flang/runtime/CUDA/allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===-- runtime/CUDA/allocator.cpp ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Runtime/CUDA/allocator.h"
#include "../derived.h"
#include "../stat.h"
#include "../terminator.h"
#include "../type-info.h"
#include "flang/Common/Fortran.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
#include "flang/Runtime/allocator-registry.h"

#include "cuda.h"

namespace Fortran::runtime::cuf {

void CUFRegisterAllocator() {
allocatorRegistry.Register(
kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
allocatorRegistry.Register(
kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
allocatorRegistry.Register(
kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
}

void *CUFAllocPinned(std::size_t sizeInBytes) {
void *p;
CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
return p;
}

void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }

void *CUFAllocDevice(std::size_t sizeInBytes) {
CUdeviceptr p = 0;
CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
return reinterpret_cast<void *>(p);
}

void CUFFreeDevice(void *p) {
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
}

void *CUFAllocManaged(std::size_t sizeInBytes) {
CUdeviceptr p = 0;
CUDA_REPORT_IF_ERROR(
cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
return reinterpret_cast<void *>(p);
}

void CUFFreeManaged(void *p) {
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
}

} // namespace Fortran::runtime::cuf
6 changes: 3 additions & 3 deletions flang/runtime/edit-output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,19 +300,19 @@ RT_API_ATTRS bool RealOutputEditing<KIND>::EditEorDOutput(
flags |= decimal::AlwaysSign;
}
int scale{edit.modes.scale}; // 'kP' value
bool isEN{edit.variation == 'N'};
bool isES{edit.variation == 'S'};
if (editWidth == 0) { // "the processor selects the field width"
if (edit.digits.has_value()) { // E0.d
if (editDigits == 0 && scale <= 0) { // E0.0
significantDigits = 1;
significantDigits = isEN || isES ? 0 : 1;
}
} else { // E0
flags |= decimal::Minimize;
significantDigits =
sizeof buffer_ - 5; // sign, NUL, + 3 extra for EN scaling
}
}
bool isEN{edit.variation == 'N'};
bool isES{edit.variation == 'S'};
int zeroesAfterPoint{0};
if (isEN) {
scale = IsZero() ? 1 : 3;
Expand Down
105 changes: 86 additions & 19 deletions flang/runtime/format-implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,84 @@ RT_API_ATTRS int FormatControl<CONTEXT>::GetIntField(
return result;
}

// Xn, TRn, TLn
template <typename CONTEXT>
static RT_API_ATTRS bool RelativeTabbing(CONTEXT &context, int n) {
ConnectionState &connection{context.GetConnectionState()};
if constexpr (std::is_same_v<CONTEXT,
ExternalFormattedIoStatementState<Direction::Input>> ||
std::is_same_v<CONTEXT,
ExternalFormattedIoStatementState<Direction::Output>>) {
if (n != 0 && connection.isUTF8) {
const char *p{};
if (n > 0) { // Xn or TRn
// Skip 'n' multi-byte characters. If that's more than are in the
// current record, that's valid -- the program can position past the
// end and then reposition back with Tn or TLn.
std::size_t bytesLeft{context.ViewBytesInRecord(p, true)};
for (; n > 0 && bytesLeft && p; --n) {
std::size_t byteCount{MeasureUTF8Bytes(*p)};
if (byteCount > bytesLeft) {
break;
}
context.HandleRelativePosition(byteCount);
bytesLeft -= byteCount;
// Don't call GotChar(byteCount), these don't count towards SIZE=
p += byteCount;
}
} else { // n < 0: TLn
n = -n;
if (std::int64_t excess{connection.positionInRecord -
connection.recordLength.value_or(connection.positionInRecord)};
excess > 0) {
// Have tabbed past the end of the record
if (excess >= n) {
context.HandleRelativePosition(-n);
return true;
}
context.HandleRelativePosition(-excess);
n -= excess;
}
std::size_t bytesLeft{context.ViewBytesInRecord(p, false)};
// Go back 'n' multi-byte characters.
for (; n > 0 && bytesLeft && p; --n) {
std::size_t byteCount{MeasurePreviousUTF8Bytes(p, bytesLeft)};
context.HandleRelativePosition(-byteCount);
bytesLeft -= byteCount;
p -= byteCount;
}
}
}
}
if (connection.internalIoCharKind > 1) {
n *= connection.internalIoCharKind;
}
context.HandleRelativePosition(n);
return true;
}

// Tn
template <typename CONTEXT>
static RT_API_ATTRS bool AbsoluteTabbing(CONTEXT &context, int n) {
ConnectionState &connection{context.GetConnectionState()};
n = n > 0 ? n - 1 : 0; // convert 1-based position to 0-based offset
if constexpr (std::is_same_v<CONTEXT,
ExternalFormattedIoStatementState<Direction::Input>> ||
std::is_same_v<CONTEXT,
ExternalFormattedIoStatementState<Direction::Output>>) {
if (connection.isUTF8) {
// Reset to the beginning of the record, then TR(n-1)
connection.HandleAbsolutePosition(0);
return RelativeTabbing(context, n);
}
}
if (connection.internalIoCharKind > 1) {
n *= connection.internalIoCharKind;
}
context.HandleAbsolutePosition(n);
return true;
}

template <typename CONTEXT>
static RT_API_ATTRS void HandleControl(
CONTEXT &context, char ch, char next, int n) {
Expand Down Expand Up @@ -169,12 +247,7 @@ static RT_API_ATTRS void HandleControl(
}
break;
case 'X':
if (!next) {
ConnectionState &connection{context.GetConnectionState()};
if (connection.internalIoCharKind > 1) {
n *= connection.internalIoCharKind;
}
context.HandleRelativePosition(n);
if (!next && RelativeTabbing(context, n)) {
return;
}
break;
Expand All @@ -190,19 +263,13 @@ static RT_API_ATTRS void HandleControl(
break;
case 'T': {
if (!next) { // Tn
--n; // convert 1-based to 0-based
}
ConnectionState &connection{context.GetConnectionState()};
if (connection.internalIoCharKind > 1) {
n *= connection.internalIoCharKind;
}
if (!next) { // Tn
context.HandleAbsolutePosition(n);
return;
}
if (next == 'L' || next == 'R') { // TLn & TRn
context.HandleRelativePosition(next == 'L' ? -n : n);
return;
if (AbsoluteTabbing(context, n)) {
return;
}
} else if (next == 'R' || next == 'L') { // TRn / TLn
if (RelativeTabbing(context, next == 'L' ? -n : n)) {
return;
}
}
} break;
default:
Expand Down
23 changes: 23 additions & 0 deletions flang/runtime/internal-unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ RT_API_ATTRS bool InternalDescriptorUnit<DIR>::Emit(
template <Direction DIR>
RT_API_ATTRS std::size_t InternalDescriptorUnit<DIR>::GetNextInputBytes(
const char *&p, IoErrorHandler &handler) {
p = nullptr;
if constexpr (DIR == Direction::Output) {
handler.Crash("InternalDescriptorUnit<Direction::Output>::"
"GetNextInputBytes() called");
Expand All @@ -98,6 +99,28 @@ RT_API_ATTRS std::size_t InternalDescriptorUnit<DIR>::GetNextInputBytes(
}
}

template <Direction DIR>
RT_API_ATTRS std::size_t InternalDescriptorUnit<DIR>::ViewBytesInRecord(
const char *&p, bool forward) const {
p = nullptr;
auto recl{recordLength.value_or(positionInRecord)};
const char *record{CurrentRecord()};
if (forward) {
if (positionInRecord < recl) {
if (record) {
p = &record[positionInRecord];
}
return recl - positionInRecord;
}
} else {
if (record && positionInRecord <= recl) {
p = &record[positionInRecord];
}
return positionInRecord - leftTabLimit.value_or(0);
}
return 0;
}

template <Direction DIR>
RT_API_ATTRS bool InternalDescriptorUnit<DIR>::AdvanceRecord(
IoErrorHandler &handler) {
Expand Down
1 change: 1 addition & 0 deletions flang/runtime/internal-unit.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ template <Direction DIR> class InternalDescriptorUnit : public ConnectionState {

RT_API_ATTRS bool Emit(const char *, std::size_t, IoErrorHandler &);
RT_API_ATTRS std::size_t GetNextInputBytes(const char *&, IoErrorHandler &);
RT_API_ATTRS std::size_t ViewBytesInRecord(const char *&, bool forward) const;
RT_API_ATTRS bool AdvanceRecord(IoErrorHandler &);
RT_API_ATTRS void BackspaceRecord(IoErrorHandler &);
RT_API_ATTRS std::int64_t InquirePos();
Expand Down
14 changes: 14 additions & 0 deletions flang/runtime/io-stmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ std::size_t IoStatementBase::GetNextInputBytes(const char *&p) {
return 0;
}

std::size_t IoStatementBase::ViewBytesInRecord(
const char *&p, bool forward) const {
p = nullptr;
return 0;
}

bool IoStatementBase::AdvanceRecord(int) { return false; }

void IoStatementBase::BackspaceRecord() {}
Expand Down Expand Up @@ -105,6 +111,8 @@ std::size_t InternalIoStatementState<DIR>::GetNextInputBytes(const char *&p) {
return unit_.GetNextInputBytes(p, *this);
}

// InternalIoStatementState<DIR>::ViewBytesInRecord() not needed or defined

template <Direction DIR>
bool InternalIoStatementState<DIR>::AdvanceRecord(int n) {
while (n-- > 0) {
Expand Down Expand Up @@ -413,6 +421,12 @@ std::size_t ExternalIoStatementState<DIR>::GetNextInputBytes(const char *&p) {
return unit().GetNextInputBytes(p, *this);
}

template <Direction DIR>
std::size_t ExternalIoStatementState<DIR>::ViewBytesInRecord(
const char *&p, bool forward) const {
return unit().ViewBytesInRecord(p, forward);
}

template <Direction DIR>
bool ExternalIoStatementState<DIR>::AdvanceRecord(int n) {
while (n-- > 0) {
Expand Down
11 changes: 8 additions & 3 deletions flang/runtime/io-stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class IoStatementState {
const char *, std::size_t bytes, std::size_t elementBytes = 0);
RT_API_ATTRS bool Receive(char *, std::size_t, std::size_t elementBytes = 0);
RT_API_ATTRS std::size_t GetNextInputBytes(const char *&);
RT_API_ATTRS std::size_t ViewBytesInRecord(const char *&, bool forward) const;
RT_API_ATTRS bool AdvanceRecord(int = 1);
RT_API_ATTRS void BackspaceRecord();
RT_API_ATTRS void HandleRelativePosition(std::int64_t byteOffset);
Expand Down Expand Up @@ -132,9 +133,9 @@ class IoStatementState {
RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
std::size_t &byteCount);

// The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
// are always in units of bytes, not characters; the distinction matters
// for internal input from CHARACTER(KIND=2 and 4).
// The result of CueUpInput() and the "remaining" arguments to SkipSpaces()
// and NextInField() are always in units of bytes, not characters; the
// distinction matters for internal input from CHARACTER(KIND=2 and 4).

// For fixed-width fields, return the number of remaining bytes.
// Skip over leading blanks.
Expand Down Expand Up @@ -279,6 +280,7 @@ class IoStatementBase : public IoErrorHandler {
RT_API_ATTRS bool Receive(
char *, std::size_t bytes, std::size_t elementBytes = 0);
RT_API_ATTRS std::size_t GetNextInputBytes(const char *&);
RT_API_ATTRS std::size_t ViewBytesInRecord(const char *&, bool forward) const;
RT_API_ATTRS bool AdvanceRecord(int);
RT_API_ATTRS void BackspaceRecord();
RT_API_ATTRS void HandleRelativePosition(std::int64_t);
Expand Down Expand Up @@ -448,6 +450,7 @@ class ExternalIoStatementBase : public IoStatementBase {
RT_API_ATTRS ExternalIoStatementBase(
ExternalFileUnit &, const char *sourceFile = nullptr, int sourceLine = 0);
RT_API_ATTRS ExternalFileUnit &unit() { return unit_; }
RT_API_ATTRS const ExternalFileUnit &unit() const { return unit_; }
RT_API_ATTRS MutableModes &mutableModes();
RT_API_ATTRS ConnectionState &GetConnectionState();
RT_API_ATTRS int asynchronousID() const { return asynchronousID_; }
Expand All @@ -473,6 +476,7 @@ class ExternalIoStatementState : public ExternalIoStatementBase,
RT_API_ATTRS bool Emit(
const char *, std::size_t bytes, std::size_t elementBytes = 0);
RT_API_ATTRS std::size_t GetNextInputBytes(const char *&);
RT_API_ATTRS std::size_t ViewBytesInRecord(const char *&, bool forward) const;
RT_API_ATTRS bool AdvanceRecord(int = 1);
RT_API_ATTRS void BackspaceRecord();
RT_API_ATTRS void HandleRelativePosition(std::int64_t);
Expand Down Expand Up @@ -539,6 +543,7 @@ class ChildIoStatementState : public IoStatementBase,
RT_API_ATTRS bool Emit(
const char *, std::size_t bytes, std::size_t elementBytes = 0);
RT_API_ATTRS std::size_t GetNextInputBytes(const char *&);
RT_API_ATTRS std::size_t ViewBytesInRecord(const char *&, bool forward) const;
RT_API_ATTRS void HandleRelativePosition(std::int64_t);
RT_API_ATTRS void HandleAbsolutePosition(std::int64_t);

Expand Down
18 changes: 18 additions & 0 deletions flang/runtime/unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,24 @@ std::size_t ExternalFileUnit::GetNextInputBytes(
return p ? length : 0;
}

std::size_t ExternalFileUnit::ViewBytesInRecord(
const char *&p, bool forward) const {
p = nullptr;
auto recl{recordLength.value_or(positionInRecord)};
if (forward) {
if (positionInRecord < recl) {
p = Frame() + recordOffsetInFrame_ + positionInRecord;
return recl - positionInRecord;
}
} else {
if (positionInRecord <= recl) {
p = Frame() + recordOffsetInFrame_ + positionInRecord;
}
return positionInRecord - leftTabLimit.value_or(0);
}
return 0;
}

const char *ExternalFileUnit::FrameNextInput(
IoErrorHandler &handler, std::size_t bytes) {
RUNTIME_CHECK(handler, isUnformatted.has_value() && !*isUnformatted);
Expand Down
1 change: 1 addition & 0 deletions flang/runtime/unit.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class ExternalFileUnit : public ConnectionState,
RT_API_ATTRS bool Receive(
char *, std::size_t, std::size_t elementBytes, IoErrorHandler &);
RT_API_ATTRS std::size_t GetNextInputBytes(const char *&, IoErrorHandler &);
RT_API_ATTRS std::size_t ViewBytesInRecord(const char *&, bool forward) const;
RT_API_ATTRS bool BeginReadingRecord(IoErrorHandler &);
RT_API_ATTRS void FinishReadingRecord(IoErrorHandler &);
RT_API_ATTRS bool AdvanceRecord(IoErrorHandler &);
Expand Down
11 changes: 11 additions & 0 deletions flang/runtime/utf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ RT_OFFLOAD_VAR_GROUP_END
#endif // FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS

RT_OFFLOAD_API_GROUP_BEGIN

std::size_t MeasurePreviousUTF8Bytes(const char *end, std::size_t limit) {
// Scan back over UTF-8 continuation bytes, if any
for (std::size_t n{1}; n <= limit; ++n) {
if ((end[-n] & 0xc0) != 0x80) {
return n;
}
}
return limit;
}

// Non-minimal encodings are accepted.
Fortran::common::optional<char32_t> DecodeUTF8(const char *p0) {
const std::uint8_t *p{reinterpret_cast<const std::uint8_t *>(p0)};
Expand Down
3 changes: 3 additions & 0 deletions flang/runtime/utf.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ static inline RT_API_ATTRS std::size_t MeasureUTF8Bytes(char first) {
return UTF8FirstByteTable[static_cast<std::uint8_t>(first)];
}

RT_API_ATTRS std::size_t MeasurePreviousUTF8Bytes(
const char *end, std::size_t limit);

// Ensure that all bytes are present in sequence in the input buffer
// before calling; use MeasureUTF8Bytes(first byte) to count them.
RT_API_ATTRS Fortran::common::optional<char32_t> DecodeUTF8(const char *);
Expand Down
15 changes: 4 additions & 11 deletions flang/test/Evaluate/fold-nearest.f90
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@ module m1
logical, parameter :: test_2 = nearest(minSubnormal, -1.) == 0
logical, parameter :: test_3 = nearest(1., 1.) == 1.0000001
logical, parameter :: test_4 = nearest(1.0000001, -1.) == 1
!WARN: warning: NEAREST intrinsic folding overflow
real, parameter :: inf = nearest(huge(1.), 1.)
!WARN: warning: NEAREST intrinsic folding: bad argument
logical, parameter :: test_5 = nearest(inf, 1.) == inf
!WARN: warning: NEAREST intrinsic folding: bad argument
logical, parameter :: test_6 = nearest(-inf, -1.) == -inf
logical, parameter :: test_7 = nearest(1.9999999, 1.) == 2.
logical, parameter :: test_8 = nearest(2., -1.) == 1.9999999
Expand Down Expand Up @@ -59,10 +56,10 @@ module m2
logical, parameter :: test_12 = ieee_next_after(1., 1.) == 1.
!WARN: warning: invalid argument on division
real, parameter :: nan = 0. / 0.
!WARN: warning: IEEE_NEXT_AFTER intrinsic folding: bad argument
!WARN: warning: IEEE_NEXT_AFTER intrinsic folding: arguments are unordered
real, parameter :: x13 = ieee_next_after(nan, nan)
logical, parameter :: test_13 = .not. (x13 == x13)
!WARN: warning: IEEE_NEXT_AFTER intrinsic folding: bad argument
!WARN: warning: IEEE_NEXT_AFTER intrinsic folding: arguments are unordered
real, parameter :: x14 = ieee_next_after(nan, 0.)
logical, parameter :: test_14 = .not. (x14 == x14)
end module
Expand All @@ -77,24 +74,20 @@ module m3
logical, parameter :: test_4 = ieee_next_down(1.0000000000000002d0) == 1.d0
!WARN: warning: division by zero
real(kind(0.d0)), parameter :: inf = 1.d0 / 0.d0
!WARN: warning: IEEE_NEXT_UP intrinsic folding overflow
logical, parameter :: test_5 = ieee_next_up(huge(0.d0)) == inf
!WARN: warning: IEEE_NEXT_DOWN intrinsic folding overflow
logical, parameter :: test_6 = ieee_next_down(-huge(0.d0)) == -inf
!WARN: warning: IEEE_NEXT_UP intrinsic folding: bad argument
logical, parameter :: test_7 = ieee_next_up(inf) == inf
logical, parameter :: test_8 = ieee_next_down(inf) == h
logical, parameter :: test_9 = ieee_next_up(-inf) == -h
!WARN: warning: IEEE_NEXT_DOWN intrinsic folding: bad argument
logical, parameter :: test_10 = ieee_next_down(-inf) == -inf
logical, parameter :: test_11 = ieee_next_up(1.9999999999999997d0) == 2.d0
logical, parameter :: test_12 = ieee_next_down(2.d0) == 1.9999999999999997d0
!WARN: warning: invalid argument on division
real(kind(0.d0)), parameter :: nan = 0.d0 / 0.d0
!WARN: warning: IEEE_NEXT_UP intrinsic folding: bad argument
!WARN: warning: IEEE_NEXT_UP intrinsic folding: argument is NaN
real(kind(0.d0)), parameter :: x13 = ieee_next_up(nan)
logical, parameter :: test_13 = .not. (x13 == x13)
!WARN: warning: IEEE_NEXT_DOWN intrinsic folding: bad argument
!WARN: warning: IEEE_NEXT_DOWN intrinsic folding: argument is NaN
real(kind(0.d0)), parameter :: x14 = ieee_next_down(nan)
logical, parameter :: test_14 = .not. (x14 == x14)
end module
32 changes: 21 additions & 11 deletions flang/test/Evaluate/logical-args.f90
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ module m
subroutine foo4(l)
logical(kind=4), intent(in) :: l
end subroutine foo4

subroutine foo8(l)
logical(kind=8), intent(in) :: l
end subroutine foo8
Expand All @@ -17,9 +16,16 @@ end module m
program main
use m
integer :: x(10), y

! CHECK: CALL foo(.true._4)
! CHECK-8: CALL foo(logical(.true._4,kind=8))
! CHECK-8: CALL foo(.true._8)
call foo(.true.)
! CHECK: CALL foo(.true._4)
! CHECK-8: CALL foo(.true._8)
call foo(1 < 2)
! CHECK: CALL foo(x(1_8)>y)
! CHECK-8: CALL foo(logical(x(1_8)>y,kind=8))
call foo(x(1) > y)
! CHECK: CALL fooa(x>y)
! CHECK-8: CALL fooa(logical(x>y,kind=8))
call fooa(x > y)
Expand All @@ -28,14 +34,18 @@ program main
! CHECK: CALL foo4(.true._4)
! CHECK-8: CALL foo8(.true._8)
call foog(.true.)
! CHECK: CALL foo4(.true._4)
! CHECK-8: CALL foo8(.true._8)
call foog(1 < 2)
! CHECK: CALL foo4(x(1_8)>y)
! CHECK-8: CALL foo8(logical(x(1_8)>y,kind=8))
call foog(x(1) > y)

contains
subroutine foo(l)
logical :: l
end subroutine foo

subroutine fooa(l)
logical :: l(10)
end subroutine fooa

contains
subroutine foo(l)
logical :: l
end subroutine foo
subroutine fooa(l)
logical :: l(10)
end subroutine fooa
end program main
21 changes: 21 additions & 0 deletions flang/test/Fir/CUDA/cuda-allocate.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// RUN: fir-opt --cuf-convert %s | FileCheck %s

func.func @_QPsub1() {
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
%4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c1 = arith.constant 1 : index
%c10_i32 = arith.constant 10 : i32
%c0_i32 = arith.constant 0 : i32
%9 = cuf.allocate %4#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
%10 = cuf.deallocate %4#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
return
}

// CHECK-LABEL: func.func @_QPsub1()
// CHECK: %[[DESC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
// CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32

// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32
2 changes: 1 addition & 1 deletion flang/test/Integration/OpenMP/copyprivate.f90
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
!CHECK-NEXT: }

!CHECK-LABEL: define internal void @test_scalar_..omp_par({{.*}})
!CHECK: %[[J:.*]] = alloca i32, i64 1
!CHECK: %[[I:.*]] = alloca i32, i64 1
!CHECK: %[[J:.*]] = alloca i32, i64 1
!CHECK: %[[DID_IT:.*]] = alloca i32
!CHECK: store i32 0, ptr %[[DID_IT]]
!CHECK: %[[THREAD_NUM1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]])
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Integration/OpenMP/map-types-and-sizes.f90
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-llvm -fopenmp -flang-deprecated-no-hlfir %s -o - | FileCheck %s
!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -flang-deprecated-no-hlfir %s -o - | FileCheck %s

!===============================================================================
! Check MapTypes for target implicit captures
Expand Down
7 changes: 7 additions & 0 deletions flang/test/Lower/CUDA/cuda-allocatable.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
Expand All @@ -37,6 +38,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub2()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QFsub2Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 3 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
Expand All @@ -60,6 +62,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub3()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub3Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 1 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
Expand All @@ -78,6 +81,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub4()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub4Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
Expand All @@ -97,6 +101,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub5()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub5Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
Expand All @@ -118,6 +123,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub6()
! CHECK: %[[BOX_A:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub6Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
Expand All @@ -140,6 +146,7 @@ end subroutine

! CHECK-LABEL: func.func @_QPsub7()
! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub7Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32}
! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
Expand Down
11 changes: 6 additions & 5 deletions flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
! This test checks the lowering of parallel do

! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s
! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - | FileCheck %s
! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - \
! RUN: | FileCheck %s

! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - \
! RUN: | FileCheck %s

! The string "EXPECTED" denotes the expected FIR

! CHECK: omp.parallel {
! EXPECTED: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"}
! EXPECTED: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFEz"}
! CHECK: omp.parallel private(@{{.*}} %{{.*}} -> %[[PRIVATE_Y:.*]] : !fir.ref<i32>, @{{.*}} %{{.*}} -> %[[PRIVATE_Y:.*]] : !fir.ref<i32>) {
! CHECK: %[[TEMP:.*]] = fir.alloca i32 {bindc_name = "x", pinned, {{.*}}}
! CHECK: %[[const_1:.*]] = arith.constant 1 : i32
! CHECK: %[[const_2:.*]] = arith.constant 10 : i32
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/associate.f90
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
!CHECK: omp.wsloop {
!CHECK: }
!CHECK: }
!CHECK: omp.parallel {
!CHECK: omp.parallel {{.*}} {
!CHECK-NOT: hlfir.declare {{.*}} {uniq_name = "_QFtest_parallel_assocEb"}
!CHECK: omp.wsloop {
!CHECK: }
Expand Down
7 changes: 4 additions & 3 deletions flang/test/Lower/OpenMP/copyprivate.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
! Test COPYPRIVATE.
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 \
! RUN: | FileCheck %s

!CHECK-DAG: func private @_copy_i64(%{{.*}}: !fir.ref<i64>, %{{.*}}: !fir.ref<i64>)
!CHECK-DAG: func private @_copy_f32(%{{.*}}: !fir.ref<f32>, %{{.*}}: !fir.ref<f32>)
Expand Down Expand Up @@ -94,10 +95,10 @@ subroutine test_scalar()

!CHECK-LABEL: func @_QPtest_array
!CHECK: omp.parallel
!CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
!CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEa"} : (!fir.box<!fir.array<?xi32>>, !fir.shift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
!CHECK: %[[I1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi1"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
!CHECK: %[[I2:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi2"} : (!fir.ref<!fir.array<3x4xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4xi32>>, !fir.ref<!fir.array<3x4xi32>>)
!CHECK: %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
!CHECK: %[[I3:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEi3"} : (!fir.ref<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
!CHECK: %[[R1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEr1"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
!CHECK: %[[C1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEc1"} : (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x4x!fir.complex<4>>>, !fir.ref<!fir.array<3x4x!fir.complex<4>>>)
!CHECK: %[[L1:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFtest_arrayEl1"} : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.ref<!fir.array<10x!fir.logical<4>>>)
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/copyprivate2.f90
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
!CHECK-NEXT: }

!CHECK-LABEL: func @_QPtest_alloc_ptr
!CHECK: omp.parallel {
!CHECK: omp.parallel {{.*}} {
!CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>,
!CHECK-SAME: uniq_name = "_QFtest_alloc_ptrEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) ->
!CHECK-SAME: (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
Expand Down
5 changes: 1 addition & 4 deletions flang/test/Lower/OpenMP/critical.f90
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,8 @@ subroutine parallel_critical_privatization()

!CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_critical_privatizationEi"}
!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]] {uniq_name = "_QFparallel_critical_privatizationEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.parallel {
!CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFparallel_critical_privatizationEi"}
!CHECK: omp.parallel private(@{{.*}} %[[I_DECL]]#0 -> %[[PRIV_I:.*]] : !fir.ref<i32>) {
!CHECK: %[[PRIV_I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFparallel_critical_privatizationEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIV_I_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!$omp parallel default(firstprivate)
!CHECK: omp.critical {
!$omp critical
Expand Down
Loading