237 changes: 237 additions & 0 deletions clang/test/CodeGenCoroutines/pr59723.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
// This is reduced test case from https://github.com/llvm/llvm-project/issues/59723.
// This is not a minimal reproducer intentionally to check the compiler's ability.
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fcxx-exceptions\
// RUN: -fexceptions -O2 -emit-llvm %s -o - | FileCheck %s

#include "Inputs/coroutine.h"

// executor and operation base

class bug_any_executor;

struct bug_async_op_base
{
void invoke();

protected:

~bug_async_op_base() = default;
};

class bug_any_executor
{
using op_type = bug_async_op_base;

public:

virtual ~bug_any_executor() = default;

// removing noexcept enables clang to find that the pointer has escaped
virtual void post(op_type& op) noexcept = 0;

virtual void wait() noexcept = 0;
};

class bug_thread_executor : public bug_any_executor
{

public:

void start()
{

}

~bug_thread_executor()
{
}

// although this implementation is not realy noexcept due to allocation but I have a real one that is and required to be noexcept
virtual void post(bug_async_op_base& op) noexcept override;

virtual void wait() noexcept override
{

}
};

// task and promise

struct bug_final_suspend_notification
{
virtual std::coroutine_handle<> get_waiter() = 0;
};

class bug_task;

class bug_task_promise
{
friend bug_task;
public:

bug_task get_return_object() noexcept;

constexpr std::suspend_always initial_suspend() noexcept { return {}; }

std::suspend_always final_suspend() noexcept
{
return {};
}

void unhandled_exception() noexcept;

constexpr void return_void() const noexcept {}

void get_result() const
{

}
};

template <class T, class U>
T exchange(T &&t, U &&u) {
T ret = t;
t = u;
return ret;
}

class bug_task
{
friend bug_task_promise;
using handle = std::coroutine_handle<>;
using promise_t = bug_task_promise;

bug_task(handle coro, promise_t* p) noexcept : this_coro{ coro }, this_promise{ p }
{

}

public:
using promise_type = bug_task_promise;

bug_task(bug_task&& other) noexcept
: this_coro{ exchange(other.this_coro, nullptr) }, this_promise{ exchange(other.this_promise, nullptr) } {

}

~bug_task()
{
if (this_coro)
this_coro.destroy();
}

constexpr bool await_ready() const noexcept
{
return false;
}

handle await_suspend(handle waiter) noexcept
{
return this_coro;
}

void await_resume()
{
return this_promise->get_result();
}

handle this_coro;
promise_t* this_promise;
};

bug_task bug_task_promise::get_return_object() noexcept
{
return { std::coroutine_handle<bug_task_promise>::from_promise(*this), this };
}

// spawn operation and spawner

template<class Handler>
class bug_spawn_op final : public bug_async_op_base, bug_final_suspend_notification
{
Handler handler;
bug_task task_;

public:

bug_spawn_op(Handler handler, bug_task&& t)
: handler { handler }, task_{ static_cast<bug_task&&>(t) } {}

virtual std::coroutine_handle<> get_waiter() override
{
handler();
return std::noop_coroutine();
}
};

class bug_spawner;

struct bug_spawner_awaiter
{
bug_spawner& s;
std::coroutine_handle<> waiter;

bug_spawner_awaiter(bug_spawner& s) : s{ s } {}

bool await_ready() const noexcept;

void await_suspend(std::coroutine_handle<> coro);

void await_resume() {}
};

class bug_spawner
{
friend bug_spawner_awaiter;

struct final_handler_t
{
bug_spawner& s;

void operator()()
{
s.awaiter_->waiter.resume();
}
};

public:

bug_spawner(bug_any_executor& ex) : ex_{ ex } {}

void spawn(bug_task&& t) {
using op_t = bug_spawn_op<final_handler_t>;
// move task into ptr
op_t* ptr = new op_t(final_handler_t{ *this }, static_cast<bug_task&&>(t));
++count_;
ex_.post(*ptr); // ptr escapes here thus task escapes but clang can't deduce that unless post() is not noexcept
}

bug_spawner_awaiter wait() noexcept { return { *this }; }

private:
bug_any_executor& ex_; // if bug_thread_executor& is used instead enables clang to detect the escape of the promise
bug_spawner_awaiter* awaiter_ = nullptr;
unsigned count_ = 0;
};

// test case

bug_task bug_spawned_task(int id, int inc)
{
co_return;
}

struct A {
A();
};

void throwing_fn(bug_spawner& s) {
s.spawn(bug_spawned_task(1, 2));
throw A{};
}

// Check that the coroutine frame of bug_spawned_task are allocated from operator new.
// CHECK: define{{.*}}@_Z11throwing_fnR11bug_spawner
// CHECK-NOT: alloc
// CHECK: %[[CALL:.+]] = {{.*}}@_Znwm(i64{{.*}} 24)
// CHECK: store ptr @_Z16bug_spawned_taskii.resume, ptr %[[CALL]]
8 changes: 4 additions & 4 deletions clang/test/Driver/x86-no-gather-no-scatter.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/// Tests -mno-gather and -mno-scatter
// RUN: %clang -c -mno-gather -### %s 2>&1 | FileCheck --check-prefix=NOGATHER %s
// RUN: %clang_cl -c /Qgather- -### %s 2>&1 | FileCheck --check-prefix=NOGATHER %s
// RUN: %clang -target x86_64-unknown-linux-gnu -c -mno-gather -### %s 2>&1 | FileCheck --check-prefix=NOGATHER %s
// RUN: %clang_cl --target=x86_64-windows -c /Qgather- -### -- %s 2>&1 | FileCheck --check-prefix=NOGATHER %s
// NOGATHER: "-target-feature" "+prefer-no-gather"

// RUN: %clang -c -mno-scatter -### %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s
// RUN: %clang_cl -c /Qscatter- -### %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s
// RUN: %clang -target x86_64-unknown-linux-gnu -c -mno-scatter -### %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s
// RUN: %clang_cl --target=x86_64-windows -c /Qscatter- -### -- %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s
// NOSCATTER: "-target-feature" "+prefer-no-scatter"
18 changes: 16 additions & 2 deletions clang/test/Sema/riscv-rvv-lax-vector-conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=integer -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=all -ffreestanding -fsyntax-only -verify=lax-vector-all %s

// lax-vector-all-no-diagnostics

// REQUIRES: riscv-registered-target

#define RVV_FIXED_ATTR __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)))
Expand All @@ -20,6 +18,8 @@ typedef __rvv_uint64m1_t vuint64m1_t;
typedef __rvv_float32m1_t vfloat32m1_t;
typedef __rvv_float64m1_t vfloat64m1_t;

typedef __rvv_int64m2_t vint64m2_t;

typedef vfloat32m1_t rvv_fixed_float32m1_t RVV_FIXED_ATTR;
typedef vint32m1_t rvv_fixed_int32m1_t RVV_FIXED_ATTR;
typedef float gnu_fixed_float32m1_t GNU_FIXED_ATTR;
Expand Down Expand Up @@ -76,3 +76,17 @@ void gnu_allowed_with_all_lax_conversions() {
// lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
// lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
}

void not_allowed() {
rvv_fixed_int32m1_t fi32m1;
vint64m2_t si64m2;

fi32m1 = si64m2;
// lax-vector-none-error@-1 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}}
// lax-vector-integer-error@-2 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}}
// lax-vector-all-error@-3 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}}
si64m2 = fi32m1;
// lax-vector-none-error@-1 {{assigning to 'vint64m2_t' (aka '__rvv_int64m2_t') from incompatible type}}
// lax-vector-integer-error@-2 {{assigning to 'vint64m2_t' (aka '__rvv_int64m2_t') from incompatible type}}
// lax-vector-all-error@-3 {{assigning to 'vint64m2_t' (aka '__rvv_int64m2_t') from incompatible type}}
}
23 changes: 23 additions & 0 deletions clang/test/SemaCXX/template-64605.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -ast-dump -ast-dump-filter=b_64605 %s | FileCheck %s

// https://github.com/llvm/llvm-project/issues/64605

#pragma STDC FENV_ACCESS ON
template <typename>
int b_64605() {
int x;
if ((float)0xFFFFFFFF != (float)0x100000000) {
x = 1;
}
return x;
}
int f() { return b_64605<void>(); }

// CHECK: ImplicitCastExpr {{.*}} 'float' <IntegralToFloating> RoundingMath=1 AllowFEnvAccess=1
// CHECK-NEXT: IntegerLiteral {{.*}} 4294967295

// CHECK: FunctionDecl {{.*}} b_64605 'int ()' implicit_instantiation
// CHECK-NEXT: TemplateArgument type 'void'

// CHECK: ImplicitCastExpr {{.*}} 'float' <IntegralToFloating> RoundingMath=1 AllowFEnvAccess=1
// CHECK-NEXT: IntegerLiteral {{.*}} 4294967295
2 changes: 1 addition & 1 deletion compiler-rt/lib/interception/interception.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ const interpose_substitution substitution_##func_name[] \
// FreeBSD's dynamic linker (incompliantly) gives non-weak symbols higher
// priority than weak ones so weak aliases won't work for indirect calls
// in position-independent (-fPIC / -fPIE) mode.
# define __ASM_WEAK_WRAPPER(func)
# define __ASM_WEAK_WRAPPER(func) ".globl " #func "\n"
# else
# define __ASM_WEAK_WRAPPER(func) ".weak " #func "\n"
# endif // SANITIZER_FREEBSD || SANITIZER_NETBSD
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/MachineLICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,10 @@ void MachineLICMBase::HoistRegionPostRA() {
PhysRegDefs.set(*AI);
}

// Funclet entry blocks will clobber all registers
if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
PhysRegClobbers.setBitsNotInMask(Mask);

SpeculationState = SpeculateUnknown;
for (MachineInstr &MI : *BB)
ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
Expand Down
83 changes: 60 additions & 23 deletions llvm/lib/Transforms/Coroutines/CoroElide.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,49 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
for (auto *DA : It->second)
Visited.insert(DA->getParent());

SmallPtrSet<const BasicBlock *, 32> EscapingBBs;
for (auto *U : CB->users()) {
// The use from coroutine intrinsics are not a problem.
if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
continue;

// Think all other usages may be an escaping candidate conservatively.
//
// Note that the major user of switch ABI coroutine (the C++) will store
// resume.fn, destroy.fn and the index to the coroutine frame immediately.
// So the parent of the coro.begin in C++ will be always escaping.
// Then we can't get any performance benefits for C++ by improving the
// precision of the method.
//
// The reason why we still judge it is we want to make LLVM Coroutine in
// switch ABIs to be self contained as much as possible instead of a
// by-product of C++20 Coroutines.
EscapingBBs.insert(cast<Instruction>(U)->getParent());
}

bool PotentiallyEscaped = false;

do {
const auto *BB = Worklist.pop_back_val();
if (!Visited.insert(BB).second)
continue;
if (TIs.count(BB))
return true;

// A Path insensitive marker to test whether the coro.begin escapes.
// It is intentional to make it path insensitive while it may not be
// precise since we don't want the process to be too slow.
PotentiallyEscaped |= EscapingBBs.count(BB);

if (TIs.count(BB)) {
if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped)
return true;

// If the function ends with the exceptional terminator, the memory used
// by the coroutine frame can be released by stack unwinding
// automatically. So we can think the coro.begin doesn't escape if it
// exits the function by exceptional terminator.

continue;
}

// Conservatively say that there is potentially a path.
if (!--Limit)
Expand Down Expand Up @@ -236,36 +273,36 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// memory location storing that value and not the virtual register.

SmallPtrSet<BasicBlock *, 8> Terminators;
// First gather all of the non-exceptional terminators for the function.
// First gather all of the terminators for the function.
// Consider the final coro.suspend as the real terminator when the current
// function is a coroutine.
for (BasicBlock &B : *F) {
auto *TI = B.getTerminator();
if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
!isa<UnreachableInst>(TI))
Terminators.insert(&B);
}
for (BasicBlock &B : *F) {
auto *TI = B.getTerminator();

if (TI->getNumSuccessors() != 0 || isa<UnreachableInst>(TI))
continue;

Terminators.insert(&B);
}

// Filter out the coro.destroy that lie along exceptional paths.
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
for (const auto &It : DestroyAddr) {
// If there is any coro.destroy dominates all of the terminators for the
// coro.begin, we could know the corresponding coro.begin wouldn't escape.
for (Instruction *DA : It.second) {
if (llvm::all_of(Terminators, [&](auto *TI) {
return DT.dominates(DA, TI->getTerminator());
})) {
ReferencedCoroBegins.insert(It.first);
break;
}
}

// Whether there is any paths from coro.begin to Terminators which not pass
// through any of the coro.destroys.
// If every terminators is dominated by coro.destroy, we could know the
// corresponding coro.begin wouldn't escape.
//
// Otherwise hasEscapePath would decide whether there is any paths from
// coro.begin to Terminators which not pass through any of the
// coro.destroys.
//
// hasEscapePath is relatively slow, so we avoid to run it as much as
// possible.
if (!ReferencedCoroBegins.count(It.first) &&
if (llvm::all_of(Terminators,
[&](auto *TI) {
return llvm::any_of(It.second, [&](auto *DA) {
return DT.dominates(DA, TI->getTerminator());
});
}) ||
!hasEscapePath(It.first, Terminators))
ReferencedCoroBegins.insert(It.first);
}
Expand Down
141 changes: 141 additions & 0 deletions llvm/test/CodeGen/X86/machine-licm-vs-wineh.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# RUN: llc -o - %s -mtriple=x86_64-pc-windows-msvc -run-pass=machinelicm | FileCheck %s
#
# This test checks that MachineLICM doesn't hoist loads out of funclets.
# Manually modified from the IR of the following C++ function by running
# llc -stop-after=machine-cp.
#
# void may_throw();
# void use(int);
#
# void test(int n, int arg)
# {
# for (int i = 0 ; i < n ; i++)
# try {
# may_throw();
# }
# catch (...) {
# // Two uses to get 'arg' allocated to a register
# use(arg);
# use(arg);
# }
# }

--- |
target triple = "x86_64-pc-windows-msvc"

define void @test(i32 %n, i32 %arg) personality ptr @__CxxFrameHandler3 {
entry:
%cmp3 = icmp sgt i32 %n, 0
br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry
br label %for.body

for.cond.cleanup: ; preds = %for.inc, %entry
ret void

for.body: ; preds = %for.body.preheader, %for.inc
%lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.inc ]
invoke void @may_throw()
to label %for.inc unwind label %catch.dispatch

catch.dispatch: ; preds = %for.body
%0 = catchswitch within none [label %catch] unwind to caller

catch: ; preds = %catch.dispatch
%1 = catchpad within %0 [ptr null, i32 64, ptr null]
call void @use(i32 %arg) [ "funclet"(token %1) ]
call void @use(i32 %arg) [ "funclet"(token %1) ]
catchret from %1 to label %for.inc

for.inc: ; preds = %catch, %for.body
%lsr.iv.next = add i32 %lsr.iv, -1
%exitcond.not = icmp eq i32 %lsr.iv.next, 0
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}

declare i32 @__CxxFrameHandler3(...)

declare void @may_throw()

declare void @use(i32)

...
---
name: test
alignment: 16
tracksRegLiveness: true
hasEHCatchret: true
hasEHScopes: true
hasEHFunclets: true
debugInstrRef: true
tracksDebugUserValues: true
liveins:
- { reg: '$ecx' }
- { reg: '$edx' }
frameInfo:
maxAlignment: 8
hasCalls: true
hasOpaqueSPAdjustment: true
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
- { id: 1, type: spill-slot, size: 4, alignment: 4 }
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1, %bb.2
liveins: $ecx, $edx
MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store (s32) into %stack.1)
TEST32rr renamable $ecx, renamable $ecx, implicit-def $eflags
JCC_1 %bb.2, 14, implicit killed $eflags
bb.1:
liveins: $ecx
JMP_1 %bb.3
bb.2.for.cond.cleanup:
RET 0
bb.3.for.body:
successors: %bb.5, %bb.4
liveins: $ecx
EH_LABEL <mcsymbol .Leh1>
MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $ecx :: (store (s32) into %stack.0)
ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @may_throw, csr_win64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
ADJCALLSTACKUP64 32, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
EH_LABEL <mcsymbol .Leh2>
JMP_1 %bb.5
bb.4.catch (landing-pad, ehfunclet-entry):
successors: %bb.5
ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
renamable $esi = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
$ecx = COPY renamable $esi
CALL64pcrel32 @use, csr_win64, implicit $rsp, implicit $ssp, implicit $ecx, implicit-def $rsp, implicit-def $ssp
ADJCALLSTACKUP64 32, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$ecx = COPY killed renamable $esi
CALL64pcrel32 @use, csr_win64, implicit $rsp, implicit $ssp, implicit $ecx, implicit-def $rsp, implicit-def $ssp
ADJCALLSTACKUP64 32, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CATCHRET %bb.5, %bb.0
bb.5.for.inc:
successors: %bb.2, %bb.3
renamable $ecx = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
renamable $ecx = DEC32r killed renamable $ecx, implicit-def $eflags
JCC_1 %bb.2, 4, implicit killed $eflags
JMP_1 %bb.3
...
#
# CHECK: bb.4.catch
# CHECK: ADJCALLSTACKDOWN64
# CHECK-NEXT: renamable [[REG:\$[a-z0-9]+]] = MOV32rm %stack.1
# CHECK-NEXT: $ecx = COPY renamable [[REG]]
# CHECK-NEXT: CALL64pcrel32 @use
31 changes: 31 additions & 0 deletions llvm/test/Transforms/SROA/scalable-vector-struct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,34 @@ define %struct.test @alloca(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y) {
%val = load %struct.test, %struct.test* %addr, align 4
ret %struct.test %val
}


define { <vscale x 2 x i32>, <vscale x 2 x i32> } @return_tuple(<vscale x 2 x i32> %v_tuple.coerce0, <vscale x 2 x i32> %v_tuple.coerce1) {
; CHECK-LABEL: @return_tuple(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], 0
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]], 1
; CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 0
; CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], 1
; CHECK-NEXT: [[CALL:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32> [[COERCE_EXTRACT0]], <vscale x 2 x i32> [[COERCE_EXTRACT1]])
; CHECK-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[CALL]]
;
entry:
%v_tuple = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
%v_tuple.addr = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
%coerce = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
%0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %v_tuple.coerce0, 0
%1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %0, <vscale x 2 x i32> %v_tuple.coerce1, 1
store { <vscale x 2 x i32>, <vscale x 2 x i32> } %1, ptr %v_tuple, align 4
%v_tuple1 = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %v_tuple, align 4
store { <vscale x 2 x i32>, <vscale x 2 x i32> } %v_tuple1, ptr %v_tuple.addr, align 4
%2 = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %v_tuple.addr, align 4
store { <vscale x 2 x i32>, <vscale x 2 x i32> } %2, ptr %coerce, align 4
%coerce.tuple = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr %coerce, align 4
%coerce.extract0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %coerce.tuple, 0
%coerce.extract1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %coerce.tuple, 1
%call = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32> %coerce.extract0, <vscale x 2 x i32> %coerce.extract1)
ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %call
}

declare { <vscale x 2 x i32>, <vscale x 2 x i32> } @foo(<vscale x 2 x i32>, <vscale x 2 x i32>)
12 changes: 7 additions & 5 deletions openmp/libomptarget/src/OmptCallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,37 +71,39 @@ static uint64_t createRegionId() {
}

void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
size_t Size, void *Code) {
void **TgtPtrBegin, size_t Size,
void *Code) {
beginTargetDataOperation();
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
ompt_target_data_alloc, HstPtrBegin,
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
/* SrcDeviceNum */ omp_get_initial_device(), *TgtPtrBegin,
/* TgtDeviceNum */ DeviceId, Size, Code);
} else if (ompt_callback_target_data_op_fn) {
// HostOpId is set by the runtime
HostOpId = createOpId();
// Invoke the tool supplied data op callback
ompt_callback_target_data_op_fn(
TargetData.value, HostOpId, ompt_target_data_alloc, HstPtrBegin,
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
/* SrcDeviceNum */ omp_get_initial_device(), *TgtPtrBegin,
/* TgtDeviceNum */ DeviceId, Size, Code);
}
}

void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
size_t Size, void *Code) {
void **TgtPtrBegin, size_t Size,
void *Code) {
// Only EMI callback handles end scope
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
ompt_target_data_alloc, HstPtrBegin,
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
/* SrcDeviceNum */ omp_get_initial_device(), *TgtPtrBegin,
/* TgtDeviceNum */ DeviceId, Size, Code);
}
endTargetDataOperation();
Expand Down
8 changes: 4 additions & 4 deletions openmp/libomptarget/src/OmptInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ static ompt_get_target_task_data_t ompt_get_target_task_data_fn;
class Interface {
public:
/// Top-level function for invoking callback before device data allocation
void beginTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size,
void *Code);
void beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
void **TgtPtrBegin, size_t Size, void *Code);

/// Top-level function for invoking callback after device data allocation
void endTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size,
void *Code);
void endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
void **TgtPtrBegin, size_t Size, void *Code);

/// Top-level function for invoking callback before data submit
void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
Expand Down
6 changes: 4 additions & 2 deletions openmp/libomptarget/src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,12 +561,14 @@ __tgt_target_table *DeviceTy::loadBinary(void *Img) {

void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
/// RAII to establish tool anchors before and after data allocation
void *TargetPtr = nullptr;
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
RegionInterface.getCallbacks<ompt_target_data_alloc>(),
RTLDeviceID, HstPtr, Size,
RTLDeviceID, HstPtr, &TargetPtr, Size,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)

return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
return TargetPtr;
}

int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
Expand Down
39 changes: 19 additions & 20 deletions openmp/libomptarget/src/interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,21 @@ targetDataMapper(ident_t *Loc, int64_t DeviceId, int32_t ArgNum,
TargetAsyncInfoTy TargetAsyncInfo(Device);
AsyncInfoTy &AsyncInfo = TargetAsyncInfo;

/// RAII to establish tool anchors before and after data begin / end / update
OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin ||
TargetDataFunction == targetDataEnd ||
TargetDataFunction == targetDataUpdate) &&
"Encountered unexpected TargetDataFunction during "
"execution of targetDataMapper");
auto CallbackFunctions =
(TargetDataFunction == targetDataBegin)
? RegionInterface.getCallbacks<ompt_target_enter_data>()
: (TargetDataFunction == targetDataEnd)
? RegionInterface.getCallbacks<ompt_target_exit_data>()
: RegionInterface.getCallbacks<ompt_target_update>();
InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId,
OMPT_GET_RETURN_ADDRESS(0));)

int Rc = OFFLOAD_SUCCESS;
Rc = TargetDataFunction(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
Expand All @@ -129,12 +144,6 @@ EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
map_var_info_t *ArgNames,
void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
/// RAII to establish tool anchors before and after data begin
OMPT_IF_BUILT(InterfaceRAII TargetDataEnterRAII(
RegionInterface.getCallbacks<ompt_target_enter_data>(),
DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)

targetDataMapper<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, targetDataBegin,
"Entering OpenMP data region", "begin");
Expand All @@ -161,12 +170,6 @@ EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
map_var_info_t *ArgNames,
void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
/// RAII to establish tool anchors before and after data end
OMPT_IF_BUILT(InterfaceRAII TargetDataExitRAII(
RegionInterface.getCallbacks<ompt_target_exit_data>(),
DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)

targetDataMapper<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, targetDataEnd,
"Exiting OpenMP data region", "end");
Expand All @@ -190,12 +193,6 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
map_var_info_t *ArgNames,
void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
/// RAII to establish tool anchors before and after data update
OMPT_IF_BUILT(InterfaceRAII TargetDataUpdateRAII(
RegionInterface.getCallbacks<ompt_target_update>(),
DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)

targetDataMapper<AsyncInfoTy>(
Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, targetDataUpdate, "Updating OpenMP data", "update");
Expand Down Expand Up @@ -295,7 +292,8 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
DeviceTy &Device = *PM->Devices[DeviceId];
TargetAsyncInfoTy TargetAsyncInfo(Device);
AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
/// RAII to establish tool anchors before and after target region
OMPT_IF_BUILT(InterfaceRAII TargetRAII(
RegionInterface.getCallbacks<ompt_target>(), DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)

Expand Down Expand Up @@ -363,7 +361,8 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
return OMP_TGT_FAIL;
}
DeviceTy &Device = *PM->Devices[DeviceId];
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
/// RAII to establish tool anchors before and after target region
OMPT_IF_BUILT(InterfaceRAII TargetRAII(
RegionInterface.getCallbacks<ompt_target>(), DeviceId,
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)

Expand Down
128 changes: 128 additions & 0 deletions openmp/libomptarget/test/ompt/veccopy_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: ompt
// UNSUPPORTED: aarch64-unknown-linux-gnu
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
// UNSUPPORTED: x86_64-pc-linux-gnu
// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO

/*
* Example OpenMP program that registers EMI callbacks.
* Explicitly testing for an initialized device num and
* #pragma omp target [data enter / data exit / update]
* The latter with the addition of a nowait clause.
*/

#include <omp.h>
#include <stdio.h>

#include "callbacks.h"
#include "register_emi.h"

#define N 100000

#pragma omp declare target
int c[N];
#pragma omp end declare target

int main() {
int a[N];
int b[N];

int i;

for (i = 0; i < N; i++)
a[i] = 0;

for (i = 0; i < N; i++)
b[i] = i;

for (i = 0; i < N; i++)
c[i] = 0;

#pragma omp target enter data map(to : a)
#pragma omp target parallel for
{
for (int j = 0; j < N; j++)
a[j] = b[j];
}
#pragma omp target exit data map(from : a)

#pragma omp target parallel for map(alloc : c)
{
for (int j = 0; j < N; j++)
c[j] = 2 * j + 1;
}
#pragma omp target update from(c) nowait
#pragma omp barrier

int rc = 0;
for (i = 0; i < N; i++) {
if (a[i] != i) {
rc++;
printf("Wrong value: a[%d]=%d\n", i, a[i]);
}
}

for (i = 0; i < N; i++) {
if (c[i] != 2 * i + 1) {
rc++;
printf("Wrong value: c[%d]=%d\n", i, c[i]);
}
}

if (!rc)
printf("Success\n");

return rc;
}

/// CHECK-NOT: Callback Target EMI:
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Init:
/// CHECK: Callback Load:
/// CHECK: Callback Target EMI: kind=2 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Target EMI: kind=2 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
/// CHECK: Callback Target EMI: kind=1 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=3 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
/// CHECK: Callback Target EMI: kind=3 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1
/// CHECK: Callback Target EMI: kind=1 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Target EMI: kind=4 endpoint=1
/// CHECK-NOT: device_num=-1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
/// CHECK: Callback Target EMI: kind=4 endpoint=2
/// CHECK-NOT: device_num=-1
/// CHECK: Callback Fini:
4 changes: 4 additions & 0 deletions openmp/libomptarget/test/ompt/veccopy_disallow_both.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ int main() {
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1
Expand All @@ -82,10 +84,12 @@ int main() {
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0
Expand Down
4 changes: 4 additions & 0 deletions openmp/libomptarget/test/ompt/veccopy_emi.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,12 @@ int main() {
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
Expand All @@ -81,10 +83,12 @@ int main() {
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0
Expand Down
4 changes: 4 additions & 0 deletions openmp/libomptarget/test/ompt/veccopy_emi_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@ int main() {
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
Expand All @@ -82,10 +84,12 @@ int main() {
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0
Expand Down