11 changes: 11 additions & 0 deletions clang/test/Analysis/malloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,14 @@ void *realloc(void **ptr, size_t size) { realloc(ptr, size); } // no-crash
namespace pr46253_paramty2{
void *realloc(void *ptr, int size) { realloc(ptr, size); } // no-crash
} // namespace pr46253_paramty2

namespace pr81597 {
struct S {};
struct T {
void free(const S& s);
};
void f(T& t) {
S s;
t.free(s); // no-warning: This is not the free you are looking for...
}
} // namespace pr81597
2 changes: 1 addition & 1 deletion clang/test/Analysis/stack-addr-ps.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core -fblocks -verify %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -fblocks -verify %s

int* f1(void) {
int x = 0;
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Analysis/stackaddrleak.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify -std=c99 -Dbool=_Bool -Wno-bool-conversion %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify -x c++ -Wno-bool-conversion %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -verify -std=c99 -Dbool=_Bool -Wno-bool-conversion %s
// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -verify -x c++ -Wno-bool-conversion %s

typedef __INTPTR_TYPE__ intptr_t;
char const *p;
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CXX/over/over.built/ast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ struct A{};

template <typename T, typename U>
auto Test(T* pt, U* pu) {
// CHECK: UnaryOperator {{.*}} '<dependent type>' prefix '*'
// CHECK: UnaryOperator {{.*}} '<dependent type>' lvalue prefix '*'
// CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *'
(void)*pt;

// CHECK: UnaryOperator {{.*}} '<dependent type>' prefix '++'
// CHECK: UnaryOperator {{.*}} '<dependent type>' lvalue prefix '++'
// CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *'
(void)(++pt);

Expand Down
28 changes: 28 additions & 0 deletions clang/test/ClangScanDeps/removed-args.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,31 @@
// CHECK-NOT: "-fmodules-prune-interval=
// CHECK-NOT: "-fmodules-prune-after=
// CHECK: ],

// Check for removed args for PCH invocations.

// RUN: split-file %s %t
// RUN: sed "s|DIR|%/t|g" %t/cdb-pch.json.template > %t/cdb-pch.json
// RUN: clang-scan-deps -compilation-database %t/cdb-pch.json -format experimental-full > %t/result-pch.json
// RUN: cat %t/result-pch.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t -check-prefix=PCH
//
// PCH-NOT: "-fdebug-compilation-dir="
// PCH-NOT: "-fcoverage-compilation-dir="
// PCH-NOT: "-coverage-notes-file
// PCH-NOT: "-coverage-data-file
// PCH-NOT: "-fprofile-instrument-use-path
// PCH-NOT: "-include"
// PCH-NOT: "-fmodules-cache-path=
// PCH-NOT: "-fmodules-validate-once-per-build-session"
// PCH-NOT: "-fbuild-session-timestamp=
// PCH-NOT: "-fmodules-prune-interval=
// PCH-NOT: "-fmodules-prune-after=

//--- cdb-pch.json.template
[
{
"directory": "DIR",
"command": "clang -x c-header DIR/header.h -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -fdebug-compilation-dir=DIR/debug -fcoverage-compilation-dir=DIR/coverage -ftest-coverage -fprofile-instr-use=DIR/tu.profdata -o DIR/header.h.pch -serialize-diagnostics DIR/header.h.pch.diag ",
"file": "DIR/header.h.pch"
}
]
18 changes: 18 additions & 0 deletions clang/test/CodeGen/bitfield-access-unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,24 @@ struct G {
// LAYOUT-DWN32-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:7 IsSigned:1 StorageSize:8 StorageOffset:4
// CHECK-NEXT: ]>

struct __attribute__((aligned(8))) H {
char a;
unsigned b : 24; // on expensive alignment we want this to stay 24
unsigned c __attribute__((aligned(8))); // Think 'long long' or lp64 ptr
} h;
// CHECK-LABEL: LLVMType:%struct.H =
// LAYOUT-FLEX-SAME: type <{ i8, i32, [3 x i8], i32, [4 x i8] }>
// LAYOUT-STRICT-SAME: type { i8, [3 x i8], [4 x i8], i32, [4 x i8] }
// LAYOUT-DWN32-FLEX-SAME: type <{ i8, i32, [3 x i8], i32, [4 x i8] }>
// LAYOUT-DWN32-STRICT-SAME: type { i8, [3 x i8], [4 x i8], i32, [4 x i8] }
// CHECK: BitFields:[
// LAYOUT-FLEX-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:32 StorageOffset:1
// LAYOUT-STRICT-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:24 StorageOffset:1

// LAYOUT-DWN32-FLEX-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:32 StorageOffset:1
// LAYOUT-DWN32-STRICT-NEXT: <CGBitFieldInfo Offset:{{[0-9]+}} Size:24 IsSigned:0 StorageSize:24 StorageOffset:1
// CHECK-NEXT: ]>

#if _LP64
struct A64 {
int a : 16;
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/target-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,11 @@

// RUN: %clang_cc1 -triple spir-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SPIR
// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"

// RUN: %clang_cc1 -triple spir64-unknown -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=SPIR64
// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"

// RUN: %clang_cc1 -triple bpfel -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=BPFEL
Expand Down
47 changes: 46 additions & 1 deletion clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 --std=c++20 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 --std=c++20 -fexceptions -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefixes=EH %s
// RUN: %clang_cc1 --std=c++20 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefixes=NOEH,CHECK %s

struct Printy {
Printy(const char *name) : name(name) {}
Expand Down Expand Up @@ -349,6 +350,34 @@ void NewArrayInit() {
// CHECK-NEXT: br label %return
}

void DestroyInConditionalCleanup() {
// EH-LABEL: DestroyInConditionalCleanupv()
// NOEH-LABEL: DestroyInConditionalCleanupv()
struct A {
A() {}
~A() {}
};

struct Value {
Value(A) {}
~Value() {}
};

struct V2 {
Value K;
Value V;
};
// Verify we use conditional cleanups.
(void)(foo() ? V2{A(), A()} : V2{A(), A()});
// NOEH: cond.true:
// NOEH: call void @_ZZ27DestroyInConditionalCleanupvEN1AC1Ev
// NOEH: store ptr %{{.*}}, ptr %cond-cleanup.save

// EH: cond.true:
// EH: invoke void @_ZZ27DestroyInConditionalCleanupvEN1AC1Ev
// EH: store ptr %{{.*}}, ptr %cond-cleanup.save
}

void ArrayInitWithContinue() {
// CHECK-LABEL: @_Z21ArrayInitWithContinuev
// Verify that we start to emit the array destructor.
Expand All @@ -362,3 +391,19 @@ void ArrayInitWithContinue() {
})};
}
}

struct [[clang::trivial_abi]] HasTrivialABI {
HasTrivialABI();
~HasTrivialABI();
};
void AcceptTrivialABI(HasTrivialABI, int);
void TrivialABI() {
// CHECK-LABEL: define dso_local void @_Z10TrivialABIv()
AcceptTrivialABI(HasTrivialABI(), ({
if (foo()) return;
// CHECK: if.then:
// CHECK-NEXT: call void @_ZN13HasTrivialABID1Ev
// CHECK-NEXT: br label %return
0;
}));
}
31 changes: 26 additions & 5 deletions clang/test/CodeGenCXX/module-funcs-from-imports.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@ int func_in_gmf_not_called() {
return 44;
}

template <class T>
class A {
public:
__attribute__((always_inline))
inline constexpr int getValue() {
return 43;
}

inline constexpr int getValue2() {
return 43;
}
};

extern template class A<char>;

//--- M.cppm
module;
#include "foo.h"
Expand All @@ -47,17 +62,21 @@ int always_inline_func() {
return 45;
}

export using ::A;

//--- Use.cpp
import M;
int use() {
return exported_func() + always_inline_func();
A<char> a;
return exported_func() + always_inline_func() +
a.getValue() + a.getValue2();
}

// Checks that none of the function (except the always_inline_func) in the importees
// are generated in the importer's code.
// CHECK-O0: define{{.*}}_Z3usev(
// CHECK-O0: declare{{.*}}_ZW1M13exported_funcv(
// CHECK-O0: define{{.*}}available_externally{{.*}}_ZW1M18always_inline_funcv(
// CHECK-O0: declare{{.*}}_ZW1M18always_inline_funcv(
// CHECK-O0: define{{.*}}@_ZN1AIcE8getValueEv(
// CHECK-O0: declare{{.*}}@_ZN1AIcE9getValue2Ev(
// CHECK-O0-NOT: func_in_gmf
// CHECK-O0-NOT: func_in_gmf_not_called
// CHECK-O0-NOT: non_exported_func
Expand All @@ -68,7 +87,9 @@ int use() {
// O0 to keep consistent ABI.
// CHECK-O1: define{{.*}}_Z3usev(
// CHECK-O1: declare{{.*}}_ZW1M13exported_funcv(
// CHECK-O1: define{{.*}}available_externally{{.*}}_ZW1M18always_inline_funcv(
// CHECK-O1: declare{{.*}}_ZW1M18always_inline_funcv(
// CHECK-O1: define{{.*}}@_ZN1AIcE8getValueEv(
// CHECK-O1: declare{{.*}}@_ZN1AIcE9getValue2Ev(
// CHECK-O1-NOT: func_in_gmf
// CHECK-O1-NOT: func_in_gmf_not_called
// CHECK-O1-NOT: non_exported_func
Expand Down
370 changes: 244 additions & 126 deletions clang/test/CodeGenHLSL/builtins/any.hlsl

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions clang/test/Driver/windows-seh-async-verify.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang --target=x86_64-pc-windows -fasync-exceptions -fsyntax-only %s -### 2>&1 | FileCheck %s
// RUN: %clang_cl --target=x86_64-pc-windows /EHa -fsyntax-only %s -### 2>&1 | FileCheck %s
// RUN: %clang --target=x86_64-pc-windows-gnu -fasync-exceptions -fsyntax-only %s -### 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,GNU
// RUN: %clang_cl --target=x86_64-pc-windows-gnu /EHa -fsyntax-only %s -### 2>&1 | FileCheck %s --check-prefixes=GNU-ALL,CL-GNU

// CHECK-NOT: warning
// GNU: warning: argument unused during compilation: '-fasync-exceptions' [-Wunused-command-line-argument]
// CL-GNU: warning: argument unused during compilation: '/EHa' [-Wunused-command-line-argument]

// CHECK: -fasync-exceptions
// GNU-ALL-NOT: -fasync-exceptions
struct S {
union _Un {
~_Un() {}
char _Buf[12];
};
_Un _un;
};

struct Embed {
S v2;
};

void PR62449() { Embed v{}; }
12 changes: 12 additions & 0 deletions clang/test/Index/USR/func-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,15 @@ void Func( void (* (*)(int, int))(int, int) );
// CHECK: {{[0-9]+}}:6 | function/C | Func | c:@F@Func#*F*Fv(#I#I)(#I#I)# |
void Func( void (* (*)(int, int, int))(int) );
// CHECK: {{[0-9]+}}:6 | function/C | Func | c:@F@Func#*F*Fv(#I)(#I#I#I)# |

// Functions with parameter types that only differ in top-level cv-qualification should generate the same USR.

void f( const int );
// CHECK: {{[0-9]+}}:6 | function/C | f | c:@F@f#I# |
void f( int );
// CHECK: {{[0-9]+}}:6 | function/C | f | c:@F@f#I# |

void g( int );
// CHECK: {{[0-9]+}}:6 | function/C | g | c:@F@g#I# |
void g( const int );
// CHECK: {{[0-9]+}}:6 | function/C | g | c:@F@g#I# |
44 changes: 44 additions & 0 deletions clang/test/Modules/hashing-decls-in-exprs-from-gmf-2.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// RUN: rm -rf %t
// RUN: mkdir -p %t
// RUN: split-file %s %t
//
// RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf %t/A.cppm -emit-module-interface -o %t/A.pcm
// RUN: %clang_cc1 -std=c++20 -fskip-odr-check-in-gmf %t/test.cpp -fprebuilt-module-path=%t -fsyntax-only -verify

//--- header.h
#pragma once
template <class _Tp>
class Optional {};

template <class _Tp>
concept C = requires(const _Tp& __t) {
[]<class _Up>(const Optional<_Up>&) {}(__t);
};

//--- func.h
#include "header.h"
template <C T>
void func() {}

//--- test_func.h
#include "func.h"

inline void test_func() {
func<Optional<int>>();
}

//--- A.cppm
module;
#include "header.h"
#include "test_func.h"
export module A;
export using ::test_func;

//--- test.cpp
// expected-no-diagnostics
import A;
#include "test_func.h"

void test() {
test_func();
}
3 changes: 1 addition & 2 deletions clang/test/OpenMP/ompx_attributes_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,5 @@ void func() {
// NVIDIA: "omp_target_thread_limit"="45"
// NVIDIA: "omp_target_thread_limit"="17"
// NVIDIA: !{ptr @__omp_offloading[[HASH1:.*]]_l16, !"maxntidx", i32 20}
// NVIDIA: !{ptr @__omp_offloading[[HASH2:.*]]_l18, !"minctasm", i32 90}
// NVIDIA: !{ptr @__omp_offloading[[HASH2]]_l18, !"maxntidx", i32 45}
// NVIDIA: !{ptr @__omp_offloading[[HASH2:.*]]_l18, !"maxntidx", i32 45}
// NVIDIA: !{ptr @__omp_offloading[[HASH3:.*]]_l20, !"maxntidx", i32 17}
2 changes: 1 addition & 1 deletion clang/test/Sema/alias-unused-win.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ extern "C" {
static int f(void) { return 42; } // cxx-warning{{unused function 'f'}}
int g(void) __attribute__((alias("f")));

static int foo [] = { 42, 0xDEAD }; // cxx-warning{{variable 'foo' is not needed and will not be emitted}}
static int foo [] = { 42, 0xDEAD };
extern typeof(foo) bar __attribute__((unused, alias("foo")));

static int __attribute__((overloadable)) f0(int x) { return x; } // expected-warning{{unused function 'f0'}}
Expand Down
16 changes: 9 additions & 7 deletions clang/test/Sema/alias-unused.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,26 @@ extern typeof(foo) bar __attribute__((unused, alias("foo")));
/// We report a warning in C++ mode because the internal linkage `resolver` gets
/// mangled as it does not have a language linkage. GCC does not mangle
/// `resolver` or report a warning.
static int (*resolver(void))(void) { return f; } // expected-warning{{unused function 'resolver'}}
static int (*resolver(void))(void) { return f; } // cxx-warning{{unused function 'resolver'}}
int ifunc(void) __attribute__((ifunc("resolver")));

static int __attribute__((overloadable)) f0(int x) { return x; } // expected-warning{{unused function 'f0'}}
static int __attribute__((overloadable)) f0(int x) { return x; }
static float __attribute__((overloadable)) f0(float x) { return x; } // expected-warning{{unused function 'f0'}}
int g0(void) __attribute__((alias("_ZL2f0i")));

#ifdef __cplusplus
static int f1() { return 42; } // expected-warning{{unused function 'f1'}}
static int f1() { return 42; }
int g1(void) __attribute__((alias("_ZL2f1v")));
}

static int f2(int) { return 42; } // expected-warning{{unused function 'f2'}}
static int f2() { return 42; } // expected-warning{{unused function 'f2'}}
/// We demangle alias/ifunc target and mark all found functions as used.

static int f2(int) { return 42; } // cxx-warning{{unused function 'f2'}}
static int f2() { return 42; }
int g2() __attribute__((alias("_ZL2f2v")));

static int (*resolver1())() { return f; } // expected-warning{{unused function 'resolver1'}}
static int (*resolver1(int))() { return f; } // expected-warning{{unused function 'resolver1'}}
static int (*resolver1())() { return f; } // cxx-warning{{unused function 'resolver1'}}
static int (*resolver1(int))() { return f; }
int ifunc1() __attribute__((ifunc("_ZL9resolver1i")));

/// TODO: We should report "unused function" for f3(int).
Expand Down
23 changes: 23 additions & 0 deletions clang/test/SemaCXX/PR84020.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: %clang_cc1 -std=c++20 -verify %s
// RUN: %clang_cc1 -std=c++23 -verify %s
// expected-no-diagnostics

struct B {
template <typename S>
void foo();

void bar();
};

template <typename T, typename S>
struct A : T {
auto foo() {
static_assert(requires { T::template foo<S>(); });
static_assert(requires { T::bar(); });
}
};

int main() {
A<B, double> a;
a.foo();
}
10 changes: 10 additions & 0 deletions clang/test/SemaCXX/overloaded-operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,13 @@ namespace nw{
}
}
#endif

#if __cplusplus >= 201703L
namespace GH88329 {

template <auto T> struct A {};
template <auto T> A<*T> operator *() { return {}; }
// expected-error@-1 {{overloaded 'operator*' must have at least one parameter of class or enumeration type}}
}

#endif
52 changes: 52 additions & 0 deletions clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3098,6 +3098,58 @@ TEST(TransferTest, ResultObjectLocationForCXXOperatorCallExpr) {
});
}

// Check that the `std::strong_ordering` object returned by builtin `<=>` has a
// correctly modeled result object location.
TEST(TransferTest, ResultObjectLocationForBuiltinSpaceshipOperator) {
std::string Code = R"(
namespace std {
// This is the minimal definition required to get
// `Sema::CheckComparisonCategoryType()` to accept this fake.
struct strong_ordering {
enum class ordering { less, equal, greater };
ordering o;
static const strong_ordering less;
static const strong_ordering equivalent;
static const strong_ordering equal;
static const strong_ordering greater;
};
inline constexpr strong_ordering strong_ordering::less =
{ strong_ordering::ordering::less };
inline constexpr strong_ordering strong_ordering::equal =
{ strong_ordering::ordering::equal };
inline constexpr strong_ordering strong_ordering::equivalent =
{ strong_ordering::ordering::equal };
inline constexpr strong_ordering strong_ordering::greater =
{ strong_ordering::ordering::greater };
}
void target(int i, int j) {
auto ordering = i <=> j;
// [[p]]
}
)";
using ast_matchers::binaryOperator;
using ast_matchers::hasOperatorName;
using ast_matchers::match;
using ast_matchers::selectFirst;
using ast_matchers::traverse;
runDataflow(
Code,
[](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
ASTContext &ASTCtx) {
const Environment &Env = getEnvironmentAtAnnotation(Results, "p");

auto *Spaceship = selectFirst<BinaryOperator>(
"op",
match(binaryOperator(hasOperatorName("<=>")).bind("op"), ASTCtx));

EXPECT_EQ(
&Env.getResultObjectLocation(*Spaceship),
&getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "ordering"));
},
LangStandard::lang_cxx20);
}

TEST(TransferTest, ResultObjectLocationForStdInitializerListExpr) {
std::string Code = R"(
namespace std {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
// RUN: %clangxx %s -pie -fPIE -o %t && %run %t
// RUN: %clang %s -pie -fPIE -o %t && %run %t
// REQUIRES: x86_64-target-arch

// FIXME: Fails Asan, as expected, with 5lvl page tables.
// UNSUPPORTED: x86_64-target-arch

#include <assert.h>
#include <stdio.h>
#include <sys/mman.h>
Expand Down
8 changes: 8 additions & 0 deletions flang/docs/Intrinsics.md
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,14 @@ CALL CO_REDUCE
CALL CO_SUM
```

### Inquiry Functions
ACCESS (GNU extension) is not supported on Windows. Otherwise:
```
CHARACTER(LEN=*) :: path = 'path/to/file'
IF (ACCESS(path, 'rwx')) &
...
```

## Non-standard intrinsics
### PGI
```
Expand Down
32 changes: 32 additions & 0 deletions flang/include/flang/Optimizer/Dialect/FIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -3190,4 +3190,36 @@ def fir_CUDADataTransferOp : fir_Op<"cuda_data_transfer", []> {
}];
}

def fir_CUDAAllocateOp : fir_Op<"cuda_allocate", [AttrSizedOperandSegments,
MemoryEffects<[MemAlloc<DefaultResource>]>]> {
let summary = "Perform the device allocation of data of an allocatable";

let description = [{
The fir.cuda_allocate operation performs the allocation on the device
of the data of an allocatable. The descriptor passed to the operation
is initialized before with the standard flang runtime calls.
}];

let arguments = (ins Arg<AnyRefOrBoxType, "", [MemWrite]>:$box,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
Optional<AnyIntegerType>:$stream,
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
fir_CUDADataAttributeAttr:$cuda_attr,
UnitAttr:$hasStat);

let results = (outs AnyIntegerType:$stat);

let assemblyFormat = [{
$box `:` qualified(type($box))
( `source` `(` $source^ `:` qualified(type($source) )`)` )?
( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
( `stream` `(` $stream^ `:` type($stream) `)` )?
( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
attr-dict `->` type($stat)
}];

let hasVerifier = 1;
}

#endif
1 change: 1 addition & 0 deletions flang/include/flang/Optimizer/Dialect/FIRTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ def AnyRefOrBoxLike : TypeConstraint<Or<[AnyReferenceLike.predicate,
def AnyRefOrBox : TypeConstraint<Or<[fir_ReferenceType.predicate,
fir_HeapType.predicate, fir_PointerType.predicate,
IsBaseBoxTypePred]>, "any reference or box">;
def AnyRefOrBoxType : Type<AnyRefOrBox.predicate, "any legal ref or box type">;

def AnyShapeLike : TypeConstraint<Or<[fir_ShapeType.predicate,
fir_ShapeShiftType.predicate]>, "any legal shape type">;
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Parser/dump-parse-tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ class ParseTreeDumper {
NODE_ENUM(OmpOrderModifier, Kind)
NODE(parser, OmpProcBindClause)
NODE_ENUM(OmpProcBindClause, Type)
NODE_ENUM(OmpReductionClause, ReductionModifier)
NODE(parser, OmpReductionClause)
NODE(parser, OmpInReductionClause)
NODE(parser, OmpReductionCombiner)
Expand Down
5 changes: 4 additions & 1 deletion flang/include/flang/Parser/parse-tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -3552,7 +3552,10 @@ struct OmpReductionOperator {
// variable-name-list)
struct OmpReductionClause {
TUPLE_CLASS_BOILERPLATE(OmpReductionClause);
std::tuple<OmpReductionOperator, OmpObjectList> t;
ENUM_CLASS(ReductionModifier, Inscan, Task, Default)
std::tuple<std::optional<ReductionModifier>, OmpReductionOperator,
OmpObjectList>
t;
};

// OMP 5.0 2.19.5.6 in_reduction-clause -> IN_REDUCTION (reduction-identifier:
Expand Down
7 changes: 7 additions & 0 deletions flang/include/flang/Runtime/extensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,12 @@ std::int64_t RTNAME(Signal)(std::int64_t number, void (*handler)(int));
// GNU extension subroutine SLEEP(SECONDS)
void RTNAME(Sleep)(std::int64_t seconds);

// GNU extension function ACCESS(NAME, MODE)
// TODO: not supported on Windows
#ifndef _WIN32
std::int64_t FORTRAN_PROCEDURE_NAME(access)(const char *name,
std::int64_t nameLength, const char *mode, std::int64_t modeLength);
#endif

} // extern "C"
#endif // FORTRAN_RUNTIME_EXTENSIONS_H_
4 changes: 2 additions & 2 deletions flang/lib/Lower/OpenMP/ClauseProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,8 +832,8 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
}

bool ClauseProcessor::processMap(
mlir::Location currentLocation, const llvm::omp::Directive &directive,
Fortran::lower::StatementContext &stmtCtx, mlir::omp::MapClauseOps &result,
mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx,
mlir::omp::MapClauseOps &result,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSyms,
llvm::SmallVectorImpl<mlir::Location> *mapSymLocs,
llvm::SmallVectorImpl<mlir::Type> *mapSymTypes) const {
Expand Down
3 changes: 1 addition & 2 deletions flang/lib/Lower/OpenMP/ClauseProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ class ClauseProcessor {
// They may be used later on to create the block_arguments for some of the
// target directives that require it.
bool processMap(
mlir::Location currentLocation, const llvm::omp::Directive &directive,
Fortran::lower::StatementContext &stmtCtx,
mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx,
mlir::omp::MapClauseOps &result,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSyms =
nullptr,
Expand Down
26 changes: 21 additions & 5 deletions flang/lib/Lower/OpenMP/Clauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,12 +1005,28 @@ ProcBind make(const parser::OmpClause::ProcBind &inp,
Reduction make(const parser::OmpClause::Reduction &inp,
semantics::SemanticsContext &semaCtx) {
// inp.v -> parser::OmpReductionClause
auto &t0 = std::get<parser::OmpReductionOperator>(inp.v.t);
auto &t1 = std::get<parser::OmpObjectList>(inp.v.t);
using wrapped = parser::OmpReductionClause;

CLAUSET_ENUM_CONVERT( //
convert, wrapped::ReductionModifier, Reduction::ReductionModifier,
// clang-format off
MS(Inscan, Inscan)
MS(Task, Task)
MS(Default, Default)
// clang-format on
);

auto &t0 =
std::get<std::optional<parser::OmpReductionClause::ReductionModifier>>(
inp.v.t);
auto &t1 = std::get<parser::OmpReductionOperator>(inp.v.t);
auto &t2 = std::get<parser::OmpObjectList>(inp.v.t);
return Reduction{
{/*ReductionIdentifiers=*/{makeReductionOperator(t0, semaCtx)},
/*ReductionModifier=*/std::nullopt,
/*List=*/makeObjects(t1, semaCtx)}};
{/*ReductionModifier=*/t0
? std::make_optional<Reduction::ReductionModifier>(convert(*t0))
: std::nullopt,
/*ReductionIdentifiers=*/{makeReductionOperator(t1, semaCtx)},
/*List=*/makeObjects(t2, semaCtx)}};
}

// Relaxed: empty
Expand Down
2,388 changes: 1,212 additions & 1,176 deletions flang/lib/Lower/OpenMP/OpenMP.cpp

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions flang/lib/Lower/OpenMP/ReductionProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,11 @@ void ReductionProcessor::addDeclareReduction(
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
*reductionSymbols) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();

if (std::get<std::optional<omp::clause::Reduction::ReductionModifier>>(
reduction.t))
TODO(currentLocation, "Reduction modifiers are not supported");

mlir::omp::DeclareReductionOp decl;
const auto &redOperatorList{
std::get<omp::clause::Reduction::ReductionIdentifiers>(reduction.t)};
Expand Down
19 changes: 19 additions & 0 deletions flang/lib/Optimizer/Dialect/FIROps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3993,6 +3993,25 @@ mlir::LogicalResult fir::CUDAKernelOp::verify() {
return mlir::success();
}

mlir::LogicalResult fir::CUDAAllocateOp::verify() {
if (getPinned() && getStream())
return emitOpError("pinned and stream cannot appears at the same time");
if (!fir::unwrapRefType(getBox().getType()).isa<fir::BaseBoxType>())
return emitOpError(
"expect box to be a reference to/or a class or box type value");
if (getSource() &&
!fir::unwrapRefType(getSource().getType()).isa<fir::BaseBoxType>())
return emitOpError(
"expect source to be a reference to/or a class or box type value");
if (getErrmsg() &&
!fir::unwrapRefType(getErrmsg().getType()).isa<fir::BoxType>())
return emitOpError(
"expect errmsg to be a reference to/or a box type value");
if (getErrmsg() && !getHasStat())
return emitOpError("expect stat attribute when errmsg is provided");
return mlir::success();
}

//===----------------------------------------------------------------------===//
// FIROpsDialect
//===----------------------------------------------------------------------===//
Expand Down
5 changes: 5 additions & 0 deletions flang/lib/Parser/openmp-parsers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ TYPE_PARSER(construct<OmpReductionOperator>(Parser<DefinedOperator>{}) ||
construct<OmpReductionOperator>(Parser<ProcedureDesignator>{}))

TYPE_PARSER(construct<OmpReductionClause>(
maybe(
("INSCAN" >> pure(OmpReductionClause::ReductionModifier::Inscan) ||
"TASK" >> pure(OmpReductionClause::ReductionModifier::Task) ||
"DEFAULT" >> pure(OmpReductionClause::ReductionModifier::Default)) /
","),
Parser<OmpReductionOperator>{} / ":", Parser<OmpObjectList>{}))

// OMP 5.0 2.19.5.6 IN_REDUCTION (reduction-identifier: variable-name-list)
Expand Down
4 changes: 4 additions & 0 deletions flang/lib/Parser/unparse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2090,6 +2090,8 @@ class UnparseVisitor {
Walk(":", x.step);
}
void Unparse(const OmpReductionClause &x) {
Walk(std::get<std::optional<OmpReductionClause::ReductionModifier>>(x.t),
",");
Walk(std::get<OmpReductionOperator>(x.t));
Put(":");
Walk(std::get<OmpObjectList>(x.t));
Expand Down Expand Up @@ -2727,6 +2729,8 @@ class UnparseVisitor {
WALK_NESTED_ENUM(OmpScheduleClause, ScheduleType) // OMP schedule-type
WALK_NESTED_ENUM(OmpDeviceClause, DeviceModifier) // OMP device modifier
WALK_NESTED_ENUM(OmpDeviceTypeClause, Type) // OMP DEVICE_TYPE
WALK_NESTED_ENUM(
OmpReductionClause, ReductionModifier) // OMP reduction-modifier
WALK_NESTED_ENUM(OmpIfClause, DirectiveNameModifier) // OMP directive-modifier
WALK_NESTED_ENUM(OmpCancelType, Type) // OMP cancel-type
WALK_NESTED_ENUM(OmpOrderClause, Type) // OMP order-type
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Semantics/check-omp-structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2289,7 +2289,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Reduction &x) {
bool OmpStructureChecker::CheckReductionOperators(
const parser::OmpClause::Reduction &x) {

const auto &definedOp{std::get<0>(x.v.t)};
const auto &definedOp{std::get<parser::OmpReductionOperator>(x.v.t)};
bool ok = false;
common::visit(
common::visitors{
Expand Down
73 changes: 73 additions & 0 deletions flang/runtime/extensions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "flang/Runtime/entry-names.h"
#include "flang/Runtime/io-api.h"
#include <chrono>
#include <cstring>
#include <ctime>
#include <signal.h>
#include <thread>
Expand Down Expand Up @@ -138,5 +139,77 @@ void RTNAME(Sleep)(std::int64_t seconds) {
std::this_thread::sleep_for(std::chrono::seconds(seconds));
}

// TODO: not supported on Windows
#ifndef _WIN32
std::int64_t FORTRAN_PROCEDURE_NAME(access)(const char *name,
std::int64_t nameLength, const char *mode, std::int64_t modeLength) {
std::int64_t ret{-1};
if (nameLength <= 0 || modeLength <= 0 || !name || !mode) {
return ret;
}

// ensure name is null terminated
char *newName{nullptr};
if (name[nameLength - 1] != '\0') {
newName = static_cast<char *>(std::malloc(nameLength + 1));
std::memcpy(newName, name, nameLength);
newName[nameLength] = '\0';
name = newName;
}

// calculate mode
bool read{false};
bool write{false};
bool execute{false};
bool exists{false};
int imode{0};

for (std::int64_t i = 0; i < modeLength; ++i) {
switch (mode[i]) {
case 'r':
read = true;
break;
case 'w':
write = true;
break;
case 'x':
execute = true;
break;
case ' ':
exists = true;
break;
default:
// invalid mode
goto cleanup;
}
}
if (!read && !write && !execute && !exists) {
// invalid mode
goto cleanup;
}

if (!read && !write && !execute) {
imode = F_OK;
} else {
if (read) {
imode |= R_OK;
}
if (write) {
imode |= W_OK;
}
if (execute) {
imode |= X_OK;
}
}
ret = access(name, imode);

cleanup:
if (newName) {
free(newName);
}
return ret;
}
#endif

} // namespace Fortran::runtime
} // extern "C"
50 changes: 50 additions & 0 deletions flang/test/Fir/cuf-invalid.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// RUN: fir-opt -split-input-file -verify-diagnostics %s

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca i32
%pinned = fir.alloca i1
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%s = fir.load %1 : !fir.ref<i32>
// expected-error@+1{{'fir.cuda_allocate' op pinned and stream cannot appears at the same time}}
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
return
}

// -----

func.func @_QPsub1() {
%1 = fir.alloca i32
// expected-error@+1{{'fir.cuda_allocate' op expect box to be a reference to/or a class or box type value}}
%2 = fir.cuda_allocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
return
}

// -----

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
// expected-error@+1{{'fir.cuda_allocate' op expect stat attribute when errmsg is provided}}
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
return
}

// -----

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%1 = fir.alloca i32
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
// expected-error@+1{{'fir.cuda_allocate' op expect errmsg to be a reference to/or a box type value}}
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
return
}
70 changes: 70 additions & 0 deletions flang/test/Fir/cuf.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// RUN: fir-opt --split-input-file %s | fir-opt --split-input-file | FileCheck %s

// Simple round trip test of operations.

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
return
}

// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32

// -----

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca i32
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%s = fir.load %1 : !fir.ref<i32>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {cuda_attr = #fir.cuda<device>} -> i32
return
}

// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {cuda_attr = #fir.cuda<device>} -> i32

// -----

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub1Eb"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%5:2 = hlfir.declare %1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%12 = fir.convert %5#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> source(%12 : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32
return
}

// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> source(%{{.*}} : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32

// -----

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%pinned = fir.alloca i1
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
return
}

// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> pinned(%{{.*}} : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32

// -----

func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
%13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
return
}
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/FIR/target.f90
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,8 @@ end subroutine omp_target_implicit_bounds
!CHECK-LABEL: func.func @_QPomp_target_thread_limit() {
subroutine omp_target_thread_limit
integer :: a
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a"}
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !fir.ref<i32>) {
!CHECK: ^bb0(%[[ARG_0]]: !fir.ref<i32>):
!$omp target map(tofrom: a) thread_limit(64)
Expand Down
13 changes: 13 additions & 0 deletions flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s

! CHECK: not yet implemented: Reduction modifiers are not supported

subroutine foo()
integer :: i, j
j = 0
!$omp do reduction (inscan, *: j)
do i = 1, 10
j = j + 1
end do
end subroutine
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/target.f90
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,8 @@ end subroutine omp_target_implicit_bounds
!CHECK-LABEL: func.func @_QPomp_target_thread_limit() {
subroutine omp_target_thread_limit
integer :: a
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a"}
!CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32
!CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %{{.*}} : !fir.ref<i32>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
!$omp target map(tofrom: a) thread_limit(64)
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ subroutine only_use_device_ptr

!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
!CHECK: omp.target_data use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
subroutine mix_use_device_ptr_and_addr
use iso_c_binding
integer, pointer, dimension(:) :: array
Expand All @@ -47,7 +47,7 @@ subroutine only_use_device_addr

!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
!CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
subroutine mix_use_device_ptr_and_addr_and_map
use iso_c_binding
integer :: i, j
Expand Down
20 changes: 20 additions & 0 deletions flang/test/Parser/OpenMP/reduction-modifier.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
! RUN: %flang_fc1 -fdebug-unparse-no-sema -fopenmp %s | FileCheck --ignore-case %s
! RUN: %flang_fc1 -fdebug-dump-parse-tree-no-sema -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s

subroutine foo()
integer :: i, j
j = 0
! CHECK: !$OMP DO REDUCTION(TASK,*:j)
! PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct
! PARSE-TREE: | | | OmpBeginLoopDirective
! PARSE-TREE: | | | | OmpLoopDirective -> llvm::omp::Directive = do
! PARSE-TREE: | | | | OmpClauseList -> OmpClause -> Reduction -> OmpReductionClause
! PARSE-TREE: | | | | | ReductionModifier = Task
! PARSE-TREE: | | | | | OmpReductionOperator -> DefinedOperator -> IntrinsicOperator = Multiply
! PARSE-TREE: | | | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'j
!$omp do reduction (task, *: j)
do i = 1, 10
j = j + 1
end do
!$omp end do
end
422 changes: 422 additions & 0 deletions flang/unittests/Runtime/AccessTest.cpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions flang/unittests/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_flang_unittest(FlangRuntimeTests
AccessTest.cpp
Allocatable.cpp
ArrayConstructor.cpp
BufferTest.cpp
Expand Down
18 changes: 18 additions & 0 deletions libc/hdr/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,21 @@ add_proxy_header_library(
FULL_BUILD_DEPENDS
libc.include.llvm-libc-types.struct_timespec
)

add_proxy_header_library(
fenv_t
HDRS
fenv_t.h
FULL_BUILD_DEPENDS
libc.include.llvm-libc-types.fenv_t
libc.incude.fenv
)

add_proxy_header_library(
fexcept_t
HDRS
fexcept_t.h
FULL_BUILD_DEPENDS
libc.include.llvm-libc-types.fexcept_t
libc.incude.fenv
)
22 changes: 22 additions & 0 deletions libc/hdr/types/fenv_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Definition of macros from fenv_t.h --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_HDR_FENV_T_H
#define LLVM_LIBC_HDR_FENV_T_H

#ifdef LIBC_FULL_BUILD

#include "include/llvm-libc-types/fenv_t.h"

#else // Overlay mode

#include <fenv.h>

#endif // LLVM_LIBC_FULL_BUILD

#endif // LLVM_LIBC_HDR_FENV_T_H
22 changes: 22 additions & 0 deletions libc/hdr/types/fexcept_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Definition of macros from fexcept_t.h -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_HDR_FEXCEPT_T_H
#define LLVM_LIBC_HDR_FEXCEPT_T_H

#ifdef LIBC_FULL_BUILD

#include "include/llvm-libc-types/fexcept_t.h"

#else // Overlay mode

#include <fenv.h>

#endif // LLVM_LIBC_FULL_BUILD

#endif // LLVM_LIBC_HDR_FENV_T_H
2 changes: 1 addition & 1 deletion libc/src/__support/FPUtil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ add_header_library(
HDRS
FEnvImpl.h
DEPENDS
libc.include.fenv
libc.hdr.types.fenv_t
libc.hdr.fenv_macros
libc.hdr.math_macros
libc.src.__support.macros.attributes
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/FPUtil/aarch64/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
#endif

#include <arm_acle.h>
#include <fenv.h>
#include <stdint.h>

#include "hdr/fenv_macros.h"
#include "hdr/types/fenv_t.h"
#include "src/__support/FPUtil/FPBits.h"

namespace LIBC_NAMESPACE {
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
#endif

#include <arm_acle.h>
#include <fenv.h>
#include <stdint.h>

#include "hdr/fenv_macros.h"
#include "hdr/types/fenv_t.h"
#include "src/__support/FPUtil/FPBits.h"

namespace LIBC_NAMESPACE {
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/FPUtil/arm/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_FENVIMPL_H

#include "hdr/fenv_macros.h"
#include "hdr/types/fenv_t.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/macros/attributes.h" // For LIBC_INLINE
#include <fenv.h>
#include <stdint.h>

namespace LIBC_NAMESPACE {
Expand Down
1 change: 1 addition & 0 deletions libc/src/__support/FPUtil/riscv/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_RISCV_FENVIMPL_H

#include "hdr/fenv_macros.h"
#include "hdr/types/fenv_t.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/macros/attributes.h" // For LIBC_INLINE_ASM
#include "src/__support/macros/config.h" // For LIBC_INLINE
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/FPUtil/x86_64/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
#error "Invalid include"
#endif

#include <fenv.h>
#include <stdint.h>

#include "hdr/types/fenv_t.h"
#include "src/__support/macros/sanitizer.h"

namespace LIBC_NAMESPACE {
Expand Down
7 changes: 6 additions & 1 deletion libc/src/fenv/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ add_entrypoint_object(
HDRS
fegetround.h
DEPENDS
libc.include.fenv
libc.src.__support.FPUtil.fenv_impl
COMPILE_OPTIONS
-O2
Expand Down Expand Up @@ -71,6 +70,7 @@ add_entrypoint_object(
fegetenv.h
DEPENDS
libc.hdr.fenv_macros
libc.hdr.types.fenv_t
libc.src.__support.FPUtil.fenv_impl
COMPILE_OPTIONS
-O2
Expand All @@ -84,6 +84,7 @@ add_entrypoint_object(
fesetenv.h
DEPENDS
libc.hdr.fenv_macros
libc.hdr.types.fenv_t
libc.src.__support.FPUtil.fenv_impl
COMPILE_OPTIONS
-O2
Expand All @@ -97,6 +98,7 @@ add_entrypoint_object(
fegetexceptflag.h
DEPENDS
libc.hdr.fenv_macros
libc.hdr.types.fexcept_t
libc.src.__support.FPUtil.fenv_impl
COMPILE_OPTIONS
-O2
Expand All @@ -123,6 +125,7 @@ add_entrypoint_object(
fesetexceptflag.h
DEPENDS
libc.hdr.fenv_macros
libc.hdr.types.fexcept_t
libc.src.__support.FPUtil.fenv_impl
COMPILE_OPTIONS
-O2
Expand All @@ -136,6 +139,7 @@ add_entrypoint_object(
feholdexcept.h
DEPENDS
libc.hdr.fenv_macros
libc.hdr.types.fenv_t
libc.src.__support.FPUtil.fenv_impl
COMPILE_OPTIONS
-O2
Expand All @@ -149,6 +153,7 @@ add_entrypoint_object(
feupdateenv.h
DEPENDS
libc.hdr.fenv_macros
libc.hdr.types.fenv_t
libc.src.__support.FPUtil.fenv_impl
COMPILE_OPTIONS
-O2
Expand Down
2 changes: 1 addition & 1 deletion libc/src/fenv/fegetenv.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef LLVM_LIBC_SRC_FENV_FEGETENV_H
#define LLVM_LIBC_SRC_FENV_FEGETENV_H

#include <fenv.h>
#include "hdr/types/fenv_t.h"

namespace LIBC_NAMESPACE {

Expand Down
3 changes: 1 addition & 2 deletions libc/src/fenv/fegetexceptflag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
//===----------------------------------------------------------------------===//

#include "src/fenv/fegetexceptflag.h"
#include "hdr/types/fexcept_t.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/common.h"

#include <fenv.h>

namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(int, fegetexceptflag, (fexcept_t * flagp, int excepts)) {
Expand Down
2 changes: 1 addition & 1 deletion libc/src/fenv/feholdexcept.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
//===----------------------------------------------------------------------===//

#include "src/fenv/feholdexcept.h"
#include "hdr/types/fenv_t.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/common.h"
#include <fenv.h>

namespace LIBC_NAMESPACE {

Expand Down
2 changes: 1 addition & 1 deletion libc/src/fenv/feholdexcept.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef LLVM_LIBC_SRC_FENV_FEHOLDEXCEPT_H
#define LLVM_LIBC_SRC_FENV_FEHOLDEXCEPT_H

#include <fenv.h>
#include "hdr/types/fenv_t.h"

namespace LIBC_NAMESPACE {

Expand Down
2 changes: 1 addition & 1 deletion libc/src/fenv/fesetenv.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef LLVM_LIBC_SRC_FENV_FESETENV_H
#define LLVM_LIBC_SRC_FENV_FESETENV_H

#include <fenv.h>
#include "hdr/types/fenv_t.h"

namespace LIBC_NAMESPACE {

Expand Down
2 changes: 1 addition & 1 deletion libc/src/fenv/fesetexceptflag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
//===----------------------------------------------------------------------===//

#include "src/fenv/fesetexceptflag.h"
#include "hdr/types/fexcept_t.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/common.h"
#include <fenv.h>

namespace LIBC_NAMESPACE {

Expand Down
2 changes: 1 addition & 1 deletion libc/src/fenv/feupdateenv.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef LLVM_LIBC_SRC_FENV_FEUPDATEENV_H
#define LLVM_LIBC_SRC_FENV_FEUPDATEENV_H

#include <fenv.h>
#include "hdr/types/fenv_t.h"

namespace LIBC_NAMESPACE {

Expand Down
1 change: 1 addition & 0 deletions libc/test/UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ add_unittest_framework_library(
LibcTest
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.fenv_impl
libc.hdr.types.fenv_t
)

add_unittest_framework_library(
Expand Down
7 changes: 4 additions & 3 deletions libc/test/UnitTest/FPExceptMatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

#include "FPExceptMatcher.h"

#include <fenv.h>
#include "hdr/types/fenv_t.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include <memory>
#include <setjmp.h>
#include <signal.h>
Expand Down Expand Up @@ -36,12 +37,12 @@ FPExceptMatcher::FPExceptMatcher(FunctionCaller *func) {

caughtExcept = false;
fenv_t oldEnv;
fegetenv(&oldEnv);
fputil::get_env(&oldEnv);
if (sigsetjmp(jumpBuffer, 1) == 0)
funcUP->call();
// We restore the previous floating point environment after
// the call to the function which can potentially raise SIGFPE.
fesetenv(&oldEnv);
fputil::set_env(&oldEnv);
signal(SIGFPE, oldSIGFPEHandler);
exceptionRaised = caughtExcept;
}
Expand Down
3 changes: 1 addition & 2 deletions libc/test/src/fenv/exception_flags_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
//
//===----------------------------------------------------------------------===//

#include "hdr/types/fexcept_t.h"
#include "src/fenv/fegetexceptflag.h"
#include "src/fenv/fesetexceptflag.h"

#include "src/__support/FPUtil/FEnvImpl.h"
#include "test/UnitTest/Test.h"

#include <fenv.h>

TEST(LlvmLibcFenvTest, GetExceptFlagAndSetExceptFlag) {
// We will disable all exceptions to prevent invocation of the exception
// handler.
Expand Down
3 changes: 1 addition & 2 deletions libc/test/src/fenv/feholdexcept_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
//
//===----------------------------------------------------------------------===//

#include "hdr/types/fenv_t.h"
#include "src/fenv/feholdexcept.h"

#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/macros/properties/architectures.h"
#include "test/UnitTest/FPExceptMatcher.h"
#include "test/UnitTest/Test.h"

#include <fenv.h>

TEST(LlvmLibcFEnvTest, RaiseAndCrash) {
#if defined(LIBC_TARGET_ARCH_IS_ANY_ARM) || \
defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/fenv/feupdateenv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
//
//===----------------------------------------------------------------------===//

#include "hdr/types/fenv_t.h"
#include "src/fenv/feupdateenv.h"

#include "src/__support/FPUtil/FEnvImpl.h"
#include "test/UnitTest/Test.h"

#include <fenv.h>
#include <signal.h>

TEST(LlvmLibcFEnvTest, UpdateEnvTest) {
Expand Down
3 changes: 1 addition & 2 deletions libc/test/src/fenv/getenv_and_setenv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include "hdr/types/fenv_t.h"
#include "src/fenv/fegetenv.h"
#include "src/fenv/fegetround.h"
#include "src/fenv/fesetenv.h"
Expand All @@ -14,8 +15,6 @@
#include "src/__support/FPUtil/FEnvImpl.h"
#include "test/UnitTest/Test.h"

#include <fenv.h>

TEST(LlvmLibcFenvTest, GetEnvAndSetEnv) {
// We will disable all exceptions to prevent invocation of the exception
// handler.
Expand Down
2 changes: 2 additions & 0 deletions libcxx/include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,7 @@ set(files
__numeric/transform_exclusive_scan.h
__numeric/transform_inclusive_scan.h
__numeric/transform_reduce.h
__pstl/cpu_algos/cpu_traits.h
__random/bernoulli_distribution.h
__random/binomial_distribution.h
__random/cauchy_distribution.h
Expand Down Expand Up @@ -859,6 +860,7 @@ set(files
__utility/in_place.h
__utility/integer_sequence.h
__utility/is_pointer_in_range.h
__utility/is_valid_range.h
__utility/move.h
__utility/no_destroy.h
__utility/pair.h
Expand Down
47 changes: 1 addition & 46 deletions libcxx/include/__algorithm/pstl_backends/cpu_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,52 +9,6 @@
#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_H
#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_H

#include <__config>

/*

// _Functor takes a subrange for [__first, __last) that should be executed in serial
template <class _RandomAccessIterator, class _Functor>
optional<__empty> __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func);

template <class _Iterator, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduction>
optional<_Tp>
__parallel_transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduction);

// Cancel the execution of other jobs - they aren't needed anymore
void __cancel_execution();

template <class _RandomAccessIterator1,
class _RandomAccessIterator2,
class _RandomAccessIterator3,
class _Compare,
class _LeafMerge>
optional<void> __parallel_merge(
_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge);

template <class _RandomAccessIterator, class _Comp, class _LeafSort>
void __parallel_stable_sort(_RandomAccessIterator __first,
_RandomAccessIterator __last,
_Comp __comp,
_LeafSort __leaf_sort);

TODO: Document the parallel backend

Exception handling
==================

CPU backends are expected to report errors (i.e. failure to allocate) by returning a disengaged `optional` from their
implementation. Exceptions shouldn't be used to report an internal failure-to-allocate, since all exceptions are turned
into a program termination at the front-end level. When a backend returns a disengaged `optional` to the frontend, the
frontend will turn that into a call to `std::__throw_bad_alloc();` to report the internal failure to the user.
*/

#include <__algorithm/pstl_backends/cpu_backends/any_of.h>
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__algorithm/pstl_backends/cpu_backends/fill.h>
Expand All @@ -64,5 +18,6 @@ frontend will turn that into a call to `std::__throw_bad_alloc();` to report the
#include <__algorithm/pstl_backends/cpu_backends/stable_sort.h>
#include <__algorithm/pstl_backends/cpu_backends/transform.h>
#include <__algorithm/pstl_backends/cpu_backends/transform_reduce.h>
#include <__config>

#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_H
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <__config>
#include <__functional/operations.h>
#include <__iterator/concepts.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <__utility/pair.h>
Expand All @@ -30,13 +31,13 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

template <class _Index, class _Brick>
template <class _Backend, class _Index, class _Brick>
_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
std::atomic<bool> __found(false);
auto __ret = __par_backend::__parallel_for(__first, __last, [__f, &__found](_Index __i, _Index __j) {
auto __ret = __pstl::__cpu_traits<_Backend>::__for_each(__first, __last, [__f, &__found](_Index __i, _Index __j) {
if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
__found.store(true, std::memory_order_relaxed);
__par_backend::__cancel_execution();
__pstl::__cpu_traits<_Backend>::__cancel_execution();
}
});
if (!__ret)
Expand Down Expand Up @@ -74,7 +75,7 @@ _LIBCPP_HIDE_FROM_ABI optional<bool>
__pstl_any_of(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__parallel_or(
return std::__parallel_or<__cpu_backend_tag>(
__first, __last, [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
auto __res = std::__pstl_any_of<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __pred);
Expand Down
14 changes: 9 additions & 5 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,20 @@
# pragma GCC system_header
#endif

#if _LIBCPP_STD_VER >= 17
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17

_LIBCPP_BEGIN_NAMESPACE_STD

struct __cpu_backend_tag {};

inline constexpr size_t __lane_size = 64;
# if defined(_LIBCPP_PSTL_CPU_BACKEND_SERIAL)
using __cpu_backend_tag = __pstl::__serial_backend_tag;
# elif defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD)
using __cpu_backend_tag = __pstl::__std_thread_backend_tag;
# elif defined(_LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH)
using __cpu_backend_tag = __pstl::__libdispatch_backend_tag;
# endif

_LIBCPP_END_NAMESPACE_STD

#endif // _LIBCPP_STD_VER >= 17
#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && && _LIBCPP_STD_VER >= 17

#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_BACKEND_H
3 changes: 2 additions & 1 deletion libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
Expand All @@ -39,7 +40,7 @@ _LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_fill(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __par_backend::__parallel_for(
return __pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
[[maybe_unused]] auto __res = std::__pstl_fill<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __value);
Expand Down
22 changes: 12 additions & 10 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <__functional/operations.h>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <__utility/pair.h>
Expand All @@ -33,16 +34,16 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

template <class _Index, class _Brick, class _Compare>
template <class _Backend, class _Index, class _Brick, class _Compare>
_LIBCPP_HIDE_FROM_ABI optional<_Index>
__parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
const _DifferenceType __n = __last - __first;
_DifferenceType __initial_dist = __b_first ? __n : -1;
std::atomic<_DifferenceType> __extremum(__initial_dist);
// TODO: find out what is better here: parallel_for or parallel_reduce
auto __res =
__par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
auto __res = __pstl::__cpu_traits<_Backend>::__for_each(
__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
// See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
// why using a shared variable scales fairly well in this situation.
if (__comp(__i - __first, __extremum)) {
Expand All @@ -61,12 +62,12 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last;
}

template <class _Index, class _DifferenceType, class _Compare>
template <class _Backend, class _Index, class _DifferenceType, class _Compare>
_LIBCPP_HIDE_FROM_ABI _Index
__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
// Experiments show good block sizes like this
const _DifferenceType __block_size = 8;
alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
const _DifferenceType __block_size = 8;
alignas(__pstl::__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
while (__end - __begin >= __block_size) {
_DifferenceType __found = 0;
_PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
Expand Down Expand Up @@ -102,7 +103,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator>
__pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__parallel_find(
return std::__parallel_find<__cpu_backend_tag>(
__first,
__last,
[&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
Expand All @@ -116,9 +117,10 @@ __pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __l
} else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
using __diff_t = __iter_diff_t<_ForwardIterator>;
return std::__simd_first(__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
return __pred(__iter[__i]);
});
return std::__simd_first<__cpu_backend_tag>(
__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
return __pred(__iter[__i]);
});
} else {
return std::find_if(__first, __last, __pred);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
Expand All @@ -39,7 +40,7 @@ _LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_for_each(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Functor __func) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__par_backend::__parallel_for(
return __pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first, __last, [__func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
[[maybe_unused]] auto __res = std::__pstl_for_each<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __func);
Expand Down
457 changes: 231 additions & 226 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <optional>
Expand Down Expand Up @@ -45,7 +46,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_merge(
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = __par_backend::__parallel_merge(
auto __res = __pstl::__cpu_traits<__cpu_backend_tag>::__merge(
__first1,
__last1,
__first2,
Expand Down
98 changes: 50 additions & 48 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_SERIAL_H

#include <__config>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__utility/empty.h>
#include <__utility/move.h>
#include <cstddef>
Expand All @@ -26,54 +27,55 @@ _LIBCPP_PUSH_MACROS
# include <__undef_macros>

_LIBCPP_BEGIN_NAMESPACE_STD

namespace __par_backend {
inline namespace __serial_cpu_backend {

template <class _RandomAccessIterator, class _Fp>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__f(__first, __last);
return __empty{};
}

template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce>
_LIBCPP_HIDE_FROM_ABI optional<_Tp>
__parallel_transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
return __reduce(std::move(__first), std::move(__last), std::move(__init));
}

template <class _RandomAccessIterator, class _Compare, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_stable_sort(
_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
__leaf_sort(__first, __last, __comp);
return __empty{};
}

_LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {}

template <class _RandomAccessIterator1,
class _RandomAccessIterator2,
class _RandomAccessIterator3,
class _Compare,
class _LeafMerge>
_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_merge(
_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
__leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
return __empty{};
}

// TODO: Complete this list

} // namespace __serial_cpu_backend
} // namespace __par_backend

namespace __pstl {

struct __serial_backend_tag {};

template <>
struct __cpu_traits<__serial_backend_tag> {
template <class _RandomAccessIterator, class _Fp>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__f(__first, __last);
return __empty{};
}

template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce>
_LIBCPP_HIDE_FROM_ABI static optional<_Tp>
__transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
return __reduce(std::move(__first), std::move(__last), std::move(__init));
}

template <class _RandomAccessIterator, class _Compare, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
__leaf_sort(__first, __last, __comp);
return __empty{};
}

_LIBCPP_HIDE_FROM_ABI static void __cancel_execution() {}

template <class _RandomAccessIterator1,
class _RandomAccessIterator2,
class _RandomAccessIterator3,
class _Compare,
class _LeafMerge>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__merge(_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
__leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
return __empty{};
}

static constexpr size_t __lane_size = 64;
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

_LIBCPP_POP_MACROS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__algorithm/stable_sort.h>
#include <__config>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
Expand All @@ -28,7 +29,7 @@ template <class _ExecutionPolicy, class _RandomAccessIterator, class _Comp>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_stable_sort(__cpu_backend_tag, _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy>) {
return __par_backend::__parallel_stable_sort(
return __pstl::__cpu_traits<__cpu_backend_tag>::__stable_sort(
__first, __last, __comp, [](_RandomAccessIterator __g_first, _RandomAccessIterator __g_last, _Comp __g_comp) {
std::stable_sort(__g_first, __g_last, __g_comp);
});
Expand Down
96 changes: 50 additions & 46 deletions libcxx/include/__algorithm/pstl_backends/cpu_backends/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <__assert>
#include <__config>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__utility/empty.h>
#include <__utility/move.h>
#include <cstddef>
Expand All @@ -29,52 +30,55 @@ _LIBCPP_PUSH_MACROS
// by a proper implementation once the PSTL implementation is somewhat stable.

_LIBCPP_BEGIN_NAMESPACE_STD

namespace __par_backend {
inline namespace __thread_cpu_backend {

template <class _RandomAccessIterator, class _Fp>
_LIBCPP_HIDE_FROM_ABI optional<__empty>
__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__f(__first, __last);
return __empty{};
}

template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce>
_LIBCPP_HIDE_FROM_ABI optional<_Tp>
__parallel_transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
return __reduce(std::move(__first), std::move(__last), std::move(__init));
}

template <class _RandomAccessIterator, class _Compare, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_stable_sort(
_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
__leaf_sort(__first, __last, __comp);
return __empty{};
}

_LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {}

template <class _RandomAccessIterator1,
class _RandomAccessIterator2,
class _RandomAccessIterator3,
class _Compare,
class _LeafMerge>
_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_merge(
_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
__leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
return __empty{};
}

} // namespace __thread_cpu_backend
} // namespace __par_backend

namespace __pstl {

struct __std_thread_backend_tag {};

template <>
struct __cpu_traits<__std_thread_backend_tag> {
template <class _RandomAccessIterator, class _Fp>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Fp __f) {
__f(__first, __last);
return __empty{};
}

template <class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce>
_LIBCPP_HIDE_FROM_ABI static optional<_Tp>
__transform_reduce(_Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) {
return __reduce(std::move(__first), std::move(__last), std::move(__init));
}

template <class _RandomAccessIterator, class _Compare, class _LeafSort>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort) {
__leaf_sort(__first, __last, __comp);
return __empty{};
}

_LIBCPP_HIDE_FROM_ABI static void __cancel_execution() {}

template <class _RandomAccessIterator1,
class _RandomAccessIterator2,
class _RandomAccessIterator3,
class _Compare,
class _LeafMerge>
_LIBCPP_HIDE_FROM_ABI static optional<__empty>
__merge(_RandomAccessIterator1 __first1,
_RandomAccessIterator1 __last1,
_RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2,
_RandomAccessIterator3 __outit,
_Compare __comp,
_LeafMerge __leaf_merge) {
__leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
return __empty{};
}

static constexpr size_t __lane_size = 64;
};

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && && _LIBCPP_STD_VER >= 17
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <__config>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/is_execution_policy.h>
#include <__type_traits/remove_cvref.h>
Expand Down Expand Up @@ -49,7 +50,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform(
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
std::__par_backend::__parallel_for(
__pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first, __last, [__op, __first, __result](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
auto __res = std::__pstl_transform<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __result + (__brick_first - __first), __op);
Expand Down Expand Up @@ -97,7 +98,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardOutIterator> __pstl_transform(
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value &&
__has_random_access_iterator_category_or_concept<_ForwardOutIterator>::value) {
auto __res = std::__par_backend::__parallel_for(
auto __res = __pstl::__cpu_traits<__cpu_backend_tag>::__for_each(
__first1,
__last1,
[__op, __first1, __first2, __result](_ForwardIterator1 __brick_first, _ForwardIterator1 __brick_last) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__numeric/transform_reduce.h>
#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/desugars_to.h>
#include <__type_traits/is_arithmetic.h>
#include <__type_traits/is_execution_policy.h>
Expand All @@ -32,7 +33,8 @@ _LIBCPP_PUSH_MACROS

_LIBCPP_BEGIN_NAMESPACE_STD

template <typename _DifferenceType,
template <typename _Backend,
typename _DifferenceType,
typename _Tp,
typename _BinaryOperation,
typename _UnaryOperation,
Expand All @@ -48,7 +50,8 @@ __simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _Unar
return __init;
}

template <typename _Size,
template <typename _Backend,
typename _Size,
typename _Tp,
typename _BinaryOperation,
typename _UnaryOperation,
Expand All @@ -58,7 +61,8 @@ template <typename _Size,
int> = 0>
_LIBCPP_HIDE_FROM_ABI _Tp
__simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept {
const _Size __block_size = __lane_size / sizeof(_Tp);
constexpr size_t __lane_size = __pstl::__cpu_traits<_Backend>::__lane_size;
const _Size __block_size = __lane_size / sizeof(_Tp);
if (__n > 2 * __block_size && __block_size > 1) {
alignas(__lane_size) char __lane_buffer[__lane_size];
_Tp* __lane = reinterpret_cast<_Tp*>(__lane_buffer);
Expand Down Expand Up @@ -116,7 +120,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
return __par_backend::__parallel_transform_reduce(
return __pstl::__cpu_traits<__cpu_backend_tag>::__transform_reduce(
__first1,
std::move(__last1),
[__first1, __first2, __transform](_ForwardIterator1 __iter) {
Expand All @@ -138,7 +142,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
} else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
__has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
return std::__simd_transform_reduce(
return std::__simd_transform_reduce<__cpu_backend_tag>(
__last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
return __transform(__first1[__i], __first2[__i]);
});
Expand All @@ -163,7 +167,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
_UnaryOperation __transform) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return __par_backend::__parallel_transform_reduce(
return __pstl::__cpu_traits<__cpu_backend_tag>::__transform_reduce(
std::move(__first),
std::move(__last),
[__transform](_ForwardIterator __iter) { return __transform(*__iter); },
Expand All @@ -182,7 +186,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_Tp> __pstl_transform_reduce(
});
} else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
return std::__simd_transform_reduce(
return std::__simd_transform_reduce<__cpu_backend_tag>(
__last - __first,
std::move(__init),
std::move(__reduce),
Expand Down
86 changes: 86 additions & 0 deletions libcxx/include/__pstl/cpu_algos/cpu_traits.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCPP___PSTL_CPU_ALGOS_CPU_TRAITS_H
#define _LIBCPP___PSTL_CPU_ALGOS_CPU_TRAITS_H

#include <__config>
#include <cstddef>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif

_LIBCPP_PUSH_MACROS
#include <__undef_macros>

_LIBCPP_BEGIN_NAMESPACE_STD
namespace __pstl {

// __cpu_traits
//
// This traits class encapsulates the basis operations for a CPU-based implementation of the PSTL.
// All the operations in the PSTL can be implemented from these basis operations, so a pure CPU backend
// only needs to customize these traits in order to get an implementation of the whole PSTL.
//
// Basis operations
// ================
//
// template <class _RandomAccessIterator, class _Functor>
// optional<__empty> __for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func);
// - __func must take a subrange of [__first, __last) that should be executed in serial
//
// template <class _Iterator, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduction>
// optional<_Tp> __transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduction);
//
// template <class _RandomAccessIterator1,
// class _RandomAccessIterator2,
// class _RandomAccessIterator3,
// class _Compare,
// class _LeafMerge>
// optional<_RandomAccessIterator3> __merge(_RandomAccessIterator1 __first1,
// _RandomAccessIterator1 __last1,
// _RandomAccessIterator2 __first2,
// _RandomAccessIterator2 __last2,
// _RandomAccessIterator3 __outit,
// _Compare __comp,
// _LeafMerge __leaf_merge);
//
// template <class _RandomAccessIterator, class _Comp, class _LeafSort>
// optional<__empty> __stable_sort(_RandomAccessIterator __first,
// _RandomAccessIterator __last,
// _Comp __comp,
// _LeafSort __leaf_sort);
//
// void __cancel_execution();
// Cancel the execution of other jobs - they aren't needed anymore. This is not a binding request,
// some backends may not actually be able to cancel jobs.
//
// constexpr size_t __lane_size;
// Size of SIMD lanes.
// TODO: Merge this with __native_vector_size from __algorithm/simd_utils.h
//
//
// Exception handling
// ==================
//
// CPU backends are expected to report errors (i.e. failure to allocate) by returning a disengaged `optional` from their
// implementation. Exceptions shouldn't be used to report an internal failure-to-allocate, since all exceptions are
// turned into a program termination at the front-end level. When a backend returns a disengaged `optional` to the
// frontend, the frontend will turn that into a call to `std::__throw_bad_alloc();` to report the internal failure to
// the user.

template <class _Backend>
struct __cpu_traits;

} // namespace __pstl
_LIBCPP_END_NAMESPACE_STD

_LIBCPP_POP_MACROS

#endif // _LIBCPP___PSTL_CPU_ALGOS_CPU_TRAITS_H
6 changes: 3 additions & 3 deletions libcxx/include/__utility/is_pointer_in_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/void_t.h>
#include <__utility/declval.h>
#include <__utility/is_valid_range.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
Expand All @@ -34,16 +35,15 @@ struct __is_less_than_comparable<_Tp, _Up, __void_t<decltype(std::declval<_Tp>()
template <class _Tp, class _Up, __enable_if_t<__is_less_than_comparable<const _Tp*, const _Up*>::value, int> = 0>
_LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_SANITIZE("address") bool __is_pointer_in_range(
const _Tp* __begin, const _Tp* __end, const _Up* __ptr) {
if (__libcpp_is_constant_evaluated()) {
_LIBCPP_ASSERT_VALID_INPUT_RANGE(__builtin_constant_p(__begin <= __end), "__begin and __end do not form a range");
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__begin, __end), "[__begin, __end) is not a valid range");

if (__libcpp_is_constant_evaluated()) {
// If this is not a constant during constant evaluation we know that __ptr is not part of the allocation where
// [__begin, __end) is.
if (!__builtin_constant_p(__begin <= __ptr && __ptr < __end))
return false;
}

// Checking this for unrelated pointers is technically UB, but no compiler optimizes based on it (currently).
return !__less<>()(__ptr, __begin) && __less<>()(__ptr, __end);
}

Expand Down
37 changes: 37 additions & 0 deletions libcxx/include/__utility/is_valid_range.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCPP___UTILITY_IS_VALID_RANGE_H
#define _LIBCPP___UTILITY_IS_VALID_RANGE_H

#include <__algorithm/comp.h>
#include <__config>
#include <__type_traits/is_constant_evaluated.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif

_LIBCPP_BEGIN_NAMESPACE_STD

template <class _Tp>
_LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_SANITIZE("address") bool
__is_valid_range(const _Tp* __first, const _Tp* __last) {
if (__libcpp_is_constant_evaluated()) {
// If this is not a constant during constant evaluation, that is because __first and __last are not
// part of the same allocation. If they are part of the same allocation, we must still make sure they
// are ordered properly.
return __builtin_constant_p(__first <= __last) && __first <= __last;
}

return !__less<>()(__last, __first);
}

_LIBCPP_END_NAMESPACE_STD

#endif // _LIBCPP___UTILITY_IS_VALID_RANGE_H
1 change: 1 addition & 0 deletions libcxx/include/libcxx.imp
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,7 @@
{ include: [ "<__utility/in_place.h>", "private", "<utility>", "public" ] },
{ include: [ "<__utility/integer_sequence.h>", "private", "<utility>", "public" ] },
{ include: [ "<__utility/is_pointer_in_range.h>", "private", "<utility>", "public" ] },
{ include: [ "<__utility/is_valid_range.h>", "private", "<utility>", "public" ] },
{ include: [ "<__utility/move.h>", "private", "<utility>", "public" ] },
{ include: [ "<__utility/no_destroy.h>", "private", "<utility>", "public" ] },
{ include: [ "<__utility/pair.h>", "private", "<utility>", "public" ] },
Expand Down
3 changes: 3 additions & 0 deletions libcxx/include/module.modulemap
Original file line number Diff line number Diff line change
Expand Up @@ -1613,6 +1613,8 @@ module std_private_numeric_transform_exclusive_scan [system] { header "__numeric
module std_private_numeric_transform_inclusive_scan [system] { header "__numeric/transform_inclusive_scan.h" }
module std_private_numeric_transform_reduce [system] { header "__numeric/transform_reduce.h" }

module std_private_pstl_cpu_algos_cpu_traits [system] { header "__pstl/cpu_algos/cpu_traits.h" }

module std_private_queue_fwd [system] { header "__fwd/queue.h" }

module std_private_random_bernoulli_distribution [system] { header "__random/bernoulli_distribution.h" }
Expand Down Expand Up @@ -2073,6 +2075,7 @@ module std_private_utility_forward_like [system] { header "__utility/f
module std_private_utility_in_place [system] { header "__utility/in_place.h" }
module std_private_utility_integer_sequence [system] { header "__utility/integer_sequence.h" }
module std_private_utility_is_pointer_in_range [system] { header "__utility/is_pointer_in_range.h" }
module std_private_utility_is_valid_range [system] { header "__utility/is_valid_range.h" }
module std_private_utility_move [system] {
header "__utility/move.h"
export std_private_type_traits_is_copy_constructible
Expand Down
7 changes: 2 additions & 5 deletions libcxx/src/pstl/libdispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
#include <dispatch/dispatch.h>

_LIBCPP_BEGIN_NAMESPACE_STD

namespace __par_backend::inline __libdispatch {
namespace __pstl::__libdispatch {

void __dispatch_apply(size_t chunk_count, void* context, void (*func)(void* context, size_t chunk)) noexcept {
::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func);
Expand All @@ -29,7 +28,5 @@ __chunk_partitions __partition_chunks(ptrdiff_t element_count) noexcept {
return partitions;
}

// NOLINTNEXTLINE(llvm-namespace-comment) // This is https://llvm.org/PR56804
} // namespace __par_backend::inline __libdispatch

} // namespace __pstl::__libdispatch
_LIBCPP_END_NAMESPACE_STD
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@

int main(int, char**) {
{
auto chunks = std::__par_backend::__libdispatch::__partition_chunks(0);
auto chunks = std::__pstl::__libdispatch::__partition_chunks(0);
assert(chunks.__chunk_count_ == 1);
assert(chunks.__first_chunk_size_ == 0);
assert(chunks.__chunk_size_ == 0);
}

{
auto chunks = std::__par_backend::__libdispatch::__partition_chunks(1);
auto chunks = std::__pstl::__libdispatch::__partition_chunks(1);
assert(chunks.__chunk_count_ == 1);
assert(chunks.__first_chunk_size_ == 1);
assert(chunks.__chunk_size_ == 1);
}

for (std::ptrdiff_t i = 2; i != 2ll << 20; ++i) {
auto chunks = std::__par_backend::__libdispatch::__partition_chunks(i);
auto chunks = std::__pstl::__libdispatch::__partition_chunks(i);
assert(chunks.__chunk_count_ >= 1);
assert(chunks.__chunk_count_ <= i);
assert((chunks.__chunk_count_ - 1) * chunks.__chunk_size_ + chunks.__first_chunk_size_ == i);
Expand Down
68 changes: 68 additions & 0 deletions libcxx/test/libcxx/utilities/is_valid_range.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <__utility/is_valid_range.h>
#include <cassert>

#include "test_macros.h"

template <class T, class TQualified>
TEST_CONSTEXPR_CXX14 void check_type() {
{
// We need to ensure that the addresses of i and j are ordered as &i < &j for
// the test below to work portably, so we define them in a struct.
struct {
T i = 0;
T j = 0;
} storage;
assert(std::__is_valid_range(static_cast<TQualified*>(&storage.i), static_cast<TQualified*>(&storage.i)));
assert(std::__is_valid_range(static_cast<TQualified*>(&storage.i), static_cast<TQualified*>(&storage.i + 1)));

assert(!std::__is_valid_range(static_cast<TQualified*>(&storage.j), static_cast<TQualified*>(&storage.i)));
assert(!std::__is_valid_range(static_cast<TQualified*>(&storage.i + 1), static_cast<TQualified*>(&storage.i)));

// We detect this as being a valid range even though it is not really valid.
assert(std::__is_valid_range(static_cast<TQualified*>(&storage.i), static_cast<TQualified*>(&storage.j)));
}

{
T arr[3] = {1, 2, 3};
assert(std::__is_valid_range(static_cast<TQualified*>(&arr[0]), static_cast<TQualified*>(&arr[0])));
assert(std::__is_valid_range(static_cast<TQualified*>(&arr[0]), static_cast<TQualified*>(&arr[1])));
assert(std::__is_valid_range(static_cast<TQualified*>(&arr[0]), static_cast<TQualified*>(&arr[2])));

assert(!std::__is_valid_range(static_cast<TQualified*>(&arr[1]), static_cast<TQualified*>(&arr[0])));
assert(!std::__is_valid_range(static_cast<TQualified*>(&arr[2]), static_cast<TQualified*>(&arr[0])));
}

#if TEST_STD_VER >= 20
{
T* arr = new int[4]{1, 2, 3, 4};
assert(std::__is_valid_range(static_cast<TQualified*>(arr), static_cast<TQualified*>(arr + 4)));
delete[] arr;
}
#endif
}

TEST_CONSTEXPR_CXX14 bool test() {
check_type<int, int>();
check_type<int, int const>();
check_type<int, int volatile>();
check_type<int, int const volatile>();

return true;
}

int main(int, char**) {
test();
#if TEST_STD_VER >= 14
static_assert(test(), "");
#endif

return 0;
}
10 changes: 7 additions & 3 deletions libcxx/utils/ci/run-buildbot
Original file line number Diff line number Diff line change
Expand Up @@ -368,18 +368,22 @@ bootstrapping-build)
-DCMAKE_CXX_COMPILER_LAUNCHER="ccache" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
-DLLVM_ENABLE_PROJECTS="clang" \
-DLLVM_ENABLE_PROJECTS="clang;lldb" \
-DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \
-DLLVM_RUNTIME_TARGETS="$(${CXX} --print-target-triple)" \
-DLLVM_HOST_TRIPLE="$(${CXX} --print-target-triple)" \
-DLLVM_TARGETS_TO_BUILD="host" \
-DRUNTIMES_BUILD_ALLOW_DARWIN=ON \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DLLVM_LIT_ARGS="-sv --xunit-xml-output test-results.xml --timeout=1500 --time-tests"

echo "+++ Running the libc++ and libc++abi tests"
echo "+++ Running the LLDB libc++ data formatter tests"
${NINJA} -vC "${BUILD_DIR}" check-lldb-api-functionalities-data-formatter-data-formatter-stl-libcxx

echo "--- Running the libc++ and libc++abi tests"
${NINJA} -vC "${BUILD_DIR}" check-runtimes

echo "--- Installing libc++ and libc++abi to a fake location"
echo "+++ Installing libc++ and libc++abi to a fake location"
${NINJA} -vC "${BUILD_DIR}" install-runtimes

ccache -s
Expand Down
1 change: 1 addition & 0 deletions libcxx/utils/generate_iwyu_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def IWYU_mapping(header: str) -> typing.Optional[typing.List[str]]:
ignore = [
"__debug_utils/.+",
"__fwd/get[.]h",
"__pstl/.+",
"__support/.+",
"__utility/private_constructor_tag.h",
]
Expand Down
5 changes: 5 additions & 0 deletions libcxxabi/src/aix_state_tab_eh.inc
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,13 @@ static void invoke_destructor(FSMEntry* fsmEntry, void* addr) {
_LIBCXXABI_TRACE_STATETAB0("returned from scalar destructor\n");
} else {
_LIBCXXABI_TRACE_STATETAB0("calling vector destructor\n");
// TODO: in the legacy ABI, destructors had a second argument. We don't expect to encounter
// destructors of this type in the itanium-based ABI, so this should be safe, but this could use some cleanup.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wcast-function-type"
__cxa_vec_cleanup(addr, reinterpret_cast<size_t>(fsmEntry->elementCount), fsmEntry->elemSize,
reinterpret_cast<destruct_f>(fsmEntry->destructor));
#pragma GCC diagnostic pop
_LIBCXXABI_TRACE_STATETAB0("returned from vector destructor\n");
}
} catch (...) {
Expand Down
7 changes: 7 additions & 0 deletions lld/COFF/Chunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,13 @@ void SectionChunk::getRuntimePseudoRelocs(
dyn_cast_or_null<Defined>(file->getSymbol(rel.SymbolTableIndex));
if (!target || !target->isRuntimePseudoReloc)
continue;
// If the target doesn't have a chunk allocated, it may be a
// DefinedImportData symbol which ended up unnecessary after GC.
// Normally we wouldn't eliminate section chunks that are referenced, but
// references within DWARF sections don't count for keeping section chunks
// alive. Thus such dangling references in DWARF sections are expected.
if (!target->getChunk())
continue;
int sizeInBits =
getRuntimePseudoRelocSize(rel.Type, file->ctx.config.machine);
if (sizeInBits == 0) {
Expand Down
10 changes: 9 additions & 1 deletion lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2072,8 +2072,16 @@ void Writer::createRuntimePseudoRelocs() {
return;
}

if (!rels.empty())
if (!rels.empty()) {
log("Writing " + Twine(rels.size()) + " runtime pseudo relocations");
const char *symbolName = "_pei386_runtime_relocator";
Symbol *relocator = ctx.symtab.findUnderscore(symbolName);
if (!relocator)
error("output image has runtime pseudo relocations, but the function " +
Twine(symbolName) +
" is missing; it is needed for fixing the relocations at runtime");
}

PseudoRelocTableChunk *table = make<PseudoRelocTableChunk>(rels);
rdataSec->addChunk(table);
EmptyChunk *endOfList = make<EmptyChunk>();
Expand Down
6 changes: 5 additions & 1 deletion lld/ELF/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,11 @@ void InputSection::copyRelocations(uint8_t *buf,
addend += sec->getFile<ELFT>()->mipsGp0;
}

if (RelTy::IsRela)
if (config->emachine == EM_LOONGARCH && type == R_LARCH_ALIGN)
// LoongArch psABI v2.30, the R_LARCH_ALIGN requires symbol index.
// If it use the section symbol, the addend should not be changed.
p->r_addend = addend;
else if (RelTy::IsRela)
p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr;
// For SHF_ALLOC sections relocated by REL, append a relocation to
// sec->relocations so that relocateAlloc transitively called by
Expand Down
8 changes: 4 additions & 4 deletions lld/MachO/ObjC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ class ObjcCategoryMerger {
const PointerListInfo &ptrList);

Defined *emitCategory(const ClassExtensionInfo &extInfo);
Defined *emitCatListEntrySec(const std::string &forCateogryName,
Defined *emitCatListEntrySec(const std::string &forCategoryName,
const std::string &forBaseClassName,
ObjFile *objFile);
Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
Expand Down Expand Up @@ -878,7 +878,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(

// This method creates an __objc_catlist ConcatInputSection with a single slot
Defined *
ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName,
ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
const std::string &forBaseClassName,
ObjFile *objFile) {
uint32_t sectionSize = target->wordSize;
Expand All @@ -894,7 +894,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName,
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;

std::string catSymName = "<__objc_catlist slot for merged category ";
catSymName += forBaseClassName + "(" + forCateogryName + ")>";
catSymName += forBaseClassName + "(" + forCategoryName + ")>";

Defined *catListSym = make<Defined>(
newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
Expand Down Expand Up @@ -1069,7 +1069,7 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
off += target->wordSize) {
Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
assert(categorySym &&
"Failed to get a valid cateogry at __objc_catlit offset");
"Failed to get a valid category at __objc_catlit offset");

// We only support ObjC categories (no swift + @objc)
// TODO: Support swift + @objc categories also
Expand Down
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-arm-data.s
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
.text
.thumb
main:
bx lr
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
bx lr
.data
ptr:
Expand Down
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-arm64-data.s
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
.global main
.text
main:
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret
.data
ptr:
Expand Down
41 changes: 41 additions & 0 deletions lld/test/COFF/autoimport-gc.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# REQUIRES: x86
# RUN: split-file %s %t.dir

# RUN: llvm-mc -triple=x86_64-windows-gnu %t.dir/lib.s -filetype=obj -o %t.dir/lib.obj
# RUN: lld-link -out:%t.dir/lib.dll -dll -entry:DllMainCRTStartup %t.dir/lib.obj -lldmingw -implib:%t.dir/lib.lib

# RUN: llvm-mc -triple=x86_64-windows-gnu %t.dir/main.s -filetype=obj -o %t.dir/main.obj
# RUN: lld-link -lldmingw -out:%t.dir/main.exe -entry:main %t.dir/main.obj %t.dir/lib.lib -opt:ref -debug:dwarf

#--- main.s
.global main
.section .text$main,"xr",one_only,main
main:
ret

.global other
.section .text$other,"xr",one_only,other
other:
movq .refptr.variable(%rip), %rax
movl (%rax), %eax
ret

.section .rdata$.refptr.variable,"dr",discard,.refptr.variable
.global .refptr.variable
.refptr.variable:
.quad variable

.section .debug_info
.long 1
.quad variable
.long 2

#--- lib.s
.global variable
.global DllMainCRTStartup
.text
DllMainCRTStartup:
ret
.data
variable:
.long 42
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-gnu-implib.s
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,8 @@
.text
main:
movl data(%rip), %eax
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret
.data
36 changes: 36 additions & 0 deletions lld/test/COFF/autoimport-handler-func.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# REQUIRES: x86
# RUN: split-file %s %t.dir

# RUN: llvm-dlltool -m i386:x86-64 -d %t.dir/lib.def -D lib.dll -l %t.dir/lib.lib

# RUN: llvm-mc -triple=x86_64-windows-gnu %t.dir/main.s -filetype=obj -o %t.dir/main.obj
# RUN: llvm-mc -triple=x86_64-windows-gnu %t.dir/func.s -filetype=obj -o %t.dir/func.obj
# RUN: env LLD_IN_TEST=1 not lld-link -lldmingw -out:%t.dir/main.exe -entry:main %t.dir/main.obj %t.dir/lib.lib 2>&1 | FileCheck %s --check-prefix=ERR

# RUN: lld-link -lldmingw -out:%t.dir/main.exe -entry:main %t.dir/main.obj %t.dir/func.obj %t.dir/lib.lib 2>&1 | FileCheck %s --check-prefix=NOERR --allow-empty

# ERR: error: output image has runtime pseudo relocations, but the function _pei386_runtime_relocator is missing; it is needed for fixing the relocations at runtime

# NOERR-NOT: error

#--- main.s
.global main
.text
main:
ret

.data
.long 1
.quad variable
.long 2

#--- func.s
.global _pei386_runtime_relocator
.text
_pei386_runtime_relocator:
ret

#--- lib.def
EXPORTS
variable DATA

3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-warn.s
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ main:
movl variable2(%rip), %ecx
addl %ecx, %eax
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret

.section .rdata$.refptr.variable1,"dr",discard,.refptr.variable1
.global .refptr.variable1
Expand Down
3 changes: 3 additions & 0 deletions lld/test/COFF/autoimport-x86.s
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
.text
main:
movl variable(%rip), %eax
ret
.global _pei386_runtime_relocator
_pei386_runtime_relocator:
ret
.data
ptr:
Expand Down
28 changes: 28 additions & 0 deletions lld/test/ELF/loongarch-relax-align-ldr.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# REQUIRES: loongarch
## Test `ld -r` not changes the addend of R_LARCH_ALIGN.

# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
# RUN: ld.lld -r %t.64.o %t.64.o -o %t.64.r
# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s

# CHECK: <.text>:
# CHECK-NEXT: break 1
# CHECK-NEXT: nop
# CHECK-NEXT: {{0*}}04: R_LARCH_ALIGN .text+0x804
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: break 2
# CHECK-NEXT: break 0
# CHECK-NEXT: break 0
# CHECK-NEXT: break 0
# CHECK-NEXT: break 1
# CHECK-NEXT: nop
# CHECK-NEXT: {{0*}}24: R_LARCH_ALIGN .text+0x804
# CHECK-NEXT: nop
# CHECK-NEXT: nop
# CHECK-NEXT: break 2

.text
break 1
.p2align 4, , 8
break 2
5 changes: 3 additions & 2 deletions lld/test/ELF/loongarch-relax-emit-relocs.s
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# CHECK-NEXT: R_LARCH_PCALA_LO12 _start
# CHECK-NEXT: R_LARCH_RELAX *ABS*
# CHECK-NEXT: nop
# CHECK-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4
# CHECK-NEXT: R_LARCH_ALIGN .text+0x4
# CHECK-NEXT: nop
# CHECK-NEXT: ret

Expand All @@ -37,11 +37,12 @@
# CHECKR-NEXT: R_LARCH_PCALA_LO12 _start
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
# CHECKR-NEXT: nop
# CHECKR-NEXT: R_LARCH_ALIGN .Lla-relax-align0+0x4
# CHECKR-NEXT: R_LARCH_ALIGN .text+0x4
# CHECKR-NEXT: nop
# CHECKR-NEXT: nop
# CHECKR-NEXT: ret

.text
.global _start
_start:
la.pcrel $a0, _start
Expand Down
2 changes: 1 addition & 1 deletion lldb/cmake/modules/LLDBFramework.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ add_custom_command(TARGET liblldb POST_BUILD
if(NOT APPLE_EMBEDDED)
if (TARGET clang-resource-headers)
add_dependencies(liblldb clang-resource-headers)
set(clang_resource_headers_dir $<TARGET_PROPERTY:clang-resource-headers,RUNTIME_OUTPUT_DIRECTORY>)
set(clang_resource_headers_dir $<TARGET_PROPERTY:clang-resource-headers,INTERFACE_INCLUDE_DIRECTORIES>)
else()
set(clang_resource_headers_dir ${LLDB_EXTERNAL_CLANG_RESOURCE_DIR}/include)
if(NOT EXISTS ${clang_resource_headers_dir})
Expand Down
2 changes: 1 addition & 1 deletion lldb/include/lldb/lldb-enumerations.h
Original file line number Diff line number Diff line change
Expand Up @@ -1310,7 +1310,7 @@ enum CompletionType {

/// Specifies if children need to be re-computed
/// after a call to \ref SyntheticChildrenFrontEnd::Update.
enum class ChildCacheState {
enum ChildCacheState {
eRefetch = 0, ///< Children need to be recomputed dynamically.

eReuse = 1, ///< Children did not change and don't need to be recomputed;
Expand Down
2 changes: 2 additions & 0 deletions lldb/packages/Python/lldbsuite/test/lldbtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ def setUpCommands(cls):
"settings set symbols.enable-external-lookup false",
# Inherit the TCC permissions from the inferior's parent.
"settings set target.inherit-tcc true",
# Based on https://discourse.llvm.org/t/running-lldb-in-a-container/76801/4
"settings set target.disable-aslr false",
# Kill rather than detach from the inferior if something goes wrong.
"settings set target.detach-on-error false",
# Disable fix-its by default so that incorrect expressions in tests don't
Expand Down
Loading