Original file line number Diff line number Diff line change
Expand Up @@ -1847,8 +1847,12 @@ void loop() {
// CHECK6-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
// CHECK6-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]]
// CHECK6: .omp.linear.pu.i:
// CHECK6-NEXT: [[TMP39:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !14
// CHECK6-NEXT: store i32 [[TMP39]], i32* [[J_I]], align 4, !noalias !14
// CHECK6-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 0
// CHECK6-NEXT: [[TMP40:%.*]] = load i32*, i32** [[TMP39]], align 8
// CHECK6-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 1
// CHECK6-NEXT: [[TMP42:%.*]] = load i32*, i32** [[TMP41]], align 8
// CHECK6-NEXT: [[TMP43:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !14
// CHECK6-NEXT: store i32 [[TMP43]], i32* [[TMP42]], align 4
// CHECK6-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]]
// CHECK6: .omp_outlined..1.exit:
// CHECK6-NEXT: ret i32 0
Expand Down
3 changes: 1 addition & 2 deletions clang/test/Preprocessor/aarch64-target-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@

// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE %s
// CHECK-SVE: __ARM_FEATURE_SVE 1
// CHECK-SVE: __ARM_FEATURE_SVE_VECTOR_OPERATORS 2

// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sve+bf16 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-BF16 %s
// CHECK-SVE-BF16: __ARM_FEATURE_BF16_SCALAR_ARITHMETIC 1
Expand Down Expand Up @@ -512,9 +513,7 @@
// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=2048 %s
// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=512+ -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-NO-SVE-VECTOR-BITS %s
// CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_BITS [[#VBITS:]]
// CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_VECTOR_OPERATORS 1
// CHECK-NO-SVE-VECTOR-BITS-NOT: __ARM_FEATURE_SVE_BITS
// CHECK-NO-SVE-VECTOR-BITS-NOT: __ARM_FEATURE_SVE_VECTOR_OPERATORS

// ================== Check Large System Extensions (LSE)
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s
Expand Down
4 changes: 4 additions & 0 deletions clang/test/Preprocessor/predefined-arch-macros.c
Original file line number Diff line number Diff line change
Expand Up @@ -3216,6 +3216,7 @@
// CHECK_ZNVER2_M32: #define __POPCNT__ 1
// CHECK_ZNVER2_M32: #define __PRFCHW__ 1
// CHECK_ZNVER2_M32: #define __RDPID__ 1
// CHECK_ZNVER2_M32: #define __RDPRU__ 1
// CHECK_ZNVER2_M32: #define __RDRND__ 1
// CHECK_ZNVER2_M32: #define __RDSEED__ 1
// CHECK_ZNVER2_M32: #define __SHA__ 1
Expand Down Expand Up @@ -3266,6 +3267,7 @@
// CHECK_ZNVER2_M64: #define __POPCNT__ 1
// CHECK_ZNVER2_M64: #define __PRFCHW__ 1
// CHECK_ZNVER2_M64: #define __RDPID__ 1
// CHECK_ZNVER2_M64: #define __RDPRU__ 1
// CHECK_ZNVER2_M64: #define __RDRND__ 1
// CHECK_ZNVER2_M64: #define __RDSEED__ 1
// CHECK_ZNVER2_M64: #define __SHA__ 1
Expand Down Expand Up @@ -3318,6 +3320,7 @@
// CHECK_ZNVER3_M32: #define __POPCNT__ 1
// CHECK_ZNVER3_M32: #define __PRFCHW__ 1
// CHECK_ZNVER3_M32: #define __RDPID__ 1
// CHECK_ZNVER3_M32: #define __RDPRU__ 1
// CHECK_ZNVER3_M32: #define __RDRND__ 1
// CHECK_ZNVER3_M32: #define __RDSEED__ 1
// CHECK_ZNVER3_M32: #define __SHA__ 1
Expand Down Expand Up @@ -3368,6 +3371,7 @@
// CHECK_ZNVER3_M64: #define __POPCNT__ 1
// CHECK_ZNVER3_M64: #define __PRFCHW__ 1
// CHECK_ZNVER3_M64: #define __RDPID__ 1
// CHECK_ZNVER3_M64: #define __RDPRU__ 1
// CHECK_ZNVER3_M64: #define __RDRND__ 1
// CHECK_ZNVER3_M64: #define __RDSEED__ 1
// CHECK_ZNVER3_M64: #define __SHA__ 1
Expand Down
2 changes: 0 additions & 2 deletions clang/test/Sema/sizeless-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,8 @@ void func(int sel) {
global_int8_ptr -= 1; // expected-error {{arithmetic on a pointer to sizeless type}}
global_int8_ptr - global_int8_ptr; // expected-error {{arithmetic on a pointer to sizeless type}}

+init_int8; // expected-error {{invalid argument type 'svint8_t'}}
++init_int8; // expected-error {{cannot increment value of type 'svint8_t'}}
init_int8++; // expected-error {{cannot increment value of type 'svint8_t'}}
-init_int8; // expected-error {{invalid argument type 'svint8_t'}}
--init_int8; // expected-error {{cannot decrement value of type 'svint8_t'}}
init_int8--; // expected-error {{cannot decrement value of type 'svint8_t'}}
!init_int8; // expected-error {{invalid argument type 'svint8_t'}}
Expand Down
2 changes: 0 additions & 2 deletions clang/test/SemaCXX/sizeless-1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,8 @@ void func(int sel) {
global_int8_ptr -= 1; // expected-error {{arithmetic on a pointer to sizeless type}}
global_int8_ptr - global_int8_ptr; // expected-error {{arithmetic on a pointer to sizeless type}}

+init_int8; // expected-error {{invalid argument type 'svint8_t'}}
++init_int8; // expected-error {{cannot increment value of type 'svint8_t'}}
init_int8++; // expected-error {{cannot increment value of type 'svint8_t'}}
-init_int8; // expected-error {{invalid argument type 'svint8_t'}}
--init_int8; // expected-error {{cannot decrement value of type 'svint8_t'}}
init_int8--; // expected-error {{cannot decrement value of type 'svint8_t'}}
!init_int8; // expected-error {{invalid argument type 'svint8_t'}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,13 @@ bool test() {
# else
full_size();
# endif
# elif defined(__powerpc__) || defined(__powerpc64__) || defined(__sparc64__)
# elif defined(__powerpc__) || defined(__powerpc64__)
# ifdef __BIG_ENDIAN__
half_size();
# else
full_size();
# endif
# elif defined(__sparc64__)
half_size();
# elif defined(_WIN32)
full_size();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1734,6 +1734,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
!getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1);
Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);

bool HasLeaf7 =
Expand Down
16 changes: 14 additions & 2 deletions openmp/libomptarget/DeviceRTL/src/Kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static void genericStateMachine(IdentTy *Ident) {
uint32_t TId = mapping::getThreadIdInBlock();

do {
ParallelRegionFnTy WorkFn = 0;
ParallelRegionFnTy WorkFn = nullptr;

// Wait for the signal that we have a new work function.
synchronize::threads();
Expand Down Expand Up @@ -100,8 +100,20 @@ int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
// doing any work. mapping::getBlockSize() does not include any of the main
// thread's warp, so none of its threads can ever be active worker threads.
if (UseGenericStateMachine &&
mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD))
mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) {
genericStateMachine(Ident);
} else {
// Retrieve the work function just to ensure we always call
// __kmpc_kernel_parallel even if a custom state machine is used.
// TODO: this is not super pretty. The problem is we create the call to
// __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it is
// not there yet. Thus, we assume we never reach it from
// __kmpc_target_deinit. That allows us to remove the store in there to
// ParallelRegionFn, which leads to bad results later on.
ParallelRegionFnTy WorkFn = nullptr;
__kmpc_kernel_parallel(&WorkFn);
ASSERT(WorkFn == nullptr);
}

return mapping::getThreadIdInBlock();
}
Expand Down