276 changes: 174 additions & 102 deletions clang/lib/Headers/__clang_cuda_device_functions.h

Large diffs are not rendered by default.

188 changes: 110 additions & 78 deletions clang/lib/Headers/__clang_hip_cmath.h

Large diffs are not rendered by default.

50 changes: 46 additions & 4 deletions clang/lib/Headers/__clang_hip_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef __CLANG_HIP_MATH_H__
#define __CLANG_HIP_MATH_H__

#if !defined(__HIP__)
#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
#endif

Expand All @@ -19,18 +19,30 @@
#endif
#include <limits.h>
#include <stdint.h>
#endif // __HIPCC_RTC__
#ifdef __OPENMP_AMDGCN__
#include <omp.h>
#endif
#endif // !defined(__HIPCC_RTC__)

#pragma push_macro("__DEVICE__")

#ifdef __OPENMP_AMDGCN__
#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
#else
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
#endif

// A few functions return bool type starting only in C++11.
#pragma push_macro("__RETURN_TYPE")
#ifdef __OPENMP_AMDGCN__
#define __RETURN_TYPE int
#else
#if defined(__cplusplus)
#define __RETURN_TYPE bool
#else
#define __RETURN_TYPE int
#endif
#endif // __OPENMP_AMDGCN__

#if defined (__cplusplus) && __cplusplus < 201103L
// emulate static_assert on type sizes
Expand Down Expand Up @@ -249,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
__DEVICE__
float frexpf(float __x, int *__nptr) {
int __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
float __r =
__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
*__nptr = __tmp;
Expand Down Expand Up @@ -334,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); }
__DEVICE__
float modff(float __x, float *__iptr) {
float __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
float __r =
__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__iptr = __tmp;
Expand Down Expand Up @@ -414,6 +432,9 @@ float remainderf(float __x, float __y) {
__DEVICE__
float remquof(float __x, float __y, int *__quo) {
int __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
float __r = __ocml_remquo_f32(
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
*__quo = __tmp;
Expand Down Expand Up @@ -470,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
__DEVICE__
void sincosf(float __x, float *__sinptr, float *__cosptr) {
float __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
*__sinptr =
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
Expand All @@ -478,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
__DEVICE__
void sincospif(float __x, float *__sinptr, float *__cosptr) {
float __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
*__sinptr = __ocml_sincospi_f32(
__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
Expand Down Expand Up @@ -790,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
__DEVICE__
double frexp(double __x, int *__nptr) {
int __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
double __r =
__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
*__nptr = __tmp;
Expand Down Expand Up @@ -874,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); }
__DEVICE__
double modf(double __x, double *__iptr) {
double __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
double __r =
__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
*__iptr = __tmp;
Expand Down Expand Up @@ -962,6 +995,9 @@ double remainder(double __x, double __y) {
__DEVICE__
double remquo(double __x, double __y, int *__quo) {
int __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
double __r = __ocml_remquo_f64(
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
*__quo = __tmp;
Expand Down Expand Up @@ -1020,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); }
__DEVICE__
void sincos(double __x, double *__sinptr, double *__cosptr) {
double __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
*__sinptr = __ocml_sincos_f64(
__x, (__attribute__((address_space(5))) double *)&__tmp);
*__cosptr = __tmp;
Expand All @@ -1028,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
__DEVICE__
void sincospi(double __x, double *__sinptr, double *__cosptr) {
double __tmp;
#ifdef __OPENMP_AMDGCN__
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
#endif
*__sinptr = __ocml_sincospi_f64(
__x, (__attribute__((address_space(5))) double *)&__tmp);
*__cosptr = __tmp;
Expand Down Expand Up @@ -1262,15 +1304,15 @@ float min(float __x, float __y) { return fminf(__x, __y); }
__DEVICE__
double min(double __x, double __y) { return fmin(__x, __y); }

#if !defined(__HIPCC_RTC__)
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
__host__ inline static int min(int __arg1, int __arg2) {
return std::min(__arg1, __arg2);
}

__host__ inline static int max(int __arg1, int __arg2) {
return std::max(__arg1, __arg2);
}
#endif // __HIPCC_RTC__
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
#endif

#pragma pop_macro("__DEVICE__")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
#error "This file is for OpenMP compilation only."
#endif

#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})

#ifdef __cplusplus
extern "C" {
#endif

#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})

#define __CUDA__
#define __OPENMP_NVPTX__

Expand All @@ -33,11 +33,33 @@ extern "C" {
#undef __OPENMP_NVPTX__
#undef __CUDA__

#ifdef __cplusplus
} // extern "C"
#pragma omp end declare variant

#ifdef __AMDGCN__
#pragma omp begin declare variant match(device = {arch(amdgcn)})

// Import types which will be used by __clang_hip_libdevice_declares.h
#ifndef __cplusplus
#include <stdbool.h>
#include <stdint.h>
#endif

#define __OPENMP_AMDGCN__
#pragma push_macro("__device__")
#define __device__

/// Include declarations for libdevice functions.
#include <__clang_hip_libdevice_declares.h>

#pragma pop_macro("__device__")
#undef __OPENMP_AMDGCN__

#pragma omp end declare variant
#endif

#ifdef __cplusplus
} // extern "C"
#endif

// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
// need to `include <new>` in C++ mode.
Expand Down
54 changes: 54 additions & 0 deletions clang/lib/Headers/openmp_wrappers/cmath
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,58 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }

#pragma omp end declare variant

#ifdef __AMDGCN__
#pragma omp begin declare variant match(device = {arch(amdgcn)})

#pragma push_macro("__constant__")
#define __constant__ __attribute__((constant))
#define __OPENMP_AMDGCN__

#include <__clang_hip_cmath.h>

#pragma pop_macro("__constant__")
#undef __OPENMP_AMDGCN__

// Define overloads otherwise which are absent
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))

__DEVICE__ float acos(float __x) { return ::acosf(__x); }
__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
__DEVICE__ float erf(float __x) { return ::erff(__x); }
__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
__DEVICE__ float ldexp(float __arg, int __exp) {
return ::ldexpf(__arg, __exp);
}
__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
__DEVICE__ float logb(float __x) { return ::logbf(__x); }
__DEVICE__ float nextafter(float __x, float __y) {
return ::nextafterf(__x, __y);
}
__DEVICE__ float remainder(float __x, float __y) {
return ::remainderf(__x, __y);
}
__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }

#undef __DEVICE__

#pragma omp end declare variant
#endif // __AMDGCN__

#endif
10 changes: 10 additions & 0 deletions clang/lib/Headers/openmp_wrappers/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,14 @@

#pragma omp end declare variant

#ifdef __AMDGCN__
#pragma omp begin declare variant match(device = {arch(amdgcn)})

#define __OPENMP_AMDGCN__
#include <__clang_hip_math.h>
#undef __OPENMP_AMDGCN__

#pragma omp end declare variant
#endif

#endif
3 changes: 3 additions & 0 deletions clang/test/Driver/amdgpu-openmp-toolchain.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@

// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
// CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm"

// RUN: env LIBRARY_PATH=%S/Inputs/hip_dev_lib %clang -### -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -lm --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIB-DEVICE
// CHECK-LIB-DEVICE: {{.*}}llvm-link{{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
6 changes: 6 additions & 0 deletions clang/test/Headers/Inputs/include/algorithm
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#pragma once

namespace std {
template<class T> constexpr const T& min(const T& a, const T& b);
template<class T> constexpr const T& max(const T& a, const T& b);
}
1 change: 1 addition & 0 deletions clang/test/Headers/Inputs/include/cstdlib
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ float abs(float __x) { return fabs(__x); }
double abs(double __x) { return fabs(__x); }

}

21 changes: 21 additions & 0 deletions clang/test/Headers/Inputs/include/omp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef __OMP_H
#define __OMP_H

#if _OPENMP
// Follows the pattern in interface.h
// Clang sema checks this type carefully, needs to closely match that from omp.h
typedef enum omp_allocator_handle_t {
omp_null_allocator = 0,
omp_default_mem_alloc = 1,
omp_large_cap_mem_alloc = 2,
omp_const_mem_alloc = 3,
omp_high_bw_mem_alloc = 4,
omp_low_lat_mem_alloc = 5,
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
KMP_ALLOCATOR_MAX_HANDLE = ~(0U)
} omp_allocator_handle_t;
#endif

#endif
2 changes: 2 additions & 0 deletions clang/test/Headers/Inputs/include/utility
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#pragma once

51 changes: 51 additions & 0 deletions clang/test/Headers/amdgcn_openmp_device_math.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-C,CHECK
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s --check-prefixes=CHECK-CPP,CHECK

#ifdef __cplusplus
#include <cmath>
#else
#include <math.h>
#endif

void test_math_f64(double x) {
// CHECK-LABEL: define {{.*}}test_math_f64
#pragma omp target
{
// CHECK: call double @__ocml_sin_f64
double l1 = sin(x);
// CHECK: call double @__ocml_cos_f64
double l2 = cos(x);
// CHECK: call double @__ocml_fabs_f64
double l3 = fabs(x);
}
}

void test_math_f32(float x) {
// CHECK-LABEL: define {{.*}}test_math_f32
#pragma omp target
{
// CHECK-C: call double @__ocml_sin_f64
// CHECK-CPP: call float @__ocml_sin_f32
float l1 = sin(x);
// CHECK-C: call double @__ocml_cos_f64
// CHECK-CPP: call float @__ocml_cos_f32
float l2 = cos(x);
// CHECK-C: call double @__ocml_fabs_f64
// CHECK-CPP: call float @__ocml_fabs_f32
float l3 = fabs(x);
}
}
void test_math_f32_suffix(float x) {
// CHECK-LABEL: define {{.*}}test_math_f32_suffix
#pragma omp target
{
// CHECK: call float @__ocml_sin_f32
float l1 = sinf(x);
// CHECK: call float @__ocml_cos_f32
float l2 = cosf(x);
// CHECK: call float @__ocml_fabs_f32
float l3 = fabsf(x);
}
}
8 changes: 4 additions & 4 deletions clang/test/Headers/openmp_device_math_isnan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@
double math(float f, double d) {
double r = 0;
// INT_RETURN: call i32 @__nv_isnanf(float
// AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float
// AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
// BOOL_RETURN: call i32 @__nv_isnanf(float
// AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float
// AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
r += std::isnan(f);
// INT_RETURN: call i32 @__nv_isnand(double
// AMD_INT_RETURN: call i32 @_{{.*}}isnand(double
// AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
// BOOL_RETURN: call i32 @__nv_isnand(double
// AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double
// AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
r += std::isnan(d);
return r;
}
Expand Down
4 changes: 2 additions & 2 deletions openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ macro(add_cuda_bc_library)
set(cu_cmd ${CLANG_TOOL}
-xc++
-c
-mllvm -openmp-opt-disable-internalization
-mllvm -openmp-opt-disable
-std=c++14
-ffreestanding
-target amdgcn-amd-amdhsa
Expand Down Expand Up @@ -157,7 +157,7 @@ foreach(mcpu ${mcpus})
add_custom_command(
OUTPUT ${bc_libname}
COMMAND ${LINK_TOOL} ${bc_files} | ${OPT_TOOL} --always-inline -o ${OUTPUTDIR}/${bc_libname}
DEPENDS ${bc_files})
DEPENDS ${bc_files} opt)

add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname})

Expand Down