Skip to content

Commit

Permalink
[HIP] Allow std::malloc in device function
Browse files Browse the repository at this point in the history
D106463 caused a regression that prevents std::malloc to be
called in the device function, which is allowed with nvcc.

Basically the standard C++ header introducing malloc in
std namespace by using ::malloc. The device ::malloc
function needs to be declared before using ::malloc
to be introduced into std namespace.

Revert D106463 and add a test.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D150965
  • Loading branch information
yxsamliu committed May 23, 2023
1 parent 18c4695 commit f5033c3
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 35 deletions.
60 changes: 34 additions & 26 deletions clang/lib/Headers/__clang_hip_runtime_wrapper.h
Expand Up @@ -47,28 +47,9 @@ extern "C" {
#endif //__cplusplus

#if !defined(__HIPCC_RTC__)
#include <cmath>
#include <cstdlib>
#include <stdlib.h>
#if __has_include("hip/hip_version.h")
#include "hip/hip_version.h"
#endif // __has_include("hip/hip_version.h")
#else
typedef __SIZE_TYPE__ size_t;
// Define macros which are needed to declare HIP device API's without standard
// C/C++ headers. This is for readability so that these API's can be written
// the same way as non-hipRTC use case. These macros need to be popped so that
// they do not pollute users' name space.
#pragma push_macro("NULL")
#pragma push_macro("uint32_t")
#pragma push_macro("uint64_t")
#pragma push_macro("CHAR_BIT")
#pragma push_macro("INT_MAX")
#define NULL (void *)0
#define uint32_t __UINT32_TYPE__
#define uint64_t __UINT64_TYPE__
#define CHAR_BIT __CHAR_BIT__
#define INT_MAX __INTMAX_MAX__
#endif // __HIPCC_RTC__

typedef __SIZE_TYPE__ __hip_size_t;
Expand All @@ -78,11 +59,13 @@ extern "C" {
#endif //__cplusplus

#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
__device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
__device__ void __ockl_dm_dealloc(unsigned long long __addr);
#if __has_feature(address_sanitizer)
extern "C" __device__ unsigned long long __asan_malloc_impl(unsigned long long __size, unsigned long long __pc);
extern "C" __device__ void __asan_free_impl(unsigned long long __addr, unsigned long long __pc);
__device__ unsigned long long __asan_malloc_impl(unsigned long long __size,
unsigned long long __pc);
__device__ void __asan_free_impl(unsigned long long __addr,
unsigned long long __pc);
__attribute__((noinline, weak)) __device__ void *malloc(__hip_size_t __size) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
return (void *)__asan_malloc_impl(__size, __pc);
Expand All @@ -91,7 +74,7 @@ __attribute__((noinline, weak)) __device__ void free(void *__ptr) {
unsigned long long __pc = (unsigned long long)__builtin_return_address(0);
__asan_free_impl((unsigned long long)__ptr, __pc);
}
#else
#else // __has_feature(address_sanitizer)
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
return (void *) __ockl_dm_alloc(__size);
}
Expand All @@ -109,21 +92,46 @@ __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__hip_free(__ptr);
}
#else
#else // __HIP_ENABLE_DEVICE_MALLOC__
__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
__builtin_trap();
return (void *)0;
}
__attribute__((weak)) inline __device__ void free(void *__ptr) {
__builtin_trap();
}
#endif
#endif // __HIP_ENABLE_DEVICE_MALLOC__
#endif // HIP version check

#ifdef __cplusplus
} // extern "C"
#endif //__cplusplus

#if !defined(__HIPCC_RTC__)
#include <cmath>
#include <cstdlib>
#include <stdlib.h>
#if __has_include("hip/hip_version.h")
#include "hip/hip_version.h"
#endif // __has_include("hip/hip_version.h")
#else
typedef __SIZE_TYPE__ size_t;
// Define macros which are needed to declare HIP device API's without standard
// C/C++ headers. This is for readability so that these API's can be written
// the same way as non-hipRTC use case. These macros need to be popped so that
// they do not pollute users' name space.
#pragma push_macro("NULL")
#pragma push_macro("uint32_t")
#pragma push_macro("uint64_t")
#pragma push_macro("CHAR_BIT")
#pragma push_macro("INT_MAX")
#define NULL (void *)0
#define uint32_t __UINT32_TYPE__
#define uint64_t __UINT64_TYPE__
#define CHAR_BIT __CHAR_BIT__
#define INT_MAX __INTMAX_MAX__
#endif // __HIPCC_RTC__

#include <__clang_hip_libdevice_declares.h>
#include <__clang_hip_math.h>
#include <__clang_hip_stdlib.h>
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Headers/Inputs/include/cstdlib
Expand Up @@ -26,5 +26,7 @@ float fabs(float __x) { return __builtin_fabs(__x); }
float abs(float __x) { return fabs(__x); }
double abs(double __x) { return fabs(__x); }

using ::malloc;
using ::free;
}

2 changes: 0 additions & 2 deletions clang/test/Headers/Inputs/include/math.h
Expand Up @@ -105,8 +105,6 @@ long lrint(double __a);
long lrintf(float __a);
long lround(double __a);
long lroundf(float __a);
int max(int __a, int __b);
int min(int __a, int __b);
double modf(double __a, double *__b);
float modff(float __a, float *__b);
double nearbyint(double __a);
Expand Down
Empty file.
Empty file.
44 changes: 37 additions & 7 deletions clang/test/Headers/hip-header.hip
Expand Up @@ -31,7 +31,14 @@
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
// RUN: -D__HIPCC_RTC__ -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC %s
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
Expand All @@ -40,6 +47,13 @@
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
// RUN: -fsanitize=address -disable-llvm-passes -D__HIPCC_RTC__ \
// RUN: | FileCheck -check-prefixes=MALLOC-ASAN %s
// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -aux-triple amdgcn-amd-amdhsa -triple x86_64-unknown-unknown \
// RUN: -emit-llvm %s -o - \
// RUN: -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
// RUN: -disable-llvm-passes | FileCheck -check-prefixes=MALLOC-HOST %s

// expected-no-diagnostics

Expand Down Expand Up @@ -133,26 +147,42 @@ __device__ double test_isnan() {

// Check that device malloc and free do not conflict with std headers.
#include <cstdlib>
// CHECK-LABEL: define{{.*}}@_Z11test_malloc
// CHECK: call {{.*}}ptr @malloc(i64
// CHECK-LABEL: define weak {{.*}}ptr @malloc(i64
// MALLOC-LABEL: define{{.*}}@_Z11test_malloc
// MALLOC: call {{.*}}ptr @malloc(i64
// MALLOC: call {{.*}}ptr @malloc(i64
// MALLOC-LABEL: define weak {{.*}}ptr @malloc(i64
// MALLOC: call i64 @__ockl_dm_alloc
// NOMALLOC: call void @llvm.trap
// MALLOC-ASAN-LABEL: define weak {{.*}}ptr @malloc(i64
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
// MALLOC-ASAN: call i64 @__asan_malloc_impl(i64 {{.*}}, i64 {{.*}})
__device__ void test_malloc(void *a) {
a = malloc(42);
a = std::malloc(42);
}

// CHECK-LABEL: define{{.*}}@_Z9test_free
// CHECK: call {{.*}}void @free(ptr
// CHECK-LABEL: define weak {{.*}}void @free(ptr
// MALLOC-LABEL: define{{.*}}@_Z9test_free
// MALLOC: call {{.*}}void @free(ptr
// MALLOC: call {{.*}}void @free(ptr
// MALLOC-LABEL: define weak {{.*}}void @free(ptr
// MALLOC: call void @__ockl_dm_dealloc
// NOMALLOC: call void @llvm.trap
// MALLOC-ASAN-LABEL: define weak {{.*}}void @free(ptr
// MALLOC-ASAN: call ptr @llvm.returnaddress(i32 0)
// MALLOC-ASAN: call void @__asan_free_impl(i64 {{.*}}, i64 {{.*}})
__device__ void test_free(void *a) {
free(a);
std::free(a);
}

// MALLOC-HOST-LABEL: define{{.*}}@_Z16test_malloc_host
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
// MALLOC-HOST: call {{.*}}ptr @_Z6mallocm(i64
// MALLOC-HOST: call {{.*}}void @_Z4freePv(ptr
void test_malloc_host(void *a) {
a = malloc(42);
free(a);
a = std::malloc(42);
std::free(a);
}

0 comments on commit f5033c3

Please sign in to comment.