Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1408,7 +1408,9 @@ RValue CIRGenFunction::emitCallExpr(const clang::CallExpr *E,
if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
return emitCXXMemberCallExpr(CE, ReturnValue);

assert(!dyn_cast<CUDAKernelCallExpr>(E) && "CUDA NYI");
if (const auto *CE = dyn_cast<CUDAKernelCallExpr>(E))
return emitCUDAKernelCallExpr(CE, ReturnValue);

if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E))
if (const CXXMethodDecl *MD =
dyn_cast_or_null<CXXMethodDecl>(CE->getCalleeDecl()))
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@ CIRGenFunction::emitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
/*IsArrow=*/false, E->getArg(0));
}

RValue CIRGenFunction::emitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue) {
return CGM.getCUDARuntime().emitCUDAKernelCallExpr(*this, E, ReturnValue);
}

static void emitNullBaseClassInitialization(CIRGenFunction &CGF,
Address DestPtr,
const CXXRecordDecl *Base) {
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -2068,6 +2068,9 @@ class CIRGenFunction : public CIRGenTypeCache {
const CXXMethodDecl *MD,
ReturnValueSlot ReturnValue);

RValue emitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue);

RValue emitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *expr);

void emitCXXTemporary(const CXXTemporary *Temporary, QualType TempType,
Expand Down
44 changes: 38 additions & 6 deletions clang/test/CIR/CodeGen/CUDA/destructor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,26 @@
// RUN: %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s

// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s

// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s

// Make sure we do emit device-side kernel even if it's only referenced
// by the destructor of a variable not present on device.
template<typename T> __global__ void f(T) {}
Expand All @@ -19,11 +39,23 @@ template<typename T> struct A {

// CIR-HOST: module
// CIR-DEVICE: module
// CIR-DEVICE-DISABLED: cir.func dso_local @_Z1fIiEvT_
// CIR-DEVICE: cir.func dso_local @_Z1fIiEvT_
// LLVM-DEVICE: define dso_local ptx_kernel void @_Z1fIiEvT_
// OGCG-DEVICE: define ptx_kernel void @_Z1fIiEvT_

// CIR-HOST: cir.func {{.*}} @_ZN1AIiED2Ev{{.*}} {
// CIR-HOST: cir.call @__cudaPushCallConfiguration
// CIR-HOST: cir.call @_Z16__device_stub__fIiEvT_
// CIR-HOST: }

// LLVM-HOST: define linkonce_odr void @_ZN1AIiED2Ev
// LLVM-HOST: call i32 @__cudaPushCallConfiguration(
// LLVM-HOST: call void @_Z16__device_stub__fIiEvT_

// OGCG-HOST: define linkonce_odr void @_ZN1AIiED2Ev
// OGCG-HOST: call i32 @__cudaPushCallConfiguration(
// OGCG-HOST: call void @_Z16__device_stub__fIiEvT_


// CIR-HOST-DISABLED: cir.func {{.*}} @_ZN1AIiED2Ev{{.*}} {
// CIR-HOST-DISABLED: cir.call @__cudaPushCallConfiguration
// CIR-HOST-DISABLED: cir.call @_Z16__device_stub__fIiEvT_
// CIR-HOST-DISABLED: }

// A<int> a;
A<int> a;
68 changes: 57 additions & 11 deletions clang/test/CIR/CodeGen/CUDA/simple.cu
Original file line number Diff line number Diff line change
@@ -1,16 +1,35 @@
#include "../Inputs/cuda.h"

// TODO: host build is currently crashing.
// RUN-DISABLE: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
// RUN-DISABLE: -x cuda -emit-cir -target-sdk-version=12.3 \
// RUN-DISABLE: %s -o %t.cir
// RUN-DISABLE: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
// RUN: %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
// RUN: %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s

// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s

// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
// RUN: -x cuda -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
// RUN: -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s

// Attribute for global_fn
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}

Expand All @@ -25,6 +44,7 @@ __device__ void device_fn(int* a, double b, float c) {}
__global__ void global_fn(int a) {}
// CIR-DEVICE: @_Z9global_fni({{.*}} cc(ptx_kernel)
// LLVM-DEVICE: define dso_local ptx_kernel void @_Z9global_fni
// OGCG-DEVICE: define dso_local ptx_kernel void @_Z9global_fni

// Check for device stub emission.

Expand All @@ -38,10 +58,17 @@ __global__ void global_fn(int a) {}
// LLVM-HOST: void @_Z24__device_stub__global_fni
// LLVM-HOST: %[[#KernelArgs:]] = alloca [1 x ptr], i64 1, align 16
// LLVM-HOST: %[[#GEP1:]] = getelementptr ptr, ptr %[[#KernelArgs]], i32 0
// LLVM-HOST: %[[#GEP2:]] = getelementptr ptr, ptr %[[#GEP1]], i64 0
// LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0
// LLVM-HOST: call i32 @__cudaPopCallConfiguration
// LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni

// OGCG-HOST: void @_Z24__device_stub__global_fni
// OGCG-HOST: %kernel_args = alloca ptr, i64 1, align 16
// OGCG-HOST: getelementptr ptr, ptr %kernel_args, i32 0
// OGCG-HOST: call i32 @__cudaPopCallConfiguration
// OGCG-HOST: call noundef i32 @cudaLaunchKernel(ptr noundef @_Z24__device_stub__global_fni


int main() {
global_fn<<<1, 1>>>(1);
}
Expand All @@ -63,10 +90,29 @@ int main() {
// LLVM-HOST: alloca %struct.dim3
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
// LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration
// LLVM-HOST: br [[LLVMConfigOK]], label %[[#Good:]], label [[#Bad:]]
// LLVM-HOST: %[[#ConfigOK:]] = call i32 @__cudaPushCallConfiguration
// LLVM-HOST: %[[#ConfigCond:]] = icmp ne i32 %[[#ConfigOK]], 0
// LLVM-HOST: br i1 %[[#ConfigCond]], label %[[#Good:]], label %[[#Bad:]]
// LLVM-HOST: [[#Good]]:
// LLVM-HOST: br label [[#End:]]
// LLVM-HOST: br label %[[#End:]]
// LLVM-HOST: [[#Bad]]:
// LLVM-HOST: call void @_Z24__device_stub__global_fni
// LLVM-HOST: br label [[#End]]
// LLVM-HOST: call void @_Z24__device_stub__global_fni(i32 1)
// LLVM-HOST: br label %[[#End:]]
// LLVM-HOST: [[#End]]:
// LLVM-HOST: %[[#]] = load i32
// LLVM-HOST: ret i32

// OGCG-HOST: define dso_local noundef i32 @main
// OGCG-HOST: alloca %struct.dim3, align 4
// OGCG-HOST: alloca %struct.dim3, align 4
// OGCG-HOST: call void @_ZN4dim3C1Ejjj
// OGCG-HOST: call void @_ZN4dim3C1Ejjj
// OGCG-HOST: %call = call i32 @__cudaPushCallConfiguration
// OGCG-HOST: %tobool = icmp ne i32 %call, 0
// OGCG-HOST: br i1 %tobool, label %kcall.end, label %kcall.configok
// OGCG-HOST: kcall.configok:
// OGCG-HOST: call void @_Z24__device_stub__global_fni(i32 noundef 1)
// OGCG-HOST: br label %kcall.end
// OGCG-HOST: kcall.end:
// OGCG-HOST: %{{[0-9]+}} = load i32, ptr %retval, align 4
// OGCG-HOST: ret i32
Loading