Skip to content

Commit

Permalink
CUDA: IR generation support for device stubs
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@141304 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
pcc committed Oct 6, 2011
1 parent 57e264e commit a4ae229
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 0 deletions.
92 changes: 92 additions & 0 deletions lib/CodeGen/CGCUDANV.cpp
Expand Up @@ -13,20 +13,112 @@
//===----------------------------------------------------------------------===//

#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/Decl.h"
#include "llvm/BasicBlock.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Support/CallSite.h"

#include <vector>

using namespace clang;
using namespace CodeGen;

namespace {

class CGNVCUDARuntime : public CGCUDARuntime {

private:
llvm::Type *IntTy, *SizeTy;
llvm::PointerType *CharPtrTy, *VoidPtrTy;

llvm::Constant *getSetupArgumentFn() const;
llvm::Constant *getLaunchFn() const;

public:
CGNVCUDARuntime(CodeGenModule &CGM);

void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
};

}

CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();

IntTy = Types.ConvertType(Ctx.IntTy);
SizeTy = Types.ConvertType(Ctx.getSizeType());

CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
}

llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
std::vector<llvm::Type*> Params;
Params.push_back(VoidPtrTy);
Params.push_back(SizeTy);
Params.push_back(SizeTy);
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
Params, false),
"cudaSetupArgument");
}

llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
// cudaError_t cudaLaunch(char *)
std::vector<llvm::Type*> Params;
Params.push_back(CharPtrTy);
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
Params, false),
"cudaLaunch");
}

void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
FunctionArgList &Args) {
// Build the argument value list and the argument stack struct type.
llvm::SmallVector<llvm::Value *, 16> ArgValues;
std::vector<llvm::Type *> ArgTypes;
for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
ArgValues.push_back(V);
assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
}
llvm::StructType *ArgStackTy = llvm::StructType::get(
CGF.getLLVMContext(), ArgTypes);

llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");

// Emit the calls to cudaSetupArgument
llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
for (unsigned I = 0, E = Args.size(); I != E; ++I) {
llvm::Value *Args[3];
llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
Args[1] = CGF.Builder.CreateIntCast(
llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
SizeTy, false);
Args[2] = CGF.Builder.CreateIntCast(
llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
SizeTy, false);
llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args);
llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
CGF.EmitBlock(NextBlock);
}

// Emit the call to cudaLaunch
llvm::Constant *cudaLaunchFn = getLaunchFn();
llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
CGF.EmitCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);

CGF.EmitBlock(EndBlock);
}

CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
Expand Down
4 changes: 4 additions & 0 deletions lib/CodeGen/CGCUDARuntime.h
Expand Up @@ -24,6 +24,7 @@ namespace CodeGen {

class CodeGenFunction;
class CodeGenModule;
class FunctionArgList;
class ReturnValueSlot;
class RValue;

Expand All @@ -39,6 +40,9 @@ class CGCUDARuntime {
const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue);

virtual void EmitDeviceStubBody(CodeGenFunction &CGF,
FunctionArgList &Args) = 0;

};

/// Creates an instance of a CUDA runtime class.
Expand Down
5 changes: 5 additions & 0 deletions lib/CodeGen/CodeGenFunction.cpp
Expand Up @@ -13,6 +13,7 @@

#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "CGCUDARuntime.h"
#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CGException.h"
Expand Down Expand Up @@ -404,6 +405,10 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
EmitDestructorBody(Args);
else if (isa<CXXConstructorDecl>(FD))
EmitConstructorBody(Args);
else if (getContext().getLangOptions().CUDA &&
!CGM.getCodeGenOpts().CUDAIsDevice &&
FD->hasAttr<CUDAGlobalAttr>())
CGM.getCUDARuntime().EmitDeviceStubBody(*this, Args);
else
EmitFunctionBody(Args);

Expand Down
13 changes: 13 additions & 0 deletions test/CodeGenCUDA/device-stub.cu
@@ -0,0 +1,13 @@
// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s

#include "../SemaCUDA/cuda.h"

// Test that we build the correct number of calls to cudaSetupArgument followed
// by a call to cudaLaunch.

// CHECK: define{{.*}}kernelfunc
// CHECK: call{{.*}}cudaSetupArgument
// CHECK: call{{.*}}cudaSetupArgument
// CHECK: call{{.*}}cudaSetupArgument
// CHECK: call{{.*}}cudaLaunch
__global__ void kernelfunc(int i, int j, int k) {}

0 comments on commit a4ae229

Please sign in to comment.