Skip to content

Commit

Permalink
[OpenCL] Clean up and add missing fields for block struct
Browse files Browse the repository at this point in the history
Currently block is translated to a structure equivalent to

struct Block {
  void *isa;
  int flags;
  int reserved;
  void *invoke;
  void *descriptor;
};
Except invoke, which is the pointer to the block invoke function,
all other fields are useless for OpenCL, which clutter the IR and
also waste memory since the block struct is passed to the block
invoke function as argument.

On the other hand, the size and alignment of the block struct is
not stored in the struct, which causes difficulty to implement
__enqueue_kernel as library function, since the library function
needs to know the size and alignment of the argument which needs
to be passed to the kernel.

This patch removes the useless fields from the block struct and adds
size and align fields. The equivalent block struct will become

struct Block {
  int size;
  int align;
  generic void *invoke;
 /* custom fields */
};
It also changes the pointer to the invoke function to be
a generic pointer since the address space of a function
may not be private on certain targets.

Differential Revision: https://reviews.llvm.org/D37822

llvm-svn: 314932
  • Loading branch information
yxsamliu committed Oct 4, 2017
1 parent 8c0ff95 commit 10712d9
Show file tree
Hide file tree
Showing 7 changed files with 276 additions and 128 deletions.
255 changes: 176 additions & 79 deletions clang/lib/CodeGen/CGBlocks.cpp
Expand Up @@ -14,11 +14,13 @@
#include "CGBlocks.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "TargetInfo.h"
#include "clang/AST/DeclObjC.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
Expand Down Expand Up @@ -302,21 +304,55 @@ static CharUnits getLowBit(CharUnits v) {

static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
SmallVectorImpl<llvm::Type*> &elementTypes) {
// The header is basically 'struct { void *; int; int; void *; void *; }'.
// Assert that that struct is packed.
assert(CGM.getIntSize() <= CGM.getPointerSize());
assert(CGM.getIntAlign() <= CGM.getPointerAlign());
assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));

info.BlockAlign = CGM.getPointerAlign();
info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize();

assert(elementTypes.empty());
elementTypes.push_back(CGM.VoidPtrTy);
elementTypes.push_back(CGM.IntTy);
elementTypes.push_back(CGM.IntTy);
elementTypes.push_back(CGM.VoidPtrTy);
elementTypes.push_back(CGM.getBlockDescriptorType());
if (CGM.getLangOpts().OpenCL) {
// The header is basically 'struct { int; int; generic void *;
// custom_fields; }'. Assert that struct is packed.
auto GenPtrAlign = CharUnits::fromQuantity(
CGM.getTarget().getPointerAlign(LangAS::opencl_generic) / 8);
auto GenPtrSize = CharUnits::fromQuantity(
CGM.getTarget().getPointerWidth(LangAS::opencl_generic) / 8);
assert(CGM.getIntSize() <= GenPtrSize);
assert(CGM.getIntAlign() <= GenPtrAlign);
assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
elementTypes.push_back(CGM.IntTy); /* total size */
elementTypes.push_back(CGM.IntTy); /* align */
elementTypes.push_back(
CGM.getOpenCLRuntime()
.getGenericVoidPointerType()); /* invoke function */
unsigned Offset =
2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
unsigned BlockAlign = GenPtrAlign.getQuantity();
if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
// TargetOpenCLBlockHelp needs to make sure the struct is packed.
// If necessary, add padding fields to the custom fields.
unsigned Align = CGM.getDataLayout().getABITypeAlignment(I);
if (BlockAlign < Align)
BlockAlign = Align;
assert(Offset % Align == 0);
Offset += CGM.getDataLayout().getTypeAllocSize(I);
elementTypes.push_back(I);
}
}
info.BlockAlign = CharUnits::fromQuantity(BlockAlign);
info.BlockSize = CharUnits::fromQuantity(Offset);
} else {
// The header is basically 'struct { void *; int; int; void *; void *; }'.
// Assert that that struct is packed.
assert(CGM.getIntSize() <= CGM.getPointerSize());
assert(CGM.getIntAlign() <= CGM.getPointerAlign());
assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));
info.BlockAlign = CGM.getPointerAlign();
info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize();
elementTypes.push_back(CGM.VoidPtrTy);
elementTypes.push_back(CGM.IntTy);
elementTypes.push_back(CGM.IntTy);
elementTypes.push_back(CGM.VoidPtrTy);
elementTypes.push_back(CGM.getBlockDescriptorType());
}
}

static QualType getCaptureFieldType(const CodeGenFunction &CGF,
Expand All @@ -341,8 +377,12 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,

SmallVector<llvm::Type*, 8> elementTypes;
initializeForBlockHeader(CGM, info, elementTypes);

if (!block->hasCaptures()) {
bool hasNonConstantCustomFields = false;
if (auto *OpenCLHelper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper())
hasNonConstantCustomFields =
!OpenCLHelper->areAllCustomFieldValuesConstant(info);
if (!block->hasCaptures() && !hasNonConstantCustomFields) {
info.StructureType =
llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true);
info.CanBeGlobal = true;
Expand Down Expand Up @@ -720,41 +760,48 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
}

llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
auto GenVoidPtrTy =
IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
unsigned GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
auto GenVoidPtrSize = CharUnits::fromQuantity(
CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
llvm::Constant *blockFn
= CodeGenFunction(CGM, true).GenerateBlockFunction(CurGD, blockInfo,
LocalDeclMap,
isLambdaConv,
blockInfo.CanBeGlobal);
blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy);
llvm::Constant *blockFn = CodeGenFunction(CGM, true).GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
blockFn = llvm::ConstantExpr::getPointerCast(blockFn, GenVoidPtrTy);

// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
return CGM.getAddrOfGlobalBlockIfEmitted(blockInfo.BlockExpression);

// Otherwise, we have to emit this as a local block.

llvm::Constant *isa =
(!CGM.getContext().getLangOpts().OpenCL)
? CGM.getNSConcreteStackBlock()
: CGM.getNullPointer(VoidPtrPtrTy,
CGM.getContext().getPointerType(
QualType(CGM.getContext().VoidPtrTy)));
isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy);

// Build the block descriptor.
llvm::Constant *descriptor = buildBlockDescriptor(CGM, blockInfo);

Address blockAddr = blockInfo.LocalAddress;
assert(blockAddr.isValid() && "block has no address!");

// Compute the initial on-stack block flags.
BlockFlags flags = BLOCK_HAS_SIGNATURE;
if (blockInfo.HasCapturedVariableLayout) flags |= BLOCK_HAS_EXTENDED_LAYOUT;
if (blockInfo.NeedsCopyDispose) flags |= BLOCK_HAS_COPY_DISPOSE;
if (blockInfo.HasCXXObject) flags |= BLOCK_HAS_CXX_OBJ;
if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET;
llvm::Constant *isa;
llvm::Constant *descriptor;
BlockFlags flags;
if (!IsOpenCL) {
isa = llvm::ConstantExpr::getBitCast(CGM.getNSConcreteStackBlock(),
VoidPtrTy);

// Build the block descriptor.
descriptor = buildBlockDescriptor(CGM, blockInfo);

// Compute the initial on-stack block flags.
flags = BLOCK_HAS_SIGNATURE;
if (blockInfo.HasCapturedVariableLayout)
flags |= BLOCK_HAS_EXTENDED_LAYOUT;
if (blockInfo.NeedsCopyDispose)
flags |= BLOCK_HAS_COPY_DISPOSE;
if (blockInfo.HasCXXObject)
flags |= BLOCK_HAS_CXX_OBJ;
if (blockInfo.UsesStret)
flags |= BLOCK_USE_STRET;
}

auto projectField =
[&](unsigned index, CharUnits offset, const Twine &name) -> Address {
Expand All @@ -778,13 +825,33 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
index++;
};

addHeaderField(isa, getPointerSize(), "block.isa");
addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
getIntSize(), "block.flags");
addHeaderField(llvm::ConstantInt::get(IntTy, 0),
getIntSize(), "block.reserved");
addHeaderField(blockFn, getPointerSize(), "block.invoke");
addHeaderField(descriptor, getPointerSize(), "block.descriptor");
if (!IsOpenCL) {
addHeaderField(isa, getPointerSize(), "block.isa");
addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
getIntSize(), "block.flags");
addHeaderField(llvm::ConstantInt::get(IntTy, 0), getIntSize(),
"block.reserved");
} else {
addHeaderField(
llvm::ConstantInt::get(IntTy, blockInfo.BlockSize.getQuantity()),
getIntSize(), "block.size");
addHeaderField(
llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
getIntSize(), "block.align");
}
addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
if (!IsOpenCL)
addHeaderField(descriptor, getPointerSize(), "block.descriptor");
else if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
addHeaderField(
I.first,
CharUnits::fromQuantity(
CGM.getDataLayout().getTypeAllocSize(I.first->getType())),
I.second);
}
}
}

// Finally, capture all the values into the block.
Expand Down Expand Up @@ -980,21 +1047,38 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() {

llvm::Type *BlockDescPtrTy = getBlockDescriptorType();

// struct __block_literal_generic {
// void *__isa;
// int __flags;
// int __reserved;
// void (*__invoke)(void *);
// struct __block_descriptor *__descriptor;
// };
GenericBlockLiteralType =
llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
if (getLangOpts().OpenCL) {
// struct __opencl_block_literal_generic {
// int __size;
// int __align;
// __generic void *__invoke;
// /* custom fields */
// };
SmallVector<llvm::Type *, 8> StructFields(
{IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldTypes())
StructFields.push_back(I);
}
GenericBlockLiteralType = llvm::StructType::create(
StructFields, "struct.__opencl_block_literal_generic");
} else {
// struct __block_literal_generic {
// void *__isa;
// int __flags;
// int __reserved;
// void (*__invoke)(void *);
// struct __block_descriptor *__descriptor;
// };
GenericBlockLiteralType =
llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
}

return GenericBlockLiteralType;
}

RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
const BlockPointerType *BPT =
E->getCallee()->getType()->getAs<BlockPointerType>();
Expand All @@ -1019,17 +1103,16 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,

// Get the function pointer from the literal.
llvm::Value *FuncPtr =
Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);

Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
CGM.getLangOpts().OpenCL ? 2 : 3);

// Add the block literal.
CallArgList Args;

QualType VoidPtrQualTy = getContext().VoidPtrTy;
llvm::Type *GenericVoidPtrTy = VoidPtrTy;
if (getLangOpts().OpenCL) {
GenericVoidPtrTy = Builder.getInt8PtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
VoidPtrQualTy =
getContext().getPointerType(getContext().getAddrSpaceQualType(
getContext().VoidTy, LangAS::opencl_generic));
Expand All @@ -1054,7 +1137,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo);

llvm::Type *BlockFTyPtr = llvm::PointerType::getUnqual(BlockFTy);
Func = Builder.CreateBitCast(Func, BlockFTyPtr);
Func = Builder.CreatePointerCast(Func, BlockFTyPtr);

// Prepare the callee.
CGCallee Callee(CGCalleeInfo(), Func);
Expand Down Expand Up @@ -1139,27 +1222,37 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
ConstantInitBuilder builder(CGM);
auto fields = builder.beginStruct();

// isa
fields.add((!CGM.getContext().getLangOpts().OpenCL)
? CGM.getNSConcreteGlobalBlock()
: CGM.getNullPointer(CGM.VoidPtrPtrTy,
CGM.getContext().getPointerType(QualType(
CGM.getContext().VoidPtrTy))));
bool IsOpenCL = CGM.getLangOpts().OpenCL;
if (!IsOpenCL) {
// isa
fields.add(CGM.getNSConcreteGlobalBlock());

// __flags
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
if (blockInfo.UsesStret)
flags |= BLOCK_USE_STRET;

// __flags
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET;

fields.addInt(CGM.IntTy, flags.getBitMask());
fields.addInt(CGM.IntTy, flags.getBitMask());

// Reserved
fields.addInt(CGM.IntTy, 0);
// Reserved
fields.addInt(CGM.IntTy, 0);
} else {
fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
}

// Function
fields.add(blockFn);

// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
if (!IsOpenCL) {
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
} else if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldValues(CGM, blockInfo)) {
fields.add(I);
}
}

unsigned AddrSpace = 0;
if (CGM.getContext().getLangOpts().OpenCL)
Expand Down Expand Up @@ -1284,9 +1377,13 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
fnLLVMType, llvm::GlobalValue::InternalLinkage, name, &CGM.getModule());
CGM.SetInternalFunctionAttributes(blockDecl, fn, fnInfo);

if (BuildGlobalBlock)
if (BuildGlobalBlock) {
auto GenVoidPtrTy = getContext().getLangOpts().OpenCL
? CGM.getOpenCLRuntime().getGenericVoidPointerType()
: VoidPtrTy;
buildGlobalBlock(CGM, blockInfo,
llvm::ConstantExpr::getBitCast(fn, VoidPtrTy));
llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy));
}

// Begin generating the function.
StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args,
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/CGOpenCLRuntime.cpp
Expand Up @@ -103,3 +103,10 @@ llvm::Value *CGOpenCLRuntime::getPipeElemAlign(const Expr *PipeArg) {
.getQuantity();
return llvm::ConstantInt::get(Int32Ty, TypeSize, false);
}

llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
assert(CGM.getLangOpts().OpenCL);
return llvm::IntegerType::getInt8PtrTy(
CGM.getLLVMContext(),
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CGOpenCLRuntime.h
Expand Up @@ -59,6 +59,9 @@ class CGOpenCLRuntime {
// \brief Returnes a value which indicates the alignment in bytes of the pipe
// element.
virtual llvm::Value *getPipeElemAlign(const Expr *PipeArg);

/// \return __generic void* type.
llvm::PointerType *getGenericVoidPointerType();
};

}
Expand Down

0 comments on commit 10712d9

Please sign in to comment.