Skip to content

Commit

Permalink
Preserve vec3 type.
Browse files Browse the repository at this point in the history
Summary: Preserve vec3 type with CodeGen option.

Reviewers: Anastasia, bruno

Reviewed By: Anastasia

Subscribers: bruno, ahatanak, cfe-commits

Differential Revision: https://reviews.llvm.org/D30810

llvm-svn: 299445
  • Loading branch information
jaykang10 committed Apr 4, 2017
1 parent 3e90f84 commit e7cdcde
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 43 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/CC1Options.td
Expand Up @@ -658,6 +658,8 @@ def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">,
HelpText<"Set default MS calling convention">;
def finclude_default_header : Flag<["-"], "finclude-default-header">,
HelpText<"Include the default header file for OpenCL">;
def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
HelpText<"Preserve 3-component vector type">;

// FIXME: Remove these entirely once functionality/tests have been excised.
def fobjc_gc_only : Flag<["-"], "fobjc-gc-only">, Group<f_Group>,
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Frontend/CodeGenOptions.def
Expand Up @@ -263,6 +263,9 @@ CODEGENOPT(StrictReturn, 1, 1)
/// Whether emit extra debug info for sample pgo profile collection.
CODEGENOPT(DebugInfoForProfiling, 1, 0)

/// Whether 3-component vector type is preserved.
CODEGENOPT(PreserveVec3Type, 1, 0)

#undef CODEGENOPT
#undef ENUM_CODEGENOPT
#undef VALUE_CODEGENOPT
Expand Down
79 changes: 41 additions & 38 deletions clang/lib/CodeGen/CGExpr.cpp
Expand Up @@ -1369,26 +1369,28 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
QualType TBAABaseType,
uint64_t TBAAOffset,
bool isNontemporal) {
// For better performance, handle vector loads differently.
if (Ty->isVectorType()) {
const llvm::Type *EltTy = Addr.getElementType();

const auto *VTy = cast<llvm::VectorType>(EltTy);

// Handle vectors of size 3 like size 4 for better performance.
if (VTy->getNumElements() == 3) {

// Bitcast to vec4 type.
llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
4);
Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
// Now load value.
llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");

// Shuffle vector to get vec3.
V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
{0, 1, 2}, "extractVec");
return EmitFromMemory(V, Ty);
if (!CGM.getCodeGenOpts().PreserveVec3Type) {
// For better performance, handle vector loads differently.
if (Ty->isVectorType()) {
const llvm::Type *EltTy = Addr.getElementType();

const auto *VTy = cast<llvm::VectorType>(EltTy);

// Handle vectors of size 3 like size 4 for better performance.
if (VTy->getNumElements() == 3) {

// Bitcast to vec4 type.
llvm::VectorType *vec4Ty =
llvm::VectorType::get(VTy->getElementType(), 4);
Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
// Now load value.
llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");

// Shuffle vector to get vec3.
V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
{0, 1, 2}, "extractVec");
return EmitFromMemory(V, Ty);
}
}
}

Expand Down Expand Up @@ -1456,24 +1458,25 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
uint64_t TBAAOffset,
bool isNontemporal) {

// Handle vectors differently to get better performance.
if (Ty->isVectorType()) {
llvm::Type *SrcTy = Value->getType();
auto *VecTy = cast<llvm::VectorType>(SrcTy);
// Handle vec3 special.
if (VecTy->getNumElements() == 3) {
// Our source is a vec3, do a shuffle vector to make it a vec4.
llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
Builder.getInt32(2),
llvm::UndefValue::get(Builder.getInt32Ty())};
llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
Value = Builder.CreateShuffleVector(Value,
llvm::UndefValue::get(VecTy),
MaskV, "extractVec");
SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
}
if (Addr.getElementType() != SrcTy) {
Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
if (!CGM.getCodeGenOpts().PreserveVec3Type) {
// Handle vectors differently to get better performance.
if (Ty->isVectorType()) {
llvm::Type *SrcTy = Value->getType();
auto *VecTy = cast<llvm::VectorType>(SrcTy);
// Handle vec3 special.
if (VecTy->getNumElements() == 3) {
// Our source is a vec3, do a shuffle vector to make it a vec4.
llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
Builder.getInt32(2),
llvm::UndefValue::get(Builder.getInt32Ty())};
llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
MaskV, "extractVec");
SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
}
if (Addr.getElementType() != SrcTy) {
Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
}
}
}

Expand Down
17 changes: 12 additions & 5 deletions clang/lib/CodeGen/CGExprScalar.cpp
Expand Up @@ -3593,8 +3593,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// vector to get a vec4, then a bitcast if the target type is different.
if (NumElementsSrc == 3 && NumElementsDst != 3) {
Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
DstTy);

if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
DstTy);
}

Src->setName("astype");
return Src;
}
Expand All @@ -3603,9 +3607,12 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// to vec4 if the original type is not vec4, then a shuffle vector to
// get a vec3.
if (NumElementsSrc != 3 && NumElementsDst == 3) {
auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
Vec4Ty);
if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
Vec4Ty);
}

Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
Src->setName("astype");
return Src;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Frontend/CompilerInvocation.cpp
Expand Up @@ -729,6 +729,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
}
}

Opts.PreserveVec3Type = Args.hasArg(OPT_fpreserve_vec3_type);
Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
Opts.XRayInstrumentFunctions = Args.hasArg(OPT_fxray_instrument);
Opts.XRayInstructionThreshold =
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type | FileCheck %s

typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));

void kernel foo(global float3 *a, global float3 *b) {
// CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a
// CHECK: store <3 x float> %[[LOAD_A]], <3 x float> addrspace(1)* %b
*b = *a;
}

void kernel float4_to_float3(global float3 *a, global float4 *b) {
// CHECK: %[[LOAD_A:.*]] = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
// CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK: store <3 x float> %[[ASTYPE:.*]], <3 x float> addrspace(1)* %a, align 16
*a = __builtin_astype(*b, float3);
}

void kernel float3_to_float4(global float3 *a, global float4 *b) {
// CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a, align 16
// CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
// CHECK: store <4 x float> %[[ASTYPE:.*]], <4 x float> addrspace(1)* %b, align 16
*b = __builtin_astype(*a, float4);
}

0 comments on commit e7cdcde

Please sign in to comment.