Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 38 additions & 8 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2801,26 +2801,56 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
LValue Dst) {
llvm::Value *SrcVal = Src.getScalarVal();
Address DstAddr = Dst.getExtVectorAddress();
const llvm::Constant *Elts = Dst.getExtVectorElts();
if (DstAddr.getElementType()->getScalarSizeInBits() >
SrcVal->getType()->getScalarSizeInBits())
SrcVal = Builder.CreateZExt(
SrcVal, convertTypeForLoadStore(Dst.getType(), SrcVal->getType()));

// HLSL allows storing to scalar values through ExtVector component LValues.
// To support this we need to handle the case where the destination address is
// a scalar.
if (!DstAddr.getElementType()->isVectorTy()) {
assert(!Dst.getType()->isVectorType() &&
"this should only occur for non-vector l-values");
Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
if (getLangOpts().HLSL) {
llvm::Type *DestAddrTy = DstAddr.getElementType();
// HLSL allows storing to scalar values through ExtVector component LValues.
// To support this we need to handle the case where the destination address
// is a scalar.
if (!DestAddrTy->isVectorTy()) {
assert(!Dst.getType()->isVectorType() &&
"this should only occur for non-vector l-values");
Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
return;
}

// HLSL allows direct access to vector elements, so storing to individual
// elements of a vector through ExtVector is handled as separate store
// instructions.
// If we are updating multiple elements, Dst and Src are vectors; for
// a single element update they are scalars.
const VectorType *VTy = Dst.getType()->getAs<VectorType>();
unsigned NumSrcElts = VTy ? VTy->getNumElements() : 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't the above if case handle when Dst.getType isn't a vector? Could we assert that VTy != null instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, there are actually three cases:

  int i;
  int4 vec;

  i.x = 1.0; // case 1
  vec.y = 1; // case 2
  vec.xz = { 3, 4}; // case 3

Case 1 is storing a scalar to scalar via vector swizzle and it is handled by the code above this comment.
Case 2 is storing a scalar to vector via single element swizzle and VTy is going to be null in this case.
Case 3 is storing multiple scalars to a vector via multi-element swizzle and VTy != nullptr.

CharUnits ElemAlign = CharUnits::fromQuantity(
CGM.getDataLayout().getPrefTypeAlign(DestAddrTy->getScalarType()));
llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);

for (unsigned I = 0; I != NumSrcElts; ++I) {
llvm::Value *Val = VTy ? Builder.CreateExtractElement(
SrcVal, llvm::ConstantInt::get(Int32Ty, I))
: SrcVal;
unsigned FieldNo = getAccessedFieldNo(I, Elts);
Address DstElemAddr = Address::invalid();
if (FieldNo == 0)
DstElemAddr = DstAddr.withAlignment(ElemAlign);
else
DstElemAddr = Builder.CreateGEP(
DstAddr, {Zero, llvm::ConstantInt::get(Int32Ty, FieldNo)},
DestAddrTy, ElemAlign);
Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified());
}
return;
}

// This access turns into a read/modify/write of the vector. Load the input
// value now.
llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified());
llvm::Type *VecTy = Vec->getType();
const llvm::Constant *Elts = Dst.getExtVectorElts();

if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) {
unsigned NumSrcElts = VTy->getNumElements();
Expand Down
12 changes: 9 additions & 3 deletions clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,16 @@ void funky(inout int3 X) {
// Call the function with the temporary.
// CHECK: call void {{.*}}funky{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[ArgTmp]])

// Shuffle it back.
// Write it back.
// CHECK: [[RetVal:%.*]] = load <3 x i32>, ptr [[ArgTmp]]
// CHECK: [[Vxyz:%.*]] = shufflevector <3 x i32> [[RetVal]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1>
// CHECK: store <3 x i32> [[Vxyz]], ptr [[V]]
// CHECK: [[Src0:%.*]] = extractelement <3 x i32> [[RetVal]], i32 0
// CHECK: [[PtrY:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 1
// CHECK: store i32 [[Src0]], ptr [[PtrY]], align 4
// CHECK: [[Src1:%.*]] = extractelement <3 x i32> [[RetVal]], i32 1
// CHECK: [[PtrZ:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 2
// CHECK: store i32 [[Src1]], ptr [[PtrZ]], align 4
// CHECK: [[Src2:%.*]] = extractelement <3 x i32> [[RetVal]], i32 2
// CHECK: store i32 [[Src2]], ptr %V, align 4

// OPT: ret <3 x i32> <i32 3, i32 1, i32 2>
export int3 case4() {
Expand Down
25 changes: 15 additions & 10 deletions clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,8 @@ bool AssignBool(bool V) {
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
// CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1
// CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32
// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1
// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8
// CHECK-NEXT: [[C:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1
// CHECK-NEXT: store i32 [[D]], ptr [[C]], align 4
// CHECK-NEXT: ret void
void AssignBool2(bool V) {
bool2 X = true.xx;
Expand All @@ -277,10 +276,13 @@ void AssignBool2(bool V) {
// CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1>
// CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32>
// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8
// CHECK-NEXT: [[V1:%.*]] = extractelement <2 x i32> [[B]], i32 0
// CHECK-NEXT: store i32 [[V1]], ptr [[X]], align 4
// CHECK-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[B]], i32 1
// CHECK-NEXT: [[X2:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1
// CHECK-NEXT: store i32 [[V2]], ptr [[X2]], align 4
// CHECK-NEXT: ret void

void AssignBool3(bool2 V) {
bool2 X = {true,true};
X.xy = V;
Expand Down Expand Up @@ -313,10 +315,13 @@ bool2 AccessBools() {
// CHECK-NEXT: [[L1:%.*]] = shufflevector <1 x i32> [[L0]], <1 x i32> poison, <3 x i32> zeroinitializer
// CHECK-NEXT: [[TruncV:%.*]] = trunc <3 x i32> [[L1]] to <3 x i1>
// CHECK-NEXT: [[L2:%.*]] = zext <3 x i1> [[TruncV]] to <3 x i32>
// CHECK-NEXT: [[L3:%.*]] = load <4 x i32>, ptr [[B]], align 16
// CHECK-NEXT: [[L4:%.*]] = shufflevector <3 x i32> [[L2]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: [[L5:%.*]] = shufflevector <4 x i32> [[L3]], <4 x i32> [[L4]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
// CHECK-NEXT: store <4 x i32> [[L5]], ptr [[B]], align 16
// CHECK-NEXT: [[V1:%.*]] = extractelement <3 x i32> [[L2]], i32 0
// CHECK-NEXT: store i32 [[V1]], ptr %B, align 4
// CHECK-NEXT: [[V2:%.*]] = extractelement <3 x i32> [[L2]], i32 1
// CHECK-NEXT: [[B2:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 1
// CHECK-NEXT: store i32 [[V2]], ptr [[B2]], align 4
// CHECK-NEXT: [[V3:%.*]] = extractelement <3 x i32> [[L2]], i32 2
// CHECK-NEXT: [[B3:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 2
void BoolSizeMismatch() {
bool4 B = {true,true,true,true};
B.xyz = false.xxx;
Expand Down
96 changes: 96 additions & 0 deletions clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// RUN: %clang_cc1 -finclude-default-header -fnative-half-type \
// RUN: -triple dxil-pc-shadermodel6.3-library %s -disable-llvm-passes \
// RUN: -emit-llvm -o - | FileCheck %s

// CHECK-LABEL: Single

// Setup local vars.
// CHECK: [[VecAddr:%.*]] = alloca <3 x i64>, align 32
// CHECK-NEXT: [[AAddr:%.*]] = alloca i64, align 8
// CHECK-NEXT: store <3 x i64> %vec, ptr [[VecAddr]], align 32
// CHECK-NEXT: store i64 %a, ptr [[AAddr]], align 8

// Update single element of the vector.
// CHECK-NEXT: [[A:%.*]] = load i64, ptr [[AAddr]], align 8
// CHECK-NEXT: [[Vy:%.*]] = getelementptr <3 x i64>, ptr [[VecAddr]], i32 0, i32 1
// CHECK-NEXT: store i64 [[A]], ptr [[Vy]], align 8

// Return.
// CHECK-NEXT: [[RetVal:%.*]] = load <3 x i64>, ptr [[VecAddr]], align 32
// CHECK-NEXT: ret <3 x i64> [[RetVal]]
uint64_t3 Single(uint64_t3 vec, uint64_t a){
vec.y = a;
return vec;
}

// CHECK-LABEL: Double

// Setup local vars.
// CHECK: [[VecAddr:%.*]] = alloca <3 x float>, align 16
// CHECK-NEXT: [[AAddr:%.*]] = alloca float, align 4
// CHECK-NEXT: [[BAddr:%.*]] = alloca float, align 4
// CHECK-NEXT: store <3 x float> %vec, ptr [[VecAddr]], align 16
// CHECK-NEXT: store float %a, ptr [[AAddr]], align 4
// CHECK-NEXT: store float %b, ptr [[BAddr]], align 4

// Create temporary vector {a, b}.
// CHECK-NEXT: [[A:%.*]] = load float, ptr [[AAddr]], align 4
// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
// CHECK-NEXT: [[B:%.*]] = load float, ptr [[BAddr]], align 4
// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <2 x float> [[TmpVec0]], float [[B]], i32 1

// Update two elements of the vector from temporary vector.
// CHECK-NEXT: [[TmpX:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 0
// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 2
// CHECK-NEXT: store float [[TmpX]], ptr [[VecZ]], align 4
// CHECK-NEXT: [[TmpY:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 1
// CHECK-NEXT: [[VecY:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 1
// CHECK-NEXT: store float [[TmpY]], ptr [[VecY]], align 4

// Return.
// CHECK-NEXT: [[RetVal:%.*]] = load <3 x float>, ptr [[VecAddr]], align 16
// CHECK-NEXT: ret <3 x float> [[RetVal]]
float3 Double(float3 vec, float a, float b) {
vec.zy = {a, b};
return vec;
}

// CHECK-LABEL: Shuffle

// Setup local vars.
// CHECK: [[VecAddr:%.*]] = alloca <4 x half>, align 8
// CHECK-NEXT: [[AAddr:%.*]] = alloca half, align 2
// CHECK-NEXT: [[BAddr:%.*]] = alloca half, align 2
// CHECK-NEXT: store <4 x half> %vec, ptr [[VecAddr]], align 8
// CHECK-NEXT: store half %a, ptr [[AAddr]], align 2
// CHECK-NEXT: store half %b, ptr [[BAddr]], align 2

// Create temporary vector {a, b, 13.74, a}.
// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2
// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
// CHECK-NEXT: [[B:%.*]] = load half, ptr [[BAddr]], align 2
// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <4 x half> [[TmpVec0]], half [[B]], i32 1
// CHECK-NEXT: [[TmpVec2:%.*]] = insertelement <4 x half> %vecinit1, half 0xH4ADF, i32 2
// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2
// CHECK-NEXT: [[TmpVec3:%.*]] = insertelement <4 x half> [[TmpVec2]], half [[A]], i32 3

// Update four elements of the vector via mixed up swizzle from the temporary vector.
// CHECK-NEXT: [[TmpX:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 0
// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 2
// CHECK-NEXT: store half [[TmpX]], ptr [[VecZ]], align 2
// CHECK-NEXT: [[TmpY:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 1
// CHECK-NEXT: [[VecW:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 3
// CHECK-NEXT: store half [[TmpY]], ptr [[VecW]], align 2
// CHECK-NEXT: [[TmpZ:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 2
// CHECK-NEXT: store half [[TmpZ]], ptr [[VecAddr]], align 2
// CHECK-NEXT: [[TmpW:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 3
// CHECK-NEXT: [[VecY:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 1
// CHECK-NEXT: store half [[TmpW]], ptr [[VecY]], align 2

// Return.
// CHECK-NEXT: [[RetVal:%.*]] = load <4 x half>, ptr [[VecAddr]], align 8
// CHECK-NEXT: ret <4 x half> [[RetVal]]
half4 Shuffle(half4 vec, half a, half b) {
vec.zwxy = {a, b, 13.74, a};
return vec;
}