diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index f2451b16e78be..f04be5bfd9daf 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2575,6 +2575,32 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit) { if (!Dst.isSimple()) { if (Dst.isVectorElt()) { + if (getLangOpts().HLSL) { + // HLSL allows direct access to vector elements, so storing to + // individual elements of a vector through VectorElt is handled as + // separate store instructions. + Address DstAddr = Dst.getVectorAddress(); + llvm::Type *DestAddrTy = DstAddr.getElementType(); + llvm::Type *ElemTy = DestAddrTy->getScalarType(); + CharUnits ElemAlign = CharUnits::fromQuantity( + CGM.getDataLayout().getPrefTypeAlign(ElemTy)); + + assert(ElemTy->getScalarSizeInBits() >= 8 && + "vector element type must be at least byte-sized"); + + llvm::Value *Val = Src.getScalarVal(); + if (Val->getType()->getPrimitiveSizeInBits() < + ElemTy->getScalarSizeInBits()) + Val = Builder.CreateZExt(Val, ElemTy->getScalarType()); + + llvm::Value *Idx = Dst.getVectorIdx(); + llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0); + Address DstElemAddr = + Builder.CreateGEP(DstAddr, {Zero, Idx}, DestAddrTy, ElemAlign); + Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified()); + return; + } + // Read/modify/write the vector, inserting the new element. llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(), Dst.isVolatileQualified()); diff --git a/clang/test/CodeGenHLSL/BoolVector.hlsl b/clang/test/CodeGenHLSL/BoolVector.hlsl index d5054a5a92b5d..6be90e8f51ce2 100644 --- a/clang/test/CodeGenHLSL/BoolVector.hlsl +++ b/clang/test/CodeGenHLSL/BoolVector.hlsl @@ -69,9 +69,8 @@ bool fn4() { // CHECK-LABEL: define hidden void {{.*}}fn5{{.*}} // CHECK: [[Arr:%.*]] = alloca <2 x i32>, align 8 // CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[Arr]], align 8 -// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Arr]], align 8 -// CHECK-NEXT: [[V:%.*]] = insertelement <2 x i32> [[L]], i32 0, i32 1 -// CHECK-NEXT: store <2 x i32> [[V]], ptr [[Arr]], align 8 +// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[Arr]] +// CHECK-NEXT: store i32 0, ptr [[Ptr]], align 4 // CHECK-NEXT: ret void void fn5() { bool2 Arr = {true,true}; @@ -86,10 +85,9 @@ void fn5() { // CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[V]], align 4 // CHECK-NEXT: [[LV:%.*]] = trunc i32 [[Y]] to i1 // CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[BV]], align 1 // CHECK-NEXT: [[Z:%.*]] = zext i1 [[LV]] to i32 -// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 [[Z]], i32 1 -// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[BV]], align 1 +// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[BV]], i32 0, i32 1 +// CHECK-NEXT: store i32 [[Z]], ptr [[Ptr]], align 4 // CHECK-NEXT: ret void void fn6() { bool V = false; @@ -101,9 +99,8 @@ void fn6() { // CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 8 {{.*}}, i32 16, i1 false) // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[Arr]], i32 0, i32 0 -// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[Idx]], align 8 -// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 0, i32 1 -// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[Idx]], align 8 +// CHECK-NEXT: %[[Ptr:.*]] = getelementptr <2 x i32>, ptr [[Idx]], i32 0, i32 1 +// CHECK-NEXT: store i32 0, ptr %[[Ptr]], align 4 // CHECK-NEXT: ret void void fn7() { bool2 Arr[2] = {{true,true}, {false,false}}; diff --git a/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl new file mode 100644 index 0000000000000..e0c3aa54aaeba --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -finclude-default-header -emit-llvm -disable-llvm-passes \ +// RUN: -triple dxil-pc-shadermodel6.3-library %s -o - | FileCheck %s + +// Test groupshared vector element store for uint. +// CHECK-LABEL: test_uint4 +// CHECK: [[VAL:%.*]] = load i32, ptr %Val.addr, align 4 +// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4 +// CHECK: [[PTR:%.*]] = getelementptr <4 x i32>, ptr addrspace(3) @SMem, i32 0, i32 [[IDX]] +// CHECK: store i32 [[VAL]], ptr addrspace(3) [[PTR]], align 4 +// CHECK-: ret void +groupshared uint4 SMem; +void test_uint4(uint Idx, uint Val) { + SMem[Idx] = Val; +} + +// Test local vector element store for bool. +// CHECK: [[COND1:%.*]] = load i32, ptr addrspace(3) @Cond, align 4 +// CHECK: [[COND2:%.*]] = trunc i32 [[COND1]] to i1 +// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4 +// CHECK: [[COND3:%.*]] = zext i1 [[COND2]] to i32 +// CHECK: [[PTR:%.*]] = getelementptr <3 x i32>, ptr %Val, i32 0, i32 [[IDX]] +// CHECK: store i32 [[COND3]], ptr [[PTR]], align 4 +// CHECK: ret +groupshared bool Cond; +bool3 test_bool(uint Idx) { + bool3 Val = { false, false, false}; + Val[Idx] = Cond; + return Val; +} + +// Test resource vector element store for float. +// CHECK: [[VAL:%.*]] = load float, ptr %Val.addr, align 4 +// CHECK: [[RES_PTR:%.*]] = call {{.*}} ptr @_ZN4hlsl18RWStructuredBufferIDv4_fEixEj(ptr {{.*}} @_ZL3Buf, i32 noundef 0) +// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4 +// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[RES_PTR]], i32 0, i32 [[IDX]] +// CHECK: store float [[VAL]], ptr [[PTR]], align 4 +// CHECK: ret void +RWStructuredBuffer Buf : register(u0); +void test_float(uint Idx, float Val) { + Buf[0][Idx] = Val; +} diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl index b7979960de9f6..364c2e8794ea2 100644 --- a/clang/test/CodeGenHLSL/builtins/lit.hlsl +++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl @@ -11,7 +11,8 @@ // CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn half [[LOG]], %{{.*}} // CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half %mul.i) // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, half 0xH0000, half %{{.*}} -// CHECK: %vecins.i = insertelement <4 x half> %{{.*}}, half %hlsl.select7.i, i32 2 +// CHECK: [[PTR:%.*]] = getelementptr <4 x half>, ptr %Result.i, i32 0, i32 2 +// CHECK: store half %hlsl.select7.i, ptr [[PTR]], align 2 // CHECK: ret <4 x half> %{{.*}} half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, M); } @@ -26,6 +27,7 @@ half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, M // CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float [[LOG]], %{{.*}} // CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32(float %mul.i) // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, float 0.000000e+00, float %{{.*}} -// CHECK: %vecins.i = insertelement <4 x float> %{{.*}}, float %hlsl.select7.i, i32 2 +// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr %Result.i, i32 0, i32 2 +// CHECK: store float %hlsl.select7.i, ptr [[PTR]], align 4 // CHECK: ret <4 x float> %{{.*}} float4 test_lit_float(float NDotL, float NDotH, float M) { return lit(NDotL, NDotH, M); }