Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/dxc/DXIL/DxilConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -2487,6 +2487,8 @@ extern const char *kHostLayoutTypePrefix;

extern const char *kWaveOpsIncludeHelperLanesString;

extern const char *kPreciseString;

} // namespace DXIL

} // namespace hlsl
3 changes: 3 additions & 0 deletions lib/DXIL/DxilModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ const char *kDxLinAlgMatrixTypePrefix = "dx.types.LinAlgMatrix";
const char *kHostLayoutTypePrefix = "hostlayout.";

const char *kWaveOpsIncludeHelperLanesString = "waveops-include-helper-lanes";

const char *kPreciseString = "dx.precise";

} // namespace DXIL

void SetDxilHook(Module &M);
Expand Down
20 changes: 10 additions & 10 deletions lib/HLSL/HLModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1022,19 +1022,18 @@ void HLModule::ClearPreciseAttributeWithMetadata(Instruction *I) {
}

static void MarkPreciseAttribute(Function *F) {
LLVMContext &Ctx = F->getContext();
MDNode *preciseNode = MDNode::get(
Ctx, {MDString::get(Ctx, DxilMDHelper::kDxilPreciseAttributeMDName)});

F->setMetadata(DxilMDHelper::kDxilPreciseAttributeMDName, preciseNode);
F->addFnAttr(DXIL::kPreciseString);
}

template <typename BuilderTy>
void HLModule::MarkPreciseAttributeOnValWithFunctionCall(llvm::Value *V,
BuilderTy &Builder,
llvm::Module &M) {
Type *Ty = V->getType();
Type *EltTy = Ty->getScalarType();
Type *EltTy = Ty;
bool SupportsVectors = M.GetHLModule().GetShaderModel()->IsSM69Plus();
if (!SupportsVectors)
EltTy = Ty->getScalarType();

// TODO: Only do this on basic types.

Expand All @@ -1050,7 +1049,8 @@ void HLModule::MarkPreciseAttributeOnValWithFunctionCall(llvm::Value *V,
cast<Function>(M.getOrInsertFunction(preciseFuncName, preciseFuncTy));
if (!HLModule::HasPreciseAttribute(preciseFunc))
MarkPreciseAttribute(preciseFunc);
if (FixedVectorType *VT = dyn_cast<FixedVectorType>(Ty)) {
if (!SupportsVectors && isa<FixedVectorType>(Ty)) {
FixedVectorType *VT = dyn_cast<FixedVectorType>(Ty);
for (unsigned i = 0; i < VT->getNumElements(); i++) {
Value *Elt = Builder.CreateExtractElement(V, i);
Builder.CreateCall(preciseFunc, {Elt});
Expand Down Expand Up @@ -1103,9 +1103,9 @@ void HLModule::MarkPreciseAttributeOnPtrWithFunctionCall(llvm::Value *Ptr,
}

bool HLModule::HasPreciseAttribute(Function *F) {
MDNode *preciseNode =
F->getMetadata(DxilMDHelper::kDxilPreciseAttributeMDName);
return preciseNode != nullptr;
AttributeSet Attributeset = F->getAttributes();
return Attributeset.hasAttribute(AttributeSet::FunctionIndex,
DXIL::kPreciseString);
}

static void AddDIGlobalVariable(DIBuilder &Builder, DIGlobalVariable *LocDIGV,
Expand Down
5 changes: 5 additions & 0 deletions lib/Transforms/Scalar/DxilConditionalMem2Reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,11 @@ class DxilConditionalMem2Reg : public FunctionPass {
static bool ScalarizePreciseVectorAlloca(Function &F) {
BasicBlock *Entry = &*F.begin();

// No need to scalarize the vector if 6.9 native vector support is available
Module *M = F.getParent();
if (M->HasHLModule() && M->GetHLModule().GetShaderModel()->IsSM69Plus())
return false;

SmallVector<AllocaInst *, 4> PreciseAllocaInsts;
for (auto it = Entry->begin(); it != Entry->end();) {
Instruction *I = &*(it++);
Expand Down
23 changes: 23 additions & 0 deletions tools/clang/test/CodeGenDXIL/hlsl/types/longvec-precise.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: %dxc -T vs_6_9 %s | FileCheck %s
// Tests a specific case of a precise native vector requiring extraction
// and reinsertion during the alloca phase where conditionalmem2reg is concerned.
// Serves as the source for longvec-precise.ll and its specific pass tests

precise float4 main (float4 pos : POSITION, float4 scale : SCL, float4 shift : OFF) : SV_Position {
precise float4 position = pos;
// Initial multiplication to avoid optimizaton just using the input scalar
// CHECK-NOT: fmul fast
// CHECK: [[pos:%.*]] = fmul <4 x float>
position = position * scale;

// CHECK: [[z:%.*]] = extractelement <4 x float> [[pos]], i32 2
// CHECK-NOT: fadd fast
// CHECK: [[sz:%.*]] = fadd float [[z]], 0x
// CHECK: [[spos:%.*]] = insertelement <4 x float> [[pos]], float [[sz]], i32 2
position.z += 0.01f;
// CHECK-NOT: fadd fast
// CHECK: fadd <4 x float> %{{.*}}, [[spos]]
position += shift;

return position;
}
112 changes: 112 additions & 0 deletions tools/clang/test/CodeGenDXIL/passes/longvec-precise-mem2reg.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
; RUN: %dxopt %s -hlsl-passes-resume -dxil-cond-mem2reg -S | FileCheck %s
; Test that conditionalmem2reg does not scalarize precise native vectors
; as it would pre-6.9 as part of keeping their allocas around to maintain
; the precise information.

; The checks are just confirming that the precise calls are preserved

target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #0

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #0

; Function Attrs: nounwind
define void @main(<4 x float>* noalias %arg, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) #0 {
entry:
%shift.addr = alloca <4 x float>, align 4, !dx.temp !9
%scale.addr = alloca <4 x float>, align 4, !dx.temp !9
%pos.addr = alloca <4 x float>, align 4, !dx.temp !9

; Confirm that position is the only alloca that is preserved
; CHECK-NOT: alloca
; CHECK: %position = alloca <4 x float>, align 4, !dx.precise
; CHECK-NOT: alloca
%position = alloca <4 x float>, align 4, !dx.precise !23
store <4 x float> %arg3, <4 x float>* %shift.addr, align 4, !tbaa !24
store <4 x float> %arg2, <4 x float>* %scale.addr, align 4, !tbaa !24
store <4 x float> %arg1, <4 x float>* %pos.addr, align 4, !tbaa !24
%tmp = bitcast <4 x float>* %position to i8*
call void @llvm.lifetime.start(i64 16, i8* %tmp) #0
%tmp4 = load <4 x float>, <4 x float>* %pos.addr, align 4, !tbaa !24

; CHECK: call void @"dx.attribute.precise.<4 x float>"(<4 x float> %arg1)
; CHECK-NEXT: store <4 x float> %arg1, <4 x float>* %position, align 4
call void @"dx.attribute.precise.<4 x float>"(<4 x float> %tmp4)
store <4 x float> %tmp4, <4 x float>* %position, align 4, !tbaa !24
%tmp5 = load <4 x float>, <4 x float>* %position, align 4, !tbaa !24
%tmp6 = load <4 x float>, <4 x float>* %scale.addr, align 4, !tbaa !24
%mul = fmul <4 x float> %tmp5, %tmp6

; CHECK: call void @"dx.attribute.precise.<4 x float>"(<4 x float> %mul)
; CHECK-NEXT: store <4 x float> %mul, <4 x float>* %position, align 4
call void @"dx.attribute.precise.<4 x float>"(<4 x float> %mul)
store <4 x float> %mul, <4 x float>* %position, align 4, !tbaa !24
%tmp7 = load <4 x float>, <4 x float>* %position, align 4
%tmp8 = extractelement <4 x float> %tmp7, i32 2
%add = fadd float %tmp8, 0x3F847AE140000000
%tmp9 = getelementptr <4 x float>, <4 x float>* %position, i32 0, i32 2

; CHECK: call void @dx.attribute.precise.float(float %add)
; CHECK-NEXT: store float %add, float* %tmp9
call void @dx.attribute.precise.float(float %add)
store float %add, float* %tmp9
%tmp10 = load <4 x float>, <4 x float>* %shift.addr, align 4, !tbaa !24
%tmp11 = load <4 x float>, <4 x float>* %position, align 4, !tbaa !24
%add1 = fadd <4 x float> %tmp11, %tmp10

; CHECK: call void @"dx.attribute.precise.<4 x float>"(<4 x float> %add1)
; CHECK-NEXT: store <4 x float> %add1, <4 x float>* %position, align 4
call void @"dx.attribute.precise.<4 x float>"(<4 x float> %add1)
store <4 x float> %add1, <4 x float>* %position, align 4, !tbaa !24
%tmp12 = load <4 x float>, <4 x float>* %position, align 4, !tbaa !24
%tmp13 = bitcast <4 x float>* %position to i8*
call void @llvm.lifetime.end(i64 16, i8* %tmp13) #0
store <4 x float> %tmp12, <4 x float>* %arg
ret void
}

declare void @"dx.attribute.precise.<4 x float>"(<4 x float>) #1

declare void @dx.attribute.precise.float(float) #1

attributes #0 = { nounwind }
attributes #1 = { "dx.precise" }

!pauseresume = !{!1}
!dx.version = !{!3}
!dx.valver = !{!4}
!dx.shaderModel = !{!5}
!dx.typeAnnotations = !{!6}
!dx.entryPoints = !{!19}
!dx.fnprops = !{!20}
!dx.options = !{!21, !22}

!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
!3 = !{i32 1, i32 9}
!4 = !{i32 1, i32 10}
!5 = !{!"vs", i32 6, i32 9}
!6 = !{i32 1, void (<4 x float>*, <4 x float>, <4 x float>, <4 x float>)* @main, !7}
!7 = !{!8, !10, !13, !15, !17}
!8 = !{i32 0, !9, !9}
!9 = !{}
!10 = !{i32 1, !11, !12}
!11 = !{i32 8, i1 true, i32 4, !"SV_Position", i32 7, i32 9}
!12 = !{i32 0}
!13 = !{i32 0, !14, !12}
!14 = !{i32 4, !"POSITION", i32 7, i32 9}
!15 = !{i32 0, !16, !12}
!16 = !{i32 4, !"SCL", i32 7, i32 9}
!17 = !{i32 0, !18, !12}
!18 = !{i32 4, !"OFF", i32 7, i32 9}
!19 = !{void (<4 x float>*, <4 x float>, <4 x float>, <4 x float>)* @main, !"main", null, null, null}
!20 = !{void (<4 x float>*, <4 x float>, <4 x float>, <4 x float>)* @main, i32 1}
!21 = !{i32 64}
!22 = !{i32 -1}
!23 = !{i32 1}
!24 = !{!25, !25, i64 0}
!25 = !{!"omnipotent char", !26, i64 0}
!26 = !{!"Simple C/C++ TBAA"}
111 changes: 111 additions & 0 deletions tools/clang/test/CodeGenDXIL/passes/longvec-precise-sroa.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s
; Test that precise native vector allocas are marked with a vector overload call
; to dx.attribute.precise() and not scalar extracted and re-inserted

target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"

%ConstantBuffer = type opaque

@"$Globals" = external constant %ConstantBuffer

; Function Attrs: nounwind
define <4 x float> @main(<4 x float> %pos, <4 x float> %scale, <4 x float> %shift) #0 {
bb:
%tmp = alloca <4 x float>, align 4, !dx.temp !10
%tmp1 = alloca <4 x float>, align 4, !dx.temp !10
%tmp2 = alloca <4 x float>, align 4, !dx.temp !10
%position = alloca <4 x float>, align 4, !dx.precise !24
store <4 x float> %shift, <4 x float>* %tmp, align 4, !tbaa !25
store <4 x float> %scale, <4 x float>* %tmp1, align 4, !tbaa !25
store <4 x float> %pos, <4 x float>* %tmp2, align 4, !tbaa !25
%tmp3 = bitcast <4 x float>* %position to i8* ; line:7 col:3
call void @llvm.lifetime.start(i64 16, i8* %tmp3) #0 ; line:7 col:3
%tmp4 = load <4 x float>, <4 x float>* %tmp2, align 4, !tbaa !25 ; line:7 col:29
; CHECK: %tmp4 = load <4 x float>, <4 x float>* %tmp2
; CHECK-NOT: extractelement
; CHECK-NOT: dx.attribute.precise.float
; CHECK: call void @"dx.attribute.precise.<4 x float>"(<4 x float> %tmp4)

store <4 x float> %tmp4, <4 x float>* %position, align 4, !tbaa !25 ; line:7 col:18
%tmp5 = load <4 x float>, <4 x float>* %position, align 4, !tbaa !25 ; line:11 col:14
%tmp6 = load <4 x float>, <4 x float>* %tmp1, align 4, !tbaa !25 ; line:11 col:25
%tmp7 = fmul <4 x float> %tmp5, %tmp6 ; line:11 col:23

; CHECK: %tmp7 = fmul <4 x float> %tmp5, %tmp6
; CHECK-NOT: extractelement
; CHECK-NOT: dx.attribute.precise.float
; CHECK: call void @"dx.attribute.precise.<4 x float>"(<4 x float> %tmp7)

store <4 x float> %tmp7, <4 x float>* %position, align 4, !tbaa !25 ; line:11 col:12
%tmp8 = load <4 x float>, <4 x float>* %position, align 4 ; line:17 col:14
%tmp9 = extractelement <4 x float> %tmp8, i32 2 ; line:17 col:14
%tmp10 = fadd float %tmp9, 0x3F847AE140000000 ; line:17 col:14
%tmp11 = load <4 x float>, <4 x float>* %position, align 4 ; line:17 col:14
%tmp12 = getelementptr <4 x float>, <4 x float>* %position, i32 0, i32 2 ; line:17 col:14

; CHECK: %tmp12 = getelementptr <4 x float>, <4 x float>* %position, i32 0, i32 2
; CHECK-NOT: extractelement
; CHECK-NOT: dx.attribute.precise.float
; CHECK: call void @dx.attribute.precise.float(float %tmp10)

store float %tmp10, float* %tmp12 ; line:17 col:14
%tmp13 = load <4 x float>, <4 x float>* %tmp, align 4, !tbaa !25 ; line:20 col:15
%tmp14 = load <4 x float>, <4 x float>* %position, align 4, !tbaa !25 ; line:20 col:12
%tmp15 = fadd <4 x float> %tmp14, %tmp13 ; line:20 col:12

; CHECK: %tmp15 = fadd <4 x float> %tmp14, %tmp13
; CHECK-NOT: extractelement
; CHECK-NOT: dx.attribute.precise.float
; CHECK: call void @"dx.attribute.precise.<4 x float>"(<4 x float> %tmp15)

store <4 x float> %tmp15, <4 x float>* %position, align 4, !tbaa !25 ; line:20 col:12
%tmp16 = load <4 x float>, <4 x float>* %position, align 4, !tbaa !25 ; line:22 col:10
%tmp17 = bitcast <4 x float>* %position to i8* ; line:23 col:1
call void @llvm.lifetime.end(i64 16, i8* %tmp17) #0 ; line:23 col:1
ret <4 x float> %tmp16 ; line:22 col:3
}

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #0

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #0

attributes #0 = { nounwind }

!pauseresume = !{!1}
!dx.version = !{!3}
!dx.valver = !{!4}
!dx.shaderModel = !{!5}
!dx.typeAnnotations = !{!6}
!dx.entryPoints = !{!17}
!dx.fnprops = !{!21}
!dx.options = !{!22, !23}

!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
!3 = !{i32 1, i32 9}
!4 = !{i32 1, i32 10}
!5 = !{!"vs", i32 6, i32 9}
!6 = !{i32 1, <4 x float> (<4 x float>, <4 x float>, <4 x float>)* @main, !7}
!7 = !{!8, !11, !13, !15}
!8 = !{i32 1, !9, !10}
!9 = !{i32 8, i1 true, i32 4, !"SV_Position", i32 7, i32 9, i32 13, i32 4}
!10 = !{}
!11 = !{i32 0, !12, !10}
!12 = !{i32 4, !"POSITION", i32 7, i32 9, i32 13, i32 4}
!13 = !{i32 0, !14, !10}
!14 = !{i32 4, !"SCL", i32 7, i32 9, i32 13, i32 4}
!15 = !{i32 0, !16, !10}
!16 = !{i32 4, !"OFF", i32 7, i32 9, i32 13, i32 4}
!17 = !{<4 x float> (<4 x float>, <4 x float>, <4 x float>)* @main, !"main", null, !18, null}
!18 = !{null, null, !19, null}
!19 = !{!20}
!20 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null}
!21 = !{<4 x float> (<4 x float>, <4 x float>, <4 x float>)* @main, i32 1}
!22 = !{i32 64}
!23 = !{i32 -1}
!24 = !{i32 1}
!25 = !{!26, !26, i64 0}
!26 = !{!"omnipotent char", !27, i64 0}
!27 = !{!"Simple C/C++ TBAA"}
Loading