diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index c9748bd05c..036614eab5 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -3427,7 +3427,7 @@ static void updateLifetimeForReplacement(Value *From, Value *To) { static bool DominateAllUsers(Instruction *I, Value *V, DominatorTree *DT); namespace { -void replaceScalarArrayGEPWithVectorArrayGEP(User *GEP, Value *VectorArray, +bool replaceScalarArrayGEPWithVectorArrayGEP(User *GEP, Value *VectorArray, IRBuilder<> &Builder, unsigned sizeInDwords) { gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); @@ -3461,11 +3461,30 @@ void replaceScalarArrayGEPWithVectorArrayGEP(User *GEP, Value *VectorArray, Value *CompIdx = Builder.CreateAnd(ArrayIdx, mask); Value *NewGEP = Builder.CreateGEP( VecPtr, {ConstantInt::get(CompIdx->getType(), 0), CompIdx}); - GEP->replaceAllUsesWith(NewGEP); + + if (isa(GEP) && isa(NewGEP)) { + if (!ReplaceConstantWithInst(cast(GEP), NewGEP, Builder)) { + // If new instructions unable to be used, clean them up. + if (NewGEP->user_empty()) + cast(NewGEP)->eraseFromParent(); + if (isa(VecPtr) && VecPtr->user_empty()) + cast(VecPtr)->eraseFromParent(); + if (isa(CompIdx) && CompIdx->user_empty()) + cast(CompIdx)->eraseFromParent(); + if (isa(VecIdx) && VecIdx->user_empty()) + cast(VecIdx)->eraseFromParent(); + return false; + } + return true; + } else { + GEP->replaceAllUsesWith(NewGEP); + } + return true; } -void replaceScalarArrayWithVectorArray(Value *ScalarArray, Value *VectorArray, +bool replaceScalarArrayWithVectorArray(Value *ScalarArray, Value *VectorArray, MemCpyInst *MC, unsigned sizeInDwords) { + bool bReplacedAll = true; LLVMContext &Context = ScalarArray->getContext(); // All users should be element type. // Replace users of AI or GV. @@ -3474,24 +3493,35 @@ void replaceScalarArrayWithVectorArray(Value *ScalarArray, Value *VectorArray, if (U->user_empty()) continue; if (BitCastInst *BCI = dyn_cast(U)) { - BCI->setOperand(0, VectorArray); + // Avoid replacing the dest of the memcpy to support partial replacement. + if (MC->getArgOperand(0) != BCI) + BCI->setOperand(0, VectorArray); continue; } if (ConstantExpr *CE = dyn_cast(U)) { IRBuilder<> Builder(Context); + // If we need to replace the constant with an instruction, start at the + // memcpy, so we replace only users dominated by it. + if (isa(VectorArray)) + Builder.SetInsertPoint(MC); + if (GEPOperator *GEP = dyn_cast(U)) { - // NewGEP must be GEPOperator too. - // No instruction will be build. - replaceScalarArrayGEPWithVectorArrayGEP(U, VectorArray, Builder, - sizeInDwords); + if (!replaceScalarArrayGEPWithVectorArrayGEP(U, VectorArray, Builder, + sizeInDwords)) + bReplacedAll = false; } else if (CE->getOpcode() == Instruction::AddrSpaceCast) { Value *NewAddrSpaceCast = Builder.CreateAddrSpaceCast( VectorArray, PointerType::get(VectorArray->getType()->getPointerElementType(), CE->getType()->getPointerAddressSpace())); - replaceScalarArrayWithVectorArray(CE, NewAddrSpaceCast, MC, - sizeInDwords); + if (!replaceScalarArrayWithVectorArray(CE, NewAddrSpaceCast, MC, + sizeInDwords)) { + bReplacedAll = false; + if (Instruction *NewInst = dyn_cast(NewAddrSpaceCast)) + if (NewInst->user_empty()) + NewInst->eraseFromParent(); + } } else if (CE->hasOneUse() && CE->user_back() == MC) { continue; } else { @@ -3499,13 +3529,16 @@ void replaceScalarArrayWithVectorArray(Value *ScalarArray, Value *VectorArray, } } else if (GetElementPtrInst *GEP = dyn_cast(U)) { IRBuilder<> Builder(GEP); - replaceScalarArrayGEPWithVectorArrayGEP(U, VectorArray, Builder, - sizeInDwords); - GEP->eraseFromParent(); + if (!replaceScalarArrayGEPWithVectorArrayGEP(U, VectorArray, Builder, + sizeInDwords)) + bReplacedAll = false; + else + GEP->eraseFromParent(); } else { DXASSERT(0, "not implemented"); } } + return bReplacedAll; } // For pattern like @@ -3521,8 +3554,25 @@ bool tryToReplaceCBVec4ArrayToScalarArray(Value *V, Type *TyV, Value *Src, Type *EltTy = AT->getElementType(); unsigned sizeInBits = DL.getTypeSizeInBits(EltTy); // Convert array of float4 to array of float. - replaceScalarArrayWithVectorArray(V, Src, MC, sizeInBits >> 5); - return true; + if (replaceScalarArrayWithVectorArray(V, Src, MC, sizeInBits >> 5)) { + Value *DstBC = MC->getArgOperand(0); + MC->setArgOperand(0, UndefValue::get(MC->getArgOperand(0)->getType())); + if (DstBC->user_empty()) { + // Replacement won't include the memcpy dest. Now remove that use. + if (BitCastInst *BCI = dyn_cast(DstBC)) { + Value *Dst = BCI->getOperand(0); + Type *DstTy = Dst->getType(); + if (Dst == V) + BCI->setOperand(0, UndefValue::get(DstTy)); + else + llvm_unreachable("Unexpected dest of memcpy."); + } + } else { + llvm_unreachable("Unexpected users of memcpy bitcast."); + } + return true; + } + return false; } } // namespace diff --git a/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-1.ll b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-1.ll new file mode 100644 index 0000000000..206163777f --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-1.ll @@ -0,0 +1,146 @@ +; RUN: %opt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; This case should fail to do memcpy replacement because init block does not +; dominate init.end block. +; However, it should not produce invalid IR, as it would have with #6510 due +; to attempting to replace constant dest with instruction source, while +; assuming the source was not constant. + +; This makes sure memcpy was split and elements are properly copied to the +; scalar array, then that the scalar array is used for the result. + +; Generated using: +; ExtractIRForPassTest.py -p scalarrepl-param-hlsl -o array-to-cbvec-1.ll array-to-cbvec-1.hlsl -- -T vs_6_0 +; uint4 VectorArray[2]; +; +; uint2 main(int i : IN) : OUT { +; static const uint ScalarArray[8] = (uint[8])VectorArray; +; return uint2(ScalarArray[1], ScalarArray[6]); +; } + +; CHECK-NOT: badref +; CHECK-NOT: store <4 x float> zeroinitializer + +; Copy array elements from constant to scalar array +; CHECK: %[[VectorArray:.*]] = getelementptr inbounds %"$Globals", %"$Globals"* %{{.*}}, i32 0, i32 0 +; CHECK: %[[gep_VA0:.*]] = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %[[VectorArray]], i32 0, i32 0 +; CHECK: %[[ld_VA0:.*]] = load <4 x i32>, <4 x i32>* %[[gep_VA0]] +; CHECK: %[[ea_VA0_1:.*]] = extractelement <4 x i32> %[[ld_VA0]], i64 1 +; CHECK: store i32 %[[ea_VA0_1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB", i32 0, i32 1) +; CHECK: %[[gep_VA1:.*]] = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %[[VectorArray]], i32 0, i32 1 +; CHECK: %[[ld_VA1:.*]] = load <4 x i32>, <4 x i32>* %[[gep_VA1]] +; CHECK: %[[ea_VA1_2:.*]] = extractelement <4 x i32> %[[ld_VA1]], i64 2 +; CHECK: store i32 %[[ea_VA1_2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB", i32 0, i32 6) + +; Load from scalar array and return it +; CHECK: %[[ld_SA1:.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB", i32 0, i32 1), align 4 +; CHECK: %[[ld_SA6:.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB", i32 0, i32 6), align 4 +; CHECK: %[[ie_SA1:.*]] = insertelement <2 x i32> undef, i32 %[[ld_SA1]], i64 0 +; CHECK: %[[ie_SA6:.*]] = insertelement <2 x i32> %[[ie_SA1]], i32 %[[ld_SA6]], i64 1 +; CHECK: store <2 x i32> %[[ie_SA6]], <2 x i32>* %0 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"$Globals" = type { [2 x <4 x i32>] } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?VectorArray@@3QBV?$vector@I$03@@B" = external constant [2 x <4 x i32>], align 4 +@"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB" = internal global [8 x i32] zeroinitializer, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define <2 x i32> @main(i32 %i) #0 { +entry: + %0 = alloca i32 + store i32 0, i32* %0 + %1 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + %2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + %3 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %2, i32 0) + %4 = getelementptr inbounds %"$Globals", %"$Globals"* %3, i32 0, i32 0 + %i.addr = alloca i32, align 4, !dx.temp !13 + store i32 %i, i32* %i.addr, align 4, !tbaa !23 + %5 = load i32, i32* %0, !dbg !27 ; line:4 col:5 + %6 = and i32 %5, 1, !dbg !27 ; line:4 col:5 + %7 = icmp ne i32 %6, 0, !dbg !27 ; line:4 col:5 + br i1 %7, label %init.end, label %init, !dbg !27 ; line:4 col:5 + +init: ; preds = %entry + %8 = or i32 %5, 1, !dbg !27 ; line:4 col:5 + store i32 %8, i32* %0, !dbg !27 ; line:4 col:5 + %9 = bitcast [8 x i32]* @"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB" to i8*, !dbg !31 ; line:4 col:49 + %10 = bitcast [2 x <4 x i32>]* %4 to i8*, !dbg !31 ; line:4 col:49 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %9, i8* %10, i64 32, i32 1, i1 false), !dbg !31 ; line:4 col:49 + br label %init.end, !dbg !27 ; line:4 col:5 + +init.end: ; preds = %init, %entry + %11 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB", i32 0, i32 1), align 4, !dbg !32, !tbaa !23 ; line:5 col:18 + %12 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @"\01?ScalarArray@?1??main@@YA?AV?$vector@I$01@@H@Z@4QBIB", i32 0, i32 6), align 4, !dbg !33, !tbaa !23 ; line:5 col:34 + %13 = insertelement <2 x i32> undef, i32 %11, i64 0, !dbg !34 ; line:5 col:17 + %14 = insertelement <2 x i32> %13, i32 %12, i64 1, !dbg !34 ; line:5 col:17 + ret <2 x i32> %14, !dbg !35 ; line:5 col:5 +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !9} +!dx.entryPoints = !{!16} +!dx.fnprops = !{!20} +!dx.options = !{!21, !22} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14508 (main, 263a77335-dirty)"} +!3 = !{i32 1, i32 0} +!4 = !{i32 1, i32 7} +!5 = !{!"vs", i32 6, i32 0} +!6 = !{i32 0, %"$Globals" undef, !7} +!7 = !{i32 32, !8} +!8 = !{i32 6, !"VectorArray", i32 3, i32 0, i32 7, i32 5} +!9 = !{i32 1, <2 x i32> (i32)* @main, !10} +!10 = !{!11, !14} +!11 = !{i32 1, !12, !13} +!12 = !{i32 4, !"OUT", i32 7, i32 5} +!13 = !{} +!14 = !{i32 0, !15, !13} +!15 = !{i32 4, !"IN", i32 7, i32 4} +!16 = !{<2 x i32> (i32)* @main, !"main", null, !17, null} +!17 = !{null, null, !18, null} +!18 = !{!19} +!19 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!20 = !{<2 x i32> (i32)* @main, i32 1} +!21 = !{i32 -2147483584} +!22 = !{i32 -1} +!23 = !{!24, !24, i64 0} +!24 = !{!"int", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 4, column: 5, scope: !28) +!28 = !DISubprogram(name: "main", scope: !29, file: !29, line: 3, type: !30, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: <2 x i32> (i32)* @main) +!29 = !DIFile(filename: "t:\5Carray-mapping.hlsl", directory: "") +!30 = !DISubroutineType(types: !13) +!31 = !DILocation(line: 4, column: 49, scope: !28) +!32 = !DILocation(line: 5, column: 18, scope: !28) +!33 = !DILocation(line: 5, column: 34, scope: !28) +!34 = !DILocation(line: 5, column: 17, scope: !28) +!35 = !DILocation(line: 5, column: 5, scope: !28) diff --git a/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-2.ll b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-2.ll new file mode 100644 index 0000000000..80fe388b6d --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-2.ll @@ -0,0 +1,118 @@ +; RUN: %opt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Local ScalarArray copy should be eliminated in favor of directly accessing +; VectorArray from cbuffer. + +; Generated using: +; ExtractIRForPassTest.py -p scalarrepl-param-hlsl -o array-to-cbvec-2.ll array-to-cbvec-2.hlsl -- -T vs_6_0 +; uint4 VectorArray[2]; +; static const uint ScalarArray[8] = (uint[8])VectorArray; +; +; uint2 main(int i : IN) : OUT { +; return uint2(ScalarArray[1], ScalarArray[6]); +; } + +; replace memcpy path works in this case +; CHECK: %[[VectorArray:.*]] = getelementptr inbounds %"$Globals", %"$Globals"* %{{.*}}, i32 0, i32 0 +; CHECK: %[[gep_VA1:.*]] = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %[[VectorArray]], i32 0, i32 1 +; CHECK: %[[gep_VA1_2:.*]] = getelementptr <4 x i32>, <4 x i32>* %[[gep_VA1]], i32 0, i32 2 +; CHECK: %[[gep_VA0:.*]] = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %[[VectorArray]], i32 0, i32 0 +; CHECK: %[[gep_VA0_1:.*]] = getelementptr <4 x i32>, <4 x i32>* %[[gep_VA0]], i32 0, i32 1 +; CHECK: %[[ld_VA0_1:.*]] = load i32, i32* %[[gep_VA0_1]], align 4 +; CHECK: %[[ld_VA1_2:.*]] = load i32, i32* %[[gep_VA1_2]], align 4 +; CHECK: %[[ie_VA0_1:.*]] = insertelement <2 x i32> undef, i32 %[[ld_VA0_1]], i64 0 +; CHECK: %[[ie_VA1_2:.*]] = insertelement <2 x i32> %[[ie_VA0_1]], i32 %[[ld_VA1_2]], i64 1 +; CHECK: store <2 x i32> %[[ie_VA1_2]], <2 x i32>* %0 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"$Globals" = type { [2 x <4 x i32>] } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?VectorArray@@3QBV?$vector@I$03@@B" = external constant [2 x <4 x i32>], align 4 +@ScalarArray = internal global [8 x i32] undef, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define <2 x i32> @main(i32 %i) #0 { +entry: + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) #0, !dbg !23 ; line:2 col:45 + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) #0, !dbg !23 ; line:2 col:45 + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) #0, !dbg !23 ; line:2 col:45 + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0, !dbg !23 ; line:2 col:45 + %4 = bitcast [2 x <4 x i32>]* %3 to i8*, !dbg !23 ; line:2 col:45 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([8 x i32]* @ScalarArray to i8*), i8* %4, i64 32, i32 1, i1 false) #0, !dbg !23 ; line:2 col:45 + %i.addr = alloca i32, align 4, !dx.temp !13 + store i32 %i, i32* %i.addr, align 4, !tbaa !29 + %5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @ScalarArray, i32 0, i32 1), align 4, !dbg !33, !tbaa !29 ; line:5 col:18 + %6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @ScalarArray, i32 0, i32 6), align 4, !dbg !34, !tbaa !29 ; line:5 col:34 + %7 = insertelement <2 x i32> undef, i32 %5, i64 0, !dbg !35 ; line:5 col:17 + %8 = insertelement <2 x i32> %7, i32 %6, i64 1, !dbg !35 ; line:5 col:17 + ret <2 x i32> %8, !dbg !36 ; line:5 col:5 +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !9} +!dx.entryPoints = !{!16} +!dx.fnprops = !{!20} +!dx.options = !{!21, !22} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14508 (main, 263a77335-dirty)"} +!3 = !{i32 1, i32 0} +!4 = !{i32 1, i32 7} +!5 = !{!"vs", i32 6, i32 0} +!6 = !{i32 0, %"$Globals" undef, !7} +!7 = !{i32 32, !8} +!8 = !{i32 6, !"VectorArray", i32 3, i32 0, i32 7, i32 5} +!9 = !{i32 1, <2 x i32> (i32)* @main, !10} +!10 = !{!11, !14} +!11 = !{i32 1, !12, !13} +!12 = !{i32 4, !"OUT", i32 7, i32 5} +!13 = !{} +!14 = !{i32 0, !15, !13} +!15 = !{i32 4, !"IN", i32 7, i32 4} +!16 = !{<2 x i32> (i32)* @main, !"main", null, !17, null} +!17 = !{null, null, !18, null} +!18 = !{!19} +!19 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!20 = !{<2 x i32> (i32)* @main, i32 1} +!21 = !{i32 -2147483584} +!22 = !{i32 -1} +!23 = !DILocation(line: 2, column: 45, scope: !24, inlinedAt: !27) +!24 = !DISubprogram(name: "??__EScalarArray@@YAXXZ", scope: !25, file: !25, line: 2, type: !26, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false) +!25 = !DIFile(filename: "t:\5Carray-mapping.hlsl", directory: "") +!26 = !DISubroutineType(types: !13) +!27 = distinct !DILocation(line: 4, scope: !28) +!28 = !DISubprogram(name: "main", scope: !25, file: !25, line: 4, type: !26, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, function: <2 x i32> (i32)* @main) +!29 = !{!30, !30, i64 0} +!30 = !{!"int", !31, i64 0} +!31 = !{!"omnipotent char", !32, i64 0} +!32 = !{!"Simple C/C++ TBAA"} +!33 = !DILocation(line: 5, column: 18, scope: !28) +!34 = !DILocation(line: 5, column: 34, scope: !28) +!35 = !DILocation(line: 5, column: 17, scope: !28) +!36 = !DILocation(line: 5, column: 5, scope: !28) diff --git a/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-3.ll b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-3.ll new file mode 100644 index 0000000000..ff8e15a98c --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/array-from-cbvec-3.ll @@ -0,0 +1,118 @@ +; RUN: %opt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Check that dynamic indexing results in dynamic indexing of cbuffer VectorArray +; and contained vector, instead of using a local copy. + +; Generated using: +; ExtractIRForPassTest.py -p scalarrepl-param-hlsl -o array-from-cbvec-3.ll array-from-cbvec-3.hlsl -- -T vs_6_0 +; uint4 VectorArray[2]; +; static const uint ScalarArray[8] = (uint[8])VectorArray; +; +; uint main(int i : IN) : OUT { +; return ScalarArray[i]; +; } + +; CHECK:define void @main(i32* noalias, i32) +; CHECK: %[[iaddr:.*]] = alloca i32, align 4 +; CHECK: %[[VectorArray:.*]] = getelementptr inbounds %"$Globals", %"$Globals"* %{{.*}}, i32 0, i32 0 +; CHECK: store i32 %1, i32* %[[iaddr]], align 4 +; CHECK: %[[ld_i:.*]] = load i32, i32* %[[iaddr]], align 4 +; CHECK: %[[add_0_i:.*]] = add i32 0, %[[ld_i]] +; CHECK: %[[lshr_i:.*]] = lshr i32 %[[add_0_i]], 2 +; CHECK: %[[gep_VA_i:.*]] = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %[[VectorArray]], i32 0, i32 %[[lshr_i]] +; CHECK: %[[and_i_3:.*]] = and i32 %[[add_0_i]], 3 +; CHECK: %[[gep_VA_i_and_i_3:.*]] = getelementptr <4 x i32>, <4 x i32>* %[[gep_VA_i]], i32 0, i32 %[[and_i_3]] +; CHECK: %[[ld_VA_i_and_i_3:.*]] = load i32, i32* %[[gep_VA_i_and_i_3]], align 4 +; CHECK: store i32 %[[ld_VA_i_and_i_3]], i32* %0 +; CHECK: ret void + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"$Globals" = type { [2 x <4 x i32>] } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?VectorArray@@3QBV?$vector@I$03@@B" = external constant [2 x <4 x i32>], align 4 +@ScalarArray = internal global [8 x i32] undef, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define i32 @main(i32 %i) #0 { +entry: + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) #0, !dbg !23 ; line:2 col:45 + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) #0, !dbg !23 ; line:2 col:45 + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) #0, !dbg !23 ; line:2 col:45 + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0, !dbg !23 ; line:2 col:45 + %4 = bitcast [2 x <4 x i32>]* %3 to i8*, !dbg !23 ; line:2 col:45 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([8 x i32]* @ScalarArray to i8*), i8* %4, i64 32, i32 1, i1 false) #0, !dbg !23 ; line:2 col:45 + %i.addr = alloca i32, align 4, !dx.temp !13 + store i32 %i, i32* %i.addr, align 4, !tbaa !29 + %5 = load i32, i32* %i.addr, align 4, !dbg !33, !tbaa !29 ; line:5 col:24 + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @ScalarArray, i32 0, i32 %5, !dbg !34 ; line:5 col:12 + %6 = load i32, i32* %arrayidx, align 4, !dbg !34, !tbaa !29 ; line:5 col:12 + ret i32 %6, !dbg !35 ; line:5 col:5 +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !9} +!dx.entryPoints = !{!16} +!dx.fnprops = !{!20} +!dx.options = !{!21, !22} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14508 (main, 263a77335-dirty)"} +!3 = !{i32 1, i32 0} +!4 = !{i32 1, i32 6} +!5 = !{!"vs", i32 6, i32 0} +!6 = !{i32 0, %"$Globals" undef, !7} +!7 = !{i32 32, !8} +!8 = !{i32 6, !"VectorArray", i32 3, i32 0, i32 7, i32 5} +!9 = !{i32 1, i32 (i32)* @main, !10} +!10 = !{!11, !14} +!11 = !{i32 1, !12, !13} +!12 = !{i32 4, !"OUT", i32 7, i32 5} +!13 = !{} +!14 = !{i32 0, !15, !13} +!15 = !{i32 4, !"IN", i32 7, i32 4} +!16 = !{i32 (i32)* @main, !"main", null, !17, null} +!17 = !{null, null, !18, null} +!18 = !{!19} +!19 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!20 = !{i32 (i32)* @main, i32 1} +!21 = !{i32 -2147483584} +!22 = !{i32 -1} +!23 = !DILocation(line: 2, column: 45, scope: !24, inlinedAt: !27) +!24 = !DISubprogram(name: "??__EScalarArray@@YAXXZ", scope: !25, file: !25, line: 2, type: !26, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false) +!25 = !DIFile(filename: "t:\5Carray-mapping.hlsl", directory: "") +!26 = !DISubroutineType(types: !13) +!27 = distinct !DILocation(line: 4, scope: !28) +!28 = !DISubprogram(name: "main", scope: !25, file: !25, line: 4, type: !26, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @main) +!29 = !{!30, !30, i64 0} +!30 = !{!"int", !31, i64 0} +!31 = !{!"omnipotent char", !32, i64 0} +!32 = !{!"Simple C/C++ TBAA"} +!33 = !DILocation(line: 5, column: 24, scope: !28) +!34 = !DILocation(line: 5, column: 12, scope: !28) +!35 = !DILocation(line: 5, column: 5, scope: !28)