diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 6f2ad33094dce..5dd2a0948fcd4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -238,8 +238,6 @@ class SelectionDAG { LLVMContext *Context; CodeGenOptLevel OptLevel; - bool DivergentTarget = false; - UniformityInfo *UA = nullptr; FunctionLoweringInfo * FLI = nullptr; @@ -473,16 +471,14 @@ class SelectionDAG { Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, UniformityInfo *UA, ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin, MachineModuleInfo &MMI, - FunctionVarLocs const *FnVarLocs, bool HasDivergency); + FunctionVarLocs const *FnVarLocs); void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, MachineFunctionAnalysisManager &AM, const TargetLibraryInfo *LibraryInfo, UniformityInfo *UA, ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin, - MachineModuleInfo &MMI, FunctionVarLocs const *FnVarLocs, - bool HasDivergency) { - init(NewMF, NewORE, nullptr, LibraryInfo, UA, PSIin, BFIin, MMI, FnVarLocs, - HasDivergency); + MachineModuleInfo &MMI, FunctionVarLocs const *FnVarLocs) { + init(NewMF, NewORE, nullptr, LibraryInfo, UA, PSIin, BFIin, MMI, FnVarLocs); MFAM = &AM; } diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index a6a3928230c3d..5241a51dd8cd8 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -57,7 +57,9 @@ class SelectionDAGISel { AssumptionCache *AC = nullptr; GCFunctionInfo *GFI = nullptr; SSPLayoutInfo *SP = nullptr; +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS TargetTransformInfo *TTI = nullptr; +#endif CodeGenOptLevel OptLevel; const TargetInstrInfo *TII; const TargetLowering *TLI; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bfa72bffe4140..d902de62c631c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1371,7 +1371,7 @@ void SelectionDAG::init(MachineFunction &NewMF, const TargetLibraryInfo *LibraryInfo, UniformityInfo *NewUA, ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin, - FunctionVarLocs const *VarLocs, bool HasDivergency) { + FunctionVarLocs const *VarLocs) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1384,7 +1384,6 @@ void SelectionDAG::init(MachineFunction &NewMF, BFI = BFIin; MMI = &MMIin; FnVarLocs = VarLocs; - DivergentTarget = HasDivergency; } SelectionDAG::~SelectionDAG() { @@ -2331,8 +2330,7 @@ SDValue SelectionDAG::getRegister(Register Reg, EVT VT) { return SDValue(E, 0); auto *N = newSDNode(Reg, VTs); - N->SDNodeBits.IsDivergent = - DivergentTarget && TLI->isSDNodeSourceOfDivergence(N, FLI, UA); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -12226,8 +12224,6 @@ static bool gluePropagatesDivergence(const SDNode *Node) { } bool SelectionDAG::calculateDivergence(SDNode *N) { - if (!DivergentTarget) - return false; if (TLI->isSDNodeAlwaysUniform(N)) { assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) && "Conflicting divergence information!"); @@ -12247,8 +12243,6 @@ bool SelectionDAG::calculateDivergence(SDNode *N) { } void SelectionDAG::updateDivergence(SDNode *N) { - if (!DivergentTarget) - return; SmallVector Worklist(1, N); do { N = Worklist.pop_back_val(); @@ -13809,20 +13803,16 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef Vals) { Ops[I].setInitial(Vals[I]); EVT VT = Ops[I].getValueType(); - // Take care of the Node's operands iff target has divergence // Skip Chain. It does not carry divergence. - if (DivergentTarget && VT != MVT::Other && + if (VT != MVT::Other && (VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) && Ops[I].getNode()->isDivergent()) { - // Node is going to be divergent if at least one of its operand is - // divergent, unless it belongs to the "AlwaysUniform" exemptions. IsDivergent = true; } } Node->NumOperands = Vals.size(); Node->OperandList = Ops; - // Check the divergence of the Node itself. - if (DivergentTarget && !TLI->isSDNodeAlwaysUniform(Node)) { + if (!TLI->isSDNodeAlwaysUniform(Node)) { IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA); Node->SDNodeBits.IsDivergent = IsDivergent; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 26071ed70c9db..ece50ed95fc49 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -480,10 +480,7 @@ void SelectionDAGISel::initializeAnalysisResults( MachineModuleInfo &MMI = MAMP.getCachedResult(*Fn.getParent())->getMMI(); - TTI = &FAM.getResult(Fn); - - CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs, - TTI->hasBranchDivergence(&Fn)); + CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -501,6 +498,10 @@ void SelectionDAGISel::initializeAnalysisResults( BatchAA = std::nullopt; SP = &FAM.getResult(Fn); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &FAM.getResult(Fn); +#endif } void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { @@ -536,10 +537,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { MachineModuleInfo &MMI = MFP.getAnalysis().getMMI(); - TTI = &MFP.getAnalysis().getTTI(Fn); - - CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs, - TTI->hasBranchDivergence(&Fn)); + CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -558,6 +556,10 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { BatchAA = std::nullopt; SP = &MFP.getAnalysis().getLayoutInfo(); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &MFP.getAnalysis().getTTI(Fn); +#endif } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll b/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll index 91a8446915f66..13ea8b08d2ade 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll @@ -18,10 +18,9 @@ define amdgpu_cs void @test_uniform_load_b96(ptr addrspace(1) %ptr, i32 %arg) "a ; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x8 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_or_b32 s1, s2, s3 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_or_b32 s0, s0, s1 -; GFX11-NEXT: v_mov_b32_e32 v2, s0 +; GFX11-NEXT: v_mov_b32_e32 v2, s3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_or3_b32 v2, s2, v2, s0 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off ; GFX11-NEXT: s_endpgm ; @@ -34,14 +33,12 @@ define amdgpu_cs void @test_uniform_load_b96(ptr addrspace(1) %ptr, i32 %arg) "a ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo ; GFX12-NEXT: v_readfirstlane_b32 s0, v2 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_readfirstlane_b32 s1, v3 ; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_or_b32 s0, s0, s1 -; GFX12-NEXT: s_or_b32 s0, s2, s0 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: v_or3_b32 v2, v2, s1, s2 ; GFX12-NEXT: global_store_b32 v[0:1], v2, off ; GFX12-NEXT: s_endpgm bb: diff --git a/llvm/test/CodeGen/AMDGPU/test_isel_single_lane.ll b/llvm/test/CodeGen/AMDGPU/test_isel_single_lane.ll deleted file mode 100644 index 726e35d6651d0..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/test_isel_single_lane.ll +++ /dev/null @@ -1,47 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GCN %s - -declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1), i32) - - -define amdgpu_kernel void @test_isel_single_lane(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 { -; GCN-LABEL: test_isel_single_lane: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: s_load_b32 s4, s[0:1], 0x58 -; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s4 -; GCN-NEXT: global_atomic_cond_sub_u32 v1, v0, v1, s[0:1] offset:16 th:TH_ATOMIC_RETURN -; GCN-NEXT: s_wait_loadcnt 0x0 -; GCN-NEXT: v_readfirstlane_b32 s0, v1 -; GCN-NEXT: s_addk_co_i32 s0, 0xf4 -; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GCN-NEXT: s_lshl_b32 s1, s0, 4 -; GCN-NEXT: s_mul_i32 s0, s0, s1 -; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GCN-NEXT: s_lshl_b32 s0, s0, 12 -; GCN-NEXT: s_sub_co_i32 s0, s1, s0 -; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GCN-NEXT: v_mov_b32_e32 v1, s0 -; GCN-NEXT: global_store_b32 v0, v1, s[2:3] -; GCN-NEXT: s_endpgm - %gep0 = getelementptr i32, ptr addrspace(1) %in, i32 22 - %val0 = load i32, ptr addrspace(1) %gep0, align 4 - %gep1 = getelementptr i32, ptr addrspace(1) %in, i32 4 - %val1 = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr addrspace(1) %gep1, i32 %val0) - %res0 = add i32 %val1, 244 - %res1 = shl i32 %res0, 4 - %res2 = mul i32 %res0, %res1 - %res3 = shl i32 %res2, 12 - %res4 = sub i32 %res1, %res3 - store i32 %res4, ptr addrspace(1) %out - ret void -} - - -attributes #0 = { - "amdgpu-flat-work-group-size"="1,1" - "amdgpu-waves-per-eu"="1,1" - "uniform-work-group-size"="true" -} diff --git a/llvm/unittests/CodeGen/SelectionDAGTestBase.h b/llvm/unittests/CodeGen/SelectionDAGTestBase.h index edc730d7f9b45..8a0a05fc2f8b0 100644 --- a/llvm/unittests/CodeGen/SelectionDAGTestBase.h +++ b/llvm/unittests/CodeGen/SelectionDAGTestBase.h @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/AsmParser/Parser.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -72,12 +71,8 @@ class SelectionDAGTestBase : public testing::Test { if (!DAG) reportFatalUsageError("Failed to create SelectionDAG?"); OptimizationRemarkEmitter ORE(F); - FunctionAnalysisManager FAM; - FAM.registerPass([&] { return TM->getTargetIRAnalysis(); }); - - TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM); DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI, - nullptr, TTI.hasBranchDivergence(F)); + nullptr); } TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) { diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp index 28d69f30bfd1a..f06f03bb35a5d 100644 --- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp @@ -8,7 +8,6 @@ #include "AArch64SelectionDAGInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/AsmParser/Parser.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -63,12 +62,8 @@ class AArch64SelectionDAGTest : public testing::Test { if (!DAG) report_fatal_error("DAG?"); OptimizationRemarkEmitter ORE(F); - FunctionAnalysisManager FAM; - FAM.registerPass([&] { return TM->getTargetIRAnalysis(); }); - - TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM); DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI, - nullptr, TTI.hasBranchDivergence(F)); + nullptr); } TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {