Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,8 @@ class SelectionDAG {
LLVMContext *Context;
CodeGenOptLevel OptLevel;

bool DivergentTarget = false;

UniformityInfo *UA = nullptr;
FunctionLoweringInfo * FLI = nullptr;

Expand Down Expand Up @@ -471,14 +473,16 @@ class SelectionDAG {
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
UniformityInfo *UA, ProfileSummaryInfo *PSIin,
BlockFrequencyInfo *BFIin, MachineModuleInfo &MMI,
FunctionVarLocs const *FnVarLocs);
FunctionVarLocs const *FnVarLocs, bool HasDivergency);

void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
MachineFunctionAnalysisManager &AM,
const TargetLibraryInfo *LibraryInfo, UniformityInfo *UA,
ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin,
MachineModuleInfo &MMI, FunctionVarLocs const *FnVarLocs) {
init(NewMF, NewORE, nullptr, LibraryInfo, UA, PSIin, BFIin, MMI, FnVarLocs);
MachineModuleInfo &MMI, FunctionVarLocs const *FnVarLocs,
bool HasDivergency) {
init(NewMF, NewORE, nullptr, LibraryInfo, UA, PSIin, BFIin, MMI, FnVarLocs,
HasDivergency);
MFAM = &AM;
}

Expand Down
2 changes: 0 additions & 2 deletions llvm/include/llvm/CodeGen/SelectionDAGISel.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ class SelectionDAGISel {
AssumptionCache *AC = nullptr;
GCFunctionInfo *GFI = nullptr;
SSPLayoutInfo *SP = nullptr;
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
TargetTransformInfo *TTI = nullptr;
#endif
CodeGenOptLevel OptLevel;
const TargetInstrInfo *TII;
const TargetLowering *TLI;
Expand Down
18 changes: 14 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1370,7 +1370,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
const TargetLibraryInfo *LibraryInfo,
UniformityInfo *NewUA, ProfileSummaryInfo *PSIin,
BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin,
FunctionVarLocs const *VarLocs) {
FunctionVarLocs const *VarLocs, bool HasDivergency) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
Expand All @@ -1383,6 +1383,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
BFI = BFIin;
MMI = &MMIin;
FnVarLocs = VarLocs;
DivergentTarget = HasDivergency;
}

SelectionDAG::~SelectionDAG() {
Expand Down Expand Up @@ -2329,7 +2330,8 @@ SDValue SelectionDAG::getRegister(Register Reg, EVT VT) {
return SDValue(E, 0);

auto *N = newSDNode<RegisterSDNode>(Reg, VTs);
N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
N->SDNodeBits.IsDivergent =
DivergentTarget && TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
Expand Down Expand Up @@ -12142,6 +12144,8 @@ static bool gluePropagatesDivergence(const SDNode *Node) {
}

bool SelectionDAG::calculateDivergence(SDNode *N) {
if (!DivergentTarget)
return false;
if (TLI->isSDNodeAlwaysUniform(N)) {
assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) &&
"Conflicting divergence information!");
Expand All @@ -12161,6 +12165,8 @@ bool SelectionDAG::calculateDivergence(SDNode *N) {
}

void SelectionDAG::updateDivergence(SDNode *N) {
if (!DivergentTarget)
return;
SmallVector<SDNode *, 16> Worklist(1, N);
do {
N = Worklist.pop_back_val();
Expand Down Expand Up @@ -13720,16 +13726,20 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
Ops[I].setInitial(Vals[I]);
EVT VT = Ops[I].getValueType();

// Take care of the Node's operands iff target has divergence
// Skip Chain. It does not carry divergence.
if (VT != MVT::Other &&
if (DivergentTarget && VT != MVT::Other &&
(VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) &&
Ops[I].getNode()->isDivergent()) {
// Node is going to be divergent if at least one of its operand is
// divergent, unless it belongs to the "AlwaysUniform" exemptions.
IsDivergent = true;
}
}
Node->NumOperands = Vals.size();
Node->OperandList = Ops;
if (!TLI->isSDNodeAlwaysUniform(Node)) {
// Check the divergence of the Node itself.
if (DivergentTarget && !TLI->isSDNodeAlwaysUniform(Node)) {
IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA);
Node->SDNodeBits.IsDivergent = IsDivergent;
}
Expand Down
18 changes: 8 additions & 10 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,10 @@ void SelectionDAGISel::initializeAnalysisResults(
MachineModuleInfo &MMI =
MAMP.getCachedResult<MachineModuleAnalysis>(*Fn.getParent())->getMMI();

CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs);
TTI = &FAM.getResult<TargetIRAnalysis>(Fn);

CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs,
TTI->hasBranchDivergence(&Fn));

// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
Expand All @@ -500,10 +503,6 @@ void SelectionDAGISel::initializeAnalysisResults(
BatchAA = std::nullopt;

SP = &FAM.getResult<SSPLayoutAnalysis>(Fn);

#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
TTI = &FAM.getResult<TargetIRAnalysis>(Fn);
#endif
}

void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
Expand Down Expand Up @@ -539,7 +538,10 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
MachineModuleInfo &MMI =
MFP.getAnalysis<MachineModuleInfoWrapperPass>().getMMI();

CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs);
TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);

CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs,
TTI->hasBranchDivergence(&Fn));

// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
Expand All @@ -558,10 +560,6 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
BatchAA = std::nullopt;

SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo();

#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
#endif
}

bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
Expand Down
13 changes: 8 additions & 5 deletions llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ define amdgpu_cs void @test_uniform_load_b96(ptr addrspace(1) %ptr, i32 %arg) "a
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x8
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v2, s3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or3_b32 v2, s2, v2, s0
; GFX11-NEXT: s_or_b32 s1, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
Expand All @@ -33,12 +34,14 @@ define amdgpu_cs void @test_uniform_load_b96(ptr addrspace(1) %ptr, i32 %arg) "a
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
; GFX12-NEXT: v_readfirstlane_b32 s0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: v_readfirstlane_b32 s1, v3
; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_or_b32 s0, s0, s1
; GFX12-NEXT: s_or_b32 s0, s2, s0
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_mov_b32_e32 v2, s0
; GFX12-NEXT: v_or3_b32 v2, v2, s1, s2
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_endpgm
bb:
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AMDGPU/test_isel_single_lane.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GCN %s

declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1), i32)


define amdgpu_kernel void @test_isel_single_lane(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
; GCN-LABEL: test_isel_single_lane:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: s_load_b32 s4, s[0:1], 0x58
; GCN-NEXT: s_wait_kmcnt 0x0
; GCN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s4
; GCN-NEXT: global_atomic_cond_sub_u32 v1, v0, v1, s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GCN-NEXT: s_wait_loadcnt 0x0
; GCN-NEXT: v_readfirstlane_b32 s0, v1
; GCN-NEXT: s_addk_co_i32 s0, 0xf4
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: s_lshl_b32 s1, s0, 4
; GCN-NEXT: s_mul_i32 s0, s0, s1
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: s_lshl_b32 s0, s0, 12
; GCN-NEXT: s_sub_co_i32 s0, s1, s0
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GCN-NEXT: v_mov_b32_e32 v1, s0
; GCN-NEXT: global_store_b32 v0, v1, s[2:3]
; GCN-NEXT: s_endpgm
%gep0 = getelementptr i32, ptr addrspace(1) %in, i32 22
%val0 = load i32, ptr addrspace(1) %gep0, align 4
%gep1 = getelementptr i32, ptr addrspace(1) %in, i32 4
%val1 = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr addrspace(1) %gep1, i32 %val0)
%res0 = add i32 %val1, 244
%res1 = shl i32 %res0, 4
%res2 = mul i32 %res0, %res1
%res3 = shl i32 %res2, 12
%res4 = sub i32 %res1, %res3
store i32 %res4, ptr addrspace(1) %out
ret void
}


attributes #0 = {
"amdgpu-flat-work-group-size"="1,1"
"amdgpu-waves-per-eu"="1,1"
"uniform-work-group-size"="true"
}
7 changes: 6 additions & 1 deletion llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
Expand Down Expand Up @@ -78,8 +79,12 @@ class SelectionDAGAddressAnalysisTest : public testing::Test {
if (!DAG)
report_fatal_error("DAG?");
OptimizationRemarkEmitter ORE(F);
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });

TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
nullptr);
nullptr, TTI.hasBranchDivergence(F));
}

TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
Expand Down
7 changes: 6 additions & 1 deletion llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SDPatternMatch.h"
Expand Down Expand Up @@ -76,8 +77,12 @@ class SelectionDAGPatternMatchTest : public testing::Test {
if (!DAG)
report_fatal_error("DAG?");
OptimizationRemarkEmitter ORE(F);
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });

TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
nullptr);
nullptr, TTI.hasBranchDivergence(F));
}

TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
Expand Down
7 changes: 6 additions & 1 deletion llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "AArch64SelectionDAGInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
Expand Down Expand Up @@ -62,8 +63,12 @@ class AArch64SelectionDAGTest : public testing::Test {
if (!DAG)
report_fatal_error("DAG?");
OptimizationRemarkEmitter ORE(F);
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });

TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
nullptr);
nullptr, TTI.hasBranchDivergence(F));
}

TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
Expand Down