From e10e37335e19922dc231ce32da96f03c47a7c417 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Wed, 12 Nov 2025 14:18:26 -0500 Subject: [PATCH 1/4] Initial commit --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 39 +++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 86ca22cfeffd8..2c83e49cca1b3 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -24,6 +24,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include @@ -66,6 +67,7 @@ class SIPeepholeSDWA { MachineInstr *createSDWAVersion(MachineInstr &MI); bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const; + bool strengthReduceCSelect64(MachineFunction &MF); public: bool run(MachineFunction &MF); @@ -1362,6 +1364,40 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, } } +bool SIPeepholeSDWA::strengthReduceCSelect64(MachineFunction &MF) { + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : make_early_inc_range(MBB)) { + if (MI.getOpcode() != AMDGPU::S_CSELECT_B64) + continue; + + Register Reg = MI.getOperand(0).getReg(); + MachineInstr *MustBeVCNDMASK = MRI->getOneNonDBGUser(Reg); + if (!MustBeVCNDMASK || + MustBeVCNDMASK->getOpcode() != AMDGPU::V_CNDMASK_B32_e64 || + !MustBeVCNDMASK->getOperand(1).isImm() || + !MustBeVCNDMASK->getOperand(2).isImm()) + continue; + + MachineInstr *MustBeVREADFIRSTLANE = + MRI->getOneNonDBGUser(MustBeVCNDMASK->getOperand(0).getReg()); + if (!MustBeVREADFIRSTLANE || + MustBeVREADFIRSTLANE->getOpcode() != AMDGPU::V_READFIRSTLANE_B32) + continue; + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::S_CSELECT_B32), + MustBeVREADFIRSTLANE->getOperand(0).getReg()) + .addImm(MI.getOperand(1).getImm()) + .addImm(MI.getOperand(2).getImm()) + .addReg(AMDGPU::SCC, RegState::Implicit); + + MustBeVREADFIRSTLANE->eraseFromParent(); + } + + return Changed; +} + bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -1436,6 +1472,9 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) { } while (Changed); } + // Other target-specific SSA-form peephole optimizations + Ret |= strengthReduceCSelect64(MF); + return Ret; } From bc28131065a8dff59b8fbc070d6ecfed9b73c095 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Wed, 12 Nov 2025 14:36:38 -0500 Subject: [PATCH 2/4] Fix --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 2c83e49cca1b3..2788eeec35570 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1369,7 +1369,9 @@ bool SIPeepholeSDWA::strengthReduceCSelect64(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) for (MachineInstr &MI : make_early_inc_range(MBB)) { - if (MI.getOpcode() != AMDGPU::S_CSELECT_B64) + if (MI.getOpcode() != AMDGPU::S_CSELECT_B64 || + !MI.getOperand(1).isImm() || !MI.getOperand(2).isImm() || + (MI.getOperand(1).getImm() != 0 && MI.getOperand(2).getImm() != 0)) continue; Register Reg = MI.getOperand(0).getReg(); @@ -1386,10 +1388,13 @@ bool SIPeepholeSDWA::strengthReduceCSelect64(MachineFunction &MF) { MustBeVREADFIRSTLANE->getOpcode() != AMDGPU::V_READFIRSTLANE_B32) continue; + unsigned CSelectZeroOpIdx = MI.getOperand(1).getImm() ? 2 : 1; + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::S_CSELECT_B32), MustBeVREADFIRSTLANE->getOperand(0).getReg()) - .addImm(MI.getOperand(1).getImm()) - .addImm(MI.getOperand(2).getImm()) + .addImm(MustBeVCNDMASK->getOperand(CSelectZeroOpIdx == 1 ? 2 : 1) + .getImm()) + .addImm(MustBeVCNDMASK->getOperand(CSelectZeroOpIdx).getImm()) .addReg(AMDGPU::SCC, RegState::Implicit); MustBeVREADFIRSTLANE->eraseFromParent(); From 26dac49fe972dbe326a39a471ded42c473a6089c Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Wed, 12 Nov 2025 14:56:17 -0500 Subject: [PATCH 3/4] This is backwards --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 2788eeec35570..4fe3af22ad265 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1392,9 +1392,10 @@ bool SIPeepholeSDWA::strengthReduceCSelect64(MachineFunction &MF) { BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::S_CSELECT_B32), MustBeVREADFIRSTLANE->getOperand(0).getReg()) - .addImm(MustBeVCNDMASK->getOperand(CSelectZeroOpIdx == 1 ? 2 : 1) - .getImm()) - .addImm(MustBeVCNDMASK->getOperand(CSelectZeroOpIdx).getImm()) + .addImm( + MustBeVCNDMASK->getOperand((CSelectZeroOpIdx == 1 ? 2 : 1) + 2) + .getImm()) + .addImm(MustBeVCNDMASK->getOperand(CSelectZeroOpIdx + 2).getImm()) .addReg(AMDGPU::SCC, RegState::Implicit); MustBeVREADFIRSTLANE->eraseFromParent(); From 76518b4359a5c00719ba6fbf3e909dad869cb615 Mon Sep 17 00:00:00 2001 From: Patrick Simmons Date: Wed, 12 Nov 2025 18:02:36 -0500 Subject: [PATCH 4/4] This is forwards. --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 4fe3af22ad265..c427dd03a8529 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1392,10 +1392,10 @@ bool SIPeepholeSDWA::strengthReduceCSelect64(MachineFunction &MF) { BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::S_CSELECT_B32), MustBeVREADFIRSTLANE->getOperand(0).getReg()) + .addImm(MustBeVCNDMASK->getOperand(CSelectZeroOpIdx + 2).getImm()) .addImm( MustBeVCNDMASK->getOperand((CSelectZeroOpIdx == 1 ? 2 : 1) + 2) .getImm()) - .addImm(MustBeVCNDMASK->getOperand(CSelectZeroOpIdx + 2).getImm()) .addReg(AMDGPU::SCC, RegState::Implicit); MustBeVREADFIRSTLANE->eraseFromParent();