diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index ce2b4a5f6f2e9..43a052b687109 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -39,6 +39,7 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass(); FunctionPass *createSIFoldOperandsLegacyPass(); FunctionPass *createSIPeepholeSDWALegacyPass(); FunctionPass *createSILowerI1CopiesLegacyPass(); +FunctionPass *createSISAbs16FixupLegacyPass(); FunctionPass *createSIShrinkInstructionsLegacyPass(); FunctionPass *createSILoadStoreOptimizerLegacyPass(); FunctionPass *createSIWholeQuadModeLegacyPass(); @@ -93,6 +94,13 @@ class SILowerI1CopiesPass : public PassInfoMixin { MachineFunctionAnalysisManager &MFAM); }; +class SISAbs16FixupPass : public PassInfoMixin { +public: + SISAbs16FixupPass() = default; + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &); void initializeAMDGPUAlwaysInlinePass(PassRegistry&); @@ -197,6 +205,9 @@ extern char &SILowerWWMCopiesLegacyID; void initializeSILowerI1CopiesLegacyPass(PassRegistry &); extern char &SILowerI1CopiesLegacyID; +void initializeSISAbs16FixupLegacyPass(PassRegistry &); +extern char &SISAbs16FixupLegacyID; + void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); extern char &AMDGPUGlobalISelDivergenceLoweringID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 996b55f42fd0b..90405fed8efdd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -551,6 +551,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR); initializeGCNDPPCombineLegacyPass(*PR); initializeSILowerI1CopiesLegacyPass(*PR); + initializeSISAbs16FixupLegacyPass(*PR); initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR); initializeAMDGPURegBankSelectPass(*PR); initializeAMDGPURegBankLegalizePass(*PR); @@ -1517,6 +1518,7 @@ bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); addPass(&SIFixSGPRCopiesLegacyID); addPass(createSILowerI1CopiesLegacyPass()); + addPass(createSISAbs16FixupLegacyPass()); return false; } @@ -2209,6 +2211,7 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const { addPass(AMDGPUISelDAGToDAGPass(TM)); addPass(SIFixSGPRCopiesPass()); addPass(SILowerI1CopiesPass()); + addPass(SISAbs16FixupPass()); return Error::success(); } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index a1e0e5293c706..cd9225acdb002 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -185,6 +185,7 @@ add_llvm_target(AMDGPUCodeGen SIPreEmitPeephole.cpp SIProgramInfo.cpp SIRegisterInfo.cpp + SISAbs16Fixup.cpp SIShrinkInstructions.cpp SIWholeQuadMode.cpp diff --git a/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp new file mode 100644 index 0000000000000..fd305b6ffc061 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp @@ -0,0 +1,168 @@ +//===-- SISAbs16Fixup.cpp - Lower I1 Copies -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass matches the pattern for 16-bit ABS instructions after they have +// been lowered to for execution on the Scalar Unit. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/InitializePasses.h" + +#define DEBUG_TYPE "si-abs16-pattern" + +using namespace llvm; + +static Register pierceCopies(Register R, MachineRegisterInfo& MRI) { + MachineInstr *CopyMI = MRI.getVRegDef(R); + while (CopyMI && CopyMI->getOpcode() == AMDGPU::COPY) { + Register T = CopyMI->getOperand(1).getReg(); + if (!T.isVirtual()) + break; + + R = T; + CopyMI = MRI.getVRegDef(R); + } + + return R; +} + +static MachineInstr *matchExpandAbsPattern(MachineInstr &MI, + MachineRegisterInfo &MRI) { + std::array SextInstructions; + for (unsigned I = 0; I < SextInstructions.size(); I++) + { + SextInstructions[I] = MRI.getVRegDef(MI.getOperand(I + 1).getReg()); + if (SextInstructions[I]->getOpcode() != AMDGPU::S_SEXT_I32_I16) + return nullptr; + } + + Register AbsSource; + MachineInstr* SubIns = nullptr; + for (MachineInstr *SextMI : SextInstructions) { + Register SextReg = SextMI->getOperand(1).getReg(); + MachineInstr* OperandMI = MRI.getVRegDef(SextReg); + if (OperandMI->getOpcode() == AMDGPU::S_SUB_I32) + if(!SubIns) + SubIns = OperandMI; + else + return nullptr; + else + AbsSource = pierceCopies(SextReg,MRI); + } + + if (!SubIns) + return nullptr; + + if (MRI.getRegClass(AbsSource) != &AMDGPU::SGPR_32RegClass) + return nullptr; + + MachineInstr &MustBeZero = + *MRI.getVRegDef(pierceCopies(SubIns->getOperand(1).getReg(), MRI)); + if (MustBeZero.getOpcode() != AMDGPU::S_MOV_B32 || + MustBeZero.getOperand(1).getImm()) + return nullptr; + + if (pierceCopies(SubIns->getOperand(2).getReg(), MRI) != AbsSource) + return nullptr; + + return MRI.getVRegDef(AbsSource); +} + +static bool runSAbs16Fixup(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : make_early_inc_range(MBB)) { + bool IsPositive = MI.getOpcode() == AMDGPU::S_MAX_I32; + bool IsNegative = MI.getOpcode() == AMDGPU::S_MIN_I32; + MachineInstr* AbsSourceMI; + if ((!IsPositive && !IsNegative) || + !(AbsSourceMI = matchExpandAbsPattern(MI, MRI))) + continue; + + Register SextDestReg = + MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register AbsDestReg = + IsNegative ? MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass) + : MI.getOperand(0).getReg(); + + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SEXT_I32_I16), + SextDestReg) + .addReg(AbsSourceMI->getOperand(0).getReg()); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_ABS_I32), AbsDestReg) + .addReg(SextDestReg); + + if(IsNegative) + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(AMDGPU::S_SUB_I32), + MI.getOperand(0).getReg()) + .addImm(0) + .addReg(AbsDestReg); + + MI.eraseFromParent(); + Changed = true; + } + + return Changed; +} + +PreservedAnalyses SISAbs16FixupPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + bool Changed = runSAbs16Fixup(MF); + if (!Changed) + return PreservedAnalyses::all(); + + // TODO: Probably preserves most. + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} + +class SISAbs16FixupLegacy : public MachineFunctionPass { +public: + static char ID; + + SISAbs16FixupLegacy() : MachineFunctionPass(ID) { + initializeSISAbs16FixupLegacyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { return "SI SAbs16 Fixup"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +bool SISAbs16FixupLegacy::runOnMachineFunction(MachineFunction &MF) { + return runSAbs16Fixup(MF); +} + +INITIALIZE_PASS_BEGIN(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup", + false, false) +INITIALIZE_PASS_END(SISAbs16FixupLegacy, DEBUG_TYPE, "SI SAbs16 Fixup", + false, false) + +char SISAbs16FixupLegacy::ID = 0; + +char &llvm::SISAbs16FixupLegacyID = SISAbs16FixupLegacy::ID; + +FunctionPass *llvm::createSISAbs16FixupLegacyPass() { + return new SISAbs16FixupLegacy(); +} diff --git a/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll new file mode 100644 index 0000000000000..0cdbedd837396 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/s_abs_i16.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck %s + +define amdgpu_ps i16 @abs_i16(i16 inreg %arg) { +; CHECK-LABEL: abs_i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_sext_i32_i16 s0, s0 +; CHECK-NEXT: s_abs_i32 s0, s0 +; CHECK-NEXT: ; return to shader part epilog + + %res = call i16 @llvm.abs.i16(i16 %arg, i1 false) + ret i16 %res +} + +define amdgpu_ps i16 @abs_i16_neg(i16 inreg %arg) { +; CHECK-LABEL: abs_i16_neg: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_sext_i32_i16 s0, s0 +; CHECK-NEXT: s_abs_i32 s0, s0 +; CHECK-NEXT: s_sub_i32 s0, 0, s0 +; CHECK-NEXT: ; return to shader part epilog + %res1 = call i16 @llvm.abs.i16(i16 %arg, i1 false) + %res2 = sub i16 0, %res1 + ret i16 %res2 +}