diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5dacc0993fc9b9..c0ea35817ec8ea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1022,6 +1022,90 @@ bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } +static unsigned getDSShaderTypeValue(const MachineFunction &MF) { + switch (MF.getFunction().getCallingConv()) { + case CallingConv::AMDGPU_PS: + return 1; + case CallingConv::AMDGPU_VS: + return 2; + case CallingConv::AMDGPU_GS: + return 3; + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_ES: + report_fatal_error("ds_ordered_count unsupported for this calling conv"); + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::C: + case CallingConv::Fast: + default: + // Assume other calling conventions are various compute callable functions + return 0; + } +} + +bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic( + MachineInstr &MI, Intrinsic::ID IntrID) const { + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + + unsigned IndexOperand = MI.getOperand(7).getImm(); + bool WaveRelease = MI.getOperand(8).getImm() != 0; + bool WaveDone = MI.getOperand(9).getImm() != 0; + + if (WaveDone && !WaveRelease) + report_fatal_error("ds_ordered_count: wave_done requires wave_release"); + + unsigned OrderedCountIndex = IndexOperand & 0x3f; + IndexOperand &= ~0x3f; + unsigned CountDw = 0; + + if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) { + CountDw = (IndexOperand >> 24) & 0xf; + IndexOperand &= ~(0xf << 24); + + if (CountDw < 1 || CountDw > 4) { + report_fatal_error( + "ds_ordered_count: dword count must be between 1 and 4"); + } + } + + if (IndexOperand) + report_fatal_error("ds_ordered_count: bad index operand"); + + unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1; + unsigned ShaderType = getDSShaderTypeValue(*MF); + + unsigned Offset0 = OrderedCountIndex << 2; + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | + (Instruction << 4); + + if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) + Offset1 |= (CountDw - 1) << 6; + + unsigned Offset = Offset0 | (Offset1 << 8); + + Register M0Val = MI.getOperand(2).getReg(); + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(M0Val); + + Register DstReg = MI.getOperand(0).getReg(); + Register ValReg = MI.getOperand(3).getReg(); + MachineInstrBuilder DS = + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg) + .addReg(ValReg) + .addImm(Offset) + .cloneMemRefs(MI); + + if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI)) + return false; + + bool Ret = constrainSelectedInstRegOperands(*DS, TII, TRI, RBI); + MI.eraseFromParent(); + return Ret; +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -1077,6 +1161,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( return selectStoreIntrinsic(I, false); case Intrinsic::amdgcn_raw_buffer_store_format: return selectStoreIntrinsic(I, true); + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: + return selectDSOrderedIntrinsic(I, IntrinsicID); default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index d884afbe770770..38ca7fd4104bb8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -100,6 +100,7 @@ class AMDGPUInstructionSelector : public InstructionSelector { splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const; bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const; + bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll new file mode 100644 index 00000000000000..9a287359d4db31 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll @@ -0,0 +1 @@ +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll new file mode 100644 index 00000000000000..8cba08f016daf2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll @@ -0,0 +1,5 @@ +; FIXME: Broken SI run line +; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll new file mode 100644 index 00000000000000..28c2c7a4e9bfb9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll @@ -0,0 +1,5 @@ +; FIXME: Broken SI run line +; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll