Skip to content

Commit

Permalink
[AMDGPU] Introduce never uniform bit field in tablegen
Browse files Browse the repository at this point in the history
IsNeverUniform can be set to 1 to mark instructions which are
inherently never-uniform/divergent. Enabling this bit in
Writelane instruction for now. To be extended to all required
instructions.

Reviewed By: arsenm, sameerds, #amdgpu

Differential Revision: https://reviews.llvm.org/D143154
  • Loading branch information
Yashwant Singh authored and Yashwant Singh committed Feb 8, 2023
1 parent 1cf344d commit cde2f33
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 11 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Expand Up @@ -133,6 +133,9 @@ enum : uint64_t {

// Whether tied sources will be read.
TiedSourceNotRead = UINT64_C(1) << 60,

// Is never uniform.
IsNeverUniform = UINT64_C(1) << 61,
};

// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrFormats.td
Expand Up @@ -153,6 +153,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that tied source will not be read.
field bit TiedSourceNotRead = 0;

// This bit indicates that the instruction is never-uniform/divergent
field bit IsNeverUniform = 0;

// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
Expand Down Expand Up @@ -234,6 +237,8 @@ class InstSI <dag outs, dag ins, string asm = "",

let TSFlags{60} = TiedSourceNotRead;

let TSFlags{61} = IsNeverUniform;

let SchedRW = [Write32Bit];

let AsmVariantName = AMDGPUAsmVariants.Default;
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -8414,7 +8414,14 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {

InstructionUniformity
SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {

if (isNeverUniform(MI))
return InstructionUniformity::NeverUniform;

unsigned opcode = MI.getOpcode();
if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
return InstructionUniformity::AlwaysUniform;

if (MI.isCopy()) {
const MachineOperand &srcOp = MI.getOperand(1);
if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
Expand Down Expand Up @@ -8456,12 +8463,6 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::Default;
}

if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
return InstructionUniformity::AlwaysUniform;

if (opcode == AMDGPU::V_WRITELANE_B32)
return InstructionUniformity::NeverUniform;

const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo();

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Expand Up @@ -781,6 +781,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
}

static bool isNeverUniform(const MachineInstr &MI){
return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
}

static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Expand Up @@ -764,11 +764,10 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag,
let isConvergent = 1, Uses = []<Register> in {
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
[(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;

let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
[(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
} // End isConvergent = 1

let isReMaterializable = 1 in {
Expand Down
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s

# readlane, readfirstlane is always uniform

---
Expand Down
@@ -1,4 +1,4 @@
# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
# loads from flat non uniform
---
name: flatloads
Expand Down

0 comments on commit cde2f33

Please sign in to comment.