Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Add integer med3 combines
Browse files Browse the repository at this point in the history
Add signed and unsigned integer version of med3 combine.
Source pattern is min(max(Val, K0), K1) or max(min(Val, K1), K0)
where K0 and K1 are constants and K0 <= K1. Destination is med3
that corresponds to signedness of min/max in source.

Differential Revision: https://reviews.llvm.org/D90050
  • Loading branch information
petar-avramovic committed Apr 27, 2021
1 parent 646b007 commit 4a9bc59
Show file tree
Hide file tree
Showing 10 changed files with 1,062 additions and 7 deletions.
15 changes: 14 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCombine.td
Expand Up @@ -45,6 +45,17 @@ def clamp_i64_to_i16 : GICombineRule<
[{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]),
(apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>;

def med3_matchdata : GIDefMatchData<"AMDGPURegBankCombinerHelper::Med3MatchInfo">;

def int_minmax_to_med3 : GICombineRule<
(defs root:$min_or_max, med3_matchdata:$matchinfo),
(match (wip_match_opcode G_SMAX,
G_SMIN,
G_UMAX,
G_UMIN):$min_or_max,
[{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
(apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;

// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;

Expand All @@ -64,6 +75,8 @@ def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
}

def AMDGPURegBankCombinerHelper : GICombinerHelper<
"AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold]> {
"AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
let StateClass = "AMDGPURegBankCombinerHelperState";
let AdditionalArguments = [];
}
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Expand Up @@ -167,7 +167,8 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;

def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
def : GINodeEquiv<G_AMDGPU_MED3, AMDGPUsmed3>;
def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;

def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
Expand Up @@ -143,7 +143,7 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
auto Bitcast = B.buildBitcast({S32}, CvtPk);

auto Med3 = B.buildInstr(
AMDGPU::G_AMDGPU_MED3, {S32},
AMDGPU::G_AMDGPU_SMED3, {S32},
{MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
MI.getFlags());

Expand Down
128 changes: 126 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
Expand Up @@ -13,7 +13,9 @@

#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
Expand All @@ -27,6 +29,126 @@
using namespace llvm;
using namespace MIPatternMatch;

class AMDGPURegBankCombinerHelper {
protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
CombinerHelper &Helper;

public:
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
RBI(*MF.getSubtarget().getRegBankInfo()),
TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};

bool isVgprRegBank(Register Reg);

struct MinMaxMedOpc {
unsigned Min, Max, Med;
};

struct Med3MatchInfo {
unsigned Opc;
Register Val0, Val1, Val2;
};

MinMaxMedOpc getMinMaxPair(unsigned Opc);

template <class m_Cst>
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
Register &Val, Register &K0, Register &K1);

bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
};

bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
}

AMDGPURegBankCombinerHelper::MinMaxMedOpc
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Unsupported opcode");
case AMDGPU::G_SMAX:
case AMDGPU::G_SMIN:
return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
case AMDGPU::G_UMAX:
case AMDGPU::G_UMIN:
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
}
}

template <class m_Cst>
bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
MachineRegisterInfo &MRI,
MinMaxMedOpc MMMOpc, Register &Val,
Register &K0, Register &K1) {
// 4 operand commutes of: min(max(Val, K0), K1).
// Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
// Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
// 4 operand commutes of: max(min(Val, K1), K0).
// Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
// Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
return mi_match(
MI, MRI,
m_any_of(
m_CommutativeBinOp(
MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
m_Cst(K1)),
m_CommutativeBinOp(
MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
m_Cst(K0))));
}

bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
Register Dst = MI.getOperand(0).getReg();
if (!isVgprRegBank(Dst))
return false;

if (MRI.getType(Dst).isVector())
return false;

MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
Register Val, K0, K1;
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
return false;

const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
return false;
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
return false;

MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
return true;
}

void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
{MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
MI.eraseFromParent();
}

class AMDGPURegBankCombinerHelperState {
protected:
CombinerHelper &Helper;
AMDGPURegBankCombinerHelper &RegBankHelper;

public:
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
AMDGPURegBankCombinerHelper &RegBankHelper)
: Helper(Helper), RegBankHelper(RegBankHelper) {}
};

#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenRegBankGICombiner.inc"
Expand Down Expand Up @@ -62,9 +184,11 @@ bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB, MDT);
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg);
AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
RegBankHelper);

if (Generated.tryCombineAll(Observer, MI, B, Helper))
if (Generated.tryCombineAll(Observer, MI, B))
return true;

return false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Expand Up @@ -3507,7 +3507,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
case AMDGPU::G_AMDGPU_MED3:
case AMDGPU::G_AMDGPU_SMED3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -2652,7 +2652,13 @@ def G_AMDGPU_CVT_PK_I16_I32 : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}

def G_AMDGPU_MED3 : AMDGPUGenericInstruction {
def G_AMDGPU_SMED3 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
let hasSideEffects = 0;
}

def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
let hasSideEffects = 0;
Expand Down

0 comments on commit 4a9bc59

Please sign in to comment.