Skip to content

Commit

Permalink
[AMDGPU][GlobalISel] Fold G_FNEG above when users cannot fold mods
Browse files Browse the repository at this point in the history
If possible fold fneg into instruction above if users cannot fold mods and we
know it will decrease instruction count.
Follows same logic as SDAG combiner in choosing opportunities to combine.

Differential Revision: https://reviews.llvm.org/D112827
  • Loading branch information
mbrkusanin committed Nov 17, 2021
1 parent 3874277 commit db6bc2a
Show file tree
Hide file tree
Showing 10 changed files with 1,229 additions and 13 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Expand Up @@ -136,6 +136,10 @@ class CombinerHelper {
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
Register ToReg) const;

/// Replace the opcode in instruction with a new opcode and inform the
/// observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const;

/// Get the register bank of \p Reg.
/// If Reg has not been assigned a register, a register class,
/// or a register bank, then this returns nullptr.
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Expand Up @@ -158,6 +158,15 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
Observer.changedInstr(*FromRegOp.getParent());
}

void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
unsigned ToOpcode) const {
Observer.changingInstr(FromMI);

FromMI.setDesc(Builder.getTII().get(ToOpcode));

Observer.changedInstr(FromMI);
}

const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
return RBI->getRegBank(Reg, MRI, *TRI);
}
Expand Down
14 changes: 12 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCombine.td
Expand Up @@ -64,19 +64,29 @@ def remove_fcanonicalize : GICombineRule<
[{ return PostLegalizerHelper.matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]),
(apply [{ Helper.replaceSingleDefInstWithReg(*${fcanonicalize}, ${matchinfo}); }])>;

def foldable_fneg_matchdata : GIDefMatchData<"MachineInstr *">;

def foldable_fneg : GICombineRule<
(defs root:$ffn, foldable_fneg_matchdata:$matchinfo),
(match (wip_match_opcode G_FNEG):$ffn,
[{ return Helper.matchFoldableFneg(*${ffn}, ${matchinfo}); }]),
(apply [{ Helper.applyFoldableFneg(*${ffn}, ${matchinfo}); }])>;

// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;

def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPreLegalizerCombinerHelper", [all_combines, clamp_i64_to_i16]> {
"AMDGPUGenPreLegalizerCombinerHelper",
[all_combines, clamp_i64_to_i16, foldable_fneg]> {
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
let StateClass = "AMDGPUPreLegalizerCombinerHelperState";
let AdditionalArguments = [];
}

def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper",
[all_combines, gfx6gfx7_combines,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize]> {
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
let AdditionalArguments = [];
Expand Down

0 comments on commit db6bc2a

Please sign in to comment.