Skip to content

Commit

Permalink
[GlobalISel] Handle sequences of trunc(sext/zext/anyext...) in artifa…
Browse files Browse the repository at this point in the history
…ct combiner

trunc(sext/zext/anyext... x) -> x pattern is handled in artifact combiner to avoid
extra copy instructions in https://reviews.llvm.org/D156831.
  • Loading branch information
dzhidzhoev committed Aug 4, 2023
1 parent 6b5ce2c commit 19d7ab1
Show file tree
Hide file tree
Showing 21 changed files with 446 additions and 485 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"

Expand Down Expand Up @@ -235,12 +236,12 @@ class LegalizationArtifactCombiner {

Builder.setInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());

// Try to fold trunc(g_constant) when the smaller constant type is legal.
auto *SrcMI = MRI.getVRegDef(SrcReg);
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
const LLT DstTy = MRI.getType(DstReg);
if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
auto &CstVal = SrcMI->getOperand(1);
Builder.buildConstant(
Expand All @@ -256,7 +257,6 @@ class LegalizationArtifactCombiner {
if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) {
const Register MergeSrcReg = SrcMerge->getSourceReg(0);
const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
const LLT DstTy = MRI.getType(DstReg);

// We can only fold if the types are scalar
const unsigned DstSize = DstTy.getSizeInBits();
Expand Down Expand Up @@ -325,6 +325,23 @@ class LegalizationArtifactCombiner {
return true;
}

// trunc(ext x) -> x
ArtifactValueFinder Finder(MRI, Builder, LI);
if (Register FoundReg =
Finder.findValueFromDef(DstReg, 0, DstTy.getSizeInBits())) {
LLT FoundRegTy = MRI.getType(FoundReg);
if (DstTy == FoundRegTy) {
LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): "
<< MI;);

replaceRegOrBuildCopy(DstReg, FoundReg, MRI, Builder, UpdatedDefs,
Observer);
UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
}

return false;
}

Expand Down Expand Up @@ -719,6 +736,55 @@ class LegalizationArtifactCombiner {
return Register();
}

/// Given an G_SEXT, G_ZEXT, G_ANYEXT op \p MI and a start bit and
/// size, try to find the origin of the value defined by that start
/// position and size.
///
/// \returns a register with the requested size, or the current best
/// register found during the current query.
Register findValueFromExt(MachineInstr &MI, unsigned StartBit,
unsigned Size) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT ||
MI.getOpcode() == TargetOpcode::G_ZEXT ||
MI.getOpcode() == TargetOpcode::G_ANYEXT);
assert(Size > 0);

Register SrcReg = MI.getOperand(1).getReg();
LLT SrcType = MRI.getType(SrcReg);
unsigned SrcSize = SrcType.getSizeInBits();

// Currently we don't go into vectors.
if (!SrcType.isScalar())
return CurrentBest;

if (StartBit + Size > SrcSize)
return CurrentBest;

if (StartBit == 0 && SrcType.getSizeInBits() == Size)
CurrentBest = SrcReg;
return findValueFromDefImpl(SrcReg, StartBit, Size);
}

/// Given an G_TRUNC op \p MI and a start bit and size, try to find
/// the origin of the value defined by that start position and size.
///
/// \returns a register with the requested size, or the current best
/// register found during the current query.
Register findValueFromTrunc(MachineInstr &MI, unsigned StartBit,
unsigned Size) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
assert(Size > 0);

Register SrcReg = MI.getOperand(1).getReg();
LLT SrcType = MRI.getType(SrcReg);

// Currently we don't go into vectors.
if (!SrcType.isScalar())
return CurrentBest;

return findValueFromDefImpl(SrcReg, StartBit, Size);
}

/// Internal implementation for findValueFromDef(). findValueFromDef()
/// initializes some data like the CurrentBest register, which this method
/// and its callees rely upon.
Expand Down Expand Up @@ -759,6 +825,12 @@ class LegalizationArtifactCombiner {
Size);
case TargetOpcode::G_INSERT:
return findValueFromInsert(*Def, StartBit, Size);
case TargetOpcode::G_TRUNC:
return findValueFromTrunc(*Def, StartBit, Size);
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
return findValueFromExt(*Def, StartBit, Size);
default:
return CurrentBest;
}
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,66 @@ body: |
%2:_(s32) = G_TRUNC %1
$vgpr0 = COPY %2
...

---
name: trunc_sext

body: |
bb.0:
; Test that trunc(sext) is replaced with sext source.
; CHECK-LABEL: name: trunc_sext
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s64) = G_SEXT %0
%2:_(s32) = G_TRUNC %1
$vgpr0 = COPY %2
...

---
name: trunc_zext

body: |
bb.0:
; Test that trunc(zext) is replaced with zext source.
; CHECK-LABEL: name: trunc_zext
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s64) = G_ZEXT %0
%2:_(s32) = G_TRUNC %1
$vgpr0 = COPY %2
...

---
name: trunc_anyext

body: |
bb.0:
; Test that trunc(anyext) is replaced with anyext source.
; CHECK-LABEL: name: trunc_anyext
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s64) = G_ANYEXT %0
%2:_(s32) = G_TRUNC %1
$vgpr0 = COPY %2
...

---
name: trunc_ext

body: |
bb.0:
; Test that trunc(sext (trunc (...))) is replaced with source.
; CHECK-LABEL: name: trunc_ext
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(s32) = G_IMPLICIT_DEF
%1:_(s64) = G_SEXT %0
%2:_(s32) = G_TRUNC %1
%3:_(s128) = G_ZEXT %2
%4:_(s64) = G_TRUNC %3
%5:_(s32) = G_TRUNC %4
$vgpr0 = COPY %5
...
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,9 @@ body: |
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
Expand Down Expand Up @@ -1495,13 +1495,15 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT1]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT2]](s64)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT3]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%0:_(s32) = COPY $vgpr0
Expand Down

0 comments on commit 19d7ab1

Please sign in to comment.