Skip to content

Commit

Permalink
[GlobalISel] combine G_TRUNC with G_MERGE_VALUES
Browse files Browse the repository at this point in the history
Summary:
Truncating the result of a merge means that most likely we could have done without merge in the first place and just used the input merge inputs directly. This can be done in three cases:

1. If the truncation result is smaller than the merge source, we can use the source in the trunc directly
2. If the sizes are the same, we can replace the register or use a copy
3. If the truncation size is a multiple of the merge source size, we can build a smaller merge

This gets rid of most of the larger, hard-to-legalize merges.

Reviewers: qcolombet, aditya_nandakumar, aemerson, paquette, arsenm, Petar.Avramovic

Reviewed By: arsenm

Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, jrtc27, atanasyan, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75915
  • Loading branch information
gargaroff committed Mar 16, 2020
1 parent 7aecf23 commit c0241f1
Show file tree
Hide file tree
Showing 15 changed files with 1,156 additions and 1,154 deletions.
Expand Up @@ -167,7 +167,8 @@ class LegalizationArtifactCombiner {

bool tryCombineTrunc(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts,
SmallVectorImpl<Register> &UpdatedDefs) {
SmallVectorImpl<Register> &UpdatedDefs,
GISelObserverWrapper &Observer) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC);

Builder.setInstr(MI);
Expand All @@ -189,6 +190,66 @@ class LegalizationArtifactCombiner {
}
}

// Try to fold trunc(merge) to directly use the source of the merge.
// This gets rid of large, difficult to legalize, merges
if (SrcMI->getOpcode() == TargetOpcode::G_MERGE_VALUES) {
const Register MergeSrcReg = SrcMI->getOperand(1).getReg();
const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
const LLT DstTy = MRI.getType(DstReg);

// We can only fold if the types are scalar
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
return false;

if (DstSize < MergeSrcSize) {
// When the merge source is larger than the destination, we can just
// truncate the merge source directly
if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
return false;

LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
<< MI);

Builder.buildTrunc(DstReg, MergeSrcReg);
UpdatedDefs.push_back(DstReg);
} else if (DstSize == MergeSrcSize) {
// If the sizes match we can simply try to replace the register
LLVM_DEBUG(
dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
<< MI);
replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
Observer);
} else if (DstSize % MergeSrcSize == 0) {
// If the trunc size is a multiple of the merge source size we can use
// a smaller merge instead
if (isInstUnsupported(
{TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
return false;

LLVM_DEBUG(
dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
<< MI);

const unsigned NumSrcs = DstSize / MergeSrcSize;
assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
"trunc(merge) should require less inputs than merge");
SmallVector<Register, 2> SrcRegs(NumSrcs);
for (unsigned i = 0; i < NumSrcs; ++i)
SrcRegs[i] = SrcMI->getOperand(i + 1).getReg();

Builder.buildMerge(DstReg, SrcRegs);
UpdatedDefs.push_back(DstReg);
} else {
// Unable to combine
return false;
}

markInstAndDefDead(MI, *SrcMI, DeadInsts);
return true;
}

return false;
}

Expand Down Expand Up @@ -533,7 +594,7 @@ class LegalizationArtifactCombiner {
Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
break;
case TargetOpcode::G_TRUNC:
Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs);
Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
if (!Changed) {
// Try to combine truncates away even if they are legal. As all artifact
// combines at the moment look only "up" the def-use chains, we achieve
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
Expand Up @@ -8,9 +8,7 @@ body: |
; CHECK-LABEL: name: test_implicit_def
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[DEF]](s64), [[DEF]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128)
; CHECK: $x0 = COPY [[TRUNC]](s64)
; CHECK: $x0 = COPY [[DEF]](s64)
%0:_(s128) = G_IMPLICIT_DEF
%1:_(s64) = G_TRUNC %0(s128)
$x0 = COPY %1(s64)
Expand Down
136 changes: 136 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
@@ -0,0 +1,136 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck %s

---
name: trunc_s16_merge_s64_s32

body: |
bb.0:
; Test that trunc(merge) with trunc-size < merge-source-size creates a trunc
; of the merge source
; CHECK-LABEL: name: trunc_s16_merge_s64_s32
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: $vgpr0 = COPY [[C]](s32)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s64) = G_MERGE_VALUES %0, %1
%3:_(s16) = G_TRUNC %2
%4:_(s32) = G_ANYEXT %3
$vgpr0 = COPY %4
...

---
name: trunc_s32_merge_s64_s32

body: |
bb.0:
; Test that trunc(merge) with trunc-size == merge-source-size is eliminated
; CHECK-LABEL: name: trunc_s32_merge_s64_s32
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: $vgpr0 = COPY [[C]](s32)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s64) = G_MERGE_VALUES %0, %1
%3:_(s32) = G_TRUNC %2
$vgpr0 = COPY %3
...

---
name: trunc_s64_merge_s128_s32

body: |
bb.0:
; Test that trunc(merge) with trunc-size > merge-source-size combines to a
; smaller merge
; CHECK-LABEL: name: trunc_s64_merge_s128_s32
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s128) = G_MERGE_VALUES %0, %1, %0, %1
%3:_(s64) = G_TRUNC %2
$vgpr0_vgpr1 = COPY %3
...

---
name: trunc_s32_merge_s128_p0

body: |
bb.0:
; Test that trunc(merge) with a non-scalar merge source is not combined
; CHECK-LABEL: name: trunc_s32_merge_s128_p0
; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128)
; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
%0:_(p0) = G_CONSTANT i64 0
%1:_(p0) = G_CONSTANT i64 1
%2:_(s128) = G_MERGE_VALUES %0, %1
%3:_(s32) = G_TRUNC %2
$vgpr0 = COPY %3
...

---
name: trunc_s64_merge_s128_p0

body: |
bb.0:
; Test that trunc(merge) with a non-scalar merge source is not combined
; CHECK-LABEL: name: trunc_s64_merge_s128_p0
; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](p0), [[C1]](p0)
; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128)
; CHECK: $vgpr0_vgpr1 = COPY [[TRUNC]](s64)
%0:_(p0) = G_CONSTANT i64 0
%1:_(p0) = G_CONSTANT i64 1
%2:_(s128) = G_MERGE_VALUES %0, %1
%3:_(s64) = G_TRUNC %2
$vgpr0_vgpr1 = COPY %3
...

---
name: trunc_s128_merge_s192_p0

body: |
bb.0:
; Test that trunc(merge) with a non-scalar merge source is not combined
; CHECK-LABEL: name: trunc_s128_merge_s192_p0
; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 1
; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[C]](p0), [[C1]](p0), [[C]](p0)
; CHECK: [[TRUNC:%[0-9]+]]:_(s128) = G_TRUNC [[MV]](s192)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](s128)
%0:_(p0) = G_CONSTANT i64 0
%1:_(p0) = G_CONSTANT i64 1
%2:_(s192) = G_MERGE_VALUES %0, %1, %0
%3:_(s128) = G_TRUNC %2
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...

---
name: trunc_s68_merge_s128_s32

body: |
bb.0:
; Test that trunc(merge) with trunc-size > merge-source-size is not combined
; if trunc-size % merge-source-size != 0
; CHECK-LABEL: name: trunc_s68_merge_s128_s32
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C]](s32), [[C1]](s32)
; CHECK: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s128)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s68)
; CHECK: $vgpr0 = COPY [[TRUNC1]](s32)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 1
%2:_(s128) = G_MERGE_VALUES %0, %1, %0, %1
%3:_(s68) = G_TRUNC %2
%4:_(s32) = G_TRUNC %3
$vgpr0 = COPY %4
...
32 changes: 14 additions & 18 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
Expand Up @@ -292,19 +292,17 @@ body: |
; CHECK-LABEL: name: test_bitcast_s24_to_v3s8
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32)
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[TRUNC]](s32)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
Expand All @@ -328,23 +326,21 @@ body: |
; CHECK-LABEL: name: test_bitcast_s48_to_v3s16
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
Expand Down
Expand Up @@ -207,13 +207,8 @@ body: |
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64)
; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s33) = G_TRUNC %0
%2:_(s33) = G_CTLZ_ZERO_UNDEF %1
Expand Down
9 changes: 2 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir
Expand Up @@ -253,13 +253,8 @@ body: |
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C4]](s64)
; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]]
; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64)
; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64)
; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s33) = G_TRUNC %0
%2:_(s33) = G_CTLZ %1
Expand Down

0 comments on commit c0241f1

Please sign in to comment.