Skip to content

Commit

Permalink
GlobalISel: Artifact combine merge-like and unmerge into unmerge
Browse files Browse the repository at this point in the history
Recognize when source could have been unmerged to pieces with DstTy
without having to split source to smaller elements
and then merge small elements into DstTy pieces.
This happens when vector was meant to be split to sub-vectors but there
was leftover. At this point artifact combiner have already dealt with
leftover and we can continue to use sub-vectors.

Differential Revision: https://reviews.llvm.org/D109241
  • Loading branch information
petar-avramovic committed Oct 24, 2022
1 parent f1aa598 commit e6c778f
Show file tree
Hide file tree
Showing 40 changed files with 864 additions and 2,542 deletions.
Expand Up @@ -876,6 +876,31 @@ class LegalizationArtifactCombiner {
return true;
}

// Recognize UnmergeSrc that can be unmerged to DstTy directly.
// Types have to be either both vector or both non-vector types.
// Merge-like opcodes are combined one at the time. First one creates new
// unmerge, following should use the same unmerge (builder performs CSE).
//
// %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
// %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1
// %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3
//
// %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc
if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
(Elt0UnmergeIdx % NumMIElts == 0) &&
getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,
EltSize))
return false;
MIB.setInstrAndDebugLoc(MI);
auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg());
unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits();
replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB,
UpdatedDefs, Observer);
DeadInsts.push_back(&MI);
return true;
}

return false;
}
};
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir
Expand Up @@ -66,8 +66,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
Expand Down Expand Up @@ -324,9 +322,9 @@ body: |
; CHECK-NEXT: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[AND63]], [[C7]](s64)
; CHECK-NEXT: [[OR55:%[0-9]+]]:_(s32) = G_OR [[OR54]], [[SHL55]]
; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[OR55]](s32)
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8)
; CHECK-NEXT: $x0 = COPY [[MV]](s64)
; CHECK-NEXT: $x1 = COPY [[MV1]](s64)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8)
; CHECK-NEXT: $x0 = COPY [[COPY]](s64)
; CHECK-NEXT: $x1 = COPY [[MV]](s64)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s64) = COPY $x2
Expand Down
21 changes: 8 additions & 13 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
Expand Up @@ -232,29 +232,24 @@ body: |
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s32) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]]
; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]]
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDO]](s64)
; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8)
; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8)
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[DEF]](s8)
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C2]](s64)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV2]], [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C3]](s64)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UADDO]], [[C3]](s64)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C2]](s64)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 23
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C4]](s64)
; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s32) = G_UADDO [[ASHR1]], [[C]]
; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO2]], [[MV6]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO2]], [[UADDO]]
; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%copy_1:_(s128) = COPY $q0
Expand Down
21 changes: 8 additions & 13 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
Expand Up @@ -232,29 +232,24 @@ body: |
; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s32) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[AND]]
; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]]
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBO]](s64)
; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8)
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8)
; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8)
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[DEF]](s8)
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C2]](s64)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV2]], [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C3]](s64)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[USUBO]], [[C3]](s64)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]]
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C2]](s64)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 23
; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C4]](s64)
; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s32) = G_UADDO [[ASHR1]], [[C]]
; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ICMP]], [[C1]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[MV6]]
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[AND1]](s32), [[UADDO]], [[USUBO]]
; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%copy_1:_(s128) = COPY $q0
Expand Down
Expand Up @@ -132,11 +132,9 @@ body: |
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<2 x s32>)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; GFX9-NEXT: $vgpr4_vgpr5 = COPY [[UV]](<2 x s32>)
; GFX9-NEXT: $vgpr6_vgpr7 = COPY [[UV1]](<2 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %0(<4 x s32>)
%5:_(<2 x s32>) = G_BUILD_VECTOR %1, %2
Expand Down

0 comments on commit e6c778f

Please sign in to comment.