Skip to content

Commit

Permalink
[AArch64][GISel] Lower EXT of 0 to a COPY
Browse files Browse the repository at this point in the history
This allows us to select G_SHUFFLE_VECTOR with identity masks (possibly
including undef elements), but avoid the actual EXT instruction if the shift
amount is 0.
  • Loading branch information
davemgreen committed Aug 16, 2023
1 parent d53b3df commit a047dfe
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 46 deletions.
14 changes: 9 additions & 5 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,11 +457,15 @@ void applyShuffleVectorPseudo(MachineInstr &MI,
/// for the imported tablegen patterns to work.
void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MachineIRBuilder MIRBuilder(MI);
// Tablegen patterns expect an i32 G_CONSTANT as the final op.
auto Cst =
MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
{MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
if (MatchInfo.SrcOps[2].getImm() == 0)
MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
else {
// Tablegen patterns expect an i32 G_CONSTANT as the final op.
auto Cst =
MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
{MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
}
MI.eraseFromParent();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,8 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[IVEC]], [[DEF]], [[C1]](s32)
; CHECK-NEXT: $q0 = COPY [[EXT]](<2 x s64>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[IVEC]](<2 x s64>)
; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%2:_(<2 x s64>) = G_IMPLICIT_DEF
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[COPY]], [[COPY1]], [[C]](s32)
; CHECK-NEXT: $q0 = COPY [[EXT]](<2 x s64>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[COPY]](<2 x s64>)
; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x s64>) = COPY $q0
%1:_(<2 x s64>) = COPY $q1
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AArch64/arm64-dup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,6 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
; CHECK-GI-NEXT: adrp x8, .LCPI39_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: add.4s v0, v0, v1
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%tmp1 = add <4 x i32> %A, <i32 8421377, i32 8421377, i32 8421377, i32 8421377>
Expand Down
15 changes: 6 additions & 9 deletions llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,8 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-LABEL: addp_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: addp v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: fmov w0, s0
Expand All @@ -165,9 +164,8 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-GI-LABEL: addp_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: addp v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: addp v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
%1 = add <8 x i16> %a, %b
%2 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Expand All @@ -187,9 +185,8 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-LABEL: addp_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: addp v0.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: addp v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%1 = add <16 x i8> %a, %b
%2 = shufflevector <16 x i8> %1, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Expand Down
34 changes: 10 additions & 24 deletions llvm/test/CodeGen/AArch64/arm64-vabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1614,18 +1614,11 @@ declare float @llvm.fabs.f32(float) nounwind readnone
declare double @llvm.fabs.f64(double) nounwind readnone

define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
; CHECK-SD-LABEL: uabdl_from_extract_dup:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: dup.2s v1, w0
; CHECK-SD-NEXT: uabdl.2d v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uabdl_from_extract_dup:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: dup.2s v1, w0
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
; CHECK-GI-NEXT: uabdl.2d v0, v0, v1
; CHECK-GI-NEXT: ret
; CHECK-LABEL: uabdl_from_extract_dup:
; CHECK: // %bb.0:
; CHECK-NEXT: dup.2s v1, w0
; CHECK-NEXT: uabdl.2d v0, v0, v1
; CHECK-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
Expand Down Expand Up @@ -1656,18 +1649,11 @@ define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
}

define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
; CHECK-SD-LABEL: sabdl_from_extract_dup:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: dup.2s v1, w0
; CHECK-SD-NEXT: sabdl.2d v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sabdl_from_extract_dup:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: dup.2s v1, w0
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0
; CHECK-GI-NEXT: sabdl.2d v0, v0, v1
; CHECK-GI-NEXT: ret
; CHECK-LABEL: sabdl_from_extract_dup:
; CHECK: // %bb.0:
; CHECK-NEXT: dup.2s v1, w0
; CHECK-NEXT: sabdl.2d v0, v0, v1
; CHECK-NEXT: ret
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
Expand Down

0 comments on commit a047dfe

Please sign in to comment.