diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 09389bda94bc0..d8ee35ebf1e21 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -457,11 +457,15 @@ void applyShuffleVectorPseudo(MachineInstr &MI, /// for the imported tablegen patterns to work. void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { MachineIRBuilder MIRBuilder(MI); - // Tablegen patterns expect an i32 G_CONSTANT as the final op. - auto Cst = - MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); - MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, - {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); + if (MatchInfo.SrcOps[2].getImm() == 0) + MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]); + else { + // Tablegen patterns expect an i32 G_CONSTANT as the final op. + auto Cst = + MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); + MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, + {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); + } MI.eraseFromParent(); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir index ce5a9ac30d310..f4374feadcdf3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir @@ -196,9 +196,8 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[IVEC]], [[DEF]], [[C1]](s32) - ; CHECK-NEXT: $q0 = COPY [[EXT]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[IVEC]](<2 x s64>) + ; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %2:_(<2 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir index 1e12459771014..bcf088287f46a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir @@ -194,10 +194,8 @@ body: | ; CHECK: liveins: $q0, $q1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[COPY]], [[COPY1]], [[C]](s32) - ; CHECK-NEXT: $q0 = COPY [[EXT]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[COPY]](<2 x s64>) + ; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 %1:_(<2 x s64>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll index 90dbd618919e2..a6afddfe3f73a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -555,7 +555,6 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind { ; CHECK-GI-NEXT: adrp x8, .LCPI39_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0] ; CHECK-GI-NEXT: add.4s v0, v0, v1 -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %tmp1 = add <4 x i32> %A, diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll index 4fd40bb7e229d..aa048eea302c9 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll @@ -137,9 +137,8 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-LABEL: addp_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0 -; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-GI-NEXT: addp v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: rev64 v1.2s, v0.2s ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: fmov w0, s0 @@ -165,9 +164,8 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-GI-LABEL: addp_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0 -; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-GI-NEXT: addp v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: addp v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: ret %1 = add <8 x i16> %a, %b %2 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> @@ -187,9 +185,8 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-GI-LABEL: addp_v16i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0 -; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-GI-NEXT: addp v0.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: addp v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %1 = add <16 x i8> %a, %b %2 = shufflevector <16 x i8> %1, <16 x i8> poison, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index 08cb1ef2e0b51..831acd242221b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -1614,18 +1614,11 @@ declare float @llvm.fabs.f32(float) nounwind readnone declare double @llvm.fabs.f64(double) nounwind readnone define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-SD-LABEL: uabdl_from_extract_dup: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: dup.2s v1, w0 -; CHECK-SD-NEXT: uabdl.2d v0, v0, v1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: uabdl_from_extract_dup: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: dup.2s v1, w0 -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0 -; CHECK-GI-NEXT: uabdl.2d v0, v0, v1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: uabdl_from_extract_dup: +; CHECK: // %bb.0: +; CHECK-NEXT: dup.2s v1, w0 +; CHECK-NEXT: uabdl.2d v0, v0, v1 +; CHECK-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> @@ -1656,18 +1649,11 @@ define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { } define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-SD-LABEL: sabdl_from_extract_dup: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: dup.2s v1, w0 -; CHECK-SD-NEXT: sabdl.2d v0, v0, v1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sabdl_from_extract_dup: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: dup.2s v1, w0 -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #0 -; CHECK-GI-NEXT: sabdl.2d v0, v0, v1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sabdl_from_extract_dup: +; CHECK: // %bb.0: +; CHECK-NEXT: dup.2s v1, w0 +; CHECK-NEXT: sabdl.2d v0, v0, v1 +; CHECK-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32>