diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index f45a3b560cf449..072008b07ced51 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -63,9 +63,16 @@ def trn : GICombineRule< (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; +def ext: GICombineRule < + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchEXT(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyEXT(*${root}, ${matchinfo}); }]) +>; + // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo // instruction. -def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp, trn]>; +def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 6b7754d60ded7b..a0e7c782f68c35 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -104,6 +104,13 @@ def G_TRN2 : AArch64GenericInstruction { let hasSideEffects = 0; } +// Represents an ext instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_EXT: AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm); +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -114,3 +121,4 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index a2cad2fad4c102..3217068c4a64db 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -111,6 +111,48 @@ static bool isTRNMask(ArrayRef M, unsigned NumElts, return true; } +/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector +/// sources of the shuffle are different. +static Optional> getExtMask(ArrayRef M, + unsigned NumElts) { + // Look for the first non-undef element. + auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); + if (FirstRealElt == M.end()) + return None; + + // Use APInt to handle overflow when calculating expected element. + unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); + APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); + + // The following shuffle indices must be the successive elements after the + // first real element. + if (any_of( + make_range(std::next(FirstRealElt), M.end()), + [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) + return None; + + // The index of an EXT is the first element if it is not UNDEF. + // Watch out for the beginning UNDEFs. The EXT index should be the expected + // value of the first element. E.g. + // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. + // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. + // ExpectedElt is the last mask index plus 1. + uint64_t Imm = ExpectedElt.getZExtValue(); + bool ReverseExt = false; + + // There are two difference cases requiring to reverse input vectors. + // For example, for vector <4 x i32> we have the following cases, + // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) + // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) + // For both cases, we finally use mask <5, 6, 7, 0>, which requires + // to reverse two input vectors. + if (Imm < NumElts) + ReverseExt = true; + else + Imm -= NumElts; + return std::make_pair(ReverseExt, Imm); +} + /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. static bool isUZPMask(ArrayRef M, unsigned NumElts, @@ -271,6 +313,27 @@ static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, return true; } +static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + Register Dst = MI.getOperand(0).getReg(); + auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), + MRI.getType(Dst).getNumElements()); + if (!ExtInfo) + return false; + bool ReverseExt; + uint64_t Imm; + std::tie(ReverseExt, Imm) = *ExtInfo; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + if (ReverseExt) + std::swap(V1, V2); + uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; + Imm *= ExtFactor; + MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); + return true; +} + /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. static bool applyShuffleVectorPseudo(MachineInstr &MI, @@ -281,6 +344,20 @@ static bool applyShuffleVectorPseudo(MachineInstr &MI, return true; } +/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. +/// Special-cased because the constant operand must be emitted as a G_CONSTANT +/// for the imported tablegen patterns to work. +static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { + MachineIRBuilder MIRBuilder(MI); + // Tablegen patterns expect an i32 G_CONSTANT as the final op. + auto Cst = + MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); + MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, + {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir new file mode 100644 index 00000000000000..18f68af46c232c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir @@ -0,0 +1,258 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we can combine a G_SHUFFLE_VECTOR into a G_EXT. + +... +--- +name: v8s8_cst3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v8s8_cst3 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(3, 4, 5, 6, 7, 8, 9, 10) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: v8s8_cst5 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v8s8_cst5 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(13, 14, 15, 0, 1, 2, 3, 4) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: v16s8_cst3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v16s8_cst3 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: v16s8_cst7 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v16s8_cst7 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: v4s16_cst6 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v4s16_cst6 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<4 x s16>) = COPY $d0 + ; CHECK: %v2:_(<4 x s16>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: %shuf:_(<4 x s16>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<4 x s16>) = COPY $d0 + %v2:_(<4 x s16>) = COPY $d1 + %shuf:_(<4 x s16>) = G_SHUFFLE_VECTOR %v1(<4 x s16>), %v2, shufflemask(3, 4, 5, 6) + $d0 = COPY %shuf(<4 x s16>) + RET_ReallyLR implicit $d0 +... +--- +name: v4s32_cst12 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v4s32_cst12 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<4 x s32>) = COPY $q0 + ; CHECK: %v2:_(<4 x s32>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: %shuf:_(<4 x s32>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<4 x s32>) = COPY $q0 + %v2:_(<4 x s32>) = COPY $q1 + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %v1(<4 x s32>), %v2, shufflemask(3, 4, 5, 6) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: undef_elts_should_match_1 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 3 here. + ; + ; CHECK-LABEL: name: undef_elts_should_match_1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(3, -1, -1, 6, 7, 8, 9, 10) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: undef_elts_should_match_2 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 6 here. + ; + ; CHECK-LABEL: name: undef_elts_should_match_2 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(-1, -1, -1, -1, 2, 3, 4, 5) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: undef_elts_should_match_3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 7 here. + ; CHECK-LABEL: name: undef_elts_should_match_3 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(23, 24, 25, 26, -1, -1, 29, 30, 31, 0, 1, 2, 3, 4, -1, 6) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: undef_elts_should_match_4 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 10 here. + ; CHECK-LABEL: name: undef_elts_should_match_4 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<8 x s16>) = COPY $q0 + ; CHECK: %v2:_(<8 x s16>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK: %shuf:_(<8 x s16>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<8 x s16>) = COPY $q0 + %v2:_(<8 x s16>) = COPY $q1 + %shuf:_(<8 x s16>) = G_SHUFFLE_VECTOR %v1(<8 x s16>), %v2, shufflemask(-1, -1, -1, -1, 1, 2, 3, 4) + $q0 = COPY %shuf(<8 x s16>) + RET_ReallyLR implicit $q0 +... +--- +name: all_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; We expect at least one defined element in the shuffle mask. + ; + ; CHECK-LABEL: name: all_undef + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<8 x s16>) = COPY $q0 + ; CHECK: %shuf:_(<8 x s16>) = G_REV64 %v1 + ; CHECK: $q0 = COPY %shuf(<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<8 x s16>) = COPY $q0 + %v2:_(<8 x s16>) = COPY $q1 + %shuf:_(<8 x s16>) = G_SHUFFLE_VECTOR %v1(<8 x s16>), %v2, shufflemask(-1, -1, -1, -1, -1, -1, -1, -1) + $q0 = COPY %shuf(<8 x s16>) + RET_ReallyLR implicit $q0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir index 2ff208b0f6f004..2325b0fb03413c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir @@ -5,7 +5,6 @@ name: splat_4xi32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -13,15 +12,15 @@ body: | ; CHECK-LABEL: name: splat_4xi32 ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s32) = COPY $w0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) + %0:_(s32) = COPY $w0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -30,7 +29,6 @@ body: | name: splat_2xi64 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -38,15 +36,15 @@ body: | ; CHECK-LABEL: name: splat_2xi64 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s64) = COPY $x0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) + %0:_(s64) = COPY $x0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -55,7 +53,6 @@ body: | name: splat_2xi32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -63,15 +60,15 @@ body: | ; CHECK-LABEL: name: splat_2xi32 ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) ; CHECK: RET_ReallyLR implicit $d0 - %0:gpr(s32) = COPY $w0 - %2:fpr(<2 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) + %0:_(s32) = COPY $w0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) $d0 = COPY %4(<2 x s32>) RET_ReallyLR implicit $d0 @@ -80,7 +77,6 @@ body: | name: splat_4xf32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -88,15 +84,15 @@ body: | ; CHECK-LABEL: name: splat_4xf32 ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) + %0:_(s32) = COPY $s0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -105,7 +101,6 @@ body: | name: splat_2xf64 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -113,15 +108,15 @@ body: | ; CHECK-LABEL: name: splat_2xf64 ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s64) = COPY $d0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) + %0:_(s64) = COPY $d0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -130,7 +125,6 @@ body: | name: splat_2xf32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -138,15 +132,15 @@ body: | ; CHECK-LABEL: name: splat_2xf32 ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) ; CHECK: RET_ReallyLR implicit $d0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<2 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) + %0:_(s32) = COPY $s0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) $d0 = COPY %4(<2 x s32>) RET_ReallyLR implicit $d0 @@ -155,7 +149,6 @@ body: | name: splat_2xf64_copies alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -165,17 +158,17 @@ body: | ; These copies shouldn't get in the way of matching the dup pattern. ; CHECK-LABEL: name: splat_2xf64_copies ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s64) = COPY $d0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %6:fpr(<2 x s64>) = COPY %2 - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s32) - %7:fpr(<2 x s64>) = COPY %1 - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, shufflemask(0, 0) + %0:_(s64) = COPY $d0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %6:_(<2 x s64>) = COPY %2 + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s32) + %7:_(<2 x s64>) = COPY %1 + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -184,7 +177,6 @@ body: | name: not_all_zeros alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -192,18 +184,19 @@ body: | ; Make sure that we don't do the optimization when it's not all zeroes. ; CHECK-LABEL: name: not_all_zeros ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DEF:%[0-9]+]]:fpr(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:fpr(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 1) - ; CHECK: $q0 = COPY [[SHUF]](<2 x s64>) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[IVEC]], [[DEF]], [[C1]](s32) + ; CHECK: $q0 = COPY [[EXT]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s64) = COPY $x0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 1) + %0:_(s64) = COPY $x0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 1) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -212,7 +205,6 @@ body: | name: all_undef alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -224,15 +216,15 @@ body: | ; ; CHECK-LABEL: name: all_undef ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s64) = COPY $x0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(-1, -1) + %0:_(s64) = COPY $x0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(-1, -1) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -241,7 +233,6 @@ body: | name: one_undef alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -252,15 +243,15 @@ body: | ; ; CHECK-LABEL: name: one_undef ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, -1, 0, 0) + %0:_(s32) = COPY $s0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, -1, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -269,7 +260,6 @@ body: | name: not_all_zeros_with_undefs alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -278,18 +268,18 @@ body: | ; ; CHECK-LABEL: name: not_all_zeros_with_undefs ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DEF:%[0-9]+]]:fpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:fpr(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 3) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 3) ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3) + %0:_(s32) = COPY $s0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -298,22 +288,21 @@ body: | name: splat_4xi16 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: liveins: $h0 ; CHECK-LABEL: name: splat_4xi16 ; CHECK: liveins: $h0 - ; CHECK: %copy:fpr(s16) = COPY $h0 - ; CHECK: %splat:fpr(<4 x s16>) = G_DUP %copy(s16) + ; CHECK: %copy:_(s16) = COPY $h0 + ; CHECK: %splat:_(<4 x s16>) = G_DUP %copy(s16) ; CHECK: $d0 = COPY %splat(<4 x s16>) ; CHECK: RET_ReallyLR implicit $d0 - %copy:fpr(s16) = COPY $h0 - %undef:fpr(<4 x s16>) = G_IMPLICIT_DEF - %cst:gpr(s32) = G_CONSTANT i32 0 - %ins:fpr(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s32) - %splat:fpr(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0) + %copy:_(s16) = COPY $h0 + %undef:_(<4 x s16>) = G_IMPLICIT_DEF + %cst:_(s32) = G_CONSTANT i32 0 + %ins:_(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s32) + %splat:_(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0) $d0 = COPY %splat(<4 x s16>) RET_ReallyLR implicit $d0 @@ -322,21 +311,20 @@ body: | name: splat_8xi8 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: liveins: $w0 ; CHECK-LABEL: name: splat_8xi8 ; CHECK: liveins: $w0 - ; CHECK: %copy:gpr(s32) = COPY $w0 - ; CHECK: %splat:fpr(<8 x s8>) = G_DUP %copy(s32) + ; CHECK: %copy:_(s32) = COPY $w0 + ; CHECK: %splat:_(<8 x s8>) = G_DUP %copy(s32) ; CHECK: $d0 = COPY %splat(<8 x s8>) ; CHECK: RET_ReallyLR implicit $d0 - %copy:gpr(s32) = COPY $w0 - %undef:fpr(<8 x s8>) = G_IMPLICIT_DEF - %cst:gpr(s32) = G_CONSTANT i32 0 - %ins:fpr(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s32) - %splat:fpr(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + %copy:_(s32) = COPY $w0 + %undef:_(<8 x s8>) = G_IMPLICIT_DEF + %cst:_(s32) = G_CONSTANT i32 0 + %ins:_(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s32) + %splat:_(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) $d0 = COPY %splat(<8 x s8>) RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir index e754377e5f3f5f..3d71b6a948dc8c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir @@ -3,7 +3,7 @@ # Check that we can recognize a shuffle mask for a zip instruction, and produce # G_ZIP1 or G_ZIP2 where appropriate. # -# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner --aarch64postlegalizercombinerhelper-disable-rule=ext -verify-machineinstrs %s -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ext.mir new file mode 100644 index 00000000000000..c97ed4d52724fe --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ext.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test G_EXT selection using AArch64ext patterns. + +... +--- +name: v8s8_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v8s8_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 3 + %v1:fpr(<8 x s8>) = COPY $d0 + %v2:fpr(<8 x s8>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 3 + %shuf:fpr(<8 x s8>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v16s8_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v16s8_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 3 + %v1:fpr(<16 x s8>) = COPY $q0 + %v2:fpr(<16 x s8>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 3 + %shuf:fpr(<16 x s8>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v4s16_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v4s16_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 6 + %v1:fpr(<4 x s16>) = COPY $d0 + %v2:fpr(<4 x s16>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 6 + %shuf:fpr(<4 x s16>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v8s16_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v8s16_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v2, %v1, 10 + %v1:fpr(<8 x s16>) = COPY $q0 + %v2:fpr(<8 x s16>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 10 + %shuf:fpr(<8 x s16>) = G_EXT %v2, %v1, %3(s32) +... + +... +--- +name: v4s32_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v4s32_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 12 + %v1:fpr(<4 x s32>) = COPY $q0 + %v2:fpr(<4 x s32>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 12 + %shuf:fpr(<4 x s32>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v2s32_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v2s32_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 2 + %v1:fpr(<2 x s32>) = COPY $d0 + %v2:fpr(<2 x s32>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 2 + %shuf:fpr(<2 x s32>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v2s64_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v2s64_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 2 + %v1:fpr(<2 x s64>) = COPY $q0 + %v2:fpr(<2 x s64>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 2 + %shuf:fpr(<2 x s64>) = G_EXT %v1, %v2, %3(s32) +...