diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 9fbe185dce2a5..baa8515baf3ea 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -271,11 +271,11 @@ static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, return true; } -static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - auto Lane = getSplatIndex(MI); - if (!Lane || *Lane != 0) +/// Helper function for matchDup. +static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, + MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + if (Lane != 0) return false; // Try to match a vector splat operation into a dup instruction. @@ -296,10 +296,9 @@ static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, MI.getOperand(1).getReg(), MRI); if (!InsMI) return false; - // Match the undef vector operand. - if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, - InsMI->getOperand(1).getReg(), MRI)) + if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), + MRI)) return false; // Match the index constant 0. @@ -307,12 +306,45 @@ static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index) return false; - Register Dst = MI.getOperand(0).getReg(); + MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), + {InsMI->getOperand(2).getReg()}); + return true; +} + +/// Helper function for matchDup. +static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, + MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(Lane >= 0 && "Expected positive lane?"); + // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the + // lane's definition directly. + auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, + MI.getOperand(1).getReg(), MRI); + if (!BuildVecMI) + return false; + Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); MatchInfo = - ShuffleVectorPseudo(AArch64::G_DUP, Dst, {InsMI->getOperand(2).getReg()}); + ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); return true; } +static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + auto MaybeLane = getSplatIndex(MI); + if (!MaybeLane) + return false; + int Lane = *MaybeLane; + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane < 0) + Lane = 0; + if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) + return true; + if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) + return true; + return false; +} + static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir index 2325b0fb03413..b910e9953193f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir @@ -328,3 +328,31 @@ body: | %splat:_(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) $d0 = COPY %splat(<8 x s8>) RET_ReallyLR implicit $d0 + +... +--- +name: build_vector +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $w2, $w3 + ; The G_SHUFFLE_VECTOR is fed by a G_BUILD_VECTOR, and the 0th input + ; operand is not a constant. We should get a G_DUP. + ; + ; CHECK-LABEL: name: build_vector + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK: %lane:_(s32) = COPY $w0 + ; CHECK: %shuf:_(<4 x s32>) = G_DUP %lane(s32) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %lane:_(s32) = COPY $w0 + %b:_(s32) = COPY $w1 + %c:_(s32) = COPY $w2 + %d:_(s32) = COPY $w3 + %undef:_(<4 x s32>) = G_IMPLICIT_DEF + %buildvec:_(<4 x s32>) = G_BUILD_VECTOR %lane, %b, %c, %d + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0