diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index d5cdfc35899766..01bd05a2b9880e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2971,6 +2971,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); } } + case AArch64::G_DUP: { + // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by + // imported patterns. Do it manually here. Avoiding generating s16 gpr is + // difficult because at RBS we may end up pessimizing the fpr case if we + // decided to add an anyextend to fix this. Manual selection is the most + // robust solution for now. + Register SrcReg = I.getOperand(1).getReg(); + if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID) + return false; // We expect the fpr regbank case to be imported. + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.getSizeInBits() == 16) + I.setDesc(TII.get(AArch64::DUPv8i16gpr)); + else if (SrcTy.getSizeInBits() == 8) + I.setDesc(TII.get(AArch64::DUPv16i8gpr)); + else + return false; + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } case TargetOpcode::G_INTRINSIC_TRUNC: return selectIntrinsicTrunc(I, MRI); case TargetOpcode::G_INTRINSIC_ROUND: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir index 1848e338b7aa6e..182bba1ae38423 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir @@ -223,6 +223,29 @@ body: | $q0 = COPY %dup(<8 x s16>) RET_ReallyLR implicit $q0 +... +--- +name: DUPv8i16gpr_s16_src +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; Checks that we can still select the gpr variant if the scalar is an s16. + ; CHECK-LABEL: name: DUPv8i16gpr_s16_src + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr128 = DUPv8i16gpr %copy + ; CHECK: $q0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $q0 + %copy:gpr(s32) = COPY $w0 + %trunc:gpr(s16) = G_TRUNC %copy + %dup:fpr(<8 x s16>) = G_DUP %trunc(s16) + $q0 = COPY %dup(<8 x s16>) + RET_ReallyLR implicit $q0 + ... --- name: DUPv8i8gpr @@ -264,3 +287,26 @@ body: | %dup:fpr(<16 x s8>) = G_DUP %copy(s32) $q0 = COPY %dup(<16 x s8>) RET_ReallyLR implicit $q0 +... +--- +name: DUPv16i8gpr_s8_src +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; Check we still select the gpr variant when scalar is an s8. + ; CHECK-LABEL: name: DUPv16i8gpr_s8_src + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr128 = DUPv16i8gpr %copy + ; CHECK: $q0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $q0 + %copy:gpr(s32) = COPY $w0 + %trunc:gpr(s8) = G_TRUNC %copy + %dup:fpr(<16 x s8>) = G_DUP %trunc(s8) + $q0 = COPY %dup(<16 x s8>) + RET_ReallyLR implicit $q0 +...