diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 5c7fb0deecd026..a3750fe03f842a 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -32,6 +32,9 @@ // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source // operand are set to zero. // +// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx +// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx +// //===----------------------------------------------------------------------===// #include "AArch64ExpandImm.h" @@ -97,6 +100,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { template bool visitAND(unsigned Opc, MachineInstr &MI); bool visitORR(MachineInstr &MI); + bool visitINSERT(MachineInstr &MI); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -250,6 +254,50 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { return true; } +bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { + // Check this INSERT_SUBREG comes from below zero-extend pattern. + // + // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx + // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx + // + // We're assuming the first operand to INSERT_SUBREG is irrelevant because a + // COPY would destroy the upper part of the register anyway + if (!MI.isRegTiedToDefOperand(1)) + return false; + + Register DstReg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DstReg); + MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); + if (!SrcMI) + return false; + + // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC + // + // When you use the 32-bit form of an instruction, the upper 32 bits of the + // source registers are ignored and the upper 32 bits of the destination + // register are set to zero. + // + // If AArch64's 32-bit form of instruction defines the source operand of + // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is + // real AArch64 instruction and if it is not, do not process the opcode + // conservatively. + if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) || + !AArch64::GPR64allRegClass.hasSubClassEq(RC)) + return false; + + // Build a SUBREG_TO_REG instruction + MachineInstr *SubregMI = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + TII->get(TargetOpcode::SUBREG_TO_REG), DstReg) + .addImm(0) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n"); + MI.eraseFromParent(); + + return true; +} + template static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { // The immediate must be in the form of ((imm0 << 12) + imm1), in which both @@ -493,6 +541,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { default: break; + case AArch64::INSERT_SUBREG: + Changed = visitINSERT(MI); + break; case AArch64::ANDWrr: Changed = visitAND(AArch64::ANDWri, MI); break; diff --git a/llvm/test/CodeGen/AArch64/peephole-insert-subreg.mir b/llvm/test/CodeGen/AArch64/peephole-insert-subreg.mir new file mode 100644 index 00000000000000..46c2e58620a171 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/peephole-insert-subreg.mir @@ -0,0 +1,47 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s + +--- | + define i64 @loop2(i32 noundef %width) { + entry: + %add = add i32 %width, -1 + %zext = zext i32 %add to i64 + %shl = shl nuw nsw i64 %zext, 1 + ret i64 %shl + } + +... +--- +--- +name: loop2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32common, preferred-register: '' } + - { id: 1, class: gpr32common, preferred-register: '' } + - { id: 2, class: gpr64, preferred-register: '' } + - { id: 3, class: gpr64all, preferred-register: '' } + - { id: 4, class: gpr64, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $w0 + + ; CHECK-LABEL: name: loop2 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 1, 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[SUBWri]], %subreg.sub_32 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = nuw nsw UBFMXri killed [[SUBREG_TO_REG]], 63, 31 + ; CHECK-NEXT: $x0 = COPY [[UBFMXri]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr32common = COPY $w0 + %1:gpr32common = SUBWri %0, 1, 0 + %3:gpr64all = IMPLICIT_DEF + %2:gpr64 = INSERT_SUBREG %3, killed %1, %subreg.sub_32 + %4:gpr64 = nuw nsw UBFMXri killed %2, 63, 31 + $x0 = COPY %4 + RET_ReallyLR implicit $x0