|
32 | 32 | // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source |
33 | 33 | // operand are set to zero. |
34 | 34 | // |
| 35 | +// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx |
| 36 | +// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx |
| 37 | +// |
35 | 38 | //===----------------------------------------------------------------------===// |
36 | 39 |
|
37 | 40 | #include "AArch64ExpandImm.h" |
@@ -97,6 +100,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { |
97 | 100 | template <typename T> |
98 | 101 | bool visitAND(unsigned Opc, MachineInstr &MI); |
99 | 102 | bool visitORR(MachineInstr &MI); |
| 103 | + bool visitINSERT(MachineInstr &MI); |
100 | 104 | bool runOnMachineFunction(MachineFunction &MF) override; |
101 | 105 |
|
102 | 106 | StringRef getPassName() const override { |
@@ -250,6 +254,50 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { |
250 | 254 | return true; |
251 | 255 | } |
252 | 256 |
|
| 257 | +bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { |
| 258 | + // Check this INSERT_SUBREG comes from below zero-extend pattern. |
| 259 | + // |
| 260 | + // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx |
| 261 | + // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx |
| 262 | + // |
| 263 | + // We're assuming the first operand to INSERT_SUBREG is irrelevant because a |
| 264 | + // COPY would destroy the upper part of the register anyway |
| 265 | + if (!MI.isRegTiedToDefOperand(1)) |
| 266 | + return false; |
| 267 | + |
| 268 | + Register DstReg = MI.getOperand(0).getReg(); |
| 269 | + const TargetRegisterClass *RC = MRI->getRegClass(DstReg); |
| 270 | + MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); |
| 271 | + if (!SrcMI) |
| 272 | + return false; |
| 273 | + |
| 274 | + // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC |
| 275 | + // |
| 276 | + // When you use the 32-bit form of an instruction, the upper 32 bits of the |
| 277 | + // source registers are ignored and the upper 32 bits of the destination |
| 278 | + // register are set to zero. |
| 279 | + // |
| 280 | + // If AArch64's 32-bit form of instruction defines the source operand of |
| 281 | + // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is |
| 282 | + // real AArch64 instruction and if it is not, do not process the opcode |
| 283 | + // conservatively. |
| 284 | + if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) || |
| 285 | + !AArch64::GPR64allRegClass.hasSubClassEq(RC)) |
| 286 | + return false; |
| 287 | + |
| 288 | + // Build a SUBREG_TO_REG instruction |
| 289 | + MachineInstr *SubregMI = |
| 290 | + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), |
| 291 | + TII->get(TargetOpcode::SUBREG_TO_REG), DstReg) |
| 292 | + .addImm(0) |
| 293 | + .add(MI.getOperand(2)) |
| 294 | + .add(MI.getOperand(3)); |
| 295 | + LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n"); |
| 296 | + MI.eraseFromParent(); |
| 297 | + |
| 298 | + return true; |
| 299 | +} |
| 300 | + |
253 | 301 | template <typename T> |
254 | 302 | static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { |
255 | 303 | // The immediate must be in the form of ((imm0 << 12) + imm1), in which both |
@@ -493,6 +541,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { |
493 | 541 | switch (MI.getOpcode()) { |
494 | 542 | default: |
495 | 543 | break; |
| 544 | + case AArch64::INSERT_SUBREG: |
| 545 | + Changed = visitINSERT(MI); |
| 546 | + break; |
496 | 547 | case AArch64::ANDWrr: |
497 | 548 | Changed = visitAND<uint32_t>(AArch64::ANDWri, MI); |
498 | 549 | break; |
|
0 commit comments