Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[Peephole] rewrite INSERT_SUBREG to SUBREG_TO_REG if upper bits zero
Restrict the 32-bit form of an instruction of integer as too many test cases
will be clobber as the register number updated.
    From %reg = INSERT_SUBREG %reg, %subreg, subidx
    To   %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
Try to prefix the redundant mov instruction at D132325 as the SUBREG_TO_REG should not generate code.

Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D132939
  • Loading branch information
vfdff committed Sep 9, 2022
1 parent 180bf5f commit b665533
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
51 changes: 51 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
Expand Up @@ -32,6 +32,9 @@
// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
// operand are set to zero.
//
// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
//
//===----------------------------------------------------------------------===//

#include "AArch64ExpandImm.h"
Expand Down Expand Up @@ -97,6 +100,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
template <typename T>
bool visitAND(unsigned Opc, MachineInstr &MI);
bool visitORR(MachineInstr &MI);
bool visitINSERT(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
Expand Down Expand Up @@ -250,6 +254,50 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
return true;
}

bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
// Check this INSERT_SUBREG comes from below zero-extend pattern.
//
// From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
// To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
//
// We're assuming the first operand to INSERT_SUBREG is irrelevant because a
// COPY would destroy the upper part of the register anyway
if (!MI.isRegTiedToDefOperand(1))
return false;

Register DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
if (!SrcMI)
return false;

// From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
//
// When you use the 32-bit form of an instruction, the upper 32 bits of the
// source registers are ignored and the upper 32 bits of the destination
// register are set to zero.
//
// If AArch64's 32-bit form of instruction defines the source operand of
// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
// real AArch64 instruction and if it is not, do not process the opcode
// conservatively.
if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
!AArch64::GPR64allRegClass.hasSubClassEq(RC))
return false;

// Build a SUBREG_TO_REG instruction
MachineInstr *SubregMI =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
.addImm(0)
.add(MI.getOperand(2))
.add(MI.getOperand(3));
LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
MI.eraseFromParent();

return true;
}

template <typename T>
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
// The immediate must be in the form of ((imm0 << 12) + imm1), in which both
Expand Down Expand Up @@ -493,6 +541,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::INSERT_SUBREG:
Changed = visitINSERT(MI);
break;
case AArch64::ANDWrr:
Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
break;
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AArch64/peephole-insert-subreg.mir
@@ -0,0 +1,47 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s

--- |
define i64 @loop2(i32 noundef %width) {
entry:
%add = add i32 %width, -1
%zext = zext i32 %add to i64
%shl = shl nuw nsw i64 %zext, 1
ret i64 %shl
}

...
---
---
name: loop2
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr32common, preferred-register: '' }
- { id: 1, class: gpr32common, preferred-register: '' }
- { id: 2, class: gpr64, preferred-register: '' }
- { id: 3, class: gpr64all, preferred-register: '' }
- { id: 4, class: gpr64, preferred-register: '' }
liveins:
- { reg: '$w0', virtual-reg: '%0' }
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: loop2
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 1, 0
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[SUBWri]], %subreg.sub_32
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = nuw nsw UBFMXri killed [[SUBREG_TO_REG]], 63, 31
; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
; CHECK-NEXT: RET_ReallyLR implicit $x0
%0:gpr32common = COPY $w0
%1:gpr32common = SUBWri %0, 1, 0
%3:gpr64all = IMPLICIT_DEF
%2:gpr64 = INSERT_SUBREG %3, killed %1, %subreg.sub_32
%4:gpr64 = nuw nsw UBFMXri killed %2, 63, 31
$x0 = COPY %4
RET_ReallyLR implicit $x0

0 comments on commit b665533

Please sign in to comment.