Skip to content

Commit b665533

Browse files
committed
[Peephole] rewrite INSERT_SUBREG to SUBREG_TO_REG if upper bits zero
Restrict the 32-bit form of an instruction of integer as too many test cases will be clobber as the register number updated. From %reg = INSERT_SUBREG %reg, %subreg, subidx To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx Try to prefix the redundant mov instruction at D132325 as the SUBREG_TO_REG should not generate code. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D132939
1 parent 180bf5f commit b665533

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
3333
// operand are set to zero.
3434
//
35+
// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36+
// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
37+
//
3538
//===----------------------------------------------------------------------===//
3639

3740
#include "AArch64ExpandImm.h"
@@ -97,6 +100,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
97100
template <typename T>
98101
bool visitAND(unsigned Opc, MachineInstr &MI);
99102
bool visitORR(MachineInstr &MI);
103+
bool visitINSERT(MachineInstr &MI);
100104
bool runOnMachineFunction(MachineFunction &MF) override;
101105

102106
StringRef getPassName() const override {
@@ -250,6 +254,50 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
250254
return true;
251255
}
252256

257+
bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
258+
// Check this INSERT_SUBREG comes from below zero-extend pattern.
259+
//
260+
// From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
261+
// To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
262+
//
263+
// We're assuming the first operand to INSERT_SUBREG is irrelevant because a
264+
// COPY would destroy the upper part of the register anyway
265+
if (!MI.isRegTiedToDefOperand(1))
266+
return false;
267+
268+
Register DstReg = MI.getOperand(0).getReg();
269+
const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
270+
MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
271+
if (!SrcMI)
272+
return false;
273+
274+
// From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
275+
//
276+
// When you use the 32-bit form of an instruction, the upper 32 bits of the
277+
// source registers are ignored and the upper 32 bits of the destination
278+
// register are set to zero.
279+
//
280+
// If AArch64's 32-bit form of instruction defines the source operand of
281+
// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
282+
// real AArch64 instruction and if it is not, do not process the opcode
283+
// conservatively.
284+
if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
285+
!AArch64::GPR64allRegClass.hasSubClassEq(RC))
286+
return false;
287+
288+
// Build a SUBREG_TO_REG instruction
289+
MachineInstr *SubregMI =
290+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
291+
TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
292+
.addImm(0)
293+
.add(MI.getOperand(2))
294+
.add(MI.getOperand(3));
295+
LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
296+
MI.eraseFromParent();
297+
298+
return true;
299+
}
300+
253301
template <typename T>
254302
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
255303
// The immediate must be in the form of ((imm0 << 12) + imm1), in which both
@@ -493,6 +541,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
493541
switch (MI.getOpcode()) {
494542
default:
495543
break;
544+
case AArch64::INSERT_SUBREG:
545+
Changed = visitINSERT(MI);
546+
break;
496547
case AArch64::ANDWrr:
497548
Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
498549
break;
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
3+
4+
--- |
5+
define i64 @loop2(i32 noundef %width) {
6+
entry:
7+
%add = add i32 %width, -1
8+
%zext = zext i32 %add to i64
9+
%shl = shl nuw nsw i64 %zext, 1
10+
ret i64 %shl
11+
}
12+
13+
...
14+
---
15+
---
16+
name: loop2
17+
alignment: 4
18+
tracksRegLiveness: true
19+
registers:
20+
- { id: 0, class: gpr32common, preferred-register: '' }
21+
- { id: 1, class: gpr32common, preferred-register: '' }
22+
- { id: 2, class: gpr64, preferred-register: '' }
23+
- { id: 3, class: gpr64all, preferred-register: '' }
24+
- { id: 4, class: gpr64, preferred-register: '' }
25+
liveins:
26+
- { reg: '$w0', virtual-reg: '%0' }
27+
body: |
28+
bb.0.entry:
29+
liveins: $w0
30+
31+
; CHECK-LABEL: name: loop2
32+
; CHECK: liveins: $w0
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
35+
; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 1, 0
36+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
37+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[SUBWri]], %subreg.sub_32
38+
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = nuw nsw UBFMXri killed [[SUBREG_TO_REG]], 63, 31
39+
; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
40+
; CHECK-NEXT: RET_ReallyLR implicit $x0
41+
%0:gpr32common = COPY $w0
42+
%1:gpr32common = SUBWri %0, 1, 0
43+
%3:gpr64all = IMPLICIT_DEF
44+
%2:gpr64 = INSERT_SUBREG %3, killed %1, %subreg.sub_32
45+
%4:gpr64 = nuw nsw UBFMXri killed %2, 63, 31
46+
$x0 = COPY %4
47+
RET_ReallyLR implicit $x0

0 commit comments

Comments
 (0)