Skip to content

Commit

Permalink
[AArch64] Remove redundant ORRWrs which is generated by zero-extend
Browse files Browse the repository at this point in the history
%3:gpr32 = ORRWrs $wzr, %2, 0
%4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32

If AArch64's 32-bit form of instruction defines the source operand of ORRWrs,
we can remove the ORRWrs because the upper 32 bits of the source operand are
set to zero.

Differential Revision: https://reviews.llvm.org/D110841
  • Loading branch information
jaykang10 committed Oct 25, 2021
1 parent 6fa1b4f commit a502436
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 8 deletions.
57 changes: 57 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@
// later. In this case, we could try to split the constant operand of mov
// instruction into two bitmask immediates. It makes two AND instructions
// intead of multiple `mov` + `and` instructions.
//
// 2. Remove redundant ORRWrs which is generated by zero-extend.
//
// %3:gpr32 = ORRWrs $wzr, %2, 0
// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
//
// If AArch64's 32-bit form of instruction defines the source operand of
// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
// operand are set to zero.
//
//===----------------------------------------------------------------------===//

#include "AArch64ExpandImm.h"
Expand Down Expand Up @@ -44,6 +54,8 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
template <typename T>
bool visitAND(MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
bool visitORR(MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
Expand Down Expand Up @@ -196,6 +208,49 @@ bool AArch64MIPeepholeOpt::visitAND(
return true;
}

bool AArch64MIPeepholeOpt::visitORR(
MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
// Check this ORR comes from below zero-extend pattern.
//
// def : Pat<(i64 (zext GPR32:$src)),
// (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
if (MI.getOperand(3).getImm() != 0)
return false;

if (MI.getOperand(1).getReg() != AArch64::WZR)
return false;

MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
if (!SrcMI)
return false;

// From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
//
// When you use the 32-bit form of an instruction, the upper 32 bits of the
// source registers are ignored and the upper 32 bits of the destination
// register are set to zero.
//
// If AArch64's 32-bit form of instruction defines the source operand of
// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
// real AArch64 instruction and if it is not, do not process the opcode
// conservatively.
if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
return false;

Register DefReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(2).getReg();
MRI->replaceRegWith(DefReg, SrcReg);
MRI->clearKillFlags(SrcReg);
// replaceRegWith changes MI's definition register. Keep it for SSA form until
// deleting MI.
MI.getOperand(0).setReg(DefReg);
ToBeRemoved.insert(&MI);

LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; });

return true;
}

bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
Expand All @@ -221,6 +276,8 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::ANDXrr:
Changed = visitAND<uint64_t>(MI, ToBeRemoved);
break;
case AArch64::ORRWrs:
Changed = visitORR(MI, ToBeRemoved);
}
}
}
Expand Down
51 changes: 43 additions & 8 deletions llvm/test/CodeGen/AArch64/arm64-assert-zext-sext.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O2 -mtriple=aarch64-linux-gnu < %s | FileCheck %s

declare i32 @test(i32) local_unnamed_addr
declare i32 @test1(i64) local_unnamed_addr

define i32 @assertzext(i32 %n, i1 %a, i32* %b) local_unnamed_addr {
; CHECK-LABEL: assertzext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w8, #33066
; CHECK-NEXT: tst w1, #0x1
; CHECK-NEXT: movk w8, #28567, lsl #16
; CHECK-NEXT: csel w19, wzr, w8, ne
; CHECK-NEXT: cbnz w0, .LBB0_2
; CHECK-NEXT: // %bb.1: // %if.then
; CHECK-NEXT: mov w19, wzr
; CHECK-NEXT: str wzr, [x2]
; CHECK-NEXT: .LBB0_2: // %if.end
; CHECK-NEXT: mov w0, w19
; CHECK-NEXT: bl test
; CHECK-NEXT: mov w0, w19
; CHECK-NEXT: bl test1
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
%i = select i1 %a, i64 0, i64 66296709418
%conv.i = trunc i64 %i to i32
Expand All @@ -20,14 +43,29 @@ if.end: ; preds = %if.then, %entry
%i2 = sext i32 %i1 to i64
%call1.i = tail call i32 @test1(i64 %i2)
ret i32 0
; CHECK: // %if.end
; CHECK: mov w{{[0-9]+}}, w{{[0-9]+}}
; CHECK: bl test
; CHECK: mov w{{[0-9]+}}, w{{[0-9]+}}
; CHECK: bl test1
}

define i32 @assertsext(i32 %n, i8 %a) local_unnamed_addr {
; CHECK-LABEL: assertsext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz w0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: b .LBB1_3
; CHECK-NEXT: .LBB1_2: // %if.then
; CHECK-NEXT: mov x9, #24575
; CHECK-NEXT: sxtb w8, w1
; CHECK-NEXT: movk x9, #15873, lsl #16
; CHECK-NEXT: movk x9, #474, lsl #32
; CHECK-NEXT: udiv x0, x9, x8
; CHECK-NEXT: .LBB1_3: // %if.end
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl test1
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%conv.i = sext i8 %a to i32
%cmp = icmp eq i32 %n, 0
Expand All @@ -37,9 +75,6 @@ if.then: ; preds = %entry
%conv1 = zext i32 %conv.i to i64
%div = udiv i64 2036854775807, %conv1
br label %if.end
; CHECK: // %if.then
; CHECK: mov w{{[0-9]+}}, w{{[0-9]+}}
; CHECK: udiv x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}

if.end: ; preds = %if.then, %entry
%i1 = phi i64 [ %div, %if.then ], [ 0, %entry ]
Expand Down
79 changes: 79 additions & 0 deletions llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -mtriple=aarch64-linux-gnu < %s | FileCheck %s

define i32 @test(i32 %input, i32 %n, i32 %a) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz w1, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: // %bb.0
; CHECK-NEXT: add w8, w0, w1
; CHECK-NEXT: mov w0, #100
; CHECK-NEXT: cmp w8, #4
; CHECK-NEXT: b.hi .LBB0_5
; CHECK-NEXT: // %bb.3: // %bb.0
; CHECK-NEXT: adrp x9, .LJTI0_0
; CHECK-NEXT: add x9, x9, :lo12:.LJTI0_0
; CHECK-NEXT: adr x10, .LBB0_4
; CHECK-NEXT: ldrb w11, [x9, x8]
; CHECK-NEXT: add x10, x10, x11, lsl #2
; CHECK-NEXT: br x10
; CHECK-NEXT: .LBB0_4: // %sw.bb
; CHECK-NEXT: add w0, w2, #1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_5: // %bb.0
; CHECK-NEXT: cmp w8, #200
; CHECK-NEXT: b.ne .LBB0_10
; CHECK-NEXT: // %bb.6: // %sw.bb7
; CHECK-NEXT: add w0, w2, #7
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_7: // %sw.bb1
; CHECK-NEXT: add w0, w2, #3
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_8: // %sw.bb3
; CHECK-NEXT: add w0, w2, #4
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_9: // %sw.bb5
; CHECK-NEXT: add w0, w2, #5
; CHECK-NEXT: .LBB0_10: // %return
; CHECK-NEXT: ret
entry:
%b = add nsw i32 %input, %n
%cmp = icmp eq i32 %n, 0
br i1 %cmp, label %bb.0, label %return

bb.0:
switch i32 %b, label %return [
i32 0, label %sw.bb
i32 1, label %sw.bb1
i32 2, label %sw.bb3
i32 4, label %sw.bb5
i32 200, label %sw.bb7
]

sw.bb:
%add = add nsw i32 %a, 1
br label %return

sw.bb1:
%add2 = add nsw i32 %a, 3
br label %return

sw.bb3:
%add4 = add nsw i32 %a, 4
br label %return

sw.bb5:
%add6 = add nsw i32 %a, 5
br label %return

sw.bb7:
%add8 = add nsw i32 %a, 7
br label %return

return:
%retval.0 = phi i32 [ %add8, %sw.bb7 ], [ %add6, %sw.bb5 ], [ %add4, %sw.bb3 ], [ %add2, %sw.bb1 ], [ %add, %sw.bb ], [ 100, %bb.0 ], [ 0, %entry ]
ret i32 %retval.0
}
69 changes: 69 additions & 0 deletions llvm/test/CodeGen/AArch64/redundant-orrwrs-from-zero-extend.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# RUN: llc -mtriple=aarch64 -run-pass aarch64-mi-peephole-opt -verify-machineinstrs -o - %s | FileCheck %s
---
name: test1
# CHECK-LABEL: name: test1
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr32 }
- { id: 1, class: gpr32 }
- { id: 2, class: gpr32 }
- { id: 3, class: gpr32 }
- { id: 4, class: gpr64 }
body: |
bb.0:
liveins: $w0, $w1
%0:gpr32 = COPY $w0
%1:gpr32 = COPY $w1
B %bb.1
bb.1:
%2:gpr32 = nsw ADDWrr %0, %1
B %bb.2
bb.2:
; CHECK-LABEL: bb.2:
; CHECK-NOT: %3:gpr32 = ORRWrs $wzr, %2, 0
; The ORRWrs should be removed.
%3:gpr32 = ORRWrs $wzr, %2, 0
%4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
B %bb.3
bb.3:
$x0 = COPY %4
RET_ReallyLR implicit $x0
...
---
name: test2
# CHECK-LABEL: name: test2
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr64 }
- { id: 1, class: gpr32 }
- { id: 2, class: gpr32 }
- { id: 3, class: gpr64 }
body: |
bb.0:
liveins: $x0
%0:gpr64 = COPY $x0
B %bb.1
bb.1:
%1:gpr32 = EXTRACT_SUBREG %0, %subreg.sub_32
B %bb.2
bb.2:
; CHECK-LABEL: bb.2:
; CHECK: %2:gpr32 = ORRWrs $wzr, %1, 0
; The ORRWrs should not be removed.
%2:gpr32 = ORRWrs $wzr, %1, 0
%3:gpr64 = SUBREG_TO_REG 0, %2, %subreg.sub_32
B %bb.3
bb.3:
$x0 = COPY %3
RET_ReallyLR implicit $x0
...

0 comments on commit a502436

Please sign in to comment.