Skip to content

Commit

Permalink
Second Recommit "[AArch64] Split bitmask immediate of bitwise AND ope…
Browse files Browse the repository at this point in the history
…ration"

This reverts the revert commit c07f709 with
bug fixes.

Differential Revision: https://reviews.llvm.org/D109963
  • Loading branch information
jaykang10 committed Sep 30, 2021
1 parent c8f03a7 commit 13f3c39
Show file tree
Hide file tree
Showing 8 changed files with 483 additions and 4 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64.h
Expand Up @@ -51,6 +51,7 @@ FunctionPass *createAArch64A53Fix835769();
FunctionPass *createFalkorHWPFFixPass();
FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64MIPeepholeOptPass();

FunctionPass *createAArch64CleanupLocalDynamicTLSPass();

Expand Down Expand Up @@ -82,6 +83,7 @@ void initializeAArch64SLSHardeningPass(PassRegistry&);
void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
Expand Down
220 changes: 220 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -0,0 +1,220 @@
//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass performs below peephole optimizations on MIR level.
//
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. In this case, we could try to split the constant operand of mov
// instruction into two bitmask immediates. It makes two AND instructions
// intead of multiple `mov` + `and` instructions.
//===----------------------------------------------------------------------===//

#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"

using namespace llvm;

#define DEBUG_TYPE "aarch64-mi-peephole-opt"

namespace {

struct AArch64MIPeepholeOpt : public MachineFunctionPass {
static char ID;

AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
}

const AArch64InstrInfo *TII;
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;

template <typename T>
bool visitAND(MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
return "AArch64 MI Peephole Optimization pass";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};

char AArch64MIPeepholeOpt::ID = 0;

} // end anonymous namespace

INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
"AArch64 MI Peephole Optimization", false, false)

template <typename T>
static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
T UImm = static_cast<T>(Imm);
if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
return false;

// If this immediate can be handled by one instruction, do not split it.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
if (Insn.size() == 1)
return false;

// The bitmask immediate consists of consecutive ones. Let's say there is
// constant 0b00000000001000000000010000000000 which does not consist of
// consecutive ones. We can split it in to two bitmask immediate like
// 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
// If we do AND with these two bitmask immediate, we can see original one.
unsigned LowestBitSet = countTrailingZeros(UImm);
unsigned HighestBitSet = Log2_64(UImm);

// Create a mask which is filled with one from the position of lowest bit set
// to the position of highest bit set.
T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
(static_cast<T>(1) << LowestBitSet);
// Create a mask which is filled with one outside the position of lowest bit
// set and the position of highest bit set.
T NewImm2 = UImm | ~NewImm1;

// If the split value is not valid bitmask immediate, do not split this
// constant.
if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
return false;

Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
return true;
}

template <typename T>
bool AArch64MIPeepholeOpt::visitAND(
MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
// Try below transformation.
//
// MOVi32imm + ANDWrr ==> ANDWri + ANDWri
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. Let's try to split the constant operand of mov instruction into two
// bitmask immediates. It makes only two AND instructions intead of multiple
// mov + and instructions.

unsigned RegSize = sizeof(T) * 8;
assert((RegSize == 32 || RegSize == 64) &&
"Invalid RegSize for AND bitmask peephole optimization");

// Check whether AND's MBB is in loop and the AND is loop invariant.
MachineBasicBlock *MBB = MI.getParent();
MachineLoop *L = MLI->getLoopFor(MBB);
if (L && !L->isLoopInvariant(MI))
return false;

// Check whether AND's operand is MOV with immediate.
MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
MachineInstr *SubregToRegMI = nullptr;
// If it is SUBREG_TO_REG, check its operand.
if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
SubregToRegMI = MovMI;
MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
}

if (MovMI->getOpcode() != AArch64::MOVi32imm &&
MovMI->getOpcode() != AArch64::MOVi64imm)
return false;

// If the MOV has multiple uses, do not split the immediate because it causes
// more instructions.
if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
return false;

if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
return false;

// Split the bitmask immediate into two.
T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
T Imm1Enc;
T Imm2Enc;
if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
return false;

// Create new AND MIs.
DebugLoc DL = MI.getDebugLoc();
const TargetRegisterClass *ANDImmRC =
(RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;

MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm1Enc);

MRI->constrainRegClass(DstReg, ANDImmRC);
BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
.addReg(NewTmpReg)
.addImm(Imm2Enc);

ToBeRemoved.insert(&MI);
if (SubregToRegMI)
ToBeRemoved.insert(SubregToRegMI);
ToBeRemoved.insert(MovMI);

return true;
}

bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
MLI = &getAnalysis<MachineLoopInfo>();
MRI = &MF.getRegInfo();

if (!MRI->isSSA())
return false;

bool Changed = false;
SmallSetVector<MachineInstr *, 8> ToBeRemoved;

for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::ANDWrr:
Changed = visitAND<uint32_t>(MI, ToBeRemoved);
break;
case AArch64::ANDXrr:
Changed = visitAND<uint64_t>(MI, ToBeRemoved);
break;
}
}
}

for (MachineInstr *MI : ToBeRemoved)
MI->eraseFromParent();

return Changed;
}

FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
return new AArch64MIPeepholeOpt();
}
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Expand Up @@ -195,6 +195,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
initializeAArch64MIPeepholeOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
Expand Down Expand Up @@ -480,6 +481,7 @@ class AArch64PassConfig : public TargetPassConfig {
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
void addMachineSSAOptimization() override;
bool addILPOpts() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
Expand Down Expand Up @@ -656,6 +658,14 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
return false;
}

void AArch64PassConfig::addMachineSSAOptimization() {
// Run default MachineSSAOptimization first.
TargetPassConfig::addMachineSSAOptimization();

if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64MIPeepholeOptPass());
}

bool AArch64PassConfig::addILPOpts() {
if (EnableCondOpt)
addPass(createAArch64ConditionOptimizerPass());
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/CMakeLists.txt
Expand Up @@ -66,6 +66,7 @@ add_llvm_target(AArch64CodeGen
AArch64LowerHomogeneousPrologEpilog.cpp
AArch64MachineFunctionInfo.cpp
AArch64MacroFusion.cpp
AArch64MIPeepholeOpt.cpp
AArch64MCInstLower.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
Expand Down
Expand Up @@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H

#include "AArch64ExpandImm.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/bit.h"
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AArch64/O3-pipeline.ll
Expand Up @@ -40,7 +40,7 @@
; CHECK-NEXT: Induction Variable Users
; CHECK-NEXT: Loop Strength Reduction
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Merge contiguous icmps into a memcmp
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Lazy Branch Probability Analysis
Expand Down Expand Up @@ -132,6 +132,7 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
; CHECK-NEXT: AArch64 Dead register definitions
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Process Implicit Definitions
Expand Down

0 comments on commit 13f3c39

Please sign in to comment.