diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 0ddfc717e47f30..1fc5617b49f66a 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -37,6 +37,7 @@ #include "AArch64ExpandImm.h" #include "AArch64InstrInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -55,17 +56,44 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { } const AArch64InstrInfo *TII; + const AArch64RegisterInfo *TRI; MachineLoopInfo *MLI; MachineRegisterInfo *MRI; + template + using SplitAndOpcFunc = + std::function(T, unsigned, T &, T &)>; + using BuildMIFunc = + std::function; + + /// For instructions where an immediate operand could be split into two + /// separate immediate instructions, use the splitTwoPartImm two handle the + /// optimization. + /// + /// To implement, the following function types must be passed to + /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if + /// splitting the immediate is valid and returns the associated new opcode. A + /// BuildMIFunc must be implemented to build the two immediate instructions. + /// + /// Example Pattern (where IMM would require 2+ MOV instructions): + /// %dst = rr %src IMM [...] + /// becomes: + /// %tmp = ri %src (encode half IMM) [...] + /// %dst = ri %tmp (encode half IMM) [...] + template + bool splitTwoPartImm(MachineInstr &MI, + SmallSetVector &ToBeRemoved, + SplitAndOpcFunc SplitAndOpc, BuildMIFunc BuildInstr); + bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, MachineInstr *&SubregToRegMI); template - bool visitADDSUB(MachineInstr &MI, - SmallSetVector &ToBeRemoved, bool IsAdd); + bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, + SmallSetVector &ToBeRemoved); template - bool visitAND(MachineInstr &MI, + bool visitAND(unsigned Opc, MachineInstr &MI, SmallSetVector &ToBeRemoved); bool visitORR(MachineInstr &MI, SmallSetVector &ToBeRemoved); @@ -129,7 +157,8 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { template bool AArch64MIPeepholeOpt::visitAND( - MachineInstr &MI, SmallSetVector &ToBeRemoved) { + unsigned Opc, MachineInstr &MI, + SmallSetVector &ToBeRemoved) { // Try below transformation. // // MOVi32imm + ANDWrr ==> ANDWri + ANDWri @@ -140,59 +169,25 @@ bool AArch64MIPeepholeOpt::visitAND( // bitmask immediates. It makes only two AND instructions intead of multiple // mov + and instructions. - unsigned RegSize = sizeof(T) * 8; - assert((RegSize == 32 || RegSize == 64) && - "Invalid RegSize for AND bitmask peephole optimization"); - - // Perform several essential checks against current MI. - MachineInstr *MovMI = nullptr, *SubregToRegMI = nullptr; - if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) - return false; - - // Split the bitmask immediate into two. - T UImm = static_cast(MovMI->getOperand(1).getImm()); - // For the 32 bit form of instruction, the upper 32 bits of the destination - // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits - // of UImm to zero. - if (SubregToRegMI) - UImm &= 0xFFFFFFFF; - T Imm1Enc; - T Imm2Enc; - if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc)) - return false; - - // Create new AND MIs. - DebugLoc DL = MI.getDebugLoc(); - MachineBasicBlock *MBB = MI.getParent(); - const TargetRegisterClass *ANDImmRC = - (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC); - Register NewDstReg = MRI->createVirtualRegister(ANDImmRC); - unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri; - - MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg)); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) - .addReg(SrcReg) - .addImm(Imm1Enc); - - MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) - .addReg(NewTmpReg) - .addImm(Imm2Enc); - - MRI->replaceRegWith(DstReg, NewDstReg); - // replaceRegWith changes MI's definition register. Keep it for SSA form until - // deleting MI. - MI.getOperand(0).setReg(DstReg); - - ToBeRemoved.insert(&MI); - if (SubregToRegMI) - ToBeRemoved.insert(SubregToRegMI); - ToBeRemoved.insert(MovMI); - - return true; + return splitTwoPartImm( + MI, ToBeRemoved, + [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional { + if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) + return Opc; + return None; + }, + [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, + unsigned Imm1, Register SrcReg, Register NewTmpReg, + Register NewDstReg) { + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + .addReg(SrcReg) + .addImm(Imm0); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + .addReg(NewTmpReg) + .addImm(Imm1); + }); } bool AArch64MIPeepholeOpt::visitORR( @@ -260,8 +255,8 @@ static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { template bool AArch64MIPeepholeOpt::visitADDSUB( - MachineInstr &MI, SmallSetVector &ToBeRemoved, - bool IsAdd) { + unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, + SmallSetVector &ToBeRemoved) { // Try below transformation. // // MOVi32imm + ADDWrr ==> ADDWri + ADDWri @@ -275,66 +270,30 @@ bool AArch64MIPeepholeOpt::visitADDSUB( // legal add/sub immediates. It makes only two ADD/SUB instructions intead of // multiple `mov` + `and/sub` instructions. - unsigned RegSize = sizeof(T) * 8; - assert((RegSize == 32 || RegSize == 64) && - "Invalid RegSize for legal add/sub immediate peephole optimization"); - - // Perform several essential checks against current MI. - MachineInstr *MovMI, *SubregToRegMI; - if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) - return false; - - // Split the immediate to Imm0 and Imm1, and calculate the Opcode. - T Imm = static_cast(MovMI->getOperand(1).getImm()), Imm0, Imm1; - unsigned Opcode; - if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) { - if (IsAdd) - Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri; - else - Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri; - } else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) { - if (IsAdd) - Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri; - else - Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri; - } else { - return false; - } - - // Create new ADD/SUB MIs. - DebugLoc DL = MI.getDebugLoc(); - MachineBasicBlock *MBB = MI.getParent(); - const TargetRegisterClass *RC = - (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - Register NewTmpReg = MRI->createVirtualRegister(RC); - Register NewDstReg = MRI->createVirtualRegister(RC); - - MRI->constrainRegClass(SrcReg, RC); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) - .addReg(SrcReg) - .addImm(Imm0) - .addImm(12); - - MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); - BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) - .addReg(NewTmpReg) - .addImm(Imm1) - .addImm(0); - - MRI->replaceRegWith(DstReg, NewDstReg); - // replaceRegWith changes MI's definition register. Keep it for SSA form until - // deleting MI. - MI.getOperand(0).setReg(DstReg); - - // Record the MIs need to be removed. - ToBeRemoved.insert(&MI); - if (SubregToRegMI) - ToBeRemoved.insert(SubregToRegMI); - ToBeRemoved.insert(MovMI); - - return true; + return splitTwoPartImm( + MI, ToBeRemoved, + [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, + T &Imm1) -> Optional { + if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) + return PosOpc; + if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) + return NegOpc; + return None; + }, + [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, + unsigned Imm1, Register SrcReg, Register NewTmpReg, + Register NewDstReg) { + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + .addReg(SrcReg) + .addImm(Imm0) + .addImm(12); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + .addReg(NewTmpReg) + .addImm(Imm1) + .addImm(0); + }); } // Checks if the corresponding MOV immediate instruction is applicable for @@ -377,15 +336,75 @@ bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, return true; } +template +bool AArch64MIPeepholeOpt::splitTwoPartImm( + MachineInstr &MI, SmallSetVector &ToBeRemoved, + SplitAndOpcFunc SplitAndOpc, BuildMIFunc BuildInstr) { + unsigned RegSize = sizeof(T) * 8; + assert((RegSize == 32 || RegSize == 64) && + "Invalid RegSize for legal immediate peephole optimization"); + + // Perform several essential checks against current MI. + MachineInstr *MovMI, *SubregToRegMI; + if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) + return false; + + // Split the immediate to Imm0 and Imm1, and calculate the Opcode. + T Imm = static_cast(MovMI->getOperand(1).getImm()), Imm0, Imm1; + // For the 32 bit form of instruction, the upper 32 bits of the destination + // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits + // of Imm to zero. This is essential if the Immediate value was a negative + // number since it was sign extended when we assign to the 64-bit Imm. + if (SubregToRegMI) + Imm &= 0xFFFFFFFF; + unsigned Opcode; + if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) + Opcode = R.getValue(); + else + return false; + + // Create new ADD/SUB MIs. + MachineFunction *MF = MI.getMF(); + const TargetRegisterClass *RC = + TII->getRegClass(TII->get(Opcode), 0, TRI, *MF); + const TargetRegisterClass *ORC = + TII->getRegClass(TII->get(Opcode), 1, TRI, *MF); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register NewTmpReg = MRI->createVirtualRegister(RC); + Register NewDstReg = MRI->createVirtualRegister(RC); + + MRI->constrainRegClass(SrcReg, RC); + MRI->constrainRegClass(NewTmpReg, ORC); + MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); + + BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); + + MRI->replaceRegWith(DstReg, NewDstReg); + // replaceRegWith changes MI's definition register. Keep it for SSA form until + // deleting MI. + MI.getOperand(0).setReg(DstReg); + + // Record the MIs need to be removed. + ToBeRemoved.insert(&MI); + if (SubregToRegMI) + ToBeRemoved.insert(SubregToRegMI); + ToBeRemoved.insert(MovMI); + + return true; +} + bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; TII = static_cast(MF.getSubtarget().getInstrInfo()); + TRI = static_cast( + MF.getSubtarget().getRegisterInfo()); MLI = &getAnalysis(); MRI = &MF.getRegInfo(); - assert (MRI->isSSA() && "Expected to be run on SSA form!"); + assert(MRI->isSSA() && "Expected to be run on SSA form!"); bool Changed = false; SmallSetVector ToBeRemoved; @@ -396,25 +415,29 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { default: break; case AArch64::ANDWrr: - Changed = visitAND(MI, ToBeRemoved); + Changed = visitAND(AArch64::ANDWri, MI, ToBeRemoved); break; case AArch64::ANDXrr: - Changed = visitAND(MI, ToBeRemoved); + Changed = visitAND(AArch64::ANDXri, MI, ToBeRemoved); break; case AArch64::ORRWrs: Changed = visitORR(MI, ToBeRemoved); break; case AArch64::ADDWrr: - Changed = visitADDSUB(MI, ToBeRemoved, true); + Changed = visitADDSUB(AArch64::ADDWri, AArch64::SUBWri, MI, + ToBeRemoved); break; case AArch64::SUBWrr: - Changed = visitADDSUB(MI, ToBeRemoved, false); + Changed = visitADDSUB(AArch64::SUBWri, AArch64::ADDWri, MI, + ToBeRemoved); break; case AArch64::ADDXrr: - Changed = visitADDSUB(MI, ToBeRemoved, true); + Changed = visitADDSUB(AArch64::ADDXri, AArch64::SUBXri, MI, + ToBeRemoved); break; case AArch64::SUBXrr: - Changed = visitADDSUB(MI, ToBeRemoved, false); + Changed = visitADDSUB(AArch64::SUBXri, AArch64::ADDXri, MI, + ToBeRemoved); break; } } diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll index 37c9e4c5c6fe10..b95c15ac6d0734 100644 --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -389,4 +389,21 @@ define i1 @uadd_add(i8 %a, i8 %b, i8* %p) { ret i1 %e1 } +; This is a unique edge case that will generate the following MIR +; MOVi32imm -1000000 +; SUBREG_TO_REG 0, killed %1, %subreg.sub_32 +; When using a 64-bit unsigned for the "-1000000" immediate, the code +; must make sure to zero out the top 32 bits since SUBREG_TO_REG is +; zero extending the value +define i64 @addl_0x80000000(i64 %a) { +; CHECK-LABEL: addl_0x80000000: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #48576 +; CHECK-NEXT: movk w8, #65520, lsl #16 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: ret + %b = add i64 %a, 4293967296 + ret i64 %b +} + ; TODO: adds/subs