Skip to content

Commit

Permalink
[AVR] Optimize int16 airthmetic right shift for shift amount 7/14/15
Browse files Browse the repository at this point in the history
Reviewed By: aykevl

Differential Revision: https://reviews.llvm.org/D115618
  • Loading branch information
benshi001 committed Mar 26, 2022
1 parent 88436af commit bce2e20
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 6 deletions.
156 changes: 154 additions & 2 deletions llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
Expand Up @@ -84,18 +84,23 @@ class AVRExpandPseudo : public MachineFunctionPass {

bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);

/// Specific shift implementation.
/// Specific shift implementation for int8.
bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
bool expandLSRB7Rd(Block &MBB, BlockIt MBBI);
bool expandASRB6Rd(Block &MBB, BlockIt MBBI);
bool expandASRB7Rd(Block &MBB, BlockIt MBBI);

/// Specific shift implementation for int16.
bool expandLSLW4Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW4Rd(Block &MBB, BlockIt MBBI);
bool expandASRW7Rd(Block &MBB, BlockIt MBBI);
bool expandLSLW8Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW8Rd(Block &MBB, BlockIt MBBI);
bool expandASRW8Rd(Block &MBB, BlockIt MBBI);
bool expandLSLW12Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW12Rd(Block &MBB, BlockIt MBBI);
bool expandASRW14Rd(Block &MBB, BlockIt MBBI);
bool expandASRW15Rd(Block &MBB, BlockIt MBBI);

// Common implementation of LPMWRdZ and ELPMWRdZ.
bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
Expand Down Expand Up @@ -1401,7 +1406,7 @@ bool AVRExpandPseudo::expand<AVR::LSLWHiRd>(Block &MBB, BlockIt MBBI) {
// add hireg, hireg <==> lsl hireg
auto MILSL =
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead))
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));

Expand Down Expand Up @@ -1820,6 +1825,53 @@ bool AVRExpandPseudo::expand<AVR::ASRWLoRd>(Block &MBB, BlockIt MBBI) {
return true;
}

bool AVRExpandPseudo::expandASRW7Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// lsl r24
// mov r24,r25
// rol r24
// sbc r25,r25

// lsl r24 <=> add r24, r24
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, RegState::Kill)
.addReg(DstLoReg, RegState::Kill);

// mov r24, r25
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg);

// rol r24 <=> adc r24, r24
buildMI(MBB, MBBI, AVR::ADCRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));

// sbc r25, r25
auto MISBC =
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));

if (ImpIsDead)
MISBC->getOperand(3).setIsDead();
// SREG is always implicitly killed
MISBC->getOperand(4).setIsKill();

MI.eraseFromParent();
return true;
}

bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Expand All @@ -1846,8 +1898,102 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));

if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
// SREG is always implicitly killed
MIBHI->getOperand(4).setIsKill();

MI.eraseFromParent();
return true;
}
bool AVRExpandPseudo::expandASRW14Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// lsl r25
// sbc r24, r24
// lsl r25
// mov r25, r24
// rol r24

// lsl r25 <=> add r25, r25
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);

// sbc r24, r24
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, RegState::Kill)
.addReg(DstLoReg, RegState::Kill);

// lsl r25 <=> add r25, r25
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);

// mov r25, r24
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg);

// rol r24 <=> adc r24, r24
auto MIROL =
buildMI(MBB, MBBI, AVR::ADCRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));

if (ImpIsDead)
MIROL->getOperand(3).setIsDead();
// SREG is always implicitly killed
MIROL->getOperand(4).setIsKill();

MI.eraseFromParent();
return false;
}

bool AVRExpandPseudo::expandASRW15Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// lsl r25
// sbc r25, r25
// mov r24, r25

// lsl r25 <=> add r25, r25
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define)
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);

// sbc r25, r25
auto MISBC =
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);
if (ImpIsDead)
MISBC->getOperand(3).setIsDead();
// SREG is always implicitly killed
MISBC->getOperand(4).setIsKill();

// mov r24, r25
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg);

MI.eraseFromParent();
return true;
Expand All @@ -1858,8 +2004,14 @@ bool AVRExpandPseudo::expand<AVR::ASRWNRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned Imm = MI.getOperand(2).getImm();
switch (Imm) {
case 7:
return expandASRW7Rd(MBB, MBBI);
case 8:
return expandASRW8Rd(MBB, MBBI);
case 14:
return expandASRW14Rd(MBB, MBBI);
case 15:
return expandASRW15Rd(MBB, MBBI);
default:
llvm_unreachable("unimplemented asrwn");
return false;
Expand Down
23 changes: 21 additions & 2 deletions llvm/lib/Target/AVR/AVRISelLowering.cpp
Expand Up @@ -270,8 +270,6 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
}

SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
//: TODO: this function has to be completely rewritten to produce optimal
// code, for now it's producing very long but correct code.
unsigned Opc8;
const SDNode *N = Op.getNode();
EVT VT = Op.getValueType();
Expand Down Expand Up @@ -372,6 +370,27 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
ShiftAmount = 0;
}
} else if (VT.getSizeInBits() == 16) {
if (Op.getOpcode() == ISD::SRA)
// Special optimization for int16 arithmetic right shift.
switch (ShiftAmount) {
case 15:
Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
DAG.getConstant(15, dl, VT));
ShiftAmount = 0;
break;
case 14:
Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
DAG.getConstant(14, dl, VT));
ShiftAmount = 0;
break;
case 7:
Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
DAG.getConstant(7, dl, VT));
ShiftAmount = 0;
break;
default:
break;
}
if (4 <= ShiftAmount && ShiftAmount < 8)
switch (Op.getOpcode()) {
case ISD::SHL:
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AVR/AVRInstrInfo.td
Expand Up @@ -1943,7 +1943,7 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
: $src)),
(implicit SREG)]>;

def ASRWNRd : Pseudo<(outs DLDREGS
def ASRWNRd : Pseudo<(outs DREGS
: $rd),
(ins DREGS
: $src, imm16
Expand Down
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/AVR/pseudo/ASRWNRd.mir
@@ -0,0 +1,41 @@
# RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s

--- |
target triple = "avr--"
define void @test() {
entry:
ret void
}
...

---
name: test
body: |
bb.0.entry:
liveins: $r15r14, $r13r12, $r11r10, $r17r16
; CHECK-LABEL: test
; CHECK: $r14 = ADDRdRr killed $r14, killed $r14, implicit-def $sreg
; CHECK-NEXT: $r14 = MOVRdRr $r15
; CHECK-NEXT: $r14 = ADCRdRr $r14, $r14, implicit-def $sreg, implicit $sreg
; CHECK-NEXT: $r15 = SBCRdRr $r15, $r15, implicit-def $sreg, implicit killed $sreg
$r15r14 = ASRWNRd $r15r14, 7, implicit-def $sreg
; CHECK-NEXT: $r12 = MOVRdRr $r13
; CHECK-NEXT: $r13 = ADDRdRr killed $r13, killed $r13, implicit-def $sreg
; CHECK-NEXT: $r13 = SBCRdRr $r13, $r13, implicit-def $sreg, implicit killed $sreg
$r13r12 = ASRWNRd $r13r12, 8, implicit-def $sreg
; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg
; CHECK-NEXT: $r10 = SBCRdRr killed $r10, killed $r10, implicit-def $sreg, implicit $sreg
; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg
; CHECK-NEXT: $r11 = MOVRdRr $r10
; CHECK-NEXT: $r10 = ADCRdRr $r10, $r10, implicit-def $sreg, implicit killed $sreg
$r11r10 = ASRWNRd $r11r10, 14, implicit-def $sreg
; CHECK-NEXT: $r17 = ADDRdRr killed $r17, killed $r17, implicit-def $sreg
; CHECK-NEXT: $r17 = SBCRdRr killed $r17, killed $r17, implicit-def $sreg, implicit killed $sreg
; CHECK-NEXT: $r16 = MOVRdRr $r17
$r17r16 = ASRWNRd $r17r16, 15, implicit-def $sreg
...
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/AVR/shift.ll
Expand Up @@ -301,6 +301,17 @@ define i16 @lsr_i16_13(i16 %a) {
ret i16 %result
}

define i16 @asr_i16_7(i16 %a) {
; CHECK-LABEL: asr_i16_7
; CHECK: lsl r24
; CHECK-NEXT: mov r24, r25
; CHECK-NEXT: rol r24
; CHECK-NEXT: sbc r25, r25
; CHECK-NEXT: ret
%result = ashr i16 %a, 7
ret i16 %result
}

define i16 @asr_i16_9(i16 %a) {
; CHECK-LABEL: asr_i16_9
; CHECK: mov r24, r25
Expand All @@ -325,3 +336,25 @@ define i16 @asr_i16_12(i16 %a) {
%result = ashr i16 %a, 12
ret i16 %result
}

define i16 @asr_i16_14(i16 %a) {
; CHECK-LABEL: asr_i16_14
; CHECK: lsl r25
; CHECK-NEXT: sbc r24, r24
; CHECK-NEXT: lsl r25
; CHECK-NEXT: mov r25, r24
; CHECK-NEXT: rol r24
; CHECK-NEXT: ret
%result = ashr i16 %a, 14
ret i16 %result
}

define i16 @asr_i16_15(i16 %a) {
; CHECK-LABEL: asr_i16_15
; CHECK: lsl r25
; CHECK-NEXT: sbc r25, r25
; CHECK-NEXT: mov r24, r25
; CHECK-NEXT: ret
%result = ashr i16 %a, 15
ret i16 %result
}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AVR/sign-extension.ll
@@ -1,4 +1,4 @@
; RUN: llc -march=avr < %s | FileCheck %s
; RUN: llc -march=avr -verify-machineinstrs < %s | FileCheck %s

define i8 @sign_extended_1_to_8(i1) {
; CHECK-LABEL: sign_extended_1_to_8
Expand Down

0 comments on commit bce2e20

Please sign in to comment.