Skip to content

Commit

Permalink
[AVR] Fix incorrect expansion of pseudo instructions LPMWRdZ/ELPMWRdZ
Browse files Browse the repository at this point in the history
The 'ELPM' instruction has three forms:

--------------------------
| form        | feature  |
| ----------- | -------- |
| ELPM        | hasELPM  |
| ELPM Rd, Z  | hasELPMX |
| ELPM Rd, Z+ | hasELPMX |
--------------------------

The second form is always used in the expansion of pseudo instructions
LPMWRdZ/ELPMWRdZ. But for devices without ELPMX and with only ELPM,
only the first form can be used.

Reviewed By: aykevl, Miss_Grape

Differential Revision: https://reviews.llvm.org/D141264
  • Loading branch information
benshi001 committed Apr 6, 2023
1 parent effdfa7 commit acb4d14
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 46 deletions.
113 changes: 89 additions & 24 deletions llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
Expand Up @@ -97,9 +97,9 @@ class AVRExpandPseudo : public MachineFunctionPass {
bool expandASRW15Rd(Block &MBB, BlockIt MBBI);

// Common implementation of LPMWRdZ and ELPMWRdZ.
bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsELPM);
// Common implementation of LPMBRdZ and ELPMBRdZ.
bool expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsExt);
bool expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsELPM);
};

char AVRExpandPseudo::ID = 0;
Expand Down Expand Up @@ -812,19 +812,21 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
return true;
}

bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) {
bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsELPM) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
Register SrcLoReg, SrcHiReg;
bool SrcIsKill = MI.getOperand(1).isKill();
unsigned OpLo = IsExt ? AVR::ELPMRdZPi : AVR::LPMRdZPi;
unsigned OpHi = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ;
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
bool IsLPMRn = IsELPM ? STI.hasELPMX() : STI.hasLPMX();

TRI->splitReg(DstReg, DstLoReg, DstHiReg);
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);

// Set the I/O register RAMPZ for ELPM.
if (IsExt) {
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
if (IsELPM) {
Register Bank = MI.getOperand(2).getReg();
// out RAMPZ, rtmp
buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(Bank);
Expand All @@ -833,18 +835,81 @@ bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) {
// This is enforced by the @earlyclobber constraint.
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");

// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
.addReg(DstLoReg, RegState::Define)
.addReg(SrcReg);

// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
.addReg(DstHiReg, RegState::Define)
.addReg(SrcReg, getKillRegState(SrcIsKill));
if (IsLPMRn) {
unsigned OpLo = IsELPM ? AVR::ELPMRdZPi : AVR::LPMRdZPi;
unsigned OpHi = IsELPM ? AVR::ELPMRdZ : AVR::LPMRdZ;
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
.addReg(DstLoReg, RegState::Define)
.addReg(SrcReg);
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
.addReg(DstHiReg, RegState::Define)
.addReg(SrcReg, getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
} else {
unsigned Opc = IsELPM ? AVR::ELPM : AVR::LPM;
// Load low byte, and copy to the low destination register.
auto MIBLO = buildMI(MBB, MBBI, Opc);
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define)
.addReg(AVR::R0, RegState::Kill);
MIBLO.setMemRefs(MI.memoperands());
// Increase the Z register by 1.
if (STI.hasADDSUBIW()) {
// adiw r31:r30, 1
auto MIINC = buildMI(MBB, MBBI, AVR::ADIWRdK)
.addReg(SrcReg, RegState::Define)
.addReg(SrcReg, getKillRegState(SrcIsKill))
.addImm(1);
MIINC->getOperand(3).setIsDead();
} else {
// subi r30, 255
// sbci r31, 255
buildMI(MBB, MBBI, AVR::SUBIRdK)
.addReg(SrcLoReg, RegState::Define)
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.addImm(255);
auto MIZHI = buildMI(MBB, MBBI, AVR::SBCIRdK)
.addReg(SrcHiReg, RegState::Define)
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.addImm(255);
MIZHI->getOperand(3).setIsDead();
MIZHI->getOperand(4).setIsKill();
}
// Load high byte, and copy to the high destination register.
auto MIBHI = buildMI(MBB, MBBI, Opc);
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstHiReg, RegState::Define)
.addReg(AVR::R0, RegState::Kill);
MIBHI.setMemRefs(MI.memoperands());
}

MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
// Restore the Z register if it is not killed.
if (!SrcIsKill) {
if (STI.hasADDSUBIW()) {
// sbiw r31:r30, 1
auto MIDEC = buildMI(MBB, MBBI, AVR::SBIWRdK)
.addReg(SrcReg, RegState::Define)
.addReg(SrcReg, getKillRegState(SrcIsKill))
.addImm(1);
MIDEC->getOperand(3).setIsDead();
} else {
// subi r30, 1
// sbci r31, 0
buildMI(MBB, MBBI, AVR::SUBIRdK)
.addReg(SrcLoReg, RegState::Define)
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.addImm(1);
auto MIZHI = buildMI(MBB, MBBI, AVR::SBCIRdK)
.addReg(SrcHiReg, RegState::Define)
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.addImm(0);
MIZHI->getOperand(3).setIsDead();
MIZHI->getOperand(4).setIsKill();
}
}

MI.eraseFromParent();
return true;
Expand All @@ -860,31 +925,31 @@ bool AVRExpandPseudo::expand<AVR::ELPMWRdZ>(Block &MBB, BlockIt MBBI) {
return expandLPMWELPMW(MBB, MBBI, true);
}

bool AVRExpandPseudo::expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsExt) {
bool AVRExpandPseudo::expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsELPM) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
bool HasX = IsExt ? STI.hasELPMX() : STI.hasLPMX();
bool IsLPMRn = IsELPM ? STI.hasELPMX() : STI.hasLPMX();

// Set the I/O register RAMPZ for ELPM (out RAMPZ, rtmp).
if (IsExt) {
if (IsELPM) {
Register BankReg = MI.getOperand(2).getReg();
buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg);
}

// Load byte.
if (HasX) {
unsigned Opc = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ;
if (IsLPMRn) {
unsigned Opc = IsELPM ? AVR::ELPMRdZ : AVR::LPMRdZ;
auto MILB = buildMI(MBB, MBBI, Opc)
.addReg(DstReg, RegState::Define)
.addReg(SrcReg, getKillRegState(SrcIsKill));
MILB.setMemRefs(MI.memoperands());
} else {
// For the basic ELPM/LPM instruction, its operand[0] is the implicit
// 'Z' register, and its operand[1] is the implicit 'R0' register.
unsigned Opc = IsExt ? AVR::ELPM : AVR::LPM;
unsigned Opc = IsELPM ? AVR::ELPM : AVR::LPM;
auto MILB = buildMI(MBB, MBBI, Opc);
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstReg, RegState::Define)
Expand Down
38 changes: 18 additions & 20 deletions llvm/lib/Target/AVR/AVRInstrInfo.td
Expand Up @@ -1690,10 +1690,15 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1,
: F16<0b1001010111001000, (outs), (ins), "lpm", []>,
Requires<[HasLPM]>;

// This pseudo is combination of LPM and MOV instructions.
let Defs = [R0] in
def LPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z), "lpmb\t$dst, $z", []>,
Requires<[HasLPM]>;
// These pseudo instructions are combination of the OUT and LPM instructions.
let Defs = [R0] in {
def LPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z), "lpmb\t$dst, $z", []>,
Requires<[HasLPM]>;

let Constraints = "@earlyclobber $dst" in
def LPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z), "lpmw\t$dst, $z", []>,
Requires<[HasLPM]>;
}

def LPMRdZ : FLPMX<0, 0,
(outs GPR8
Expand All @@ -1713,14 +1718,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1,
"lpm\t$rd, $z+", []>,
Requires<[HasLPMX]>;

let Constraints = "@earlyclobber $dst" in
def LPMWRdZ : Pseudo<(outs DREGS
: $dst),
(ins ZREG
: $z),
"lpmw\t$dst, $z", []>,
Requires<[HasLPMX]>;

def LPMWRdZPi : Pseudo<(outs DREGS
: $dst),
(ins ZREG
Expand All @@ -1747,19 +1744,20 @@ let mayLoad = 1, hasSideEffects = 0 in {
Requires<[HasELPMX]>;
}

// This pseudo is combination of the OUT and ELPM instructions.
let Defs = [R0] in
def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
"elpmb\t$dst, $z, $p", []>,
Requires<[HasELPM]>;
// These pseudo instructions are combination of the OUT and ELPM instructions.
let Defs = [R0] in {
def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
"elpmb\t$dst, $z, $p", []>,
Requires<[HasELPM]>;

// These pseudos are combination of the OUT and ELPM instructions.
let Defs = [R31R30], hasSideEffects = 1 in {
let Constraints = "@earlyclobber $dst" in
def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p),
"elpmw\t$dst, $z, $p", []>,
Requires<[HasELPMX]>;
Requires<[HasELPM]>;
}

// These pseudos are combination of the OUT and ELPM instructions.
let Defs = [R31R30], hasSideEffects = 1 in {
def ELPMBRdZPi : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
"elpmb\t$dst, $z+, $p", []>,
Requires<[HasELPMX]>;
Expand Down
112 changes: 112 additions & 0 deletions llvm/test/CodeGen/AVR/elpm.ll
Expand Up @@ -28,6 +28,32 @@ define i16 @foo0(i16 %a, i16 %b) {
; CHECK-NEXT: sub r24, r18
; CHECK-NEXT: sbc r25, r19
; CHECK-NEXT: ret
;
; NOX-LABEL: foo0:
; NOX: ; %bb.0: ; %entry
; NOX-NEXT: lsl r22
; NOX-NEXT: rol r23
; NOX-NEXT: subi r22, lo8(-(arr0))
; NOX-NEXT: sbci r23, hi8(-(arr0))
; NOX-NEXT: movw r30, r22
; NOX-NEXT: lpm
; NOX-NEXT: mov r18, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: lpm
; NOX-NEXT: mov r19, r0
; NOX-NEXT: lsl r24
; NOX-NEXT: rol r25
; NOX-NEXT: subi r24, lo8(-(arr0))
; NOX-NEXT: sbci r25, hi8(-(arr0))
; NOX-NEXT: movw r30, r24
; NOX-NEXT: lpm
; NOX-NEXT: mov r24, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: lpm
; NOX-NEXT: mov r25, r0
; NOX-NEXT: sub r24, r18
; NOX-NEXT: sbc r25, r19
; NOX-NEXT: ret
entry:
%arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a
%0 = load i16, i16 addrspace(1)* %arrayidx, align 1
Expand Down Expand Up @@ -59,6 +85,34 @@ define i16 @foo1(i16 %a, i16 %b) {
; CHECK-NEXT: sub r24, r20
; CHECK-NEXT: sbc r25, r21
; CHECK-NEXT: ret
;
; NOX-LABEL: foo1:
; NOX: ; %bb.0: ; %entry
; NOX-NEXT: lsl r22
; NOX-NEXT: rol r23
; NOX-NEXT: subi r22, lo8(-(arr1))
; NOX-NEXT: sbci r23, hi8(-(arr1))
; NOX-NEXT: movw r30, r22
; NOX-NEXT: ldi r18, 1
; NOX-NEXT: out 59, r18
; NOX-NEXT: elpm
; NOX-NEXT: mov r20, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: elpm
; NOX-NEXT: mov r21, r0
; NOX-NEXT: lsl r24
; NOX-NEXT: rol r25
; NOX-NEXT: subi r24, lo8(-(arr0))
; NOX-NEXT: sbci r25, hi8(-(arr0))
; NOX-NEXT: movw r30, r24
; NOX-NEXT: lpm
; NOX-NEXT: mov r24, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: lpm
; NOX-NEXT: mov r25, r0
; NOX-NEXT: sub r24, r20
; NOX-NEXT: sbc r25, r21
; NOX-NEXT: ret
entry:
%arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(1)* @arr0, i16 0, i16 %a
%0 = load i16, i16 addrspace(1)* %arrayidx, align 1
Expand Down Expand Up @@ -90,6 +144,34 @@ define i16 @foo2(i16 %a, i16 %b) {
; CHECK-NEXT: sub r24, r18
; CHECK-NEXT: sbc r25, r19
; CHECK-NEXT: ret
;
; NOX-LABEL: foo2:
; NOX: ; %bb.0: ; %entry
; NOX-NEXT: lsl r24
; NOX-NEXT: rol r25
; NOX-NEXT: subi r24, lo8(-(arr2))
; NOX-NEXT: sbci r25, hi8(-(arr2))
; NOX-NEXT: movw r30, r24
; NOX-NEXT: ldi r18, 2
; NOX-NEXT: out 59, r18
; NOX-NEXT: elpm
; NOX-NEXT: mov r24, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: elpm
; NOX-NEXT: mov r25, r0
; NOX-NEXT: lsl r22
; NOX-NEXT: rol r23
; NOX-NEXT: subi r22, lo8(-(arr0))
; NOX-NEXT: sbci r23, hi8(-(arr0))
; NOX-NEXT: movw r30, r22
; NOX-NEXT: lpm
; NOX-NEXT: mov r18, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: lpm
; NOX-NEXT: mov r19, r0
; NOX-NEXT: sub r24, r18
; NOX-NEXT: sbc r25, r19
; NOX-NEXT: ret
entry:
%arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a
%0 = load i16, i16 addrspace(3)* %arrayidx, align 1
Expand Down Expand Up @@ -123,6 +205,36 @@ define i16 @foo3(i16 %a, i16 %b) {
; CHECK-NEXT: sub r24, r20
; CHECK-NEXT: sbc r25, r21
; CHECK-NEXT: ret
;
; NOX-LABEL: foo3:
; NOX: ; %bb.0: ; %entry
; NOX-NEXT: lsl r22
; NOX-NEXT: rol r23
; NOX-NEXT: subi r22, lo8(-(arr1))
; NOX-NEXT: sbci r23, hi8(-(arr1))
; NOX-NEXT: movw r30, r22
; NOX-NEXT: ldi r18, 1
; NOX-NEXT: out 59, r18
; NOX-NEXT: elpm
; NOX-NEXT: mov r20, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: elpm
; NOX-NEXT: mov r21, r0
; NOX-NEXT: lsl r24
; NOX-NEXT: rol r25
; NOX-NEXT: subi r24, lo8(-(arr2))
; NOX-NEXT: sbci r25, hi8(-(arr2))
; NOX-NEXT: movw r30, r24
; NOX-NEXT: ldi r18, 2
; NOX-NEXT: out 59, r18
; NOX-NEXT: elpm
; NOX-NEXT: mov r24, r0
; NOX-NEXT: adiw r30, 1
; NOX-NEXT: elpm
; NOX-NEXT: mov r25, r0
; NOX-NEXT: sub r24, r20
; NOX-NEXT: sbc r25, r21
; NOX-NEXT: ret
entry:
%arrayidx = getelementptr inbounds [4 x i16], [4 x i16] addrspace(3)* @arr2, i16 0, i16 %a
%0 = load i16, i16 addrspace(3)* %arrayidx, align 1
Expand Down

0 comments on commit acb4d14

Please sign in to comment.