298 changes: 298 additions & 0 deletions llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,135 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSLW4Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// swap Rh
// swap Rl
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill));
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill));

// andi Rh, 0xf0
auto MI0 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addImm(0xf0);
// SREG is implicitly dead.
MI0->getOperand(3).setIsDead();

// eor Rh, Rl
auto MI1 =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));
// SREG is implicitly dead.
MI1->getOperand(3).setIsDead();

// andi Rl, 0xf0
auto MI2 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addImm(0xf0);
// SREG is implicitly dead.
MI2->getOperand(3).setIsDead();

// eor Rh, Rl
auto MI3 =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MI3->getOperand(3).setIsDead();

MI.eraseFromParent();
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSLW8Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// mov Rh, Rl
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill));

// clr Rl
auto MIBLO =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBLO->getOperand(3).setIsDead();

MI.eraseFromParent();
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSLW12Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// mov Rh, Rl
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill));

// swap Rh
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill));

// andi Rh, 0xf0
auto MI0 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addImm(0xf0);
// SREG is implicitly dead.
MI0->getOperand(3).setIsDead();

// clr Rl
auto MI1 =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MI1->getOperand(3).setIsDead();

MI.eraseFromParent();
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSRWRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Expand Down Expand Up @@ -1433,6 +1562,135 @@ bool AVRExpandPseudo::expand<AVR::LSRWRd>(Block &MBB, BlockIt MBBI) {
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSRW4Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// swap Rh
// swap Rl
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill));
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill));

// andi Rl, 0xf
auto MI0 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addImm(0xf);
// SREG is implicitly dead.
MI0->getOperand(3).setIsDead();

// eor Rl, Rh
auto MI1 =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
// SREG is implicitly dead.
MI1->getOperand(3).setIsDead();

// andi Rh, 0xf
auto MI2 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addImm(0xf);
// SREG is implicitly dead.
MI2->getOperand(3).setIsDead();

// eor Rl, Rh
auto MI3 =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MI3->getOperand(3).setIsDead();

MI.eraseFromParent();
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSRW8Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// Move upper byte to lower byte.
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg);

// Clear upper byte.
auto MIBHI =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();

MI.eraseFromParent();
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSRW12Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// Move upper byte to lower byte.
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg);

// swap Rl
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill));

// andi Rl, 0xf
auto MI0 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addImm(0xf);
// SREG is implicitly dead.
MI0->getOperand(3).setIsDead();

// Clear upper byte.
auto MIBHI =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();

MI.eraseFromParent();
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::RORWRd>(Block &MBB, BlockIt MBBI) {
llvm_unreachable("RORW unimplemented");
Expand Down Expand Up @@ -1476,6 +1734,39 @@ bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::ASRW8Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);

// Move upper byte to lower byte.
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill));

// Move the sign bit to the C flag.
buildMI(MBB, MBBI, AVR::ADDRdRr).addReg(DstHiReg)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill));

// Set upper byte to 0 or -1.
auto MIBHI =
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();

MI.eraseFromParent();
return true;
}

template <>
bool AVRExpandPseudo::expand<AVR::LSLB7Rd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Expand Down Expand Up @@ -1798,10 +2089,17 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::ROLBRd);
EXPAND(AVR::RORBRd);
EXPAND(AVR::LSLWRd);
EXPAND(AVR::LSLW4Rd);
EXPAND(AVR::LSLW8Rd);
EXPAND(AVR::LSLW12Rd);
EXPAND(AVR::LSRWRd);
EXPAND(AVR::LSRW4Rd);
EXPAND(AVR::LSRW8Rd);
EXPAND(AVR::LSRW12Rd);
EXPAND(AVR::RORWRd);
EXPAND(AVR::ROLWRd);
EXPAND(AVR::ASRWRd);
EXPAND(AVR::ASRW8Rd);
EXPAND(AVR::LSLB7Rd);
EXPAND(AVR::LSRB7Rd);
EXPAND(AVR::ASRB7Rd);
Expand Down
46 changes: 45 additions & 1 deletion llvm/lib/Target/AVR/AVRISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Invalid shift opcode");
}

// Optimize int8 shifts.
// Optimize int8/int16 shifts.
if (VT.getSizeInBits() == 8) {
if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) {
// Optimize LSL when 4 <= ShiftAmount <= 6.
Expand Down Expand Up @@ -362,6 +362,50 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
Victim = DAG.getNode(AVRISD::ASR7, dl, VT, Victim);
ShiftAmount = 0;
}
} else if (VT.getSizeInBits() == 16) {
if (4 <= ShiftAmount && ShiftAmount < 8)
switch (Op.getOpcode()) {
case ISD::SHL:
Victim = DAG.getNode(AVRISD::LSL4, dl, VT, Victim);
ShiftAmount -= 4;
break;
case ISD::SRL:
Victim = DAG.getNode(AVRISD::LSR4, dl, VT, Victim);
ShiftAmount -= 4;
break;
default:
break;
}
else if (8 <= ShiftAmount && ShiftAmount < 12)
switch (Op.getOpcode()) {
case ISD::SHL:
Victim = DAG.getNode(AVRISD::LSL8, dl, VT, Victim);
ShiftAmount -= 8;
break;
case ISD::SRL:
Victim = DAG.getNode(AVRISD::LSR8, dl, VT, Victim);
ShiftAmount -= 8;
break;
case ISD::SRA:
Victim = DAG.getNode(AVRISD::ASR8, dl, VT, Victim);
ShiftAmount -= 8;
break;
default:
break;
}
else if (12 <= ShiftAmount)
switch (Op.getOpcode()) {
case ISD::SHL:
Victim = DAG.getNode(AVRISD::LSL12, dl, VT, Victim);
ShiftAmount -= 12;
break;
case ISD::SRL:
Victim = DAG.getNode(AVRISD::LSR12, dl, VT, Victim);
ShiftAmount -= 12;
break;
default:
break;
}
}

while (ShiftAmount--) {
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AVR/AVRISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,15 @@ enum NodeType {
/// TargetExternalSymbol, and TargetGlobalAddress.
WRAPPER,
LSL, ///< Logical shift left.
LSL4, ///< Logical shift left 4 bits.
LSL8, ///< Logical shift left 8 bits.
LSL12, ///< Logical shift left 12 bits.
LSR, ///< Logical shift right.
LSR4, ///< Logical shift right 4 bits.
LSR8, ///< Logical shift right 8 bits.
LSR12, ///< Logical shift right 12 bits.
ASR, ///< Arithmetic shift right.
ASR8, ///< Arithmetic shift right 8 bits.
LSL7, ///< Logical shift left 7 bits.
LSR7, ///< Logical shift right 7 bits.
ASR7, ///< Arithmetic shift right 7 bits.
Expand Down
49 changes: 43 additions & 6 deletions llvm/lib/Target/AVR/AVRInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@ def AVRselectcc: SDNode<"AVRISD::SELECT_CC", SDT_AVRSelectCC, [SDNPInGlue]>;

// Shift nodes.
def AVRlsl : SDNode<"AVRISD::LSL", SDTIntUnaryOp>;
def AVRlsl4 : SDNode<"AVRISD::LSL4", SDTIntUnaryOp>;
def AVRlsl8 : SDNode<"AVRISD::LSL8", SDTIntUnaryOp>;
def AVRlsl12 : SDNode<"AVRISD::LSL12", SDTIntUnaryOp>;
def AVRlsr : SDNode<"AVRISD::LSR", SDTIntUnaryOp>;
def AVRlsr4 : SDNode<"AVRISD::LSR4", SDTIntUnaryOp>;
def AVRlsr8 : SDNode<"AVRISD::LSR8", SDTIntUnaryOp>;
def AVRlsr12 : SDNode<"AVRISD::LSR12", SDTIntUnaryOp>;
def AVRrol : SDNode<"AVRISD::ROL", SDTIntUnaryOp>;
def AVRror : SDNode<"AVRISD::ROR", SDTIntUnaryOp>;
def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>;
def AVRasr8 : SDNode<"AVRISD::ASR8", SDTIntUnaryOp>;
def AVRlsl7 : SDNode<"AVRISD::LSL7", SDTIntUnaryOp>;
def AVRlsr7 : SDNode<"AVRISD::LSR7", SDTIntUnaryOp>;
def AVRasr7 : SDNode<"AVRISD::ASR7", SDTIntUnaryOp>;
Expand Down Expand Up @@ -1674,6 +1681,21 @@ Defs = [SREG] in
"lslb7\t$rd",
[(set i8:$rd, (AVRlsl7 i8:$src)), (implicit SREG)]>;

def LSLW4Rd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"lslw4\t$rd",
[(set i16:$rd, (AVRlsl4 i16:$src)), (implicit SREG)]>;

def LSLW8Rd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"lslw8\t$rd",
[(set i16:$rd, (AVRlsl8 i16:$src)), (implicit SREG)]>;

def LSLW12Rd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"lslw12\t$rd",
[(set i16:$rd, (AVRlsl12 i16:$src)), (implicit SREG)]>;

def LSRRd : FRd<0b1001,
0b0100110,
(outs GPR8:$rd),
Expand All @@ -1691,6 +1713,21 @@ Defs = [SREG] in
"lsrw\t$rd",
[(set i16:$rd, (AVRlsr i16:$src)), (implicit SREG)]>;

def LSRW4Rd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"lsrw4\t$rd",
[(set i16:$rd, (AVRlsr4 i16:$src)), (implicit SREG)]>;

def LSRW8Rd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"lsrw8\t$rd",
[(set i16:$rd, (AVRlsr8 i16:$src)), (implicit SREG)]>;

def LSRW12Rd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"lsrw12\t$rd",
[(set i16:$rd, (AVRlsr12 i16:$src)), (implicit SREG)]>;

def ASRRd : FRd<0b1001,
0b0100101,
(outs GPR8:$rd),
Expand All @@ -1708,6 +1745,11 @@ Defs = [SREG] in
"asrw\t$rd",
[(set i16:$rd, (AVRasr i16:$src)), (implicit SREG)]>;

def ASRW8Rd : Pseudo<(outs DREGS:$rd),
(ins DREGS:$src),
"asrw8\t$rd",
[(set i16:$rd, (AVRasr8 i16:$src)), (implicit SREG)]>;

// Bit rotate operations.
let Uses = [SREG] in
{
Expand Down Expand Up @@ -2123,12 +2165,7 @@ def : Pat<(store i16:$src, (i16 (AVRWrapper tglobaladdr:$dst))),
def : Pat<(i16 (AVRWrapper tblockaddress:$dst)),
(LDIWRdK tblockaddress:$dst)>;

// hi-reg truncation : trunc(int16 >> 8)
//:FIXME: i think it's better to emit an extract subreg node in the DAG than
// all this mess once we get optimal shift code
// lol... I think so, too. [@agnat]
def : Pat<(i8 (trunc (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr (AVRlsr
(AVRlsr DREGS:$src)))))))))),
def : Pat<(i8 (trunc (AVRlsr8 DREGS:$src))),
(EXTRACT_SUBREG DREGS:$src, sub_hi)>;

// :FIXME: DAGCombiner produces an shl node after legalization from these seq:
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"

Expand Down Expand Up @@ -821,6 +822,21 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
return false;
}

// Helper to detect unneeded and instructions on shift amounts. Called
// from PatFrags in tablegen.
bool RISCVDAGToDAGISel::isUnneededShiftMask(SDNode *N, unsigned Width) const {
assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
assert(Width >= 5 && N->getValueSizeInBits(0) >= (1 << Width) &&
"Unexpected width");
const APInt &Val = N->getConstantOperandAPInt(1);

if (Val.countTrailingOnes() >= Width)
return true;

APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
return Mask.countTrailingOnes() >= Width;
}

// Match (srl (and val, mask), imm) where the result would be a
// zero-extended 32-bit integer. i.e. the mask is 0xffffffff or the result
// is equivalent to this (SimplifyDemandedBits may have removed lower bits
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {

bool SelectAddrFI(SDValue Addr, SDValue &Base);

bool isUnneededShiftMask(SDNode *N, unsigned Width) const;

bool MatchSRLIW(SDNode *N) const;
bool MatchSLOI(SDNode *N) const;
bool MatchSROI(SDNode *N) const;
Expand Down
17 changes: 9 additions & 8 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,6 @@ def ixlenimm_li : Operand<XLenVT> {
// Standalone (codegen-only) immleaf patterns.
def simm32 : ImmLeaf<XLenVT, [{return isInt<32>(Imm);}]>;
def simm32hi20 : ImmLeaf<XLenVT, [{return isShiftedInt<20, 12>(Imm);}]>;
// A mask value that won't affect significant shift bits.
def immbottomxlenset : ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
return countTrailingOnes<uint64_t>(Imm) >= 6;
return countTrailingOnes<uint64_t>(Imm) >= 5;
}]>;

// A 6-bit constant greater than 32.
def uimm6gt32 : ImmLeaf<XLenVT, [{
Expand Down Expand Up @@ -901,14 +895,21 @@ def : PatGprUimmLog2XLen<sra, SRAI>;
// typically introduced when the legalizer promotes the shift amount and
// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
// ISA only read the least significant 5 bits (RV32I) or 6 bits (RV64I).
def shiftMaskXLen : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
return isUnneededShiftMask(N, Subtarget->is64Bit() ? 6 : 5);
}]>;
def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
return isUnneededShiftMask(N, 5);
}]>;

class shiftop<SDPatternOperator operator>
: PatFrags<(ops node:$val, node:$count),
[(operator node:$val, node:$count),
(operator node:$val, (and node:$count, immbottomxlenset))]>;
(operator node:$val, (shiftMaskXLen node:$count))]>;
class shiftopw<SDPatternOperator operator>
: PatFrags<(ops node:$val, node:$count),
[(operator node:$val, node:$count),
(operator node:$val, (and node:$count, (XLenVT 31)))]>;
(operator node:$val, (shiftMask32 node:$count))]>;

def : PatGprGpr<shiftop<shl>, SLL>;
def : PatGprGpr<shiftop<srl>, SRL>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/IPO/SampleContextTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ SampleContextTracker::SampleContextTracker(
SampleContext Context(FuncSample.first(), RawContext);
LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
if (!Context.isBaseContext())
FuncToCtxtProfileSet[Context.getName()].insert(FSamples);
FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples);
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
assert(!NewNode->getFunctionSamples() &&
"New node can't have sample profile");
Expand Down
90 changes: 90 additions & 0 deletions llvm/test/CodeGen/AVR/shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,93 @@ define i8 @asr_i8_7(i8 %a) {
%result = ashr i8 %a, 7
ret i8 %result
}

define i16 @lsl_i16_5(i16 %a) {
; CHECK-LABEL: lsl_i16_5
; CHECK: swap r25
; CHECK-NEXT: swap r24
; CHECK-NEXT: andi r25, 240
; CHECK-NEXT: eor r25, r24
; CHECK-NEXT: andi r24, 240
; CHECK-NEXT: eor r25, r24
; CHECK-NEXT: lsl r24
; CHECK-NEXT: rol r25
; CHECK-NEXT: ret
%result = shl i16 %a, 5
ret i16 %result
}

define i16 @lsl_i16_9(i16 %a) {
; CHECK-LABEL: lsl_i16_9
; CHECK: mov r25, r24
; CHECK-NEXT: clr r24
; CHECK-NEXT: lsl r24
; CHECK-NEXT: rol r25
; CHECK-NEXT: ret
%result = shl i16 %a, 9
ret i16 %result
}

define i16 @lsl_i16_13(i16 %a) {
; CHECK-LABEL: lsl_i16_13
; CHECK: mov r25, r24
; CHECK-NEXT: swap r25
; CHECK-NEXT: andi r25, 240
; CHECK-NEXT: clr r24
; CHECK-NEXT: lsl r24
; CHECK-NEXT: rol r25
; CHECK-NEXT: ret
%result = shl i16 %a, 13
ret i16 %result
}

define i16 @lsr_i16_5(i16 %a) {
; CHECK-LABEL: lsr_i16_5
; CHECK: swap r25
; CHECK-NEXT: swap r24
; CHECK-NEXT: andi r24, 15
; CHECK-NEXT: eor r24, r25
; CHECK-NEXT: andi r25, 15
; CHECK-NEXT: eor r24, r25
; CHECK-NEXT: lsr r25
; CHECK-NEXT: ror r24
; CHECK-NEXT: ret
%result = lshr i16 %a, 5
ret i16 %result
}

define i16 @lsr_i16_9(i16 %a) {
; CHECK-LABEL: lsr_i16_9
; CHECK: mov r24, r25
; CHECK-NEXT: clr r25
; CHECK-NEXT: lsr r25
; CHECK-NEXT: ror r24
; CHECK-NEXT: ret
%result = lshr i16 %a, 9
ret i16 %result
}

define i16 @lsr_i16_13(i16 %a) {
; CHECK-LABEL: lsr_i16_13
; CHECK: mov r24, r25
; CHECK-NEXT: swap r24
; CHECK-NEXT: andi r24, 15
; CHECK-NEXT: clr r25
; CHECK-NEXT: lsr r25
; CHECK-NEXT: ror r24
; CHECK-NEXT: ret
%result = lshr i16 %a, 13
ret i16 %result
}

define i16 @asr_i16_9(i16 %a) {
; CHECK-LABEL: asr_i16_9
; CHECK: mov r24, r25
; CHECK-NEXT: lsl r25
; CHECK-NEXT: sbc r25, r25
; CHECK-NEXT: asr r25
; CHECK-NEXT: ror r24
; CHECK-NEXT: ret
%result = ashr i16 %a, 9
ret i16 %result
}
20 changes: 0 additions & 20 deletions llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -103,7 +102,6 @@ define void @cmpxchg_i8_acquire_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -180,7 +178,6 @@ define void @cmpxchg_i8_acquire_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -257,7 +254,6 @@ define void @cmpxchg_i8_release_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -334,7 +330,6 @@ define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -411,7 +406,6 @@ define void @cmpxchg_i8_acq_rel_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -488,7 +482,6 @@ define void @cmpxchg_i8_acq_rel_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -565,7 +558,6 @@ define void @cmpxchg_i8_seq_cst_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -642,7 +634,6 @@ define void @cmpxchg_i8_seq_cst_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -719,7 +710,6 @@ define void @cmpxchg_i8_seq_cst_seq_cst(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: addi a4, zero, 255
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: andi a1, a1, 255
Expand Down Expand Up @@ -796,7 +786,6 @@ define void @cmpxchg_i16_monotonic_monotonic(i16* %ptr, i16 %cmp, i16 %val) noun
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -875,7 +864,6 @@ define void @cmpxchg_i16_acquire_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -954,7 +942,6 @@ define void @cmpxchg_i16_acquire_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -1033,7 +1020,6 @@ define void @cmpxchg_i16_release_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -1112,7 +1098,6 @@ define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -1191,7 +1176,6 @@ define void @cmpxchg_i16_acq_rel_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -1270,7 +1254,6 @@ define void @cmpxchg_i16_acq_rel_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -1349,7 +1332,6 @@ define void @cmpxchg_i16_seq_cst_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwi
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -1428,7 +1410,6 @@ define void @cmpxchg_i16_seq_cst_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down Expand Up @@ -1507,7 +1488,6 @@ define void @cmpxchg_i16_seq_cst_seq_cst(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a3, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: lui a4, 16
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: sll a5, a4, a0
Expand Down
470 changes: 190 additions & 280 deletions llvm/test/CodeGen/RISCV/atomic-rmw.ll

Large diffs are not rendered by default.

184 changes: 184 additions & 0 deletions llvm/test/CodeGen/RISCV/shift-masked-shamt.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I

; This test checks that unnecessary masking of shift amount operands is
; eliminated during instruction selection. The test needs to ensure that the
Expand All @@ -11,6 +13,11 @@ define i32 @sll_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 31
%2 = shl i32 %a, %1
ret i32 %2
Expand All @@ -22,6 +29,12 @@ define i32 @sll_non_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I-NEXT: andi a1, a1, 15
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_non_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 15
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 15
%2 = shl i32 %a, %1
ret i32 %2
Expand All @@ -32,6 +45,11 @@ define i32 @srl_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 4095
%2 = lshr i32 %a, %1
ret i32 %2
Expand All @@ -43,6 +61,12 @@ define i32 @srl_non_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I-NEXT: andi a1, a1, 7
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_non_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a1, a1, 7
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 7
%2 = lshr i32 %a, %1
ret i32 %2
Expand All @@ -53,6 +77,11 @@ define i32 @sra_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: sraw a0, a0, a1
; RV64I-NEXT: ret
%1 = and i32 %b, 65535
%2 = ashr i32 %a, %1
ret i32 %2
Expand All @@ -64,7 +93,162 @@ define i32 @sra_non_redundant_mask(i32 %a, i32 %b) nounwind {
; RV32I-NEXT: andi a1, a1, 32
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_non_redundant_mask:
; RV64I: # %bb.0:
; RV64I-NEXT: sraw a0, a0, zero
; RV64I-NEXT: ret
%1 = and i32 %b, 32
%2 = ashr i32 %a, %1
ret i32 %2
}

define i32 @sll_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: sll_redundant_mask_zeros:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 1
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_redundant_mask_zeros:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 1
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i32 %b, 1
%2 = and i32 %1, 30
%3 = shl i32 %a, %2
ret i32 %3
}

define i32 @srl_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: srl_redundant_mask_zeros:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_redundant_mask_zeros:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i32 %b, 2
%2 = and i32 %1, 28
%3 = lshr i32 %a, %2
ret i32 %3
}

define i32 @sra_redundant_mask_zeros(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: sra_redundant_mask_zeros:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_redundant_mask_zeros:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: sraw a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i32 %b, 3
%2 = and i32 %1, 24
%3 = ashr i32 %a, %2
ret i32 %3
}

define i64 @sll_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: sll_redundant_mask_zeros_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a2, a2, 2
; RV32I-NEXT: andi a3, a2, 60
; RV32I-NEXT: addi a4, a3, -32
; RV32I-NEXT: bltz a4, .LBB9_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sll a1, a0, a4
; RV32I-NEXT: mv a0, zero
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB9_2:
; RV32I-NEXT: sll a1, a1, a2
; RV32I-NEXT: addi a4, zero, 31
; RV32I-NEXT: sub a3, a4, a3
; RV32I-NEXT: srli a4, a0, 1
; RV32I-NEXT: srl a3, a4, a3
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: sll a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_redundant_mask_zeros_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 2
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i64 %b, 2
%2 = and i64 %1, 60
%3 = shl i64 %a, %2
ret i64 %3
}

define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: srl_redundant_mask_zeros_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a2, a2, 3
; RV32I-NEXT: andi a3, a2, 56
; RV32I-NEXT: addi a4, a3, -32
; RV32I-NEXT: bltz a4, .LBB10_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: srl a0, a1, a4
; RV32I-NEXT: mv a1, zero
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB10_2:
; RV32I-NEXT: srl a0, a0, a2
; RV32I-NEXT: addi a4, zero, 31
; RV32I-NEXT: sub a3, a4, a3
; RV32I-NEXT: slli a4, a1, 1
; RV32I-NEXT: sll a3, a4, a3
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: srl a1, a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: srl_redundant_mask_zeros_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i64 %b, 3
%2 = and i64 %1, 56
%3 = lshr i64 %a, %2
ret i64 %3
}

define i64 @sra_redundant_mask_zeros_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: sra_redundant_mask_zeros_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a2, a2, 4
; RV32I-NEXT: andi a3, a2, 48
; RV32I-NEXT: addi a4, a3, -32
; RV32I-NEXT: bltz a4, .LBB11_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sra a0, a1, a4
; RV32I-NEXT: srai a1, a1, 31
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB11_2:
; RV32I-NEXT: srl a0, a0, a2
; RV32I-NEXT: addi a4, zero, 31
; RV32I-NEXT: sub a3, a4, a3
; RV32I-NEXT: slli a4, a1, 1
; RV32I-NEXT: sll a3, a4, a3
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: sra a1, a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: sra_redundant_mask_zeros_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a1, 4
; RV64I-NEXT: sra a0, a0, a1
; RV64I-NEXT: ret
%1 = shl i64 %b, 4
%2 = and i64 %1, 48
%3 = ashr i64 %a, %2
ret i64 %3
}
4 changes: 4 additions & 0 deletions llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly
; based on inline decision, so post inline counts are accurate.

; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t

; Note that we need new pass manager to enable top-down processing for sample profile loader
; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
; main:3 @ _Z5funcAi
; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
; _Z5funcBi:1 @ _Z8funcLeafi
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL

; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
; main:3 @ _Z5funcAi
; _Z5funcAi:1 @ _Z8funcLeafi
; _Z5funcBi:1 @ _Z8funcLeafi
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT


@factor = dso_local global i32 3, align 4, !dbg !0
Expand Down
36 changes: 36 additions & 0 deletions llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11
0: 6
1: 6
3: 287884
4: 287864 _Z3fibi:315608
15: 23
[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
0: 15
1: 15
3: 74946
4: 74941 _Z3fibi:82359
10: 23324
11: 23327 _Z3fibi:25228
15: 11
[main]:154:0
2: 12
3: 18 _Z5funcAi:11
3.1: 18 _Z5funcBi:19
[external:12 @ main]:154:12
2: 12
3: 10 _Z5funcAi:7
3.1: 10 _Z5funcBi:11
[main:3.1 @ _Z5funcBi]:120:19
0: 19
1: 19 _Z8funcLeafi:20
3: 12
[externalA:17 @ _Z5funcBi]:120:3
0: 3
1: 3
[external:10 @ _Z5funcBi]:120:10
0: 10
1: 10
[main:3 @ _Z5funcAi]:99:11
0: 10
1: 10 _Z8funcLeafi:11
3: 24
4 changes: 4 additions & 0 deletions llvm/test/tools/llvm-profdata/cs-sample-profile.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext
RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext
RUN: llvm-profdata merge --sample --extbinary %p/Inputs/cs-sample.proftext -o %t.prof && llvm-profdata merge --sample --text %t.prof -o %t1.proftext
RUN: diff -b %t1.proftext %S/Inputs/cs-sample.proftext
2 changes: 1 addition & 1 deletion llvm/tools/llvm-profdata/llvm-profdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
Remapper ? remapSamples(I->second, *Remapper, Result)
: FunctionSamples();
FunctionSamples &Samples = Remapper ? Remapped : I->second;
StringRef FName = Samples.getName();
StringRef FName = Samples.getNameWithContext(true);
MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
if (Result != sampleprof_error::success) {
std::error_code EC = make_error_code(Result);
Expand Down
2 changes: 1 addition & 1 deletion llvm/tools/llvm-profgen/ProfileGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr) {
if (Ret.second) {
SampleContext FContext(Ret.first->first(), RawContext);
FunctionSamples &FProfile = Ret.first->second;
FProfile.setName(FContext.getName());
FProfile.setName(FContext.getNameWithoutContext());
FProfile.setContext(FContext);
}
return Ret.first->second;
Expand Down
4 changes: 2 additions & 2 deletions llvm/utils/TableGen/IntrinsicEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,13 +638,13 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
std::max(maxArgAttrs, unsigned(intrinsic.ArgumentAttributes.size()));
unsigned &N = UniqAttributes[&intrinsic];
if (N) continue;
assert(AttrNum < 256 && "Too many unique attributes for table!");
N = ++AttrNum;
assert(N < 65536 && "Too many unique attributes for table!");
}

// Emit an array of AttributeList. Most intrinsics will have at least one
// entry, for the function itself (index ~1), which is usually nounwind.
OS << " static const uint8_t IntrinsicsToAttributesMap[] = {\n";
OS << " static const uint16_t IntrinsicsToAttributesMap[] = {\n";

for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
const CodeGenIntrinsic &intrinsic = Ints[i];
Expand Down