286 changes: 285 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setJumpIsExpensive();

setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT, ISD::MUL});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);

Expand All @@ -1064,6 +1064,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
if (Subtarget.hasVendorXTHeadMemPair())
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
setTargetDAGCombine(ISD::BITCAST);

Expand Down Expand Up @@ -8644,6 +8646,134 @@ static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
}

static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDLoc DL(N);
const MVT XLenVT = Subtarget.getXLenVT();
const EVT VT = N->getValueType(0);

// An MUL is usually smaller than any alternative sequence for legal type.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (DAG.getMachineFunction().getFunction().hasMinSize() &&
TLI.isOperationLegal(ISD::MUL, VT))
return SDValue();

SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *ConstOp = dyn_cast<ConstantSDNode>(N1);
// Any optimization requires a constant RHS.
if (!ConstOp)
return SDValue();

const APInt &C = ConstOp->getAPIntValue();
// A multiply-by-pow2 will be reduced to a shift by the
// architecture-independent code.
if (C.isPowerOf2())
return SDValue();

// The below optimizations only work for non-negative constants
if (!C.isNonNegative())
return SDValue();

auto Shl = [&](SDValue Value, unsigned ShiftAmount) {
if (!ShiftAmount)
return Value;

SDValue ShiftAmountConst = DAG.getConstant(ShiftAmount, DL, XLenVT);
return DAG.getNode(ISD::SHL, DL, Value.getValueType(), Value,
ShiftAmountConst);
};
auto Add = [&](SDValue Addend1, SDValue Addend2) {
return DAG.getNode(ISD::ADD, DL, Addend1.getValueType(), Addend1, Addend2);
};

if (Subtarget.hasVendorXTHeadBa()) {
// We try to simplify using shift-and-add instructions into up to
// 3 instructions (e.g. 2x shift-and-add and 1x shift).

auto isDivisibleByShiftedAddConst = [&](APInt C, APInt &N,
APInt &Quotient) {
unsigned BitWidth = C.getBitWidth();
for (unsigned i = 3; i >= 1; --i) {
APInt X(BitWidth, (1 << i) + 1);
APInt Remainder;
APInt::sdivrem(C, X, Quotient, Remainder);
if (Remainder == 0) {
N = X;
return true;
}
}
return false;
};
auto isShiftedAddConst = [&](APInt C, APInt &N) {
APInt Quotient;
return isDivisibleByShiftedAddConst(C, N, Quotient) && Quotient == 1;
};
auto isSmallShiftAmount = [](APInt C) {
return (C == 2) || (C == 4) || (C == 8);
};

auto ShiftAndAdd = [&](SDValue Value, unsigned ShiftAmount,
SDValue Addend) {
return Add(Shl(Value, ShiftAmount), Addend);
};
auto AnyExt = [&](SDValue Value) {
return DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Value);
};
auto Trunc = [&](SDValue Value) {
return DAG.getNode(ISD::TRUNCATE, DL, VT, Value);
};

unsigned TrailingZeroes = C.countTrailingZeros();
const APInt ShiftedC = C.ashr(TrailingZeroes);
const APInt ShiftedCMinusOne = ShiftedC - 1;

// the below comments use the following notation:
// n, m .. a shift-amount for a shift-and-add instruction
// (i.e. in { 2, 4, 8 })
// k .. a power-of-2 that is equivalent to shifting by
// TrailingZeroes bits
// i, j .. a power-of-2

APInt ShiftAmt1;
APInt ShiftAmt2;
APInt Quotient;

// C = (m + 1) * k
if (isShiftedAddConst(ShiftedC, ShiftAmt1)) {
SDValue Op0 = AnyExt(N0);
SDValue Result = ShiftAndAdd(Op0, ShiftAmt1.logBase2(), Op0);
return Trunc(Shl(Result, TrailingZeroes));
}
// C = (m + 1) * (n + 1) * k
if (isDivisibleByShiftedAddConst(ShiftedC, ShiftAmt1, Quotient) &&
isShiftedAddConst(Quotient, ShiftAmt2)) {
SDValue Op0 = AnyExt(N0);
SDValue Result = ShiftAndAdd(Op0, ShiftAmt1.logBase2(), Op0);
Result = ShiftAndAdd(Result, ShiftAmt2.logBase2(), Result);
return Trunc(Shl(Result, TrailingZeroes));
}
// C = ((m + 1) * n + 1) * k
if (isDivisibleByShiftedAddConst(ShiftedCMinusOne, ShiftAmt1, ShiftAmt2) &&
isSmallShiftAmount(ShiftAmt2)) {
SDValue Op0 = AnyExt(N0);
SDValue Result = ShiftAndAdd(Op0, ShiftAmt1.logBase2(), Op0);
Result = ShiftAndAdd(Result, Quotient.logBase2(), Op0);
return Trunc(Shl(Result, TrailingZeroes));
}

// C has 2 bits set: synthesize using 2 shifts and 1 add (which may
// see one of the shifts merged into a shift-and-add, if feasible)
if (C.countPopulation() == 2) {
APInt HighBit(C.getBitWidth(), (1 << C.logBase2()));
APInt LowBit = C - HighBit;
return Add(Shl(N0, HighBit.logBase2()), Shl(N0, LowBit.logBase2()));
}
}

return SDValue();
}

static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
Expand Down Expand Up @@ -9636,6 +9766,145 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return InputRootReplacement;
}

// Helper function for performMemPairCombine.
// Try to combine the memory loads/stores LSNode1 and LSNode2
// into a single memory pair operation.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
LSBaseSDNode *LSNode2, SDValue BasePtr,
uint64_t Imm) {
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};

if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
return SDValue();

MachineFunction &MF = DAG.getMachineFunction();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();

// The new operation has twice the width.
MVT XLenVT = Subtarget.getXLenVT();
EVT MemVT = LSNode1->getMemoryVT();
EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
MachineMemOperand *MMO = LSNode1->getMemOperand();
MachineMemOperand *NewMMO = MF.getMachineMemOperand(
MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);

if (LSNode1->getOpcode() == ISD::LOAD) {
auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
unsigned Opcode;
if (MemVT == MVT::i32)
Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
else
Opcode = RISCVISD::TH_LDD;

SDValue Res = DAG.getMemIntrinsicNode(
Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
{LSNode1->getChain(), BasePtr,
DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
NewMemVT, NewMMO);

SDValue Node1 =
DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
SDValue Node2 =
DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));

DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
return Node1;
} else {
unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;

SDValue Res = DAG.getMemIntrinsicNode(
Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
{LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
NewMemVT, NewMMO);

DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
return Res;
}
}

// Try to combine two adjacent loads/stores to a single pair instruction from
// the XTHeadMemPair vendor extension.
static SDValue performMemPairCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
MachineFunction &MF = DAG.getMachineFunction();
const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();

// Target does not support load/store pair.
if (!Subtarget.hasVendorXTHeadMemPair())
return SDValue();

LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
EVT MemVT = LSNode1->getMemoryVT();
unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;

// No volatile, indexed or atomic loads/stores.
if (!LSNode1->isSimple() || LSNode1->isIndexed())
return SDValue();

// Function to get a base + constant representation from a memory value.
auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
if (Ptr->getOpcode() == ISD::ADD) {
if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
return {Ptr->getOperand(0), C1->getZExtValue()};
}
}
return {Ptr, 0};
};

auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));

SDValue Chain = N->getOperand(0);
for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
UI != UE; ++UI) {
SDUse &Use = UI.getUse();
if (Use.getUser() != N && Use.getResNo() == 0 &&
Use.getUser()->getOpcode() == N->getOpcode()) {
LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());

// No volatile, indexed or atomic loads/stores.
if (!LSNode2->isSimple() || LSNode2->isIndexed())
continue;

// Check if LSNode1 and LSNode2 have the same type and extension.
if (LSNode1->getOpcode() == ISD::LOAD)
if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
cast<LoadSDNode>(LSNode1)->getExtensionType())
continue;

if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
continue;

auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));

// Check if the base pointer is the same for both instruction.
if (Base1 != Base2)
continue;

// Check if the offsets match the XTHeadMemPair encoding contraints.
if (MemVT == MVT::i32) {
// Check for adjucent i32 values and a 2-bit index.
if ((Offset1 + 4 != Offset2) || !isShiftedUInt<2, 3>(Offset1))
continue;
} else {
// Check for adjucent i64 values and a 2-bit index.
if ((Offset1 + 8 != Offset2) || !isShiftedUInt<2, 4>(Offset1))
continue;
}

// Try to combine.
if (SDValue Res =
tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
return Res;
}
}

return SDValue();
}

// Fold
// (fp_to_int (froundeven X)) -> fcvt X, rne
// (fp_to_int (ftrunc X)) -> fcvt X, rtz
Expand Down Expand Up @@ -10293,6 +10562,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performADDCombine(N, DAG, Subtarget);
case ISD::SUB:
return performSUBCombine(N, DAG, Subtarget);
case ISD::MUL:
return performMULCombine(N, DAG, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI, Subtarget);
case ISD::OR:
Expand Down Expand Up @@ -10605,7 +10876,15 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
VL);
}
case ISD::LOAD:
case ISD::STORE: {
if (DCI.isAfterLegalizeDAG())
if (SDValue V = performMemPairCombine(N, DCI))
return V;

if (N->getOpcode() != ISD::STORE)
break;

auto *Store = cast<StoreSDNode>(N);
SDValue Val = Store->getValue();
// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
Expand Down Expand Up @@ -13451,6 +13730,11 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ORC_B)
NODE_NAME_CASE(ZIP)
NODE_NAME_CASE(UNZIP)
NODE_NAME_CASE(TH_LWD)
NODE_NAME_CASE(TH_LWUD)
NODE_NAME_CASE(TH_LDD)
NODE_NAME_CASE(TH_SWD)
NODE_NAME_CASE(TH_SDD)
NODE_NAME_CASE(VMV_V_X_VL)
NODE_NAME_CASE(VFMV_V_F_VL)
NODE_NAME_CASE(VMV_X_S)
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,12 @@ enum NodeType : unsigned {
// Load address.
LA = ISD::FIRST_TARGET_MEMORY_OPCODE,
LA_TLS_IE,

TH_LWD,
TH_LWUD,
TH_LDD,
TH_SWD,
TH_SDD,
};
} // namespace RISCVISD

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,14 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;

switch (MI.getOpcode()) {
case RISCV::TH_MULA:
case RISCV::TH_MULAW:
case RISCV::TH_MULAH:
case RISCV::TH_MULS:
case RISCV::TH_MULSW:
case RISCV::TH_MULSH:
// Operands 2 and 3 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
case RISCV::PseudoCCMOVGPR:
// Operands 4 and 5 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
Expand Down
153 changes: 92 additions & 61 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,26 @@
//
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
// T-HEAD C specific DAG Nodes.
//===----------------------------------------------------------------------===//

def SDT_TDBLD : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
def SDT_TDBST : SDTypeProfile<0, 4,
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;

def TH_TLWUD : SDNode<"RISCVISD::TH_LWUD", SDT_TDBLD,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def TH_TLWD : SDNode<"RISCVISD::TH_LWD", SDT_TDBLD,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def TH_TLDD : SDNode<"RISCVISD::TH_LDD", SDT_TDBLD,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def TH_TSWD : SDNode<"RISCVISD::TH_SWD", SDT_TDBST,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def TH_TSDD : SDNode<"RISCVISD::TH_SDD", SDT_TDBST,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -96,6 +116,29 @@ class THMulAccumulate_rr<bits<7> funct7, string opcodestr>
let Constraints = "$rd_wb = $rd";
}

let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class THLoadPair<bits<5> funct5, string opcodestr>
: RVInstR<!shl(funct5, 2), 0b100, OPC_CUSTOM_0,
(outs GPR:$rd, GPR:$rs2), (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
bits<2> uimm2;
let Inst{26-25} = uimm2;
let DecoderMethod = "decodeXTHeadMemPair";
let Constraints = "@earlyclobber $rd,@earlyclobber $rs2";
}

let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class THStorePair<bits<5> funct5, string opcodestr>
: RVInstR<!shl(funct5, 2), 0b101, OPC_CUSTOM_0,
(outs), (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
bits<2> uimm2;
let Inst{26-25} = uimm2;
let DecoderMethod = "decodeXTHeadMemPair";
}


//===----------------------------------------------------------------------===//
// Combination of instruction classes.
Expand Down Expand Up @@ -154,6 +197,23 @@ def TH_MULAW : THMulAccumulate_rr<0b0010010, "th.mulaw">;
def TH_MULSW : THMulAccumulate_rr<0b0010011, "th.mulsw">;
} // Predicates = [HasVendorXTHeadMac, IsRV64]

let Predicates = [HasVendorXTHeadMemPair] in {
def TH_LWUD : THLoadPair<0b11110, "th.lwud">,
Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
def TH_SWD : THStorePair<0b11100, "th.swd">,
Sched<[WriteSTW, WriteSTW, ReadStoreData, ReadMemBase]>;
let IsSignExtendingOpW = 1 in
def TH_LWD : THLoadPair<0b11100, "th.lwd">,
Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
}

let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
def TH_LDD : THLoadPair<0b11111, "th.ldd">,
Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
def TH_SDD : THStorePair<0b11111, "th.sdd">,
Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
}

let Predicates = [HasVendorXTHeadVdot],
Constraints = "@earlyclobber $vd",
RVVConstraint = WidenV in {
Expand Down Expand Up @@ -240,67 +300,6 @@ def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
(TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>;
def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
(TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;

def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 1)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 1)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 1)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 3)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 3)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 3)>;

def : Pat<(add GPR:$r, CSImm12MulBy4:$i),
(TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy2XForm CSImm12MulBy4:$i)), 2)>;
def : Pat<(add GPR:$r, CSImm12MulBy8:$i),
(TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy3XForm CSImm12MulBy8:$i)), 3)>;

def : Pat<(mul GPR:$r, C3LeftShift:$i),
(SLLI (TH_ADDSL GPR:$r, GPR:$r, 1),
(TrailingZeros C3LeftShift:$i))>;
def : Pat<(mul GPR:$r, C5LeftShift:$i),
(SLLI (TH_ADDSL GPR:$r, GPR:$r, 2),
(TrailingZeros C5LeftShift:$i))>;
def : Pat<(mul GPR:$r, C9LeftShift:$i),
(SLLI (TH_ADDSL GPR:$r, GPR:$r, 3),
(TrailingZeros C9LeftShift:$i))>;

def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 1)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 1), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2), (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 3)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>;

def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)),
(SLLI (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2),
(TH_ADDSL GPR:$r, GPR:$r, 2), 2), 3)>;
} // Predicates = [HasVendorXTHeadBa]

let Predicates = [HasVendorXTHeadBb] in {
Expand Down Expand Up @@ -393,3 +392,35 @@ defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU", AllQu
defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>;
defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>;
}

def uimm2_3_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 0x3,
SDLoc(N), Subtarget->getXLenVT());
}]>;

def uimm2_3 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
return isShiftedUInt<2, 3>(Imm);
}], uimm2_3_XFORM>;

def uimm2_4_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 0x3,
SDLoc(N), Subtarget->getXLenVT());
}]>;

def uimm2_4 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
return isShiftedUInt<2, 4>(Imm);
}], uimm2_4_XFORM>;

let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
def : Pat<(TH_TLWUD i64:$rs1, uimm2_3:$uimm2_3), (TH_LWUD i64:$rs1, uimm2_3:$uimm2_3, 3)>;
def : Pat<(TH_TLDD i64:$rs1, uimm2_4:$uimm2_4), (TH_LDD i64:$rs1, uimm2_4:$uimm2_4, 4)>;

def : Pat<(TH_TSDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4),
(TH_SDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4, 4)>;
}

let Predicates = [HasVendorXTHeadMemPair] in {
def : Pat<(TH_TLWD GPR:$rs1, uimm2_3:$uimm2_3), (TH_LWD GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
def : Pat<(TH_TSWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3),
(TH_SWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
}
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/RISCV/attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
; RUN: llc -mtriple=riscv32 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV32SVPBMT %s
; RUN: llc -mtriple=riscv32 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV32SVINVAL %s
; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV32XTHEADMAC %s
; RUN: llc -mtriple=riscv32 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV32XTHEADMEMPAIR %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zca %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCA %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcb %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCB %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcd %s -o - | FileCheck --check-prefixes=CHECK,RV32ZCD %s
Expand Down Expand Up @@ -95,6 +96,7 @@
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBB %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADBS %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADMAC %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadmempair %s -o - | FileCheck --check-prefix=RV64XTHEADMEMPAIR %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAWRS %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefixes=CHECK,RV64ZTSO %s
Expand Down Expand Up @@ -147,6 +149,7 @@
; RV32SVPBMT: .attribute 5, "rv32i2p0_svpbmt1p0"
; RV32SVINVAL: .attribute 5, "rv32i2p0_svinval1p0"
; RV32XTHEADMAC: .attribute 5, "rv32i2p0_xtheadmac1p0"
; RV32XTHEADMEMPAIR: .attribute 5, "rv32i2p0_xtheadmempair1p0"
; RV32ZCA: .attribute 5, "rv32i2p0_zca1p0"
; RV32ZCB: .attribute 5, "rv32i2p0_zca1p0_zcb1p0"
; RV32ZCD: .attribute 5, "rv32i2p0_zcd1p0"
Expand Down Expand Up @@ -200,6 +203,7 @@
; RV64XTHEADBB: .attribute 5, "rv64i2p0_xtheadbb1p0"
; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0"
; RV64XTHEADMAC: .attribute 5, "rv64i2p0_xtheadmac1p0"
; RV64XTHEADMEMPAIR: .attribute 5, "rv64i2p0_xtheadmempair1p0"
; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0"
; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1"
; RV64ZCA: .attribute 5, "rv64i2p0_zca1p0"
Expand Down
142 changes: 142 additions & 0 deletions llvm/test/CodeGen/RISCV/bitextract-mac.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv32 -mattr=+zbb -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32ZBB
; RUN: llc -mtriple=riscv32 -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32XTHEADBB
; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32XTHEADMAC
; RUN: llc -mtriple=riscv32 -mattr=+xtheadmac -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32XTHEAD
; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; RUN: llc -mtriple=riscv64 -mattr=+zbb -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64ZBB
; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64XTHEADMAC
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB
; RUN: llc -mtriple=riscv64 -mattr=+xtheadmac -mattr=+xtheadbb -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64XTHEAD

define i32 @f(i32 %A, i32 %B, i32 %C) {
; RV32I-LABEL: f:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: mul a0, a1, a0
; RV32I-NEXT: slli a1, a0, 26
; RV32I-NEXT: srli a1, a1, 28
; RV32I-NEXT: slli a0, a0, 20
; RV32I-NEXT: srli a0, a0, 25
; RV32I-NEXT: mul a0, a1, a0
; RV32I-NEXT: add a0, a0, a2
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: f:
; RV32ZBB: # %bb.0: # %entry
; RV32ZBB-NEXT: mul a0, a1, a0
; RV32ZBB-NEXT: slli a1, a0, 26
; RV32ZBB-NEXT: srli a1, a1, 28
; RV32ZBB-NEXT: slli a0, a0, 20
; RV32ZBB-NEXT: srli a0, a0, 25
; RV32ZBB-NEXT: mul a0, a1, a0
; RV32ZBB-NEXT: add a0, a0, a2
; RV32ZBB-NEXT: ret
;
; RV32XTHEADBB-LABEL: f:
; RV32XTHEADBB: # %bb.0: # %entry
; RV32XTHEADBB-NEXT: mul a0, a1, a0
; RV32XTHEADBB-NEXT: slli a1, a0, 26
; RV32XTHEADBB-NEXT: srli a1, a1, 28
; RV32XTHEADBB-NEXT: slli a0, a0, 20
; RV32XTHEADBB-NEXT: srli a0, a0, 25
; RV32XTHEADBB-NEXT: mul a0, a1, a0
; RV32XTHEADBB-NEXT: add a0, a0, a2
; RV32XTHEADBB-NEXT: ret
;
; RV32XTHEADMAC-LABEL: f:
; RV32XTHEADMAC: # %bb.0: # %entry
; RV32XTHEADMAC-NEXT: mul a0, a1, a0
; RV32XTHEADMAC-NEXT: slli a1, a0, 26
; RV32XTHEADMAC-NEXT: srli a1, a1, 28
; RV32XTHEADMAC-NEXT: slli a0, a0, 20
; RV32XTHEADMAC-NEXT: srli a0, a0, 25
; RV32XTHEADMAC-NEXT: th.mulah a2, a1, a0
; RV32XTHEADMAC-NEXT: mv a0, a2
; RV32XTHEADMAC-NEXT: ret
;
; RV32XTHEAD-LABEL: f:
; RV32XTHEAD: # %bb.0: # %entry
; RV32XTHEAD-NEXT: mul a0, a1, a0
; RV32XTHEAD-NEXT: slli a1, a0, 26
; RV32XTHEAD-NEXT: srli a1, a1, 28
; RV32XTHEAD-NEXT: slli a0, a0, 20
; RV32XTHEAD-NEXT: srli a0, a0, 25
; RV32XTHEAD-NEXT: th.mulah a2, a1, a0
; RV32XTHEAD-NEXT: mv a0, a2
; RV32XTHEAD-NEXT: ret
;
; RV64I-LABEL: f:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: mulw a0, a1, a0
; RV64I-NEXT: slli a1, a0, 58
; RV64I-NEXT: srli a1, a1, 60
; RV64I-NEXT: slli a0, a0, 52
; RV64I-NEXT: srli a0, a0, 57
; RV64I-NEXT: mulw a0, a1, a0
; RV64I-NEXT: addw a0, a0, a2
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: f:
; RV64ZBB: # %bb.0: # %entry
; RV64ZBB-NEXT: mulw a0, a1, a0
; RV64ZBB-NEXT: slli a1, a0, 58
; RV64ZBB-NEXT: srli a1, a1, 60
; RV64ZBB-NEXT: slli a0, a0, 52
; RV64ZBB-NEXT: srli a0, a0, 57
; RV64ZBB-NEXT: mulw a0, a1, a0
; RV64ZBB-NEXT: addw a0, a0, a2
; RV64ZBB-NEXT: ret
;
; RV64XTHEADMAC-LABEL: f:
; RV64XTHEADMAC: # %bb.0: # %entry
; RV64XTHEADMAC-NEXT: mulw a0, a1, a0
; RV64XTHEADMAC-NEXT: slli a1, a0, 58
; RV64XTHEADMAC-NEXT: srli a1, a1, 60
; RV64XTHEADMAC-NEXT: slli a0, a0, 52
; RV64XTHEADMAC-NEXT: srli a0, a0, 57
; RV64XTHEADMAC-NEXT: th.mulah a2, a1, a0
; RV64XTHEADMAC-NEXT: mv a0, a2
; RV64XTHEADMAC-NEXT: ret
;
; RV64XTHEADBB-LABEL: f:
; RV64XTHEADBB: # %bb.0: # %entry
; RV64XTHEADBB-NEXT: mulw a0, a1, a0
; RV64XTHEADBB-NEXT: slli a1, a0, 58
; RV64XTHEADBB-NEXT: srli a1, a1, 60
; RV64XTHEADBB-NEXT: slli a0, a0, 52
; RV64XTHEADBB-NEXT: srli a0, a0, 57
; RV64XTHEADBB-NEXT: mulw a0, a1, a0
; RV64XTHEADBB-NEXT: addw a0, a0, a2
; RV64XTHEADBB-NEXT: ret
;
; RV64XTHEAD-LABEL: f:
; RV64XTHEAD: # %bb.0: # %entry
; RV64XTHEAD-NEXT: mulw a0, a1, a0
; RV64XTHEAD-NEXT: slli a1, a0, 58
; RV64XTHEAD-NEXT: srli a1, a1, 60
; RV64XTHEAD-NEXT: slli a0, a0, 52
; RV64XTHEAD-NEXT: srli a0, a0, 57
; RV64XTHEAD-NEXT: th.mulah a2, a1, a0
; RV64XTHEAD-NEXT: mv a0, a2
; RV64XTHEAD-NEXT: ret
entry:
%mul = mul nsw i32 %B, %A
%0 = lshr i32 %mul, 2
%and = and i32 %0, 15
%1 = lshr i32 %mul, 5
%and2 = and i32 %1, 127
%mul3 = mul nuw nsw i32 %and, %and2
%add = add i32 %mul3, %C
ret i32 %add
}
63 changes: 56 additions & 7 deletions llvm/test/CodeGen/RISCV/xtheadmac.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ define i64 @mula_i64(i64 %a, i64 %b, i64 %c) {
; RV32XTHEADMAC-NEXT: mulhu a6, a2, a4
; RV32XTHEADMAC-NEXT: th.mula a6, a2, a5
; RV32XTHEADMAC-NEXT: th.mula a6, a3, a4
; RV32XTHEADMAC-NEXT: th.mula a2, a0, a4
; RV32XTHEADMAC-NEXT: sltu a0, a2, a0
; RV32XTHEADMAC-NEXT: mv a3, a0
; RV32XTHEADMAC-NEXT: th.mula a3, a2, a4
; RV32XTHEADMAC-NEXT: sltu a0, a3, a0
; RV32XTHEADMAC-NEXT: add a0, a1, a0
; RV32XTHEADMAC-NEXT: add a1, a0, a6
; RV32XTHEADMAC-NEXT: mv a0, a2
; RV32XTHEADMAC-NEXT: mv a0, a3
; RV32XTHEADMAC-NEXT: ret
;
; RV64XTHEADMAC-LABEL: mula_i64:
Expand Down Expand Up @@ -99,11 +100,10 @@ define i64 @muls_i64(i64 %a, i64 %b, i64 %c) {
; RV32XTHEADMAC-NEXT: th.mula a6, a2, a5
; RV32XTHEADMAC-NEXT: th.mula a6, a3, a4
; RV32XTHEADMAC-NEXT: mul a3, a2, a4
; RV32XTHEADMAC-NEXT: th.muls a2, a0, a4
; RV32XTHEADMAC-NEXT: sltu a0, a0, a3
; RV32XTHEADMAC-NEXT: sub a1, a1, a0
; RV32XTHEADMAC-NEXT: sltu a3, a0, a3
; RV32XTHEADMAC-NEXT: th.muls a0, a2, a4
; RV32XTHEADMAC-NEXT: sub a1, a1, a3
; RV32XTHEADMAC-NEXT: sub a1, a1, a6
; RV32XTHEADMAC-NEXT: mv a0, a2
; RV32XTHEADMAC-NEXT: ret
;
; RV64XTHEADMAC-LABEL: muls_i64:
Expand Down Expand Up @@ -150,3 +150,52 @@ define i64 @mulsh_i64(i32 %a, i16 %b, i16 %c) {
%h = sext i32 %g to i64
ret i64 %h
}

define i32 @commutative1(i32 %A, i32 %B, i32 %C) {
; RV32XTHEADMAC-LABEL: commutative1:
; RV32XTHEADMAC: # %bb.0:
; RV32XTHEADMAC-NEXT: th.mula a2, a1, a0
; RV32XTHEADMAC-NEXT: mv a0, a2
; RV32XTHEADMAC-NEXT: ret
;
; RV64XTHEADMAC-LABEL: commutative1:
; RV64XTHEADMAC: # %bb.0:
; RV64XTHEADMAC-NEXT: th.mulaw a2, a1, a0
; RV64XTHEADMAC-NEXT: mv a0, a2
; RV64XTHEADMAC-NEXT: ret
%mul = mul nsw i32 %B, %A
%add = add i32 %mul, %C
ret i32 %add
}

define i32 @commutative2(i32 %A, i32 %B, i32 %C) {
; RV32XTHEADMAC-LABEL: commutative2:
; RV32XTHEADMAC: # %bb.0:
; RV32XTHEADMAC-NEXT: th.mula a0, a1, a2
; RV32XTHEADMAC-NEXT: ret
;
; RV64XTHEADMAC-LABEL: commutative2:
; RV64XTHEADMAC: # %bb.0:
; RV64XTHEADMAC-NEXT: th.mulaw a0, a1, a2
; RV64XTHEADMAC-NEXT: ret
%mul = mul nsw i32 %B, %C
%add = add i32 %mul, %A
ret i32 %add
}

define i32 @commutative3(i32 %A, i32 %B, i32 %C) {
; RV32XTHEADMAC-LABEL: commutative3:
; RV32XTHEADMAC: # %bb.0:
; RV32XTHEADMAC-NEXT: th.mula a1, a2, a0
; RV32XTHEADMAC-NEXT: mv a0, a1
; RV32XTHEADMAC-NEXT: ret
;
; RV64XTHEADMAC-LABEL: commutative3:
; RV64XTHEADMAC: # %bb.0:
; RV64XTHEADMAC-NEXT: th.mulaw a1, a2, a0
; RV64XTHEADMAC-NEXT: mv a0, a1
; RV64XTHEADMAC-NEXT: ret
%mul = mul nsw i32 %C, %A
%add = add i32 %mul, %B
ret i32 %add
}
227 changes: 227 additions & 0 deletions llvm/test/CodeGen/RISCV/xtheadmempair.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+xtheadmempair -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMPAIR
; RUN: llc -mtriple=riscv64 -mattr=+xtheadmempair -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64XTHEADMEMPAIR

define i64 @lwd(i32* %a) {
; RV32XTHEADMEMPAIR-LABEL: lwd:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 2, 3
; RV32XTHEADMEMPAIR-NEXT: srai a3, a1, 31
; RV32XTHEADMEMPAIR-NEXT: srai a4, a2, 31
; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
; RV32XTHEADMEMPAIR-NEXT: add a3, a3, a4
; RV32XTHEADMEMPAIR-NEXT: add a1, a3, a1
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: lwd:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 2, 3
; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i32, i32* %a, i64 4
%2 = load i32, i32* %1, align 4
%3 = getelementptr i32, i32* %a, i64 5
%4 = load i32, i32* %3, align 4
%5 = sext i32 %2 to i64
%6 = sext i32 %4 to i64
%7 = add i64 %5, %6
ret i64 %7
}

define i64 @lwud(i32* %a) {
; RV32XTHEADMEMPAIR-LABEL: lwud:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 2, 3
; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: lwud:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.lwud a1, a2, (a0), 2, 3
; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i32, i32* %a, i64 4
%2 = load i32, i32* %1, align 4
%3 = getelementptr i32, i32* %a, i64 5
%4 = load i32, i32* %3, align 4
%5 = zext i32 %2 to i64
%6 = zext i32 %4 to i64
%7 = add i64 %5, %6
ret i64 %7
}

define i64 @ldd(i64* %a) {
; RV32XTHEADMEMPAIR-LABEL: ldd:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: lw a1, 32(a0)
; RV32XTHEADMEMPAIR-NEXT: lw a2, 36(a0)
; RV32XTHEADMEMPAIR-NEXT: lw a3, 44(a0)
; RV32XTHEADMEMPAIR-NEXT: lw a0, 40(a0)
; RV32XTHEADMEMPAIR-NEXT: add a2, a2, a3
; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a0
; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
; RV32XTHEADMEMPAIR-NEXT: add a1, a2, a1
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: ldd:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.ldd a1, a2, (a0), 2, 4
; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i64, i64* %a, i64 4
%2 = load i64, i64* %1, align 8
%3 = getelementptr i64, i64* %a, i64 5
%4 = load i64, i64* %3, align 8
%5 = add i64 %2, %4
ret i64 %5
}

define i64 @lwd_0(i32* %a) {
; RV32XTHEADMEMPAIR-LABEL: lwd_0:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
; RV32XTHEADMEMPAIR-NEXT: srai a3, a1, 31
; RV32XTHEADMEMPAIR-NEXT: srai a4, a2, 31
; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
; RV32XTHEADMEMPAIR-NEXT: add a3, a3, a4
; RV32XTHEADMEMPAIR-NEXT: add a1, a3, a1
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: lwd_0:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i32, i32* %a, i64 0
%2 = load i32, i32* %1, align 4
%3 = getelementptr i32, i32* %a, i64 1
%4 = load i32, i32* %3, align 4
%5 = sext i32 %2 to i64
%6 = sext i32 %4 to i64
%7 = add i64 %5, %6
ret i64 %7
}

define i64 @lwud_0(i32* %a) {
; RV32XTHEADMEMPAIR-LABEL: lwud_0:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: lwud_0:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.lwud a1, a2, (a0), 0, 3
; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i32, i32* %a, i64 0
%2 = load i32, i32* %1, align 4
%3 = getelementptr i32, i32* %a, i64 1
%4 = load i32, i32* %3, align 4
%5 = zext i32 %2 to i64
%6 = zext i32 %4 to i64
%7 = add i64 %5, %6
ret i64 %7
}

define i64 @ldd_0(i64* %a) {
; RV32XTHEADMEMPAIR-LABEL: ldd_0:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.lwd a1, a2, (a0), 0, 3
; RV32XTHEADMEMPAIR-NEXT: th.lwd a3, a4, (a0), 1, 3
; RV32XTHEADMEMPAIR-NEXT: add a2, a2, a4
; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a3
; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
; RV32XTHEADMEMPAIR-NEXT: add a1, a2, a1
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: ldd_0:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.ldd a1, a2, (a0), 0, 4
; RV64XTHEADMEMPAIR-NEXT: add a0, a1, a2
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i64, i64* %a, i64 0
%2 = load i64, i64* %1, align 8
%3 = getelementptr i64, i64* %a, i64 1
%4 = load i64, i64* %3, align 8
%5 = add i64 %2, %4
ret i64 %5
}

define void @swd(i32* %a, i32 %b, i32%c) {
; RV32XTHEADMEMPAIR-LABEL: swd:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 2, 3
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: swd:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 2, 3
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i32, i32* %a, i64 4
store i32 %b, i32* %1, align 4
%2 = getelementptr i32, i32* %a, i64 5
store i32 %c, i32* %2, align 4
ret void
}

define void @sdd(i64* %a, i64 %b, i64%c) {
; RV32XTHEADMEMPAIR-LABEL: sdd:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: sw a2, 36(a0)
; RV32XTHEADMEMPAIR-NEXT: sw a1, 32(a0)
; RV32XTHEADMEMPAIR-NEXT: sw a4, 44(a0)
; RV32XTHEADMEMPAIR-NEXT: sw a3, 40(a0)
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: sdd:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.sdd a1, a2, (a0), 2, 4
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i64, i64* %a, i64 4
store i64 %b, i64* %1, align 8
%2 = getelementptr i64, i64* %a, i64 5
store i64 %c, i64* %2, align 8
ret void
}

define void @swd_0(i32* %a, i32 %b, i32%c) {
; RV32XTHEADMEMPAIR-LABEL: swd_0:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 0, 3
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: swd_0:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 0, 3
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i32, i32* %a, i64 0
store i32 %b, i32* %1, align 4
%2 = getelementptr i32, i32* %a, i64 1
store i32 %c, i32* %2, align 4
ret void
}

define void @sdd_0(i64* %a, i64 %b, i64%c) {
; RV32XTHEADMEMPAIR-LABEL: sdd_0:
; RV32XTHEADMEMPAIR: # %bb.0:
; RV32XTHEADMEMPAIR-NEXT: th.swd a1, a2, (a0), 0, 3
; RV32XTHEADMEMPAIR-NEXT: th.swd a3, a4, (a0), 1, 3
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: sdd_0:
; RV64XTHEADMEMPAIR: # %bb.0:
; RV64XTHEADMEMPAIR-NEXT: th.sdd a1, a2, (a0), 0, 4
; RV64XTHEADMEMPAIR-NEXT: ret
%1 = getelementptr i64, i64* %a, i64 0
store i64 %b, i64* %1, align 8
%2 = getelementptr i64, i64* %a, i64 1
store i64 %c, i64* %2, align 8
ret void
}
20 changes: 20 additions & 0 deletions llvm/test/MC/RISCV/rv32xtheadmempair-invalid.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadmempair < %s 2>&1 | FileCheck %s

th.ldd t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:22: error: invalid operand for instruction
th.ldd t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.ldd t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
th.sdd a0, a1, (a2), 5, 4 # CHECK: [[@LINE]]:22: error: invalid operand for instruction
th.sdd a0, a1, (a2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.sdd a0, a1, (a2), 3, 5 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
th.lwud t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:23: error: immediate must be an integer in the range [0, 3]
th.lwud t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.lwud t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:26: error: Operand must be constant 3.
th.lwd a3, a4, (a5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
th.lwd a3, a4, (a5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.lwd a3, a4, (a5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
th.swd t3, t4, (t5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
th.swd t3, t4, (t5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.swd t3, t4, (t5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
th.lwud x6, x6, (x6), 2, 3 # CHECK: [[@LINE]]:9: error: The source register and destination registers cannot be equal.
th.ldd t0, t1, (t2), 2, 4 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
th.sdd t0, t1, (t2), 2, 4 # CHECK: [[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
18 changes: 18 additions & 0 deletions llvm/test/MC/RISCV/rv32xtheadmempair-valid.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# With Bitmanip base extension:
# RUN: llvm-mc %s -triple=riscv32 -mattr=+xtheadmempair -show-encoding \
# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xtheadmempair < %s \
# RUN: | llvm-objdump --mattr=+xtheadmempair -d -r - \
# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s

# CHECK-ASM-AND-OBJ: th.lwd
# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xe2]
th.lwd a0, a1, (a2), 1, 3

# CHECK-ASM-AND-OBJ: th.lwud
# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf4]
th.lwud a0, a1, (a2), 2, 3

# CHECK-ASM-AND-OBJ: th.swd
# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xe0]
th.swd a0, a1, (a2), 0, 3
18 changes: 18 additions & 0 deletions llvm/test/MC/RISCV/rv64xtheadmempair-invalid.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadmempair < %s 2>&1 | FileCheck %s

th.ldd t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
th.ldd t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.ldd t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 4.
th.sdd a0, a1, (a2), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
th.sdd a0, a1, (a2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.sdd a0, a1, (a2), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 4.
th.lwud t0, t1, (t2), 5, 4 # CHECK: [[@LINE]]:23: error: immediate must be an integer in the range [0, 3]
th.lwud t0, t1, (t2) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.lwud t0, t1, (t2), 3, 5 # CHECK: [[@LINE]]:26: error: Operand must be constant 3.
th.lwd a3, a4, (a5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
th.lwd a3, a4, (a5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.lwd a3, a4, (a5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
th.swd t3, t4, (t5), 5, 4 # CHECK: [[@LINE]]:22: error: immediate must be an integer in the range [0, 3]
th.swd t3, t4, (t5) # CHECK: [[@LINE]]:1: error: too few operands for instruction
th.swd t3, t4, (t5), 3, 5 # CHECK: [[@LINE]]:25: error: Operand must be constant 3.
th.lwud x6, x6, (x6), 2, 3 # CHECK: [[@LINE]]:9: error: The source register and destination registers cannot be equal.
26 changes: 26 additions & 0 deletions llvm/test/MC/RISCV/rv64xtheadmempair-valid.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# With Bitmanip base extension:
# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadmempair -show-encoding \
# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadmempair < %s \
# RUN: | llvm-objdump --mattr=+xtheadmempair -d -r - \
# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s

# CHECK-ASM-AND-OBJ: th.lwd
# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xe2]
th.lwd a0, a1, (a2), 1, 3

# CHECK-ASM-AND-OBJ: th.lwud
# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf4]
th.lwud a0, a1, (a2), 2, 3

# CHECK-ASM-AND-OBJ: th.swd
# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xe0]
th.swd a0, a1, (a2), 0, 3

# CHECK-ASM-AND-OBJ: th.ldd
# CHECK-ASM: encoding: [0x0b,0x45,0xb6,0xf8]
th.ldd a0, a1, (a2), 0, 4

# CHECK-ASM-AND-OBJ: th.sdd
# CHECK-ASM: encoding: [0x0b,0x55,0xb6,0xfe]
th.sdd a0, a1, (a2), 3, 4