158 changes: 151 additions & 7 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);

if (!Subtarget.hasStdExtZbb())
if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);

if (Subtarget.is64Bit()) {
Expand Down Expand Up @@ -280,7 +280,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
Custom);

if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
Subtarget.hasVendorXTHeadBb()) {
if (Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
} else {
Expand All @@ -290,7 +291,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
Subtarget.hasVendorXTHeadBb())
? Legal
: Expand);
// Zbkb can use rev8+brev8 to implement bitreverse.
Expand All @@ -309,6 +311,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
}

if (Subtarget.hasVendorXTHeadBb()) {
setOperationAction({ISD::CTLZ}, XLenVT, Legal);

// We need the custom lowering to make sure that the resulting sequence
// for the 32bit case is efficient on 64bit targets.
if (Subtarget.is64Bit())
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
}

if (Subtarget.is64Bit())
setOperationAction(ISD::ABS, MVT::i32, Custom);

Expand Down Expand Up @@ -1011,7 +1022,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setJumpIsExpensive();

setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT, ISD::MUL});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);

Expand Down Expand Up @@ -1212,7 +1223,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
}

bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return Subtarget.hasStdExtZbb();
return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
}

bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
Expand All @@ -1223,7 +1234,7 @@ bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
// on the basis that it's possible the sinking+duplication of the AND in
// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
if (!Subtarget.hasStdExtZbs())
if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
return false;
ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
Expand All @@ -1246,8 +1257,11 @@ bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
if (Subtarget.hasStdExtZbs())
return X.getValueType().isScalarInteger();
// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
auto *C = dyn_cast<ConstantSDNode>(Y);
// XTheadBs provides th.tst (similar to bexti), if Y is a constant
if (Subtarget.hasVendorXTHeadBs())
return C != nullptr;
// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
return C && C->getAPIntValue().ule(10);
}

Expand Down Expand Up @@ -8569,6 +8583,134 @@ static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
}

static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDLoc DL(N);
const MVT XLenVT = Subtarget.getXLenVT();
const EVT VT = N->getValueType(0);

// An MUL is usually smaller than any alternative sequence for legal type.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (DAG.getMachineFunction().getFunction().hasMinSize() &&
TLI.isOperationLegal(ISD::MUL, VT))
return SDValue();

SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *ConstOp = dyn_cast<ConstantSDNode>(N1);
// Any optimization requires a constant RHS.
if (!ConstOp)
return SDValue();

const APInt &C = ConstOp->getAPIntValue();
// A multiply-by-pow2 will be reduced to a shift by the
// architecture-independent code.
if (C.isPowerOf2())
return SDValue();

// The below optimizations only work for non-negative constants
if (!C.isNonNegative())
return SDValue();

auto Shl = [&](SDValue Value, unsigned ShiftAmount) {
if (!ShiftAmount)
return Value;

SDValue ShiftAmountConst = DAG.getConstant(ShiftAmount, DL, XLenVT);
return DAG.getNode(ISD::SHL, DL, Value.getValueType(), Value,
ShiftAmountConst);
};
auto Add = [&](SDValue Addend1, SDValue Addend2) {
return DAG.getNode(ISD::ADD, DL, Addend1.getValueType(), Addend1, Addend2);
};

if (Subtarget.hasVendorXTHeadBa()) {
// We try to simplify using shift-and-add instructions into up to
// 3 instructions (e.g. 2x shift-and-add and 1x shift).

auto isDivisibleByShiftedAddConst = [&](APInt C, APInt &N,
APInt &Quotient) {
unsigned BitWidth = C.getBitWidth();
for (unsigned i = 3; i >= 1; --i) {
APInt X(BitWidth, (1 << i) + 1);
APInt Remainder;
APInt::sdivrem(C, X, Quotient, Remainder);
if (Remainder == 0) {
N = X;
return true;
}
}
return false;
};
auto isShiftedAddConst = [&](APInt C, APInt &N) {
APInt Quotient;
return isDivisibleByShiftedAddConst(C, N, Quotient) && Quotient == 1;
};
auto isSmallShiftAmount = [](APInt C) {
return (C == 2) || (C == 4) || (C == 8);
};

auto ShiftAndAdd = [&](SDValue Value, unsigned ShiftAmount,
SDValue Addend) {
return Add(Shl(Value, ShiftAmount), Addend);
};
auto AnyExt = [&](SDValue Value) {
return DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Value);
};
auto Trunc = [&](SDValue Value) {
return DAG.getNode(ISD::TRUNCATE, DL, VT, Value);
};

unsigned TrailingZeroes = C.countTrailingZeros();
const APInt ShiftedC = C.ashr(TrailingZeroes);
const APInt ShiftedCMinusOne = ShiftedC - 1;

// the below comments use the following notation:
// n, m .. a shift-amount for a shift-and-add instruction
// (i.e. in { 2, 4, 8 })
// k .. a power-of-2 that is equivalent to shifting by
// TrailingZeroes bits
// i, j .. a power-of-2

APInt ShiftAmt1;
APInt ShiftAmt2;
APInt Quotient;

// C = (m + 1) * k
if (isShiftedAddConst(ShiftedC, ShiftAmt1)) {
SDValue Op0 = AnyExt(N0);
SDValue Result = ShiftAndAdd(Op0, ShiftAmt1.logBase2(), Op0);
return Trunc(Shl(Result, TrailingZeroes));
}
// C = (m + 1) * (n + 1) * k
if (isDivisibleByShiftedAddConst(ShiftedC, ShiftAmt1, Quotient) &&
isShiftedAddConst(Quotient, ShiftAmt2)) {
SDValue Op0 = AnyExt(N0);
SDValue Result = ShiftAndAdd(Op0, ShiftAmt1.logBase2(), Op0);
Result = ShiftAndAdd(Result, ShiftAmt2.logBase2(), Result);
return Trunc(Shl(Result, TrailingZeroes));
}
// C = ((m + 1) * n + 1) * k
if (isDivisibleByShiftedAddConst(ShiftedCMinusOne, ShiftAmt1, ShiftAmt2) &&
isSmallShiftAmount(ShiftAmt2)) {
SDValue Op0 = AnyExt(N0);
SDValue Result = ShiftAndAdd(Op0, ShiftAmt1.logBase2(), Op0);
Result = ShiftAndAdd(Result, Quotient.logBase2(), Op0);
return Trunc(Shl(Result, TrailingZeroes));
}

// C has 2 bits set: synthesize using 2 shifts and 1 add (which may
// see one of the shifts merged into a shift-and-add, if feasible)
if (C.countPopulation() == 2) {
APInt HighBit(C.getBitWidth(), (1 << C.logBase2()));
APInt LowBit = C - HighBit;
return Add(Shl(N0, HighBit.logBase2()), Shl(N0, LowBit.logBase2()));
}
}

return SDValue();
}

static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
Expand Down Expand Up @@ -10218,6 +10360,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performADDCombine(N, DAG, Subtarget);
case ISD::SUB:
return performSUBCombine(N, DAG, Subtarget);
case ISD::MUL:
return performMULCombine(N, DAG, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI, Subtarget);
case ISD::OR:
Expand Down
165 changes: 104 additions & 61 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,38 @@ class THShiftALU_rri<bits<3> funct3, string opcodestr>
let Inst{26-25} = uimm2;
}

let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
class THShift_ri<bits<5> funct5, bits<3> funct3, string opcodestr>
: RVInstIShift<funct5, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, uimmlog2xlen:$shamt),
opcodestr, "$rd, $rs1, $shamt">;

class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
opcodestr, "$rd, $rs1, $msb, $lsb"> {
bits<6> msb;
bits<6> lsb;
let Inst{31-26} = msb;
let Inst{25-20} = lsb;
}

class THRev_r<bits<5> funct5, bits<2> funct2, string opcodestr>
: RVInstR4<funct2, 0b001, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1),
opcodestr, "$rd, $rs1"> {
let rs3 = funct5;
let rs2 = 0;
}
}

let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstIShiftW<funct7, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, uimm5:$shamt),
opcodestr, "$rd, $rs1, $shamt">;

//===----------------------------------------------------------------------===//
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
Expand All @@ -75,6 +107,27 @@ def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
} // Predicates = [HasVendorXTHeadBa]

let Predicates = [HasVendorXTHeadBb] in {
def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">;
def TH_EXT : THBitfieldExtract_rii<0b010, "th.ext">;
def TH_EXTU : THBitfieldExtract_rii<0b011, "th.extu">;
def TH_FF0 : THRev_r<0b10000, 0b10, "th.ff0">;
def TH_FF1 : THRev_r<0b10000, 0b11, "th.ff1">;
def TH_REV : THRev_r<0b10000, 0b01, "th.rev">;
def TH_TSTNBZ : THRev_r<0b10000, 0b00, "th.tstnbz">;
} // Predicates = [HasVendorXTHeadBb]

let Predicates = [HasVendorXTHeadBb, IsRV64], IsSignExtendingOpW = 1 in {
def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">;
def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">;
} // Predicates = [HasVendorXTHeadBb, IsRV64]

let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in {
let IsSignExtendingOpW = 1 in
def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">,
Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
} // Predicates = [HasVendorXTHeadBs]

let Predicates = [HasVendorXTHeadVdot],
Constraints = "@earlyclobber $vd",
RVVConstraint = WidenV in {
Expand Down Expand Up @@ -161,69 +214,59 @@ def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
(TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>;
def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
(TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;

def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 1)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 1)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 1)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 2)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 3)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 3)>;
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
(TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 3)>;

def : Pat<(add GPR:$r, CSImm12MulBy4:$i),
(TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy2XForm CSImm12MulBy4:$i)), 2)>;
def : Pat<(add GPR:$r, CSImm12MulBy8:$i),
(TH_ADDSL GPR:$r, (ADDI X0, (SimmShiftRightBy3XForm CSImm12MulBy8:$i)), 3)>;

def : Pat<(mul GPR:$r, C3LeftShift:$i),
(SLLI (TH_ADDSL GPR:$r, GPR:$r, 1),
(TrailingZeros C3LeftShift:$i))>;
def : Pat<(mul GPR:$r, C5LeftShift:$i),
(SLLI (TH_ADDSL GPR:$r, GPR:$r, 2),
(TrailingZeros C5LeftShift:$i))>;
def : Pat<(mul GPR:$r, C9LeftShift:$i),
(SLLI (TH_ADDSL GPR:$r, GPR:$r, 3),
(TrailingZeros C9LeftShift:$i))>;

def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 1)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 1), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2), (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 3)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)),
(TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>;
def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
(TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>;

def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)),
(SLLI (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2),
(TH_ADDSL GPR:$r, GPR:$r, 2), 2), 3)>;
} // Predicates = [HasVendorXTHeadBa]

let Predicates = [HasVendorXTHeadBb] in {
def : PatGprImm<rotr, TH_SRRI, uimmlog2xlen>;
// There's no encoding for a rotate-left-immediate in X-THead-Bb, as
// it can be implemented with th.srri by negating the immediate.
def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
(TH_SRRI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
def : Pat<(rotr GPR:$rs1, GPR:$rs2),
(OR (SRL GPR:$rs1, GPR:$rs2),
(SLL GPR:$rs1, (SUB X0, GPR:$rs2)))>;
def : Pat<(rotl GPR:$rs1, GPR:$rs2),
(OR (SLL GPR:$rs1, GPR:$rs2),
(SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>;
//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>;
//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>;
def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>;
def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>;
def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>;
def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>;
def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>;
def : Pat<(sext_inreg GPR:$rs1, i1), (TH_EXT GPR:$rs1, 0, 0)>;
def : PatGpr<ctlz, TH_FF1>;
def : Pat<(ctlz (xor GPR:$rs1, -1)), (TH_FF0 GPR:$rs1)>;
def : PatGpr<bswap, TH_REV>;
} // Predicates = [HasVendorXTHeadBb]

let Predicates = [HasVendorXTHeadBb, IsRV64] in {
def : PatGprImm<riscv_rorw, TH_SRRIW, uimm5>;
def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
(TH_SRRIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
def : Pat<(riscv_rorw i64:$rs1, i64:$rs2),
(OR (SRLW i64:$rs1, i64:$rs2),
(SLLW i64:$rs1, (SUB X0, i64:$rs2)))>;
def : Pat<(riscv_rolw i64:$rs1, i64:$rs2),
(OR (SLLW i64:$rs1, i64:$rs2),
(SRLW i64:$rs1, (SUB X0, i64:$rs2)))>;
def : Pat<(sra (bswap i64:$rs1), (i64 32)),
(TH_REVW i64:$rs1)>;
def : Pat<(binop_allwusers<srl> (bswap i64:$rs1), (i64 32)),
(TH_REVW i64:$rs1)>;
def : Pat<(riscv_clzw i64:$rs1),
(TH_FF0 (SLLI (XORI i64:$rs1, -1), 32))>;
} // Predicates = [HasVendorXTHeadBb, IsRV64]

let Predicates = [HasVendorXTHeadBs] in {
def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), 1),
(TH_TST GPR:$rs1, uimmlog2xlen:$shamt)>;
def : Pat<(seteq (and GPR:$rs1, SingleBitSetMask:$mask), 0),
(TH_TST (XORI GPR:$rs1, -1), SingleBitSetMask:$mask)>;
} // Predicates = [HasVendorXTHeadBs]


defm PseudoTHVdotVMAQA : VPseudoVMAQA_VV_VX;
defm PseudoTHVdotVMAQAU : VPseudoVMAQA_VV_VX;
defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX;
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/RISCV/attributes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
; RUN: llc -mtriple=riscv64 -mattr=+svinval %s -o - | FileCheck --check-prefix=RV64SVINVAL %s
; RUN: llc -mtriple=riscv64 -mattr=+xventanacondops %s -o - | FileCheck --check-prefix=RV64XVENTANACONDOPS %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadba %s -o - | FileCheck --check-prefix=RV64XTHEADBA %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs %s -o - | FileCheck --check-prefix=RV64XTHEADBS %s
; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefix=RV64XTHEADVDOT %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zawrs %s -o - | FileCheck --check-prefix=RV64ZAWRS %s
; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefix=RV64ZTSO %s
Expand Down Expand Up @@ -182,6 +183,7 @@
; RV64SVINVAL: .attribute 5, "rv64i2p0_svinval1p0"
; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p0_xventanacondops1p0"
; RV64XTHEADBA: .attribute 5, "rv64i2p0_xtheadba1p0"
; RV64XTHEADBS: .attribute 5, "rv64i2p0_xtheadbs1p0"
; RV64XTHEADVDOT: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0"
; RV64ZTSO: .attribute 5, "rv64i2p0_ztso0p1"
; RV64ZCA: .attribute 5, "rv64i2p0_zca1p0"
Expand Down
212 changes: 212 additions & 0 deletions llvm/test/CodeGen/RISCV/bittest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
; RUN: | FileCheck %s -check-prefixes=CHECK,ZBS,RV32,RV32ZBS
; RUN: llc -mtriple=riscv64 -mattr=+zbs -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=CHECK,ZBS,RV64,RV64ZBS
; RUN: llc -mtriple=riscv32 -mattr=+xtheadbs -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=CHECK,XTHEADBS,RV32,RV32XTHEADBS
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=CHECK,XTHEADBS,RV64,RV64XTHEADBS

define signext i32 @bittest_7_i32(i32 signext %a) nounwind {
; CHECK-LABEL: bittest_7_i32:
Expand Down Expand Up @@ -52,6 +56,12 @@ define signext i32 @bittest_11_i32(i32 signext %a) nounwind {
; ZBS-NEXT: not a0, a0
; ZBS-NEXT: bexti a0, a0, 11
; ZBS-NEXT: ret
;
; XTHEADBS-LABEL: bittest_11_i32:
; XTHEADBS: # %bb.0:
; XTHEADBS-NEXT: not a0, a0
; XTHEADBS-NEXT: th.tst a0, a0, 11
; XTHEADBS-NEXT: ret
%shr = lshr i32 %a, 11
%not = xor i32 %shr, -1
%and = and i32 %not, 1
Expand Down Expand Up @@ -142,6 +152,19 @@ define i64 @bittest_11_i64(i64 %a) nounwind {
; RV64ZBS-NEXT: not a0, a0
; RV64ZBS-NEXT: bexti a0, a0, 11
; RV64ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bittest_11_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: not a0, a0
; RV32XTHEADBS-NEXT: th.tst a0, a0, 11
; RV32XTHEADBS-NEXT: li a1, 0
; RV32XTHEADBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_11_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: not a0, a0
; RV64XTHEADBS-NEXT: th.tst a0, a0, 11
; RV64XTHEADBS-NEXT: ret
%shr = lshr i64 %a, 11
%not = xor i64 %shr, -1
%and = and i64 %not, 1
Expand All @@ -167,6 +190,12 @@ define i64 @bittest_31_i64(i64 %a) nounwind {
; RV64ZBS-NEXT: not a0, a0
; RV64ZBS-NEXT: bexti a0, a0, 31
; RV64ZBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_31_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: not a0, a0
; RV64XTHEADBS-NEXT: th.tst a0, a0, 31
; RV64XTHEADBS-NEXT: ret
%shr = lshr i64 %a, 31
%not = xor i64 %shr, -1
%and = and i64 %not, 1
Expand All @@ -193,6 +222,12 @@ define i64 @bittest_32_i64(i64 %a) nounwind {
; RV64ZBS-NEXT: not a0, a0
; RV64ZBS-NEXT: bexti a0, a0, 32
; RV64ZBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_32_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: not a0, a0
; RV64XTHEADBS-NEXT: th.tst a0, a0, 32
; RV64XTHEADBS-NEXT: ret
%shr = lshr i64 %a, 32
%not = xor i64 %shr, -1
%and = and i64 %not, 1
Expand Down Expand Up @@ -249,6 +284,22 @@ define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind {
; RV64ZBS-NEXT: addiw a1, a1, 722
; RV64ZBS-NEXT: bext a0, a1, a0
; RV64ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bittest_constant_by_var_shr_i32:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: lui a1, 301408
; RV32XTHEADBS-NEXT: addi a1, a1, 722
; RV32XTHEADBS-NEXT: srl a0, a1, a0
; RV32XTHEADBS-NEXT: andi a0, a0, 1
; RV32XTHEADBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_constant_by_var_shr_i32:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: lui a1, 301408
; RV64XTHEADBS-NEXT: addiw a1, a1, 722
; RV64XTHEADBS-NEXT: srlw a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
; RV64XTHEADBS-NEXT: ret
%shl = lshr i32 1234567890, %b
%and = and i32 %shl, 1
%cmp = icmp ne i32 %and, 0
Expand Down Expand Up @@ -286,6 +337,22 @@ define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind {
; RV64ZBS-NEXT: addiw a1, a1, 722
; RV64ZBS-NEXT: bext a0, a1, a0
; RV64ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bittest_constant_by_var_shl_i32:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: lui a1, 301408
; RV32XTHEADBS-NEXT: addi a1, a1, 722
; RV32XTHEADBS-NEXT: srl a0, a1, a0
; RV32XTHEADBS-NEXT: andi a0, a0, 1
; RV32XTHEADBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_constant_by_var_shl_i32:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: lui a1, 301408
; RV64XTHEADBS-NEXT: addiw a1, a1, 722
; RV64XTHEADBS-NEXT: srlw a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
; RV64XTHEADBS-NEXT: ret
%shl = shl i32 1, %b
%and = and i32 %shl, 1234567890
%cmp = icmp ne i32 %and, 0
Expand Down Expand Up @@ -318,6 +385,14 @@ define i1 @bittest_constant_by_var_shr_i64(i64 %b) nounwind {
; RV64ZBS-NEXT: addiw a1, a1, 722
; RV64ZBS-NEXT: bext a0, a1, a0
; RV64ZBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_constant_by_var_shr_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: lui a1, 301408
; RV64XTHEADBS-NEXT: addiw a1, a1, 722
; RV64XTHEADBS-NEXT: srl a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
; RV64XTHEADBS-NEXT: ret
%shl = lshr i64 1234567890, %b
%and = and i64 %shl, 1
%cmp = icmp ne i64 %and, 0
Expand Down Expand Up @@ -350,6 +425,14 @@ define i1 @bittest_constant_by_var_shl_i64(i64 %b) nounwind {
; RV64ZBS-NEXT: addiw a1, a1, 722
; RV64ZBS-NEXT: bext a0, a1, a0
; RV64ZBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_constant_by_var_shl_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: lui a1, 301408
; RV64XTHEADBS-NEXT: addiw a1, a1, 722
; RV64XTHEADBS-NEXT: srl a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
; RV64XTHEADBS-NEXT: ret
%shl = shl i64 1, %b
%and = and i64 %shl, 1234567890
%cmp = icmp ne i64 %and, 0
Expand Down Expand Up @@ -417,6 +500,37 @@ define void @bittest_switch(i32 signext %0) {
; RV64ZBS-NEXT: tail bar@plt
; RV64ZBS-NEXT: .LBB14_3:
; RV64ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bittest_switch:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: li a1, 31
; RV32XTHEADBS-NEXT: bltu a1, a0, .LBB14_3
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: lui a1, 524291
; RV32XTHEADBS-NEXT: addi a1, a1, 768
; RV32XTHEADBS-NEXT: srl a0, a1, a0
; RV32XTHEADBS-NEXT: andi a0, a0, 1
; RV32XTHEADBS-NEXT: beqz a0, .LBB14_3
; RV32XTHEADBS-NEXT: # %bb.2:
; RV32XTHEADBS-NEXT: tail bar@plt
; RV32XTHEADBS-NEXT: .LBB14_3:
; RV32XTHEADBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_switch:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: li a1, 31
; RV64XTHEADBS-NEXT: bltu a1, a0, .LBB14_3
; RV64XTHEADBS-NEXT: # %bb.1:
; RV64XTHEADBS-NEXT: lui a1, 2048
; RV64XTHEADBS-NEXT: addiw a1, a1, 51
; RV64XTHEADBS-NEXT: slli a1, a1, 8
; RV64XTHEADBS-NEXT: srl a0, a1, a0
; RV64XTHEADBS-NEXT: andi a0, a0, 1
; RV64XTHEADBS-NEXT: beqz a0, .LBB14_3
; RV64XTHEADBS-NEXT: # %bb.2:
; RV64XTHEADBS-NEXT: tail bar@plt
; RV64XTHEADBS-NEXT: .LBB14_3:
; RV64XTHEADBS-NEXT: ret
switch i32 %0, label %3 [
i32 8, label %2
i32 9, label %2
Expand Down Expand Up @@ -697,6 +811,18 @@ define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-NEXT: mv a1, a5
; RV32ZBS-NEXT: .LBB24_2:
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_10_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a6, a0, 10
; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: mv a0, a2
; RV32XTHEADBS-NEXT: bnez a6, .LBB24_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
; RV32XTHEADBS-NEXT: .LBB24_2:
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 1024
%2 = icmp ne i64 %1, 0
%3 = select i1 %2, i64 %b, i64 %c
Expand Down Expand Up @@ -766,6 +892,18 @@ define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-NEXT: mv a1, a5
; RV32ZBS-NEXT: .LBB26_2:
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_11_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a6, a0, 11
; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: mv a0, a2
; RV32XTHEADBS-NEXT: bnez a6, .LBB26_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
; RV32XTHEADBS-NEXT: .LBB26_2:
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 2048
%2 = icmp ne i64 %1, 0
%3 = select i1 %2, i64 %b, i64 %c
Expand Down Expand Up @@ -835,6 +973,18 @@ define i64 @bit_20_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-NEXT: mv a1, a5
; RV32ZBS-NEXT: .LBB28_2:
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_20_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a6, a0, 20
; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: mv a0, a2
; RV32XTHEADBS-NEXT: bnez a6, .LBB28_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
; RV32XTHEADBS-NEXT: .LBB28_2:
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 1048576
%2 = icmp ne i64 %1, 0
%3 = select i1 %2, i64 %b, i64 %c
Expand Down Expand Up @@ -1015,6 +1165,18 @@ define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-NEXT: mv a1, a5
; RV32ZBS-NEXT: .LBB34_2:
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_55_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a6, a1, 23
; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: mv a0, a2
; RV32XTHEADBS-NEXT: bnez a6, .LBB34_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
; RV32XTHEADBS-NEXT: .LBB34_2:
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 36028797018963968
%2 = icmp ne i64 %1, 0
%3 = select i1 %2, i64 %b, i64 %c
Expand Down Expand Up @@ -2374,6 +2536,20 @@ define i64 @bit_63_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-NEXT: mv a1, a5
; RV32ZBS-NEXT: .LBB85_2:
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_63_1_z_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: slli a1, a1, 1
; RV32XTHEADBS-NEXT: srli a1, a1, 1
; RV32XTHEADBS-NEXT: or a6, a0, a1
; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: mv a0, a2
; RV32XTHEADBS-NEXT: beqz a6, .LBB85_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
; RV32XTHEADBS-NEXT: .LBB85_2:
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 9223372036854775807
%2 = icmp eq i64 %1, 0
%3 = select i1 %2, i64 %b, i64 %c
Expand Down Expand Up @@ -2417,6 +2593,20 @@ define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-NEXT: mv a1, a5
; RV32ZBS-NEXT: .LBB86_2:
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_63_1_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: slli a1, a1, 1
; RV32XTHEADBS-NEXT: srli a1, a1, 1
; RV32XTHEADBS-NEXT: or a6, a0, a1
; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: mv a0, a2
; RV32XTHEADBS-NEXT: bnez a6, .LBB86_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
; RV32XTHEADBS-NEXT: .LBB86_2:
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 9223372036854775807
%2 = icmp ne i64 %1, 0
%3 = select i1 %2, i64 %b, i64 %c
Expand Down Expand Up @@ -3198,6 +3388,17 @@ define void @bit_63_1_z_branch_i64(i64 %0) {
; RV32ZBS-NEXT: ret
; RV32ZBS-NEXT: .LBB115_2:
; RV32ZBS-NEXT: tail bar@plt
;
; RV32XTHEADBS-LABEL: bit_63_1_z_branch_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: slli a1, a1, 1
; RV32XTHEADBS-NEXT: srli a1, a1, 1
; RV32XTHEADBS-NEXT: or a0, a0, a1
; RV32XTHEADBS-NEXT: beqz a0, .LBB115_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: ret
; RV32XTHEADBS-NEXT: .LBB115_2:
; RV32XTHEADBS-NEXT: tail bar@plt
%2 = and i64 %0, 9223372036854775807
%3 = icmp eq i64 %2, 0
br i1 %3, label %4, label %5
Expand Down Expand Up @@ -3240,6 +3441,17 @@ define void @bit_63_1_nz_branch_i64(i64 %0) {
; RV32ZBS-NEXT: tail bar@plt
; RV32ZBS-NEXT: .LBB116_2:
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_63_1_nz_branch_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: slli a1, a1, 1
; RV32XTHEADBS-NEXT: srli a1, a1, 1
; RV32XTHEADBS-NEXT: or a0, a0, a1
; RV32XTHEADBS-NEXT: beqz a0, .LBB116_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: tail bar@plt
; RV32XTHEADBS-NEXT: .LBB116_2:
; RV32XTHEADBS-NEXT: ret
%2 = and i64 %0, 9223372036854775807
%3 = icmp ne i64 %2, 0
br i1 %3, label %4, label %5
Expand Down
670 changes: 670 additions & 0 deletions llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll

Large diffs are not rendered by default.

462 changes: 462 additions & 0 deletions llvm/test/CodeGen/RISCV/imm.ll

Large diffs are not rendered by default.

878 changes: 878 additions & 0 deletions llvm/test/CodeGen/RISCV/rotl-rotr.ll

Large diffs are not rendered by default.

453 changes: 453 additions & 0 deletions llvm/test/CodeGen/RISCV/rv32xtheadbb.ll

Large diffs are not rendered by default.

76 changes: 76 additions & 0 deletions llvm/test/CodeGen/RISCV/rv32xtheadbs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32I
; RUN: llc -mtriple=riscv32 -mattr=+xtheadbs -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV32XTHEADBS

define i32 @th_tst_i32(i32 %a) nounwind {
; RV32I-LABEL: th_tst_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 26
; RV32I-NEXT: srli a0, a0, 31
; RV32I-NEXT: ret
;
; RV32XTHEADBS-LABEL: th_tst_i32:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a0, a0, 5
; RV32XTHEADBS-NEXT: ret
%shr = lshr i32 %a, 5
%and = and i32 %shr, 1
ret i32 %and
}

define i64 @th_tst_i64(i64 %a) nounwind {
; RV32I-LABEL: th_tst_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 26
; RV32I-NEXT: srli a0, a0, 31
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
;
; RV32XTHEADBS-LABEL: th_tst_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a0, a0, 5
; RV32XTHEADBS-NEXT: li a1, 0
; RV32XTHEADBS-NEXT: ret
%shr = lshr i64 %a, 5
%and = and i64 %shr, 1
ret i64 %and
}

define signext i32 @th_tst_i32_cmp(i32 signext %a) nounwind {
; RV32I-LABEL: th_tst_i32_cmp:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 26
; RV32I-NEXT: srli a0, a0, 31
; RV32I-NEXT: ret
;
; RV32XTHEADBS-LABEL: th_tst_i32_cmp:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a0, a0, 5
; RV32XTHEADBS-NEXT: ret
%and = and i32 %a, 32
%cmp = icmp ne i32 %and, 0
%zext = zext i1 %cmp to i32
ret i32 %zext
}

define i64 @th_tst_i64_cmp(i64 %a) nounwind {
; RV32I-LABEL: th_tst_i64_cmp:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 26
; RV32I-NEXT: srli a0, a0, 31
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
;
; RV32XTHEADBS-LABEL: th_tst_i64_cmp:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: th.tst a0, a0, 5
; RV32XTHEADBS-NEXT: li a1, 0
; RV32XTHEADBS-NEXT: ret
%and = and i64 %a, 32
%cmp = icmp ne i64 %and, 0
%zext = zext i1 %cmp to i64
ret i64 %zext
}

768 changes: 768 additions & 0 deletions llvm/test/CodeGen/RISCV/rv64xtheadbb.ll

Large diffs are not rendered by default.

72 changes: 72 additions & 0 deletions llvm/test/CodeGen/RISCV/rv64xtheadbs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV64I
; RUN: llc -mtriple=riscv64 -mattr=+xtheadbs -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=RV64XTHEADBS

define signext i32 @th_tst_i32(i32 signext %a) nounwind {
; RV64I-LABEL: th_tst_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 58
; RV64I-NEXT: srli a0, a0, 63
; RV64I-NEXT: ret
;
; RV64XTHEADBS-LABEL: th_tst_i32:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: th.tst a0, a0, 5
; RV64XTHEADBS-NEXT: ret
%shr = lshr i32 %a, 5
%and = and i32 %shr, 1
ret i32 %and
}

define i64 @the_tst_i64(i64 %a) nounwind {
; RV64I-LABEL: the_tst_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 58
; RV64I-NEXT: srli a0, a0, 63
; RV64I-NEXT: ret
;
; RV64XTHEADBS-LABEL: the_tst_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: th.tst a0, a0, 5
; RV64XTHEADBS-NEXT: ret
%shr = lshr i64 %a, 5
%and = and i64 %shr, 1
ret i64 %and
}

define signext i32 @th_tst_i32_cmp(i32 signext %a) nounwind {
; RV64I-LABEL: th_tst_i32_cmp:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 58
; RV64I-NEXT: srli a0, a0, 63
; RV64I-NEXT: ret
;
; RV64XTHEADBS-LABEL: th_tst_i32_cmp:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: th.tst a0, a0, 5
; RV64XTHEADBS-NEXT: ret
%and = and i32 %a, 32
%cmp = icmp ne i32 %and, 0
%zext = zext i1 %cmp to i32
ret i32 %zext
}

define i64 @th_tst_i64_cmp(i64 %a) nounwind {
; RV64I-LABEL: th_tst_i64_cmp:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 58
; RV64I-NEXT: srli a0, a0, 63
; RV64I-NEXT: ret
;
; RV64XTHEADBS-LABEL: th_tst_i64_cmp:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: th.tst a0, a0, 5
; RV64XTHEADBS-NEXT: ret
%and = and i64 %a, 32
%cmp = icmp ne i64 %and, 0
%zext = zext i1 %cmp to i64
ret i64 %zext
}

7 changes: 7 additions & 0 deletions llvm/test/MC/RISCV/rv32xtheadbs-invalid.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: not llvm-mc -triple riscv32 -mattr=+xtheadbs < %s 2>&1 | FileCheck %s

# Too few operands
th.tst t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
# Immediate operand out of range
th.tst t0, t1, 32 # CHECK: :[[@LINE]]:16: error: immediate must be an integer in the range [0, 31]
th.tst t0, t1, -1 # CHECK: :[[@LINE]]:16: error: immediate must be an integer in the range [0, 31]
17 changes: 17 additions & 0 deletions llvm/test/MC/RISCV/rv32xtheadbs-valid.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# RUN: llvm-mc %s -triple=riscv32 -mattr=+xtheadbs -show-encoding \
# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
# RUN: llvm-mc %s -triple=riscv64 -mattr=+xtheadbs -show-encoding \
# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xtheadbs < %s \
# RUN: | llvm-objdump --mattr=+xtheadbs -d -r - \
# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+xtheadbs < %s \
# RUN: | llvm-objdump --mattr=+xtheadbs -d -r - \
# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s

# CHECK-ASM-AND-OBJ: th.tst t0, t1, 1
# CHECK-ASM: encoding: [0x8b,0x12,0x13,0x88]
th.tst t0, t1, 1
# CHECK-ASM-AND-OBJ: th.tst t0, t1, 31
# CHECK-ASM: encoding: [0x8b,0x12,0xf3,0x89]
th.tst t0, t1, 31
2 changes: 1 addition & 1 deletion llvm/test/MC/RISCV/rv32zba-valid.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# With Bitmanip base extension:
# With Zba extension:
# RUN: llvm-mc %s -triple=riscv32 -mattr=+zba -show-encoding \
# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
# RUN: llvm-mc %s -triple=riscv64 -mattr=+zba -show-encoding \
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/MC/RISCV/rv64xtheadbs-invalid.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# RUN: not llvm-mc -triple riscv64 -mattr=+xtheadbs < %s 2>&1 | FileCheck %s

# Immediate operand out of range
th.tst t0, t1, 64 # CHECK: :[[@LINE]]:16: error: immediate must be an integer in the range [0, 63]
th.tst t0, t1, -1 # CHECK: :[[@LINE]]:16: error: immediate must be an integer in the range [0, 63]
2 changes: 1 addition & 1 deletion llvm/test/MC/RISCV/rv64zba-valid.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# With Bitmanip base extension:
# With Zba extension:
# RUN: llvm-mc %s -triple=riscv64 -mattr=+zba -show-encoding \
# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zba < %s \
Expand Down