Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {

bool isSImm5() const { return isSImm<5>(); }
bool isSImm6() const { return isSImm<6>(); }
bool isSImm8() const { return isSImm<8>(); }
bool isSImm10() const { return isSImm<10>(); }
bool isSImm11() const { return isSImm<11>(); }
bool isSImm12() const { return isSImm<12>(); }
Expand Down
159 changes: 159 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ static cl::opt<bool>
"be combined with a shift"),
cl::init(true));

static cl::opt<bool> EnablePExtCodeGen(
DEBUG_TYPE "-enable-p-ext-codegen", cl::Hidden,
cl::desc("Turn on P Extension codegen(This is a temporary switch where "
"only partial codegen is currently supported."),
cl::init(false));

RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
Expand Down Expand Up @@ -279,6 +285,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
}

// fixed vector is stored in GPRs for P extension packed operations
if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
if (Subtarget.is64Bit()) {
addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);
}
}

// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());

Expand Down Expand Up @@ -479,6 +496,32 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FCANONICALIZE};

if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
setTargetDAGCombine(ISD::TRUNCATE);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
// load/store are already handled by pattern matching
SmallVector<MVT, 2> VTs = {MVT::v2i16, MVT::v4i8};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes the v2i16 and v4i8 patterns legal for RV32 and RV64 but your patterns v2i16 patterns are only for RV32.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, we need to remove IsRV32

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not add a type with 32 bits type to a 64-bit register class on RV64. We need to have the type legalizer widen v2i16 to v4i16.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want v4i16 or v2i32?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want v4i16 or v2i32?

If we go with v2i32, then loads need an extend to move bits [31:16] from memory to bits [47:32] of the register. Stores need to do the opposite. Not sure how easy that is to do in the P extension. I don't see any instructions that an extend an element. Maybe I missed it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also didn't find lol

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the ZIP instructions be used for zero extend. Excerpt from mailing list post https://lists.riscv.org/g/tech-p-ext/message/287

"One important use for the proposed ZIP instructions is to expand
unsigned 8-bit bytes into 16-bit halfwords, and likewise to expand
unsigned 16-bit halfwords into 32-bit words. If rs2 is x0 and we have

X(rs1) = A B C D E F G H

then we get the following results:

ZIP8P     z E z F z G z H
ZIP8HP    z A z B z C z D

ZIP16P    z z E F z z G H
ZIP16HP   z z A B z z C D

where each 'z' is a zero byte (taken from X(rs2), which I said is x0).
"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

somewhat related but likely unrelated here, there is also similar use of the pack instructions with x0, to extract a byte/halfword/word from specific positions: https://lists.riscv.org/g/tech-p-ext/message/753

if (Subtarget.is64Bit()) {
VTs.append({MVT::v2i32, MVT::v4i16, MVT::v8i8});
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
}
for (auto VT : VTs) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::SSHLSAT, VT, Legal);
setOperationAction(ISD::USHLSAT, VT, Legal);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VT, Legal);
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Legal);
}
}

if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
Expand Down Expand Up @@ -4311,6 +4354,34 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
MVT XLenVT = Subtarget.getXLenVT();

SDLoc DL(Op);
// Handle P extension packed vector BUILD_VECTOR with PLI for splat constants
if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
bool IsPExtVector =
(VT == MVT::v2i16 || VT == MVT::v4i8) ||
(Subtarget.is64Bit() &&
(VT == MVT::v4i16 || VT == MVT::v8i8 || VT == MVT::v2i32));
if (IsPExtVector) {
if (SDValue SplatValue = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
int64_t SplatImm = C->getSExtValue();
bool IsValidImm = false;

// Check immediate range based on vector type
if (VT == MVT::v8i8 || VT == MVT::v4i8)
// PLI_B uses 8-bit unsigned immediate
IsValidImm = isUInt<8>(SplatImm);
else
// PLI_H and PLI_W use 10-bit signed immediate
IsValidImm = isInt<10>(SplatImm);

if (IsValidImm) {
SDValue Imm = DAG.getConstant(SplatImm, DL, XLenVT);
return DAG.getNode(RISCVISD::PLI, DL, VT, Imm);
}
}
}
}
}

// Proper support for f16 requires Zvfh. bf16 always requires special
// handling. We need to cast the scalar to integer and create an integer
Expand Down Expand Up @@ -15996,11 +16067,99 @@ static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
}

// Handle P extension averaging subtraction pattern:
// (vXiY (trunc (srl (sub ([s|z]ext vXiY:$a), ([s|z]ext vXiY:$b)), 1)))
// -> PASUB/PASUBU
static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (!Subtarget.hasStdExtP() || !VT.isFixedLengthVector())
return SDValue();

if (N0.getOpcode() != ISD::SRL)
return SDValue();

// Check if shift amount is 1
SDValue ShAmt = N0.getOperand(1);
if (ShAmt.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();

BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(ShAmt.getNode());
if (!BV)
return SDValue();
SDValue Splat = BV->getSplatValue();
if (!Splat)
return SDValue();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat);
if (!C)
return SDValue();
if (C->getZExtValue() != 1)
return SDValue();

// Check for SUB operation
SDValue Sub = N0.getOperand(0);
if (Sub.getOpcode() != ISD::SUB)
return SDValue();

SDValue LHS = Sub.getOperand(0);
SDValue RHS = Sub.getOperand(1);

// Check if both operands are sign/zero extends from the target
// type
bool IsSignExt = LHS.getOpcode() == ISD::SIGN_EXTEND &&
RHS.getOpcode() == ISD::SIGN_EXTEND;
bool IsZeroExt = LHS.getOpcode() == ISD::ZERO_EXTEND &&
RHS.getOpcode() == ISD::ZERO_EXTEND;

if (!IsSignExt && !IsZeroExt)
return SDValue();

SDValue A = LHS.getOperand(0);
SDValue B = RHS.getOperand(0);

// Check if the extends are from our target vector type
if (A.getValueType() != VT || B.getValueType() != VT)
return SDValue();

// Determine the instruction based on type and signedness
unsigned Opc;
MVT VecVT = VT.getSimpleVT();
if (VecVT == MVT::v4i16 && IsSignExt)
Opc = RISCV::PASUB_H;
else if (VecVT == MVT::v4i16 && IsZeroExt)
Opc = RISCV::PASUBU_H;
else if (VecVT == MVT::v2i16 && IsSignExt)
Opc = RISCV::PASUB_H;
else if (VecVT == MVT::v2i16 && IsZeroExt)
Opc = RISCV::PASUBU_H;
else if (VecVT == MVT::v8i8 && IsSignExt)
Opc = RISCV::PASUB_B;
else if (VecVT == MVT::v8i8 && IsZeroExt)
Opc = RISCV::PASUBU_B;
else if (VecVT == MVT::v4i8 && IsSignExt)
Opc = RISCV::PASUB_B;
else if (VecVT == MVT::v4i8 && IsZeroExt)
Opc = RISCV::PASUBU_B;
else if (VecVT == MVT::v2i32 && IsSignExt)
Opc = RISCV::PASUB_W;
else if (VecVT == MVT::v2i32 && IsZeroExt)
Opc = RISCV::PASUBU_W;
else
return SDValue();

// Create the machine node directly
return SDValue(DAG.getMachineNode(Opc, SDLoc(N), VT, {A, B}), 0);
}

static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);

if (Subtarget.hasStdExtP() && VT.isFixedLengthVector() && EnablePExtCodeGen)
return combinePExtTruncate(N, DAG, Subtarget);

// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
// extending X. This is safe since we only need the LSB after the shift and
// shift amounts larger than 31 would produce poison. If we wait until
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2895,6 +2895,12 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM9_LSB000:
Ok = isShiftedUInt<6, 3>(Imm);
break;
case RISCVOp::OPERAND_SIMM8_UNSIGNED:
Ok = isInt<8>(Imm);
break;
case RISCVOp::OPERAND_SIMM10_UNSIGNED:
Ok = isInt<10>(Imm);
break;
case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
break;
Expand All @@ -2916,6 +2922,8 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
// clang-format off
CASE_OPERAND_SIMM(5)
CASE_OPERAND_SIMM(6)
CASE_OPERAND_SIMM(8)
CASE_OPERAND_SIMM(10)
CASE_OPERAND_SIMM(11)
CASE_OPERAND_SIMM(12)
CASE_OPERAND_SIMM(26)
Expand Down
104 changes: 102 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfoP.td
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//

def simm10 : RISCVSImmOp<10>;
def simm10 : RISCVSImmLeafOp<10>;

def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
let RenderMethod = "addSImm8UnsignedOperands";
}

// A 8-bit signed immediate allowing range [-128, 255]
// but represented as [-128, 127].
def simm8_unsigned : RISCVOp {
def simm8_unsigned : RISCVSImmLeafOp<8> {
let ParserMatchClass = SImm8UnsignedAsmOperand;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmOperand<8>";
Expand Down Expand Up @@ -1455,3 +1455,103 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PMAXU_DW : RVPPairBinaryExchanged_rr<0b1111, 0b01, "pmaxu.dw">;
def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">;
} // Predicates = [HasStdExtP, IsRV32]

def SDT_RISCVPLI : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
def riscv_pli : RVSDNode<"PLI", SDT_RISCVPLI>;
let Predicates = [HasStdExtP, IsRV64] in {
// Basic arithmetic patterns for v4i16 (16-bit elements in 64-bit GPR)
def: Pat<(v4i16 (add (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i16 (sub (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;

// Saturating add/sub patterns for v4i16
def: Pat<(v4i16 (saddsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i16 (uaddsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i16 (ssubsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i16 (usubsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;

// Averaging patterns for v4i16
def: Pat<(v4i16 (avgfloors (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i16 (avgflooru (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;

// Absolute difference patterns for v4i16
def: Pat<(v4i16 (abds (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i16 (abdu (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;

// Basic arithmetic patterns for v8i8 (8-bit elements in 64-bit GPR)
def: Pat<(v8i8 (add (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v8i8 (sub (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;

// Saturating add/sub patterns for v8i8
def: Pat<(v8i8 (saddsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v8i8 (uaddsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v8i8 (ssubsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v8i8 (usubsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;

// Averaging patterns for v8i8
def: Pat<(v8i8 (avgfloors (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v8i8 (avgflooru (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;

// Absolute difference patterns for v8i8
def: Pat<(v8i8 (abds (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v8i8 (abdu (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;

def: Pat<(v2i32 (riscv_pli simm10:$imm10)), (PLI_W simm10:$imm10)>;
def: Pat<(v4i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
def: Pat<(v8i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;

// Load/Store patterns for v4i16 and v8i8 (use regular GPR load/store since they're in GPRs)
def : StPat<store, SD, GPR, v4i16>;
def : LdPat<load, LD, v4i16>;
def : StPat<store, SD, GPR, v8i8>;
def : LdPat<load, LD, v8i8>;

// Load/Store patterns for v2i32 (32-bit elements in 64-bit GPR)
def : StPat<store, SD, GPR, v2i32>;
def : LdPat<load, LD, v2i32>;
} // Predicates = [HasStdExtP, IsRV64]

let Predicates = [HasStdExtP] in {
// Basic arithmetic patterns for v2i16 (16-bit elements in 32-bit GPR)
def: Pat<(v2i16 (add (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i16 (sub (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;

// Saturating add/sub patterns for v2i16
def: Pat<(v2i16 (saddsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i16 (uaddsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i16 (ssubsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i16 (usubsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;

// Averaging patterns for v2i16
def: Pat<(v2i16 (avgfloors (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i16 (avgflooru (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;

// Absolute difference patterns for v2i16
def: Pat<(v2i16 (abds (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i16 (abdu (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;

// Basic arithmetic patterns for v4i8 (8-bit elements in 32-bit GPR)
def: Pat<(v4i8 (add (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i8 (sub (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;

// Saturating add/sub patterns for v4i8
def: Pat<(v4i8 (saddsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i8 (uaddsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i8 (ssubsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i8 (usubsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;

// Averaging patterns for v4i8
def: Pat<(v4i8 (avgfloors (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i8 (avgflooru (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;

// Absolute difference patterns for v4i8
def: Pat<(v4i8 (abds (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
def: Pat<(v4i8 (abdu (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;

def: Pat<(v2i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
def: Pat<(v4i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
// Load/Store patterns for v2i16 and v4i8 (use regular GPR load/store since they're in GPRs)
def : StPat<store, SW, GPR, v2i16>;
def : LdPat<load, LW, v2i16>;
def : StPat<store, SW, GPR, v4i8>;
def : LdPat<load, LW, v4i8>;
} // Predicates = [HasStdExtP]
6 changes: 5 additions & 1 deletion llvm/lib/Target/RISCV/RISCVRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,11 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList>
}

class GPRRegisterClass<dag regList>
: RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> {
: RISCVRegisterClass<[XLenVT, XLenFVT,
// P extension packed vector types:
// RV32: v2i16, v4i8
// RV64: v2i32, v4i16, v8i8
v2i16, v4i8, v2i32, v4i16, v8i8], 32, regList> {
let RegInfos = XLenRI;
}

Expand Down
Loading