Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 51 additions & 2 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18291,8 +18291,6 @@ then the result is the size in bits of the type of ``src`` if
``is_zero_poison == 0`` and ``poison`` otherwise. For example,
``llvm.cttz(2) = 1``.

.. _int_overflow:

.. _int_fshl:

'``llvm.fshl.*``' Intrinsic
Expand Down Expand Up @@ -18389,6 +18387,57 @@ Example:
%r = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ; %r = i8: 225 (0b11100001)
%r = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ; %r = i8: 255 (0b11111111)

.. _int_clmul:

'``llvm.clmul.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""

This is an overloaded intrinsic. You can use ``llvm.clmul`` on any integer
or vectors of integer elements.

::

declare i16 @llvm.clmul.i16(i16 %a, i16 %b)
declare i32 @llvm.clmul.i32(i32 %a, i32 %b)
declare i64 @llvm.clmul.i64(i64 %a, i64 %b)
declare <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b)

Overview:
"""""""""

The '``llvm.clmul``' family of intrinsic functions performs carry-less
multiplication, or XOR multiplication, on the two arguments, and returns
the low-bits.

Arguments:
""""""""""

The arguments may be any integer type or vector of integer type. Both
arguments and result must have the same type.

Semantics:
""""""""""

The '``llvm.clmul``' intrinsic computes carry-less multiply of its arguments,
which is the result of applying the standard multiplication algorithm, where
all of the additions are replaced with XORs, and returns the low-bits.
The vector variants operate lane-wise.

Example:
""""""""

.. code-block:: llvm

%r = call i4 @llvm.clmul.i4(i4 1, i4 2) ; %r = 2
%r = call i4 @llvm.clmul.i4(i4 5, i4 6) ; %r = 14
%r = call i4 @llvm.clmul.i4(i4 -4, i4 2) ; %r = -8
%r = call i4 @llvm.clmul.i4(i4 -4, i4 -5) ; %r = 4

.. _int_overflow:

Arithmetic with Overflow Intrinsics
-----------------------------------

Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,11 @@ enum NodeType {
FSHL,
FSHR,

/// Carry-less multiplication operations.
CLMUL,
CLMULR,
CLMULH,

/// Byte Swap and Counting operators.
BSWAP,
CTTZ,
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/SDPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,11 @@ inline BinaryOpc_match<LHS, RHS> m_Rotr(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS>(ISD::ROTR, L, R);
}

template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS, true> m_Clmul(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS, true>(ISD::CLMUL, L, R);
}

template <typename LHS, typename RHS>
inline BinaryOpc_match<LHS, RHS, true> m_FAdd(const LHS &L, const RHS &R) {
return BinaryOpc_match<LHS, RHS, true>(ISD::FADD, L, R);
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -5457,6 +5457,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// \returns The expansion if successful, SDValue() otherwise
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const;

/// Expand carryless multiply.
/// \param N Node to expand
/// \returns The expansion if successful, SDValue() otherwise
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const;

/// Expand rotations.
/// \param N Node to expand
/// \param AllowVectorOps expand vector rotate, this should only be performed
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1465,6 +1465,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
}

let IntrProperties = [IntrNoMem, IntrSpeculatable,
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,10 @@ def sra_parts : SDNode<"ISD::SRA_PARTS" , SDTIntShiftPairOp>;
def srl_parts : SDNode<"ISD::SRL_PARTS" , SDTIntShiftPairOp>;
def fshl : SDNode<"ISD::FSHL" , SDTIntShiftDOp>;
def fshr : SDNode<"ISD::FSHR" , SDTIntShiftDOp>;
def clmul : SDNode<"ISD::CLMUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def clmulr : SDNode<"ISD::CLMULR" , SDTIntBinOp, [SDNPCommutative]>;
def clmulh : SDNode<"ISD::CLMULH" , SDTIntBinOp, [SDNPCommutative]>;
def and : SDNode<"ISD::AND" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def or : SDNode<"ISD::OR" , SDTIntBinOp,
Expand Down
28 changes: 28 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11420,6 +11420,30 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (SDValue AVG = foldShiftToAvg(N, DL))
return AVG;

SDValue Y;
if (VT.getScalarSizeInBits() % 2 == 0) {
// Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y).
uint64_t HalfBW = VT.getScalarSizeInBits() / 2;
if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
X.getScalarValueSizeInBits() == HalfBW) {
if (sd_match(N1, m_SpecificInt(HalfBW - 1)))
return DAG.getNode(
ISD::ZERO_EXTEND, DL, VT,
DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y));
if (sd_match(N1, m_SpecificInt(HalfBW)))
return DAG.getNode(
ISD::ZERO_EXTEND, DL, VT,
DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y));
}
}

// Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 ->
// clmulh(x, y).
if (sd_match(N0, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)),
m_BitReverse(m_Value(Y))))) &&
sd_match(N1, m_SpecificInt(1)))
return DAG.getNode(ISD::CLMULH, DL, VT, X, Y);

return SDValue();
}

Expand Down Expand Up @@ -11771,6 +11795,10 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
return DAG.getNode(ISD::SRL, DL, VT, X, Y);

// fold bitreverse(clmul(bitreverse(x), bitreverse(y))) -> clmulr(x, y)
if (sd_match(N0, m_Clmul(m_BitReverse(m_Value(X)), m_BitReverse(m_Value(Y)))))
return DAG.getNode(ISD::CLMULR, DL, VT, X, Y);

return SDValue();
}

Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4095,6 +4095,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG))
Results.push_back(Expanded);
break;
case ISD::CLMUL:
case ISD::CLMULR:
case ISD::CLMULH:
if (SDValue Expanded = TLI.expandCLMUL(Node, DAG))
Results.push_back(Expanded);
break;
case ISD::ROTL:
case ISD::ROTR:
if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG))
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::CLMUL:
case ISD::CLMULR:
case ISD::CLMULH:
case ISD::VP_AND:
case ISD::VP_OR:
case ISD::VP_XOR:
Expand Down Expand Up @@ -3162,6 +3165,12 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
ExpandIntRes_FunnelShift(N, Lo, Hi);
break;

case ISD::CLMUL:
case ISD::CLMULR:
case ISD::CLMULH:
ExpandIntRes_CLMUL(N, Lo, Hi);
break;

case ISD::VSCALE:
ExpandIntRes_VSCALE(N, Lo, Hi);
break;
Expand Down Expand Up @@ -5492,6 +5501,11 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}

void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Res = TLI.expandCLMUL(N, DAG);
SplitInteger(Res, Lo, Hi);
}

void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT VT = N->getValueType(0);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {

void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo, SDValue &Hi);

void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi);
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::CLMUL:
case ISD::CLMULR:
case ISD::CLMULH:

case ISD::SADDSAT:
case ISD::UADDSAT:
Expand Down Expand Up @@ -1372,6 +1375,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD: case ISD::VP_ADD:
case ISD::SUB: case ISD::VP_SUB:
case ISD::MUL: case ISD::VP_MUL:
case ISD::CLMUL:
case ISD::CLMULR:
case ISD::CLMULH:
case ISD::MULHS:
case ISD::MULHU:
case ISD::ABDS:
Expand Down Expand Up @@ -4924,6 +4930,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SHL: case ISD::VP_SHL:
case ISD::SRA: case ISD::VP_SRA:
case ISD::SRL: case ISD::VP_SRL:
case ISD::CLMUL:
case ISD::CLMULR:
case ISD::CLMULH:
case ISD::FMINNUM:
case ISD::FMINNUM_IEEE:
case ISD::VP_FMINNUM:
Expand Down Expand Up @@ -7064,6 +7073,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::LLROUND:
case ISD::LRINT:
case ISD::LLRINT:
case ISD::CLMUL:
case ISD::CLMULR:
case ISD::CLMULH:
Res = WidenVecOp_UnrollVectorOp(N);
break;
case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6789,6 +6789,12 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
return APIntOps::mulhs(C1, C2);
case ISD::MULHU:
return APIntOps::mulhu(C1, C2);
case ISD::CLMUL:
return APIntOps::clmul(C1, C2);
case ISD::CLMULR:
return APIntOps::clmulr(C1, C2);
case ISD::CLMULH:
return APIntOps::clmulh(C1, C2);
}
return std::nullopt;
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7279,6 +7279,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
return;
}
case Intrinsic::clmul: {
SDValue X = getValue(I.getArgOperand(0));
SDValue Y = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::CLMUL, sdl, X.getValueType(), X, Y));
return;
}
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ROTR: return "rotr";
case ISD::FSHL: return "fshl";
case ISD::FSHR: return "fshr";
case ISD::CLMUL: return "clmul";
case ISD::CLMULR: return "clmulr";
case ISD::CLMULH: return "clmulh";
case ISD::FADD: return "fadd";
case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
Expand Down
48 changes: 48 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8302,6 +8302,54 @@ SDValue TargetLowering::expandFunnelShift(SDNode *Node,
return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
}

SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
SDLoc DL(Node);
EVT VT = Node->getValueType(0);
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
unsigned BW = VT.getScalarSizeInBits();
unsigned Opcode = Node->getOpcode();

switch (Opcode) {
case ISD::CLMUL: {
SDValue Res = DAG.getConstant(0, DL, VT);
for (unsigned I = 0; I < BW; ++I) {
SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
Res = DAG.getNode(ISD::XOR, DL, VT, Res, Mul);
}
return Res;
}
case ISD::CLMULR:
case ISD::CLMULH: {
EVT ExtVT =
VT.changeElementType(EVT::getIntegerVT(*DAG.getContext(), 2 * BW));
// For example, ExtVT = i64 based operations aren't legal on a 32-bit
// target; use bitreverse-based lowering in this case.
if (!isOperationLegalOrCustom(ISD::ZERO_EXTEND, ExtVT) ||
!isOperationLegalOrCustom(ISD::SRL, ExtVT)) {
SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
if (Opcode == ISD::CLMULR)
Res = DAG.getNode(ISD::SRL, DL, VT, Res,
DAG.getShiftAmountConstant(1, VT, DL));
return Res;
}
SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
unsigned ShtAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
DAG.getShiftAmountConstant(ShtAmt, ExtVT, DL));
return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
}
}
llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
}

// TODO: Merge with expandFunnelShift.
SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
SelectionDAG &DAG) const {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,9 @@ void TargetLoweringBase::initActions() {
// Absolute difference
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);

// Carry-less multiply
setOperationAction({ISD::CLMUL, ISD::CLMULR, ISD::CLMULH}, VT, Expand);

// Saturated trunc
setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Expand);
setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Expand);
Expand Down
Loading