Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 88 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4488,6 +4488,25 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues({Sum, OutFlag}, DL);
}

static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
SelectionDAG &DAG) {
SDLoc DL(Op);
EVT OrigVT = Op.getValueType();
assert((OrigVT == MVT::i32 || OrigVT == MVT::i64) &&
"lowerIntNeonIntrinsic expects 32/64-bit scalar operation.");

EVT NodeVT = (OrigVT == MVT::i32) ? MVT::f32 : MVT::f64;

SmallVector<SDValue, 2> NewOps;
NewOps.reserve(Op.getNumOperands() - 1);

for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
NewOps.push_back(DAG.getBitcast(NodeVT, Op.getOperand(I)));

SDValue OpNode = DAG.getNode(Opcode, DL, NodeVT, NewOps);
return DAG.getBitcast(OrigVT, OpNode);
}

static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
Expand Down Expand Up @@ -6359,26 +6378,45 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
case Intrinsic::aarch64_neon_sqrshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHL, DAG);
case Intrinsic::aarch64_neon_sqshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHL, DAG);
case Intrinsic::aarch64_neon_uqrshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHL, DAG);
case Intrinsic::aarch64_neon_uqshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHL, DAG);
case Intrinsic::aarch64_neon_sqadd:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::SADDSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQADD, DAG);

case Intrinsic::aarch64_neon_sqsub:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::SSUBSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSUB, DAG);

case Intrinsic::aarch64_neon_uqadd:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::UADDSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQADD, DAG);
case Intrinsic::aarch64_neon_uqsub:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::USUBSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSUB, DAG);

case Intrinsic::aarch64_sve_whilelt:
return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
/*IsEqual=*/false);
Expand Down Expand Up @@ -6713,6 +6751,52 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::experimental_vector_match: {
return LowerVectorMatch(Op, DAG);
}
// case Intrinsic::aarch64_neon_fcvtas:
// case Intrinsic::aarch64_neon_fcvtau:
// case Intrinsic::aarch64_neon_fcvtms:
// case Intrinsic::aarch64_neon_fcvtmu:
// case Intrinsic::aarch64_neon_fcvtns:
// case Intrinsic::aarch64_neon_fcvtnu:
// case Intrinsic::aarch64_neon_fcvtps:
// case Intrinsic::aarch64_neon_fcvtpu:
// case Intrinsic::aarch64_neon_fcvtzs:
// case Intrinsic::aarch64_neon_fcvtzu:
// case Intrinsic::aarch64_neon_sqabs:
// case Intrinsic::aarch64_neon_sqneg:
// case Intrinsic::aarch64_neon_scalar_sqxtn:
// case Intrinsic::aarch64_neon_scalar_sqxtun:
// case Intrinsic::aarch64_neon_scalar_uqxtn:
// case Intrinsic::aarch64_neon_sqadd:
// case Intrinsic::aarch64_neon_sqdmulh:
// case Intrinsic::aarch64_neon_sqrdmulh:
// case Intrinsic::aarch64_neon_sqrshl:
// case Intrinsic::aarch64_neon_sqshl:
// case Intrinsic::aarch64_neon_sqshlu:
// case Intrinsic::aarch64_neon_sqsub:
// case Intrinsic::aarch64_neon_srshl:
// case Intrinsic::aarch64_neon_sshl:
// case Intrinsic::aarch64_neon_suqadd:
// case Intrinsic::aarch64_neon_uqadd:
// case Intrinsic::aarch64_neon_uqrshl:
// case Intrinsic::aarch64_neon_uqshl:
// case Intrinsic::aarch64_neon_uqsub:
// case Intrinsic::aarch64_neon_urshl:
// case Intrinsic::aarch64_neon_ushl:
// case Intrinsic::aarch64_neon_usqadd:
// case Intrinsic::aarch64_neon_rshrn:
// case Intrinsic::aarch64_neon_sqrshrn:
// case Intrinsic::aarch64_neon_sqrshrun:
// case Intrinsic::aarch64_neon_sqshrn:
// case Intrinsic::aarch64_neon_sqshrun:
// case Intrinsic::aarch64_neon_uqrshrn:
// case Intrinsic::aarch64_neon_uqshrn:
// case Intrinsic::aarch64_neon_sqdmulh_lane:
// case Intrinsic::aarch64_neon_sqdmulh_laneq:
// case Intrinsic::aarch64_neon_sqrdmulh_lane:
// case Intrinsic::aarch64_neon_sqrdmulh_laneq:
// case Intrinsic::aarch64_neon_sqrdmlah:
// case Intrinsic::aarch64_neon_sqrdmlsh:
// case Intrinsic::aarch64_neon_abs:{
}
}

Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -7703,16 +7703,21 @@ multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
}

multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode, SDPatternOperator SatOp> {
SDPatternOperator OpNode, SDPatternOperator G_OpNode, SDPatternOperator SatOp> {
def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (SatOp (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;

def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
def : Pat<(i64 (G_OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
def : Pat<(i32 (G_OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
(!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;

def : Pat<(f64 (OpNode FPR64:$Rn, FPR64:$Rm)),
(!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(f32 (OpNode FPR32:$Rn, FPR32:$Rm)),
(!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
}

Expand Down
43 changes: 31 additions & 12 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,25 @@ def AArch64fcvtnu_half : SDNode<"AArch64ISD::FCVTNU_HALF", SDTFPExtendOp>;
def AArch64fcvtps_half : SDNode<"AArch64ISD::FCVTPS_HALF", SDTFPExtendOp>;
def AArch64fcvtpu_half : SDNode<"AArch64ISD::FCVTPU_HALF", SDTFPExtendOp>;

def AArch64sqadd_node: SDNode<"AArch64ISD::SQADD", SDTFPBinOp>;
def AArch64sqrshl: SDNode<"AArch64ISD::SQRSHL", SDTFPBinOp>;
def AArch64sqshl: SDNode<"AArch64ISD::SQSHL", SDTFPBinOp>;
def AArch64sqsub_node: SDNode<"AArch64ISD::SQSUB", SDTFPBinOp>;
def AArch64uqadd: SDNode<"AArch64ISD::UQADD", SDTFPBinOp>;
def AArch64uqrshl: SDNode<"AArch64ISD::UQRSHL", SDTFPBinOp>;
def AArch64uqshl: SDNode<"AArch64ISD::UQSHL", SDTFPBinOp>;
def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;

// This patfrags are temporary hack to get around pattern matching issues with not yet updated intrinsics.
def AArch64sqadd: PatFrags<(ops node:$lhs, node:$rhs),
[(bitconvert (AArch64sqadd_node (f32 (bitconvert node:$lhs)), (f32 (bitconvert node:$rhs)))),
(bitconvert (AArch64sqadd_node (f64 (bitconvert node:$lhs)), (f64 (bitconvert node:$rhs)))),
(int_aarch64_neon_sqadd node:$lhs, node:$rhs)]>;
def AArch64sqsub: PatFrags<(ops node:$lhs, node:$rhs),
[(bitconvert (AArch64sqsub_node (f32 (bitconvert node:$lhs)), (f32 (bitconvert node:$rhs)))),
(bitconvert (AArch64sqsub_node (f64 (bitconvert node:$lhs)), (f64 (bitconvert node:$rhs)))),
(int_aarch64_neon_sqsub node:$lhs, node:$rhs)]>;

//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;

// Vector immediate ops
Expand Down Expand Up @@ -6453,19 +6472,19 @@ defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd, saddsat>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", AArch64sqadd_node, int_aarch64_neon_sqadd, saddsat>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub, ssubsat>;
defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", AArch64sqrshl, int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", AArch64sqshl, int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", AArch64sqsub_node, int_aarch64_neon_sqsub, ssubsat>;
defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd, uaddsat>;
defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub, usubsat>;
defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", AArch64uqadd, int_aarch64_neon_uqadd, uaddsat>;
defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", AArch64uqrshl, int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", AArch64uqshl, int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", AArch64uqsub, int_aarch64_neon_uqsub, usubsat>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
let Predicates = [HasRDM] in {
Expand Down Expand Up @@ -6520,11 +6539,11 @@ defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;

def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
def : Pat<(i64 (AArch64sqadd (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),
(SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
def : Pat<(i64 (AArch64sqsub (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),
(SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
Expand Down Expand Up @@ -8545,9 +8564,9 @@ defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", saddsat,
int_aarch64_neon_sqadd>;
AArch64sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", ssubsat,
int_aarch64_neon_sqsub>;
AArch64sqsub>;
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
Expand Down
Loading