Skip to content

Commit

Permalink
[AArch64] Select saturating Neon instructions
Browse files Browse the repository at this point in the history
This adds some extra patterns to select AArch64 Neon SQADD, UQADD, SQSUB
and UQSUB from the existing target independent sadd_sat, uadd_sat,
ssub_sat and usub_sat nodes.

It does not attempt to replace the existing int_aarch64_neon_uqadd
intrinsic nodes as they are apparently used for both scalar and vector,
and need to be legal on scalar types for some of the patterns to work.
The int_aarch64_neon_uqadd on scalar would move the two integers into
floating point registers, perform a Neon uqadd and move the value back.
I don't believe this is good idea for uadd_sat to do the same as the
scalar alternative is simpler (an adds with a csinv). For signed it may
be smaller, but I'm not sure about it being better.

So this just adds some extra patterns for the existing vector
instructions, matching on the _sat nodes.

Differential Revision: https://reviews.llvm.org/D69374
  • Loading branch information
davemgreen committed Oct 31, 2019
1 parent 62c0746 commit 2179867
Show file tree
Hide file tree
Showing 9 changed files with 305 additions and 979 deletions.
8 changes: 7 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,14 +741,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);

// Vector reductions
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);

// Saturates
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5066,6 +5066,24 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
}

multiclass SIMDThreeSameVectorExtraPatterns<string inst, SDPatternOperator OpNode> {
def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)),
(!cast<Instruction>(inst#"v8i8") V64:$LHS, V64:$RHS)>;
def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
(!cast<Instruction>(inst#"v4i16") V64:$LHS, V64:$RHS)>;
def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
(!cast<Instruction>(inst#"v2i32") V64:$LHS, V64:$RHS)>;

def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v16i8") V128:$LHS, V128:$RHS)>;
def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v8i16") V128:$LHS, V128:$RHS)>;
def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v4i32") V128:$LHS, V128:$RHS)>;
def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v2i64") V128:$LHS, V128:$RHS)>;
}

// As above, but D sized elements unsupported.
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3839,6 +3839,12 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
int_aarch64_neon_sqsub>;

// Extra saturate patterns, other than the intrinsics matches above
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;

defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
Expand Down
10 changes: 1 addition & 9 deletions llvm/test/CodeGen/AArch64/sadd_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,7 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.4s, v0.4s, v1.4s
; CHECK-NEXT: cmlt v4.4s, v2.4s, #0
; CHECK-NEXT: mvni v3.4s, #128, lsl #24
; CHECK-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqadd v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y);
ret <4 x i32> %tmp;
Expand Down
Loading

0 comments on commit 2179867

Please sign in to comment.