Skip to content

Commit

Permalink
[ARM,MVE] Add the vsbciq intrinsics.
Browse files Browse the repository at this point in the history
Summary:
These are exactly parallel to the existing `vadciq` intrinsics, which
we implemented last year as part of the original MVE intrinsics
framework setup.

Just like VADC/VADCI, the MVE VSBC/VSBCI instructions deliver two
outputs, both of which the intrinsic exposes: a modified vector
register and a carry flag. So they have to be instruction-selected in
C++ rather than Tablegen. However, in this case, that's trivial: the
same C++ isel routine we already have for VADC works unchanged, and
all we have to do is to pass it a different instruction id.

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75444
  • Loading branch information
statham-arm committed Mar 4, 2020
1 parent 9284abd commit 810127f
Show file tree
Hide file tree
Showing 5 changed files with 380 additions and 20 deletions.
44 changes: 24 additions & 20 deletions clang/include/clang/Basic/arm_mve.td
Expand Up @@ -1139,27 +1139,31 @@ defm sqrshr: ScalarSaturatingShiftReg<s32, s64>;
def lsll: LongScalarShift<u64, (args s32:$sh), (IRInt<"lsll"> $lo, $hi, $sh)>;
def asrl: LongScalarShift<s64, (args s32:$sh), (IRInt<"asrl"> $lo, $hi, $sh)>;

multiclass vadcsbc {
def q: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
(seq (IRInt<NAME, [Vector]> $a, $b, (shl (load $carry), 29)):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def iq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
(seq (IRInt<NAME, [Vector]> $a, $b, 0):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def q_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Ptr<uint>:$carry, Predicate:$pred),
(seq (IRInt<NAME # "_predicated", [Vector, Predicate]> $inactive, $a, $b,
(shl (load $carry), 29), $pred):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def iq_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Ptr<uint>:$carry, Predicate:$pred),
(seq (IRInt<NAME # "_predicated", [Vector, Predicate]> $inactive, $a, $b,
0, $pred):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
}
let params = T.Int32 in {
def vadcq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
(seq (IRInt<"vadc", [Vector]> $a, $b, (shl (load $carry), 29)):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def vadciq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
(seq (IRInt<"vadc", [Vector]> $a, $b, 0):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def vadcq_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Ptr<uint>:$carry, Predicate:$pred),
(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b,
(shl (load $carry), 29), $pred):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def vadciq_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Ptr<uint>:$carry, Predicate:$pred),
(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b,
0, $pred):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
defm vadc: vadcsbc;
defm vsbc: vadcsbc;
}

multiclass VectorComplexAddPred<dag not_halving, dag angle> {
Expand Down
160 changes: 160 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vadc.c
Expand Up @@ -87,3 +87,163 @@ int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigne
return vadcq_m_s32(inactive, a, b, carry, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
// CHECK-NEXT: store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) {
#ifdef POLYMORPHIC
return vsbciq(a, b, carry_out);
#else /* POLYMORPHIC */
return vsbciq_s32(a, b, carry_out);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
// CHECK-NEXT: store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
uint32x4_t test_vsbciq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry_out) {
#ifdef POLYMORPHIC
return vsbciq(a, b, carry_out);
#else /* POLYMORPHIC */
return vsbciq_u32(a, b, carry_out);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP6]]
//
int32x4_t test_vsbcq_s32(int32x4_t a, int32x4_t b, unsigned *carry) {
#ifdef POLYMORPHIC
return vsbcq(a, b, carry);
#else /* POLYMORPHIC */
return vsbcq_s32(a, b, carry);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vsbcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry) {
#ifdef POLYMORPHIC
return vsbcq(a, b, carry);
#else /* POLYMORPHIC */
return vsbcq_u32(a, b, carry);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP6]]
//
int32x4_t test_vsbciq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry_out, mve_pred16_t p) {
#ifdef POLYMORPHIC
return vsbciq_m(inactive, a, b, carry_out, p);
#else /* POLYMORPHIC */
return vsbciq_m_s32(inactive, a, b, carry_out, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_m_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vsbciq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p) {
#ifdef POLYMORPHIC
return vsbciq_m(inactive, a, b, carry_out, p);
#else /* POLYMORPHIC */
return vsbciq_m_u32(inactive, a, b, carry_out, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
// CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
// CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]
// CHECK-NEXT: store i32 [[TMP7]], i32* [[CARRY]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP8]]
//
int32x4_t test_vsbcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p) {
#ifdef POLYMORPHIC
return vsbcq_m(inactive, a, b, carry, p);
#else /* POLYMORPHIC */
return vsbcq_m_s32(inactive, a, b, carry, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_m_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
// CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
// CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]
// CHECK-NEXT: store i32 [[TMP7]], i32* [[CARRY]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP8]]
//
uint32x4_t test_vsbcq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry, mve_pred16_t p) {
#ifdef POLYMORPHIC
return vsbcq_m(inactive, a, b, carry, p);
#else /* POLYMORPHIC */
return vsbcq_m_u32(inactive, a, b, carry, p);
#endif /* POLYMORPHIC */
}
7 changes: 7 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsARM.td
Expand Up @@ -1020,10 +1020,17 @@ def int_arm_mve_vabd: Intrinsic<
def int_arm_mve_vadc: Intrinsic<
[llvm_anyvector_ty, llvm_i32_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_vsbc: Intrinsic<
[llvm_anyvector_ty, llvm_i32_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_vadc_predicated: Intrinsic<
[llvm_anyvector_ty, llvm_i32_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
def int_arm_mve_vsbc_predicated: Intrinsic<
[llvm_anyvector_ty, llvm_i32_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
def int_arm_mve_vmulh: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
Expand Up @@ -4588,6 +4588,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
IntNo == Intrinsic::arm_mve_vadc_predicated);
return;
case Intrinsic::arm_mve_vsbc:
case Intrinsic::arm_mve_vsbc_predicated:
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
IntNo == Intrinsic::arm_mve_vsbc_predicated);
return;

case Intrinsic::arm_mve_vmlldava:
case Intrinsic::arm_mve_vmlldava_predicated: {
Expand Down

0 comments on commit 810127f

Please sign in to comment.