125 changes: 125 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vmullbq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vmullbq_int_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vmull.v8i16.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0)
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vmullbq_int_u8(uint8x16_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vmullbq_int(a, b);
#else /* POLYMORPHIC */
return vmullbq_int_u8(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmullbq_int_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
int32x4_t test_vmullbq_int_s16(int16x8_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vmullbq_int(a, b);
#else /* POLYMORPHIC */
return vmullbq_int_s16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmullbq_int_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
//
uint64x2_t test_vmullbq_int_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vmullbq_int(a, b);
#else /* POLYMORPHIC */
return vmullbq_int_u32(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmullbq_poly_p16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vmull.poly.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vmullbq_poly_p16(uint16x8_t a, uint16x8_t b)
{
#ifdef POLYMORPHIC
return vmullbq_poly(a, b);
#else /* POLYMORPHIC */
return vmullbq_poly_p16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmullbq_int_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vmullbq_int_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmullbq_int_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmullbq_int_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmullbq_int_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmullbq_int_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmullbq_int_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmullbq_int_m_u16(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmullbq_int_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <2 x i64> [[TMP2]]
//
int64x2_t test_vmullbq_int_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmullbq_int_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmullbq_int_m_s32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmullbq_poly_m_p8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmullbq_poly_m_p8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmullbq_poly_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmullbq_poly_m_p8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
125 changes: 125 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vmulltq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vmulltq_int_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vmull.v8i16.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1)
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vmulltq_int_u8(uint8x16_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vmulltq_int(a, b);
#else /* POLYMORPHIC */
return vmulltq_int_u8(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmulltq_int_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
int32x4_t test_vmulltq_int_s16(int16x8_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vmulltq_int(a, b);
#else /* POLYMORPHIC */
return vmulltq_int_s16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmulltq_int_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1)
// CHECK-NEXT: ret <2 x i64> [[TMP0]]
//
uint64x2_t test_vmulltq_int_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vmulltq_int(a, b);
#else /* POLYMORPHIC */
return vmulltq_int_u32(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmulltq_poly_p16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vmull.poly.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vmulltq_poly_p16(uint16x8_t a, uint16x8_t b)
{
#ifdef POLYMORPHIC
return vmulltq_poly(a, b);
#else /* POLYMORPHIC */
return vmulltq_poly_p16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmulltq_int_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vmulltq_int_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmulltq_int_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmulltq_int_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmulltq_int_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmulltq_int_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmulltq_int_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmulltq_int_m_u16(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmulltq_int_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <2 x i64> [[TMP2]]
//
int64x2_t test_vmulltq_int_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmulltq_int_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmulltq_int_m_s32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vmulltq_poly_m_p8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmulltq_poly_m_p8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vmulltq_poly_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vmulltq_poly_m_p8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
95 changes: 95 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vqaddq_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vqaddq(a, b);
#else /* POLYMORPHIC */
return vqaddq_u8(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqaddq_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vqaddq(a, b);
#else /* POLYMORPHIC */
return vqaddq_s16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqaddq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vqaddq(a, b);
#else /* POLYMORPHIC */
return vqaddq_u32(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqaddq_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vqaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqaddq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqaddq_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqaddq_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vqaddq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqaddq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqaddq_m_u16(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqaddq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqaddq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqaddq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqaddq_m_s32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
95 changes: 95 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vqdmulhq_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
uint8x16_t test_vqdmulhq_u8(uint8x16_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vqdmulhq(a, b);
#else /* POLYMORPHIC */
return vqdmulhq_u8(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqdmulhq_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vqdmulhq(a, b);
#else /* POLYMORPHIC */
return vqdmulhq_s16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqdmulhq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vqdmulhq_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vqdmulhq(a, b);
#else /* POLYMORPHIC */
return vqdmulhq_u32(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqdmulhq_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qdmulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vqdmulhq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqdmulhq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqdmulhq_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqdmulhq_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qdmulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vqdmulhq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqdmulhq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqdmulhq_m_u16(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqdmulhq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqdmulhq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqdmulhq_m_s32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
95 changes: 95 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vqrdmulhq_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
uint8x16_t test_vqrdmulhq_u8(uint8x16_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vqrdmulhq(a, b);
#else /* POLYMORPHIC */
return vqrdmulhq_u8(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqrdmulhq_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vqrdmulhq(a, b);
#else /* POLYMORPHIC */
return vqrdmulhq_s16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqrdmulhq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vqrdmulhq_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vqrdmulhq(a, b);
#else /* POLYMORPHIC */
return vqrdmulhq_u32(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqrdmulhq_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qrdmulh.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vqrdmulhq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqrdmulhq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqrdmulhq_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqrdmulhq_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qrdmulh.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vqrdmulhq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqrdmulhq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqrdmulhq_m_u16(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqrdmulhq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqrdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqrdmulhq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqrdmulhq_m_s32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
95 changes: 95 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vqsubq_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vqsubq(a, b);
#else /* POLYMORPHIC */
return vqsubq_u8(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqsubq_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vqsubq(a, b);
#else /* POLYMORPHIC */
return vqsubq_s16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqsubq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vqsubq(a, b);
#else /* POLYMORPHIC */
return vqsubq_u32(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqsubq_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qsub.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vqsubq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqsubq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqsubq_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqsubq_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qsub.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vqsubq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqsubq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqsubq_m_u16(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vqsubq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vqsubq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vqsubq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vqsubq_m_s32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
95 changes: 95 additions & 0 deletions clang/test/CodeGen/arm-mve-intrinsics/vrhaddq.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vrhaddq_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vrhadd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
//
uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vrhaddq(a, b);
#else /* POLYMORPHIC */
return vrhaddq_u8(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vrhaddq_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vrhadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vrhaddq(a, b);
#else /* POLYMORPHIC */
return vrhaddq_s16(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vrhaddq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vrhadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vrhaddq(a, b);
#else /* POLYMORPHIC */
return vrhaddq_u32(a, b);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vrhaddq_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.rhadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vrhaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vrhaddq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vrhaddq_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vrhaddq_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.rhadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vrhaddq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vrhaddq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vrhaddq_m_u16(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vrhaddq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.rhadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vrhaddq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vrhaddq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vrhaddq_m_s32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
22 changes: 17 additions & 5 deletions clang/utils/TableGen/MveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class Type {
virtual std::string llvmName() const {
PrintFatalError("no LLVM type name available for type " + cName());
}
virtual std::string acleSuffix() const {
virtual std::string acleSuffix(std::string) const {
PrintFatalError("no ACLE suffix available for this type");
}
};
Expand Down Expand Up @@ -180,7 +180,7 @@ class VoidType : public Type {
std::string cName() const override { return "void"; }

static bool classof(const Type *T) { return T->typeKind() == TypeKind::Void; }
std::string acleSuffix() const override { return ""; }
std::string acleSuffix(std::string) const override { return ""; }
};

class PointerType : public Type {
Expand Down Expand Up @@ -266,8 +266,9 @@ class ScalarType : public CRegularNamedType {
}
return "Int" + utostr(Bits) + "Ty";
}
std::string acleSuffix() const override {
return "_" + toLetter(Kind) + utostr(Bits);
std::string acleSuffix(std::string overrideLetter) const override {
return "_" + (overrideLetter.size() ? overrideLetter : toLetter(Kind))
+ utostr(Bits);
}
bool isInteger() const { return Kind != ScalarTypeKind::Float; }
bool requiresFloat() const override { return !isInteger(); }
Expand Down Expand Up @@ -1093,6 +1094,16 @@ const Type *MveEmitter::getType(DagInit *D, const Type *Param) {
PrintFatalError("Cannot find a type to satisfy CopyKind");
}

if (Op->getName() == "CTO_DoubleSize") {
const ScalarType *STKind = cast<ScalarType>(getType(D->getArg(0), Param));
for (const auto &kv : ScalarTypes) {
const ScalarType *RT = kv.second.get();
if (RT->kind() == STKind->kind() && RT->sizeInBits() == 2*STKind->sizeInBits())
return RT;
}
PrintFatalError("Cannot find a type to satisfy DoubleSize");
}

PrintFatalError("Bad operator in type dag expression");
}

Expand Down Expand Up @@ -1251,7 +1262,8 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
StringRef BaseName =
(R->isSubClassOf("NameOverride") ? R->getValueAsString("basename")
: R->getName());
FullName = (Twine(BaseName) + Param->acleSuffix()).str();
StringRef overrideLetter = R->getValueAsString("overrideKindLetter");
FullName = (Twine(BaseName) + Param->acleSuffix(overrideLetter)).str();

// Derive the intrinsic's polymorphic name, by removing components from the
// full name as specified by its 'pnt' member ('polymorphic name type'),
Expand Down
50 changes: 50 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsARM.td
Original file line number Diff line number Diff line change
Expand Up @@ -837,9 +837,38 @@ def int_arm_mve_mul_predicated: Intrinsic<[llvm_anyvector_ty],
def int_arm_mve_mulh_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_qdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_rmulh_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_qrdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_mull_int_predicated: Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
LLVMMatchType<1>],
[IntrNoMem]>;
def int_arm_mve_mull_poly_predicated: Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
LLVMMatchType<1>],
[IntrNoMem]>;
def int_arm_mve_qadd_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_hadd_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_rhadd_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_qsub_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_arm_mve_hsub_predicated: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;

defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty],
[llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
Expand Down Expand Up @@ -925,9 +954,30 @@ def int_arm_mve_vadc_predicated: Intrinsic<
def int_arm_mve_vmulh: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vqdmulh: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vhadd: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vrhadd: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vhsub: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vrmulh: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vqrdmulh: Intrinsic<
[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vmull: Intrinsic<
[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_vmull_poly: Intrinsic<
[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;

// Intrinsic with a predicated and a non-predicated case. The predicated case
// has two additional parameters: inactive (the value for inactive lanes, can
Expand Down
376 changes: 275 additions & 101 deletions llvm/lib/Target/ARM/ARMInstrMVE.td

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7895,10 +7895,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
case ARM::MVE_VQDMULLs32bh:
case ARM::MVE_VQDMULLs32th:
case ARM::MVE_VCMULf32:
case ARM::MVE_VMULLs32bh:
case ARM::MVE_VMULLs32th:
case ARM::MVE_VMULLu32bh:
case ARM::MVE_VMULLu32th: {
case ARM::MVE_VMULLBs32:
case ARM::MVE_VMULLTs32:
case ARM::MVE_VMULLBu32:
case ARM::MVE_VMULLTu32: {
if (Operands[3]->getReg() == Operands[4]->getReg()) {
return Error (Operands[3]->getStartLoc(),
"Qd register and Qn register can't be identical");
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vhaddq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vhaddq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhaddq_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vhadd.s8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vhadd.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %0
}

declare <16 x i8> @llvm.arm.mve.vhadd.v16i8(<16 x i8>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vhaddq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhaddq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vhadd.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vhadd.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.arm.mve.vhadd.v8i16(<8 x i16>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vhaddq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhaddq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vhadd.s32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32>, <4 x i32>) #1

define arm_aapcs_vfpcc <16 x i8> @test_vhaddq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhaddq_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vhaddt.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1

declare <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vhaddq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhaddq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vhaddt.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1

declare <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vhaddq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhaddq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vhaddt.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1

declare <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vhsubq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vhsubq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhsubq_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vhsub.s8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vhsub.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %0
}

declare <16 x i8> @llvm.arm.mve.vhsub.v16i8(<16 x i8>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vhsubq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhsubq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vhsub.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vhsub.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.arm.mve.vhsub.v8i16(<8 x i16>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vhsubq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhsubq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vhsub.s32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vhsub.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vhsub.v4i32(<4 x i32>, <4 x i32>) #1

define arm_aapcs_vfpcc <16 x i8> @test_vhsubq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhsubq_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vhsubt.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.hsub.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1

declare <16 x i8> @llvm.arm.mve.hsub.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vhsubq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhsubq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vhsubt.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.hsub.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1

declare <8 x i16> @llvm.arm.mve.hsub.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vhsubq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vhsubq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vhsubt.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1

declare <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
121 changes: 121 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vmullbq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <8 x i16> @test_vmullbq_int_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullb.s8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vmull.v8i16.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.arm.mve.vmull.v8i16.v16i8(<16 x i8>, <16 x i8>, i32) #1

define arm_aapcs_vfpcc <4 x i32> @test_vmullbq_int_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullb.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32) #1

define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullb.s32 q2, q0, q1
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: bx lr
entry:
%0 = tail call <2 x i64> @llvm.arm.mve.vmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
ret <2 x i64> %0
}

declare <2 x i64> @llvm.arm.mve.vmull.v2i64.v4i32(<4 x i32>, <4 x i32>, i32) #1

define arm_aapcs_vfpcc <4 x i32> @test_vmullbq_poly_p16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_poly_p16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullb.p16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vmull.poly.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vmull.poly.v4i32.v8i16(<8 x i16>, <8 x i16>, i32) #1

define arm_aapcs_vfpcc <8 x i16> @test_vmullbq_int_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmullbt.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, <16 x i1> %1, <16 x i8> %inactive)
ret <8 x i16> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1

declare <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>, <16 x i8>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vmullbq_int_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmullbt.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, <8 x i1> %1, <8 x i16> %inactive)
ret <4 x i32> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1

declare <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <8 x i16>) #1

define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmullbt.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <4 x i32> %inactive)
ret <2 x i64> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1

declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vmullbq_poly_m_p8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_poly_m_p8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmullbt.p8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, <16 x i1> %1, <16 x i8> %inactive)
ret <8 x i16> %2
}

declare <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>, <16 x i8>) #1
121 changes: 121 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulltq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <8 x i16> @test_vmulltq_int_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullt.s8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vmull.v8i16.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.arm.mve.vmull.v8i16.v16i8(<16 x i8>, <16 x i8>, i32) #1

define arm_aapcs_vfpcc <4 x i32> @test_vmulltq_int_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullt.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16>, <8 x i16>, i32) #1

define arm_aapcs_vfpcc <2 x i64> @test_vmulltq_int_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullt.s32 q2, q0, q1
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: bx lr
entry:
%0 = tail call <2 x i64> @llvm.arm.mve.vmull.v2i64.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1)
ret <2 x i64> %0
}

declare <2 x i64> @llvm.arm.mve.vmull.v2i64.v4i32(<4 x i32>, <4 x i32>, i32) #1

define arm_aapcs_vfpcc <4 x i32> @test_vmulltq_poly_p16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_poly_p16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullt.p16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vmull.poly.v4i32.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vmull.poly.v4i32.v8i16(<8 x i16>, <8 x i16>, i32) #1

define arm_aapcs_vfpcc <8 x i16> @test_vmulltq_int_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmulltt.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, <16 x i1> %1, <16 x i8> %inactive)
ret <8 x i16> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1

declare <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>, <16 x i8>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vmulltq_int_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmulltt.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1, <8 x i16> %inactive)
ret <4 x i32> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1

declare <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <8 x i16>) #1

define arm_aapcs_vfpcc <2 x i64> @test_vmulltq_int_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmulltt.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, <4 x i1> %1, <4 x i32> %inactive)
ret <2 x i64> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1

declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vmulltq_poly_m_p8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_poly_m_p8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmulltt.p8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, <16 x i1> %1, <16 x i8> %inactive)
ret <8 x i16> %2
}

declare <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>, <16 x i8>) #1
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vqaddq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqaddq_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqadd.u8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %0
}

declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqaddq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqadd.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqaddq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqadd.u32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) #1

define arm_aapcs_vfpcc <16 x i8> @test_vqaddq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqaddq_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqaddt.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.qadd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2

declare <16 x i8> @llvm.arm.mve.qadd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2

define arm_aapcs_vfpcc <8 x i16> @test_vqaddq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqaddq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqaddt.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2

declare <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2

define arm_aapcs_vfpcc <4 x i32> @test_vqaddq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqaddq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqaddt.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2

declare <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vqdmulhq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vqdmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqdmulhq_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqdmulh.s8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqdmulh.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %0
}

declare <16 x i8> @llvm.arm.mve.vqdmulh.v16i8(<16 x i8>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqdmulhq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqdmulh.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.arm.mve.vqdmulh.v8i16(<8 x i16>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqdmulhq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqdmulh.s32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32>, <4 x i32>) #1

define arm_aapcs_vfpcc <16 x i8> @test_vqdmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqdmulhq_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqdmulht.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.qdmulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1

declare <16 x i8> @llvm.arm.mve.qdmulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqdmulhq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqdmulht.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.qdmulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1

declare <8 x i16> @llvm.arm.mve.qdmulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqdmulhq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqdmulht.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1

declare <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vqrdmulhq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vqrdmulhq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqrdmulhq_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqrdmulh.s8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vqrdmulh.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %0
}

declare <16 x i8> @llvm.arm.mve.vqrdmulh.v16i8(<16 x i8>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqrdmulhq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqrdmulh.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.arm.mve.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulhq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqrdmulhq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqrdmulh.s32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) #1

define arm_aapcs_vfpcc <16 x i8> @test_vqrdmulhq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqrdmulhq_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqrdmulht.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.qrdmulh.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1

declare <16 x i8> @llvm.arm.mve.qrdmulh.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulhq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqrdmulhq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqrdmulht.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.qrdmulh.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1

declare <8 x i16> @llvm.arm.mve.qrdmulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulhq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqrdmulhq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqrdmulht.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1

declare <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vqsubq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqsubq_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqsub.u8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %0
}

declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqsubq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqsub.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqsubq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vqsub.u32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) #1

define arm_aapcs_vfpcc <16 x i8> @test_vqsubq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqsubq_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqsubt.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.qsub.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2

declare <16 x i8> @llvm.arm.mve.qsub.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2

define arm_aapcs_vfpcc <8 x i16> @test_vqsubq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqsubq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqsubt.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.qsub.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2

declare <8 x i16> @llvm.arm.mve.qsub.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2

define arm_aapcs_vfpcc <4 x i32> @test_vqsubq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vqsubq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vqsubt.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2

declare <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vrhaddq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vrhaddq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vrhaddq_u8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vrhadd.s8 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <16 x i8> @llvm.arm.mve.vrhadd.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %0
}

declare <16 x i8> @llvm.arm.mve.vrhadd.v16i8(<16 x i8>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vrhaddq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vrhaddq_s16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vrhadd.s16 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <8 x i16> @llvm.arm.mve.vrhadd.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %0
}

declare <8 x i16> @llvm.arm.mve.vrhadd.v8i16(<8 x i16>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vrhaddq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vrhaddq_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vrhadd.s32 q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vrhadd.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %0
}

declare <4 x i32> @llvm.arm.mve.vrhadd.v4i32(<4 x i32>, <4 x i32>) #1

define arm_aapcs_vfpcc <16 x i8> @test_vrhaddq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vrhaddq_m_s8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vrhaddt.s8 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
%2 = tail call <16 x i8> @llvm.arm.mve.rhadd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1

declare <16 x i8> @llvm.arm.mve.rhadd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #1

define arm_aapcs_vfpcc <8 x i16> @test_vrhaddq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vrhaddq_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vrhaddt.s16 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = tail call <8 x i16> @llvm.arm.mve.rhadd.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1

declare <8 x i16> @llvm.arm.mve.rhadd.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1

define arm_aapcs_vfpcc <4 x i32> @test_vrhaddq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vrhaddq_m_s32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vrhaddt.s32 q0, q1, q2
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call <4 x i32> @llvm.arm.mve.rhadd.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1

declare <4 x i32> @llvm.arm.mve.rhadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1