Skip to content

Commit

Permalink
[AArch64] Fix vsqadd scalar intrinsics operands
Browse files Browse the repository at this point in the history
Summary:
Change the vsqadd scalar instrinsics to have the second argument as signed values, not unsigned,
accordingly to https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics

The existing unsigned argument can cause faulty code as negative float to unsigned conversion is
undefined, which llvm/clang optimizes away.

Reviewers: LukeCheeseman, john.brawn

Reviewed By: john.brawn

Subscribers: john.brawn, javed.absar, kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64239

llvm-svn: 365298
  • Loading branch information
Diogo N. Sampaio committed Jul 8, 2019
1 parent 1606a86 commit 0464e07
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 5 deletions.
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/arm_neon.td
Expand Up @@ -1337,7 +1337,7 @@ def SCALAR_SUQADD : SInst<"vuqadd", "sss", "ScSsSiSl">;

////////////////////////////////////////////////////////////////////////////////
// Scalar Unsigned Saturating Accumulated of Signed Value
def SCALAR_USQADD : SInst<"vsqadd", "sss", "SUcSUsSUiSUl">;
def SCALAR_USQADD : SInst<"vsqadd", "ss$", "SUcSUsSUiSUl">;

////////////////////////////////////////////////////////////////////////////////
// Signed Saturating Doubling Multiply-Add Long
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/aarch64-neon-intrinsics.c
Expand Up @@ -13913,7 +13913,7 @@ int64_t test_vuqaddd_s64(int64_t a, int64_t b) {
// CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
// CHECK: ret i8 [[TMP2]]
uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) {
uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
return (uint8_t)vsqaddb_u8(a, b);
}

Expand All @@ -13923,21 +13923,21 @@ uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) {
// CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
// CHECK: ret i16 [[TMP2]]
uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) {
uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
return (uint16_t)vsqaddh_u16(a, b);
}

// CHECK-LABEL: @test_vsqadds_u32(
// CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
// CHECK: ret i32 [[VSQADDS_U32_I]]
uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) {
uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
return (uint32_t)vsqadds_u32(a, b);
}

// CHECK-LABEL: @test_vsqaddd_u64(
// CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
// CHECK: ret i64 [[VSQADDD_U64_I]]
uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) {
uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
return (uint64_t)vsqaddd_u64(a, b);
}

Expand Down
49 changes: 49 additions & 0 deletions clang/test/CodeGen/aarch64-neon-vsqadd-float-conversion.c
@@ -0,0 +1,49 @@
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -S -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg -dce \
// RUN: | FileCheck %s

#include <arm_neon.h>

// Check float conversion is accepted for int argument
uint8_t test_vsqaddb_u8(){
return vsqaddb_u8(1, -1.0f);
}

uint16_t test_vsqaddh_u16() {
return vsqaddh_u16(1, -1.0f);
}

uint32_t test_vsqadds_u32() {
return vsqadds_u32(1, -1.0f);
}

uint64_t test_vsqaddd_u64() {
return vsqaddd_u64(1, -1.0f);
}

// CHECK-LABEL: @test_vsqaddb_u8()
// CHECK: entry:
// CHECK-NEXT: [[T0:%.*]] = insertelement <8 x i8> undef, i8 1, i64 0
// CHECK-NEXT: [[T1:%.*]] = insertelement <8 x i8> undef, i8 -1, i64 0
// CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[T0]], <8 x i8> [[T1]])
// CHECK-NEXT: [[R:%.*]] = extractelement <8 x i8> [[V]], i64 0
// CHECK-NEXT: ret i8 [[R]]

// CHECK-LABEL: @test_vsqaddh_u16()
// CHECK: entry:
// CHECK-NEXT: [[T0:%.*]] = insertelement <4 x i16> undef, i16 1, i64 0
// CHECK-NEXT: [[T1:%.*]] = insertelement <4 x i16> undef, i16 -1, i64 0
// CHECK-NEXT: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[T0]], <4 x i16> [[T1]])
// CHECK-NEXT: [[R:%.*]] = extractelement <4 x i16> [[V]], i64 0
// CHECK-NEXT: ret i16 [[R]]

// CHECK-LABEL: @test_vsqadds_u32()
// CHECK: entry:
// CHECK-NEXT: [[V:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 1, i32 -1)
// CHECK-NEXT: ret i32 [[V]]

// CHECK-LABEL: @test_vsqaddd_u64()
// CHECK: entry:
// CHECK-NEXT: [[V:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 1, i64 -1)
// CHECK-NEXT: ret i64 [[V]]

0 comments on commit 0464e07

Please sign in to comment.