Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CodeGen][ARM] Coerce FP16 vectors to integer vectors when needed
Summary: On targets that do not support FP16 natively LLVM currently legalizes vectors of FP16 values by scalarizing them and promoting to FP32. This causes problems for the following code: void foo(int, ...); typedef __attribute__((neon_vector_type(4))) __fp16 float16x4_t; void bar(float16x4_t x) { foo(42, x); } According to the AAPCS (appendix A.2) float16x4_t is a containerized vector fundamental type, so 'foo' expects that the 4 16-bit FP values are packed into 2 32-bit registers, but instead bar promotes them to 4 single precision values. Since we already handle scalar FP16 values in the frontend by bitcasting them to/from integers, this patch adds similar handling for vector types and homogeneous FP16 vector aggregates. One existing test required some adjustments because we now generate more bitcasts (so the patch changes the test to target a machine with native FP16 support). Reviewers: eli.friedman, olista01, SjoerdMeijer, javed.absar, efriedma Reviewed By: javed.absar, efriedma Subscribers: efriedma, kristof.beyls, cfe-commits, chrib Differential Revision: https://reviews.llvm.org/D50507 llvm-svn: 342034
- Loading branch information
Showing
3 changed files
with
252 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ | ||
// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O1 %s \ | ||
// RUN: | FileCheck %s --check-prefix=CHECK-SOFT | ||
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ | ||
// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O1 %s \ | ||
// RUN: | FileCheck %s --check-prefix=CHECK-HARD | ||
// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ | ||
// RUN: -mfloat-abi hard -target-feature +neon -target-feature +fullfp16 \ | ||
// RUN: -emit-llvm -o - -O1 %s \ | ||
// RUN: | FileCheck %s --check-prefix=CHECK-FULL | ||
|
||
typedef __attribute__((neon_vector_type(4))) __fp16 float16x4_t; | ||
typedef __attribute__((neon_vector_type(8))) __fp16 float16x8_t; | ||
|
||
typedef struct { float16x4_t x[2]; } hfa_t; | ||
// CHECK-FULL: %struct.hfa_t = type { [2 x <4 x half>] } | ||
|
||
float16x4_t g4; | ||
float16x8_t g8; | ||
|
||
void st4(float16x4_t a) { g4 = a; } | ||
// CHECK-SOFT: define void @st4(<2 x i32> %a.coerce) | ||
// CHECK-SOFT: store <2 x i32> %a.coerce, <2 x i32>* bitcast (<4 x half>* @g4 to <2 x i32>*) | ||
// | ||
// CHECK-HARD: define arm_aapcs_vfpcc void @st4(<2 x i32> %a.coerce) | ||
// CHECK-HARD: store <2 x i32> %a.coerce, <2 x i32>* bitcast (<4 x half>* @g4 to <2 x i32>*) | ||
// | ||
// CHECK-FULL: define arm_aapcs_vfpcc void @st4(<4 x half> %a) | ||
// CHECK-FULL: store <4 x half> %a, <4 x half>* @g4 | ||
|
||
float16x4_t ld4(void) { return g4; } | ||
// CHECK-SOFT: define <2 x i32> @ld4() | ||
// CHECK-SOFT: %0 = load <2 x i32>, <2 x i32>* bitcast (<4 x half>* @g4 to <2 x i32>*) | ||
// CHECK-SOFT: ret <2 x i32> %0 | ||
// | ||
// CHECK-HARD: define arm_aapcs_vfpcc <2 x i32> @ld4() | ||
// CHECK-HARD: %0 = load <2 x i32>, <2 x i32>* bitcast (<4 x half>* @g4 to <2 x i32>*) | ||
// CHECK-HARD: ret <2 x i32> %0 | ||
// | ||
// CHECK-FULL: define arm_aapcs_vfpcc <4 x half> @ld4() | ||
// CHECK-FULL: %0 = load <4 x half>, <4 x half>* @g4 | ||
// CHECK-FULL: ret <4 x half> %0 | ||
|
||
void st8(float16x8_t a) { g8 = a; } | ||
// CHECK-SOFT: define void @st8(<4 x i32> %a.coerce) | ||
// CHECK-SOFT: store <4 x i32> %a.coerce, <4 x i32>* bitcast (<8 x half>* @g8 to <4 x i32>*) | ||
// | ||
// CHECK-HARD: define arm_aapcs_vfpcc void @st8(<4 x i32> %a.coerce) | ||
// CHECK-HARD: store <4 x i32> %a.coerce, <4 x i32>* bitcast (<8 x half>* @g8 to <4 x i32>*) | ||
// | ||
// CHECK-FULL: define arm_aapcs_vfpcc void @st8(<8 x half> %a) | ||
// CHECK-FULL: store <8 x half> %a, <8 x half>* @g8 | ||
|
||
float16x8_t ld8(void) { return g8; } | ||
// CHECK-SOFT: define <4 x i32> @ld8() | ||
// CHECK-SOFT: %0 = load <4 x i32>, <4 x i32>* bitcast (<8 x half>* @g8 to <4 x i32>*) | ||
// CHECK-SOFT: ret <4 x i32> %0 | ||
// | ||
// CHECK-HARD: define arm_aapcs_vfpcc <4 x i32> @ld8() | ||
// CHECK-HARD: %0 = load <4 x i32>, <4 x i32>* bitcast (<8 x half>* @g8 to <4 x i32>*) | ||
// CHECK-HARD: ret <4 x i32> %0 | ||
// | ||
// CHECK-FULL: define arm_aapcs_vfpcc <8 x half> @ld8() | ||
// CHECK-FULL: %0 = load <8 x half>, <8 x half>* @g8 | ||
// CHECK-FULL: ret <8 x half> %0 | ||
|
||
void test_hfa(hfa_t a) {} | ||
// CHECK-SOFT: define void @test_hfa([2 x i64] %a.coerce) | ||
// CHECK-HARD: define arm_aapcs_vfpcc void @test_hfa([2 x <2 x i32>] %a.coerce) | ||
// CHECK-FULL: define arm_aapcs_vfpcc void @test_hfa(%struct.hfa_t %a.coerce) | ||
|
||
hfa_t ghfa; | ||
hfa_t test_ret_hfa(void) { return ghfa; } | ||
// CHECK-SOFT: define void @test_ret_hfa(%struct.hfa_t* noalias nocapture sret %agg.result) | ||
// CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @test_ret_hfa() | ||
// CHECK-FULL: define arm_aapcs_vfpcc %struct.hfa_t @test_ret_hfa() |
Oops, something went wrong.