-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Clang][LLVM][AArch64] Add support for FCVTXNT, FCVTLT, {B}FCVTNT int… #170356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | ||
| // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
| // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
|
|
||
| // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
| // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2p2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK | ||
|
|
||
| // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p2 -target-feature +sve2p2 \ | ||
| // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s | ||
| // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -target-feature +sve2p2 \ | ||
| // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s | ||
| // | ||
| // REQUIRES: aarch64-registered-target | ||
|
|
||
| #include <arm_sve.h> | ||
|
|
||
| #ifdef SVE_OVERLOADED_FORMS | ||
| // A simple used,unused... macro, long enough to represent any SVE builtin. | ||
| #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 | ||
| #else | ||
| #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 | ||
| #endif | ||
|
|
||
| #if defined __ARM_FEATURE_SME | ||
| #define MODE_ATTR __arm_streaming | ||
| #else | ||
| #define MODE_ATTR | ||
| #endif | ||
|
|
||
|
|
||
| // CHECK-LABEL: @test_svcvtnt_f16_f32_z( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) | ||
| // CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z22test_svcvtnt_f16_f32_zu13__SVFloat16_tu10__SVBool_tu13__SVFloat32_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) | ||
| // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] | ||
| // | ||
| svfloat16_t test_svcvtnt_f16_f32_z(svfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR | ||
| { | ||
| return SVE_ACLE_FUNC(svcvtnt_f16,_f32,_z,)(inactive, pg, op); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svcvtnt_bf16_f32_z( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) | ||
| // CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z23test_svcvtnt_bf16_f32_zu14__SVBfloat16_tu10__SVBool_tu13__SVFloat32_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) | ||
| // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] | ||
| // | ||
| svbfloat16_t test_svcvtnt_bf16_f32_z(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR | ||
| { | ||
| return SVE_ACLE_FUNC(svcvtnt_bf16,_f32,_z,)(inactive, pg, op); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svcvtnt_f32_f64_z( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) | ||
| // CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z22test_svcvtnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) | ||
| // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] | ||
| // | ||
| svfloat32_t test_svcvtnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR | ||
| { | ||
| return SVE_ACLE_FUNC(svcvtnt_f32,_f64,_z,)(inactive, pg, op); | ||
| } | ||
|
|
||
|
|
||
|
|
||
| // CHECK-LABEL: @test_svcvtxnt_f32_f64_z( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) | ||
| // CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z23test_svcvtxnt_f32_f64_zu13__SVFloat32_tu10__SVBool_tu13__SVFloat64_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]]) | ||
| // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] | ||
| // | ||
| svfloat32_t test_svcvtxnt_f32_f64_z(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR | ||
| { | ||
| return SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_z,)(inactive, pg, op); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svcvtlt_f32_f16_z( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) | ||
| // CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z22test_svcvtlt_f32_f16_zu10__SVBool_tu13__SVFloat16_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]]) | ||
| // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] | ||
| // | ||
| svfloat32_t test_svcvtlt_f32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR | ||
| { | ||
| return SVE_ACLE_FUNC(svcvtlt_f32,_f16,_z,)(pg, op); | ||
| } | ||
|
|
||
| // CHECK-LABEL: @test_svcvtlt_f64_f32_z( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) | ||
| // CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] | ||
| // | ||
| // CPP-CHECK-LABEL: @_Z22test_svcvtlt_f64_f32_zu10__SVBool_tu13__SVFloat32_t( | ||
| // CPP-CHECK-NEXT: entry: | ||
| // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) | ||
| // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]]) | ||
| // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] | ||
| // | ||
| svfloat64_t test_svcvtlt_f64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR | ||
| { | ||
| return SVE_ACLE_FUNC(svcvtlt_f64,_f32,_z,)(pg, op); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4552,17 +4552,17 @@ let Predicates = [HasSVE2p2_or_SME2p2] in { | |||||
| defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">; | ||||||
|
|
||||||
| // SVE2p2 floating-point convert precision down (placing odd), zeroing predicate | ||||||
| defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">; | ||||||
| def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>; | ||||||
| defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt", "int_aarch64_sve_fcvtnt_z">; | ||||||
| defm FCVTXNT_ZPzZ : sve_float_convert_top<"fcvtxnt", int_aarch64_sve_fcvtxnt_z_f32f64>; | ||||||
| // Placing even | ||||||
| defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>; | ||||||
|
|
||||||
| // SVE2p2 floating-point convert precision up, zeroing predicate | ||||||
| defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">; | ||||||
|
|
||||||
| // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate | ||||||
| def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>; | ||||||
| defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>; | ||||||
| defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>; | ||||||
|
||||||
| defm BFCVTNT_ZPzZ_StoH : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>; | |
| defm BFCVTNT_ZPzZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_z_bf16f32, 0b0, true>; |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -2907,9 +2907,12 @@ multiclass sve2_fp_convert_up_long_z<string asm, string op> { | |||
| defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>; | ||||
| } | ||||
|
|
||||
| multiclass sve2_fp_convert_down_narrow_z<string asm> { | ||||
| multiclass sve2_fp_convert_down_narrow_z<string asm, string op> { | ||||
| def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>; | ||||
| def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>; | ||||
|
|
||||
|
||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | ||
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p2 < %s | FileCheck %s | ||
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p2 < %s | FileCheck %s | ||
| ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2 -force-streaming < %s | FileCheck %s | ||
|
|
||
|
|
||
| ;FCVTNT, BFCVTNT | ||
| define <vscale x 8 x half> @fcvtnt_f16_f32_z(<vscale x 8 x half> %even, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) { | ||
| ; CHECK-LABEL: fcvtnt_f16_f32_z: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: fcvtnt z0.h, p0/z, z1.s | ||
| ; CHECK-NEXT: ret | ||
| %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.z.f16f32(<vscale x 8 x half> %even, | ||
| <vscale x 4 x i1> %pg, | ||
| <vscale x 4 x float> %b) | ||
| ret <vscale x 8 x half> %out | ||
| } | ||
|
|
||
| define <vscale x 8 x bfloat> @fcvtnt_bf16_f32_z(<vscale x 8 x bfloat> %even, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) { | ||
| ; CHECK-LABEL: fcvtnt_bf16_f32_z: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: bfcvtnt z0.h, p0/z, z1.s | ||
| ; CHECK-NEXT: ret | ||
| %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.z.bf16f32(<vscale x 8 x bfloat> %even, | ||
| <vscale x 4 x i1> %pg, | ||
| <vscale x 4 x float> %b) | ||
| ret <vscale x 8 x bfloat> %out | ||
| } | ||
|
|
||
| define <vscale x 4 x float> @fcvtnt_f32_f64_z(<vscale x 4 x float> %even, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) { | ||
| ; CHECK-LABEL: fcvtnt_f32_f64_z: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: fcvtnt z0.s, p0/z, z1.d | ||
| ; CHECK-NEXT: ret | ||
| %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.z.f32f64(<vscale x 4 x float> %even, | ||
| <vscale x 2 x i1> %pg, | ||
| <vscale x 2 x double> %b) | ||
| ret <vscale x 4 x float> %out | ||
| } | ||
|
|
||
| ;FCVTXNT | ||
|
|
||
|
|
||
| define <vscale x 4 x float> @fcvtxnt_f32_f64_z(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) { | ||
| ; CHECK-LABEL: fcvtxnt_f32_f64_z: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: fcvtxnt z0.s, p0/z, z1.d | ||
| ; CHECK-NEXT: ret | ||
| %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.z.f32f64(<vscale x 4 x float> %a, | ||
| <vscale x 2 x i1> %pg, | ||
| <vscale x 2 x double> %b) | ||
| ret <vscale x 4 x float> %out | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be calling
@llvm.aarch64.sve.fcvtlt.z.f64f32instead?