Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NFC][TLI] Improve tests for ArmPL and SLEEF Intrinsics. #73352

Merged

Conversation

paschalis-mpeis
Copy link
Member

Auto-generate test armpl-intrinsics.ll, and use active lane mask to have shorter shufflevector check lines.

Update scripts now add @llvm.compiler.used instead of using the regex: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]]

Auto-generate test `armpl-intrinsics.ll`, and use active lane mask to
have shorter `shufflevector` check lines.

Update scripts now add `@llvm.compiler.used` instead of using the regex:
`@[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]]`
@llvmbot
Copy link
Collaborator

llvmbot commented Nov 24, 2023

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-aarch64

Author: Paschalis Mpeis (paschalis-mpeis)

Changes

Auto-generate test armpl-intrinsics.ll, and use active lane mask to have shorter shufflevector check lines.

Update scripts now add @<!-- -->llvm.compiler.used instead of using the regex: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]]


Patch is 53.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73352.diff

4 Files Affected:

  • (modified) llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll (+1-1)
  • (modified) llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll (+198-77)
  • (modified) llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll (+63-65)
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
index a38d4a53407c5d2..18431ae021f9766 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
 declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
 
 ;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
 ;.
 define <2 x double> @llvm_cos_f64(<2 x double> %in) {
 ; CHECK-LABEL: define <2 x double> @llvm_cos_f64
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
index cedb7dd85149d00..be247de368056e7 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
@@ -4,7 +4,7 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 ;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
 ;.
 define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_ceil_f64(
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
index 03d959c928577d5..07b1402b4697fa2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
@@ -1,10 +1,9 @@
-; RUN: opt -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,NEON
-; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,SVE
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(\.|_v|_sv)(ceil|copysign|cos|exp\.|expf?\(|exp2|exp10|fabs|floor|fma|log|m..num|pow|nearbyint|rint|round|sin|sqrt|trunc)|(ret)" --version 2
+; RUN: opt -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize  -S < %s | FileCheck %s --check-prefixes=NEON
+; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefixes=SVE
 
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-
 ; Tests are checking if LV can vectorize loops with llvm math intrinsics
 ; using mappings from TLI for scalable and fixed width vectorization.
 
@@ -12,10 +11,18 @@ declare double @llvm.cos.f64(double)
 declare float @llvm.cos.f32(float)
 
 define void @cos_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @cos_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK:    ret void
+;
+; NEON-LABEL: define void @cos_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[IN:%.*]]) #[[ATTR1:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @cos_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[IN:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -36,10 +43,17 @@ define void @cos_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
 }
 
 define void @cos_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @cos_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @cos_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[IN:%.*]]) #[[ATTR2:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @cos_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[IN:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -63,10 +77,15 @@ declare double @llvm.exp.f64(double)
 declare float @llvm.exp.f32(float)
 
 define void @exp_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK:    ret void
+; NEON-LABEL: define void @exp_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[IN:%.*]]) #[[ATTR3:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[IN:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -87,10 +106,15 @@ define void @exp_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
 }
 
 define void @exp_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[IN:%.*]]) #[[ATTR4:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[IN:%.*]]) #[[ATTR8:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -114,10 +138,17 @@ declare double @llvm.exp2.f64(double)
 declare float @llvm.exp2.f32(float)
 
 define void @exp2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp2_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK:    ret void
+; NEON-LABEL: define void @exp2_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[IN:%.*]]) #[[ATTR5:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp2_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[IN:%.*]]) #[[ATTR9:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -138,10 +169,17 @@ define void @exp2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
 }
 
 define void @exp2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp2_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp2_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[IN:%.*]]) #[[ATTR6:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp2_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[IN:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -165,10 +203,17 @@ declare double @llvm.exp10.f64(double)
 declare float @llvm.exp10.f32(float)
 
 define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp10_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK:    ret void
+; NEON-LABEL: define void @exp10_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[IN:%.*]]) #[[ATTR7:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp10_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[IN:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -189,10 +234,17 @@ define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
 }
 
 define void @exp10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp10_f32(
-; NEON:     [[TMP5:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK:    ret void
+; NEON-LABEL: define void @exp10_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[IN:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @exp10_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[IN:%.*]]) #[[ATTR12:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -216,10 +268,17 @@ declare double @llvm.log.f64(double)
 declare float @llvm.log.f32(float)
 
 define void @log_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK:    ret void
+; NEON-LABEL: define void @log_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.log.f64(double [[IN:%.*]]) #[[ATTR9:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.log.f64(double [[IN:%.*]]) #[[ATTR13:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -240,10 +299,17 @@ define void @log_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
 }
 
 define void @log_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.log.f32(float [[IN:%.*]]) #[[ATTR10:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.log.f32(float [[IN:%.*]]) #[[ATTR14:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -267,10 +333,17 @@ declare double @llvm.log2.f64(double)
 declare float @llvm.log2.f32(float)
 
 define void @log2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log2_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK:    ret void
+; NEON-LABEL: define void @log2_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[IN:%.*]]) #[[ATTR11:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log2_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[IN:%.*]]) #[[ATTR15:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -291,10 +364,17 @@ define void @log2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
 }
 
 define void @log2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log2_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log2_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[IN:%.*]]) #[[ATTR12:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log2_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[IN:%.*]]) #[[ATTR16:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -318,10 +398,17 @@ declare double @llvm.log10.f64(double)
 declare float @llvm.log10.f32(float)
 
 define void @log10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log10_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[TMP4:%.*]])
-; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK:    ret void
+; NEON-LABEL: define void @log10_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[IN:%.*]]) #[[ATTR13:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log10_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[IN:%.*]]) #[[ATTR17:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -342,10 +429,17 @@ define void @log10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
 }
 
 define void @log10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log10_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log10_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON:    [[TMP4:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON:    [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[IN:%.*]]) #[[ATTR14:[0-9]+]]
+; NEON:    ret void
+;
+; SVE-LABEL: define void @log10_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE:    [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE:    [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[IN:%.*]]) #[[ATTR18:[0-9]+]]
+; SVE:    ret void
 ;
   entry:
   br label %for.body
@@ -369,10 +463,17 @@ declare double @llvm.sin.f64(double)
 declare float @llvm.sin.f32(float)
 
 define void @sin_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @sin_f64(
-; NEON:     [[TMP5:%.*]] = call <2 x double> @armpl_v...
[truncated]

`noalias` attribute was added only to the `%in.ptr` parameter of the
ArmPL Intrinsics.
@paschalis-mpeis paschalis-mpeis merged commit 1bfb84b into llvm:main Nov 29, 2023
3 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants