-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Mark sincos libcalls as available for RISC-V #168708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
A standard glibc build for RISC-V does contain these libcalls, and GCC will generate them. Found by comparing benchmark data for SPEC, spotting that the GCC build of 526.blender_r spends 3.02% of dynamically executed instructions in libm functions, while the Clang build spends 3.41%. And calling sincos vs separate sin and cos functions is the reason for the difference.
|
@llvm/pr-subscribers-llvm-ir Author: Alex Bradbury (asb) ChangesA standard glibc build for RISC-V does contain these libcalls, and GCC will generate calls to them. Found by comparing benchmark data for SPEC, spotting that the GCC build of 526.blender_r spends 3.02% of dynamically executed instructions in libm functions, while the Clang build spends 3.41%. Calling sincos rather than separate sin and cos functions is the reason for the difference. Full diff: https://github.com/llvm/llvm-project/pull/168708.diff 2 Files Affected:
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index ce7e836f66446..9230b39e6f45e 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -2452,6 +2452,7 @@ def RISCVSystemLibrary
(add DefaultRuntimeLibcallImpls,
exp10f, exp10, exp10l_f128,
__riscv_flush_icache,
+ LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128,
LibcallImpls<(add Int128RTLibcalls), isRISCV64>,
DefaultStackProtector)>;
diff --git a/llvm/test/CodeGen/RISCV/sincos-expansion.ll b/llvm/test/CodeGen/RISCV/sincos-expansion.ll
index 6f37d67134cb3..606768a8e2f53 100644
--- a/llvm/test/CodeGen/RISCV/sincos-expansion.ll
+++ b/llvm/test/CodeGen/RISCV/sincos-expansion.ll
@@ -7,20 +7,16 @@
define float @test_sincos_f32(float %f) nounwind {
; CHECK-LABEL: test_sincos_f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fmv.s fs0, fa0
-; CHECK-NEXT: call sinf
-; CHECK-NEXT: fmv.s fs1, fa0
-; CHECK-NEXT: fmv.s fa0, fs0
-; CHECK-NEXT: call cosf
-; CHECK-NEXT: fadd.s fa0, fs1, fa0
-; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: addi a0, sp, 4
+; CHECK-NEXT: mv a1, sp
+; CHECK-NEXT: call sincosf
+; CHECK-NEXT: flw fa5, 0(sp)
+; CHECK-NEXT: flw fa4, 4(sp)
+; CHECK-NEXT: fadd.s fa0, fa4, fa5
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%sin = call float @sinf(float %f) readnone
%cos = call float @cosf(float %f) readnone
@@ -57,17 +53,13 @@ define double @test_sincos_f64(double %f) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fmv.d fs0, fa0
-; CHECK-NEXT: call sin
-; CHECK-NEXT: fmv.d fs1, fa0
-; CHECK-NEXT: fmv.d fa0, fs0
-; CHECK-NEXT: call cos
-; CHECK-NEXT: fadd.d fa0, fs1, fa0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: addi a1, sp, 8
+; CHECK-NEXT: call sincos
+; CHECK-NEXT: fld fa5, 8(sp)
+; CHECK-NEXT: fld fa4, 16(sp)
+; CHECK-NEXT: fadd.d fa0, fa4, fa5
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
%sin = call double @sin(double %f) readnone
|
|
@llvm/pr-subscribers-backend-risc-v Author: Alex Bradbury (asb) ChangesA standard glibc build for RISC-V does contain these libcalls, and GCC will generate calls to them. Found by comparing benchmark data for SPEC, spotting that the GCC build of 526.blender_r spends 3.02% of dynamically executed instructions in libm functions, while the Clang build spends 3.41%. Calling sincos rather than separate sin and cos functions is the reason for the difference. Full diff: https://github.com/llvm/llvm-project/pull/168708.diff 2 Files Affected:
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index ce7e836f66446..9230b39e6f45e 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -2452,6 +2452,7 @@ def RISCVSystemLibrary
(add DefaultRuntimeLibcallImpls,
exp10f, exp10, exp10l_f128,
__riscv_flush_icache,
+ LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128,
LibcallImpls<(add Int128RTLibcalls), isRISCV64>,
DefaultStackProtector)>;
diff --git a/llvm/test/CodeGen/RISCV/sincos-expansion.ll b/llvm/test/CodeGen/RISCV/sincos-expansion.ll
index 6f37d67134cb3..606768a8e2f53 100644
--- a/llvm/test/CodeGen/RISCV/sincos-expansion.ll
+++ b/llvm/test/CodeGen/RISCV/sincos-expansion.ll
@@ -7,20 +7,16 @@
define float @test_sincos_f32(float %f) nounwind {
; CHECK-LABEL: test_sincos_f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fmv.s fs0, fa0
-; CHECK-NEXT: call sinf
-; CHECK-NEXT: fmv.s fs1, fa0
-; CHECK-NEXT: fmv.s fa0, fs0
-; CHECK-NEXT: call cosf
-; CHECK-NEXT: fadd.s fa0, fs1, fa0
-; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: addi a0, sp, 4
+; CHECK-NEXT: mv a1, sp
+; CHECK-NEXT: call sincosf
+; CHECK-NEXT: flw fa5, 0(sp)
+; CHECK-NEXT: flw fa4, 4(sp)
+; CHECK-NEXT: fadd.s fa0, fa4, fa5
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%sin = call float @sinf(float %f) readnone
%cos = call float @cosf(float %f) readnone
@@ -57,17 +53,13 @@ define double @test_sincos_f64(double %f) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: fmv.d fs0, fa0
-; CHECK-NEXT: call sin
-; CHECK-NEXT: fmv.d fs1, fa0
-; CHECK-NEXT: fmv.d fa0, fs0
-; CHECK-NEXT: call cos
-; CHECK-NEXT: fadd.d fa0, fs1, fa0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: addi a1, sp, 8
+; CHECK-NEXT: call sincos
+; CHECK-NEXT: fld fa5, 8(sp)
+; CHECK-NEXT: fld fa4, 16(sp)
+; CHECK-NEXT: fadd.d fa0, fa4, fa5
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
%sin = call double @sin(double %f) readnone
|
| (add DefaultRuntimeLibcallImpls, | ||
| exp10f, exp10, exp10l_f128, | ||
| __riscv_flush_icache, | ||
| LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to test the sincos f128 libcall?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at AArch64 it seems it should kick in for f128 as well and it's not for some reason. I'll look into that.
🐧 Linux x64 Test Results
|
A standard glibc build for RISC-V does contain these libcalls, and GCC will generate calls to them. Found by comparing benchmark data for SPEC, spotting that the GCC build of 526.blender_r spends 3.02% of dynamically executed instructions in libm functions, while the Clang build spends 3.41%. Calling sincos rather than separate sin and cos functions is the reason for the difference.