-
Notifications
You must be signed in to change notification settings - Fork 15.1k
AArch64: Stop changing legality rules based on sincos_stret availability #165817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
AArch64: Stop changing legality rules based on sincos_stret availability #165817
Conversation
This should be treated like a program property and not a static property of the subtarget. The regression is the 3 element vector case; a combine happens to replace the original undef value with non-undef, so the 4th component is never eliminated. Trying to avoid that particular case hits other combine regressions, so leave that for later.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-aarch64 Author: Matt Arsenault (arsenm) ChangesThis should be treated like a program property and not a static property Full diff: https://github.com/llvm/llvm-project/pull/165817.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e993b26..6324d9d18e31b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1052,15 +1052,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Lower READCYCLECOUNTER using an mrs from CNTVCT_EL0.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
- getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
// Issue __sincos_stret if available.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- } else {
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- }
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
// Make floating-point constants legal for the large code model, so they don't
// become loads from the constant pool.
@@ -5353,20 +5347,23 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
SDLoc DL(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
+ RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
+ RTLIB::LibcallImpl SincosStret = getLibcallImpl(LC);
+ if (SincosStret == RTLIB::Unsupported)
+ return SDValue();
+
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
Args.emplace_back(Arg, ArgTy);
- RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
- : RTLIB::SINCOS_STRET_F32;
- const char *LibcallName = getLibcallName(LC);
- SDValue Callee =
- DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
+ StringRef LibcallImplName = getLibcallImplName(SincosStret);
+ SDValue Callee = DAG.getExternalSymbol(LibcallImplName.data(),
+ getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy);
TargetLowering::CallLoweringInfo CLI(DAG);
- CallingConv::ID CC = getLibcallCallingConv(LC);
+ CallingConv::ID CC = getLibcallImplCallingConv(SincosStret);
CLI.setDebugLoc(DL)
.setChain(DAG.getEntryNode())
.setLibCallee(CC, RetTy, Callee, std::move(Args));
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index 21da8645b9b16..fa3ada6212894 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -374,84 +374,111 @@ define { float, float } @test_sincos_f32(float %a) nounwind {
define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) nounwind {
; CHECK-LABEL: test_sincos_v3f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #80
-; CHECK-NEXT: add x0, sp, #20
-; CHECK-NEXT: add x1, sp, #16
-; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl sincosf
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #28
-; CHECK-NEXT: add x1, sp, #24
-; CHECK-NEXT: add x19, sp, #28
-; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: add x19, sp, #44
+; CHECK-NEXT: add x20, sp, #40
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: bl sincosf
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #44
-; CHECK-NEXT: add x1, sp, #40
-; CHECK-NEXT: add x21, sp, #44
-; CHECK-NEXT: add x22, sp, #40
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: add x21, sp, #36
+; CHECK-NEXT: add x22, sp, #32
; CHECK-NEXT: mov s0, v0.s[2]
; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldp s1, s0, [sp, #16]
-; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x23, sp, #28
+; CHECK-NEXT: add x24, sp, #24
+; CHECK-NEXT: mov s0, v0.s[3]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #56]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-NEXT: ld1 { v0.s }[1], [x19]
; CHECK-NEXT: ld1 { v1.s }[1], [x20]
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ld1 { v0.s }[2], [x21]
; CHECK-NEXT: ld1 { v1.s }[2], [x22]
-; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[3], [x23]
+; CHECK-NEXT: ld1 { v1.s }[3], [x24]
+; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
;
; NO-LIBCALL-LABEL: test_sincos_v3f32:
; NO-LIBCALL: // %bb.0:
; NO-LIBCALL-NEXT: sub sp, sp, #80
-; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
; NO-LIBCALL-NEXT: mov s8, v0.s[1]
-; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
; NO-LIBCALL-NEXT: fmov s0, s8
; NO-LIBCALL-NEXT: bl sinf
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: mov s9, v0.s[2]
; NO-LIBCALL-NEXT: fmov s0, s9
; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov s10, v0.s[3]
+; NO-LIBCALL-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v1.s[3], v0.s[0]
; NO-LIBCALL-NEXT: fmov s0, s8
-; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
; NO-LIBCALL-NEXT: bl cosf
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
; NO-LIBCALL-NEXT: bl cosf
; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; NO-LIBCALL-NEXT: fmov s0, s9
; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
; NO-LIBCALL-NEXT: fmov s2, s0
-; NO-LIBCALL-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v1.s[2], v2.s[0]
+; NO-LIBCALL-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.s[3], v2.s[0]
; NO-LIBCALL-NEXT: add sp, sp, #80
; NO-LIBCALL-NEXT: ret
%result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a)
|

This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.