Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Oct 31, 2025

This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.

This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.
Copy link
Contributor Author

arsenm commented Oct 31, 2025

@arsenm arsenm marked this pull request as ready for review October 31, 2025 02:06
@llvmbot
Copy link
Member

llvmbot commented Oct 31, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Matt Arsenault (arsenm)

Changes

This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.


Full diff: https://github.com/llvm/llvm-project/pull/165817.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+11-14)
  • (modified) llvm/test/CodeGen/AArch64/llvm.sincos.ll (+62-35)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e993b26..6324d9d18e31b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1052,15 +1052,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // Lower READCYCLECOUNTER using an mrs from CNTVCT_EL0.
   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
 
-  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
-      getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
     // Issue __sincos_stret if available.
-    setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
-    setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
-  } else {
-    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
-    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-  }
+  setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+  setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
 
   // Make floating-point constants legal for the large code model, so they don't
   // become loads from the constant pool.
@@ -5353,20 +5347,23 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
   SDLoc DL(Op);
   SDValue Arg = Op.getOperand(0);
   EVT ArgVT = Arg.getValueType();
+  RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
+  RTLIB::LibcallImpl SincosStret = getLibcallImpl(LC);
+  if (SincosStret == RTLIB::Unsupported)
+    return SDValue();
+
   Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 
   ArgListTy Args;
   Args.emplace_back(Arg, ArgTy);
 
-  RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
-                                        : RTLIB::SINCOS_STRET_F32;
-  const char *LibcallName = getLibcallName(LC);
-  SDValue Callee =
-      DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
+  StringRef LibcallImplName = getLibcallImplName(SincosStret);
+  SDValue Callee = DAG.getExternalSymbol(LibcallImplName.data(),
+                                         getPointerTy(DAG.getDataLayout()));
 
   StructType *RetTy = StructType::get(ArgTy, ArgTy);
   TargetLowering::CallLoweringInfo CLI(DAG);
-  CallingConv::ID CC = getLibcallCallingConv(LC);
+  CallingConv::ID CC = getLibcallImplCallingConv(SincosStret);
   CLI.setDebugLoc(DL)
       .setChain(DAG.getEntryNode())
       .setLibCallee(CC, RetTy, Callee, std::move(Args));
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index 21da8645b9b16..fa3ada6212894 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -374,84 +374,111 @@ define { float, float } @test_sincos_f32(float %a) nounwind {
 define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_sincos_v3f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    add x0, sp, #20
-; CHECK-NEXT:    add x1, sp, #16
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x1, sp, #56
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    add x0, sp, #28
-; CHECK-NEXT:    add x1, sp, #24
-; CHECK-NEXT:    add x19, sp, #28
-; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    add x19, sp, #44
+; CHECK-NEXT:    add x20, sp, #40
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    add x0, sp, #44
-; CHECK-NEXT:    add x1, sp, #40
-; CHECK-NEXT:    add x21, sp, #44
-; CHECK-NEXT:    add x22, sp, #40
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    add x21, sp, #36
+; CHECK-NEXT:    add x22, sp, #32
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sincosf
-; CHECK-NEXT:    ldp s1, s0, [sp, #16]
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x23, sp, #28
+; CHECK-NEXT:    add x24, sp, #24
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #56]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v0.s }[2], [x21]
 ; CHECK-NEXT:    ld1 { v1.s }[2], [x22]
-; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[3], [x23]
+; CHECK-NEXT:    ld1 { v1.s }[3], [x24]
+; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #112
 ; CHECK-NEXT:    ret
 ;
 ; NO-LIBCALL-LABEL: test_sincos_v3f32:
 ; NO-LIBCALL:       // %bb.0:
 ; NO-LIBCALL-NEXT:    sub sp, sp, #80
-; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #56] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    mov s8, v0.s[1]
-; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str d10, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #72] // 8-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sinf
-; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    mov s9, v0.s[2]
 ; NO-LIBCALL-NEXT:    fmov s0, s9
 ; NO-LIBCALL-NEXT:    bl sinf
-; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov s10, v0.s[3]
+; NO-LIBCALL-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v1.s[3], v0.s[0]
 ; NO-LIBCALL-NEXT:    fmov s0, s8
-; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s9
 ; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    fmov s2, s0
-; NO-LIBCALL-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT:    mov v1.s[2], v2.s[0]
+; NO-LIBCALL-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.s[3], v2.s[0]
 ; NO-LIBCALL-NEXT:    add sp, sp, #80
 ; NO-LIBCALL-NEXT:    ret
   %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a)

Base automatically changed from users/arsenm/aarch64/add-more-v3-sincos-tests to main October 31, 2025 05:49
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants