From 0fa970d709b06c689ee4bd1d988c65008f252ef4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 7 Apr 2025 08:59:54 +0700 Subject: [PATCH] AMDGPU: Disable sincos fold for constant inputs The use list for ConstantData should not be used. We could do better by just constant folding here. --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 5 +++++ .../AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll | 8 +++----- .../CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll | 11 +++++------ 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index cf8b416d23e50..b65b4d67b3f8c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1361,6 +1361,11 @@ bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; Value *CArgVal = FPOp->getOperand(0); + + // TODO: Constant fold the call + if (isa(CArgVal)) + return false; + CallInst *CI = cast(FPOp); Function *F = B.GetInsertBlock()->getParent(); diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll index ad7e913e6d22c..c8f45fe11390c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll @@ -125,12 +125,10 @@ define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture writ ; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp ; CHECK-SAME: (ptr addrspace(1) writeonly captures(none) [[SIN_OUT:%.*]], ptr addrspace(1) writeonly captures(none) [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float 4.200000e+01, ptr addrspace(5) [[__SINCOS_]]) -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01) +; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01) -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll index da5686972a86b..03b7d1646eb86 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -745,13 +745,12 @@ entry: define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp -; CHECK-SAME: (ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float 4.200000e+01, ptr addrspace(5) [[__SINCOS_]]) -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01) +; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01) +; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: