Skip to content

SelectionDAGBuilder (wrongly in at least x86 half case) assumes that it's safe to replace cmp + select by maxnum if maxnum is promoted #114520

@uuuvn

Description

@uuuvn

https://godbolt.org/z/z33cz5h8a

I think the cause is this line where it seems like llvm assumes that operation would become legal after promotion but in half to float maxnum on x86 it isn't the case and we end up with an introduced call to fmaxf in a freestanding environment.

(TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||

This patch fixes it but i'm not sure if it's the right way to go about it as it will disable instruction combine even when it's perfectly legal after promotion:

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f41dbe81434c..58a11b2e9001 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3778,7 +3778,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
     }
 
     if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
-        (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||
+        (TLI.isOperationLegalOrCustom(Opc, VT) ||
          (UseScalarMinMax &&
           TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
         // If the underlying comparison instruction is used by any other
Bug
void r_2_10001(__fp16* restrict data0, __fp16* restrict data1) {
  __fp16 acc0 = (__fp16)(-__builtin_inff());
  for (int ridx0 = 0; ridx0 < 2; ridx0++) {
    __fp16 val0 = *(data1+ridx0);
    acc0 = ((acc0<val0)?val0:acc0);
  }
  *(data0+0) = acc0;
}

clang -march=x86-64-v4 --target=x86_64-none-unknown-elf -fno-math-errno -O2 -Wall -Werror -x c -fPIC -ffreestanding -nostdlib test.c -o test.ll -S -emit-llvm

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-none-unknown-elf"

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
define void @r_2_10001(ptr noalias nocapture noundef writeonly %data0, ptr noalias nocapture noundef readonly %data1) local_unnamed_addr #0 {
entry:
  %0 = load half, ptr %data1, align 2, !tbaa !4
  %cmp2 = fcmp ogt half %0, 0xHFC00
  %cond.v = select i1 %cmp2, half %0, half 0xHFC00
  %add.ptr.1 = getelementptr inbounds i8, ptr %data1, i64 2
  %1 = load half, ptr %add.ptr.1, align 2, !tbaa !4
  %cmp2.1 = fcmp olt half %cond.v, %1
  %cond.v.1 = select i1 %cmp2.1, half %1, half %cond.v
  store half %cond.v.1, ptr %data0, align 2, !tbaa !4
  ret void
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64-v4" "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"frame-pointer", i32 2}
!3 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 61a6439f35b6de28ff4aff4450d6fca970292fd5)"}
!4 = !{!5, !5, i64 0}
!5 = !{!"__fp16", !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C/C++ TBAA"}

llc -debug test.ll -o test.s -O2 2>llc_debug.txt

	.text
	.file	"test.c"
	.section	.rodata.cst4,"aM",@progbits,4
	.p2align	2, 0x0                          # -- Begin function r_2_10001
.LCPI0_0:
	.long	0xff800000                      # float -Inf
	.text
	.globl	r_2_10001
	.p2align	4
	.type	r_2_10001,@function
r_2_10001:                              # @r_2_10001
# %bb.0:                                # %entry
	pushq	%rbp
	movq	%rsp, %rbp
	pushq	%r14
	pushq	%rbx
	movq	%rsi, %rbx
	movq	%rdi, %r14
	movzwl	(%rsi), %eax
	vmovd	%eax, %xmm0
	vcvtph2ps	%xmm0, %xmm0
	vmovss	.LCPI0_0(%rip), %xmm1           # xmm1 = [-Inf,0.0E+0,0.0E+0,0.0E+0]
	callq	fmaxf@PLT
	vcvtps2ph	$4, %xmm0, %xmm0
	vmovd	%xmm0, %eax
	vcvtph2ps	%xmm0, %xmm0
	movzwl	2(%rbx), %ecx
	vmovd	%ecx, %xmm1
	vcvtph2ps	%xmm1, %xmm1
	vucomiss	%xmm0, %xmm1
	cmoval	%ecx, %eax
	movw	%ax, (%r14)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
.Lfunc_end0:
	.size	r_2_10001, .Lfunc_end0-r_2_10001
                                        # -- End function
	.ident	"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 61a6439f35b6de28ff4aff4450d6fca970292fd5)"
	.section	".note.GNU-stack","",@progbits

llc_debug.txt

No bug
void r_2_10001(__fp16* restrict data0, __fp16* restrict data1) {
  __fp16 acc0 = (__fp16)(-__builtin_inff());
  for (int ridx0 = 0; ridx0 < 2; ridx0++) {
    __fp16 val0 = *(data1+ridx0);
    acc0 = ((acc0<val0)?val0:acc0);
  }
  *(data0+0) = acc0;
}

clang -march=x86-64-v4 --target=x86_64-none-unknown-elf -fno-math-errno -O2 -Wall -Werror -x c -fPIC -ffreestanding -nostdlib test.c -o test.ll -S -emit-llvm

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-none-unknown-elf"

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
define void @r_2_10001(ptr noalias nocapture noundef writeonly %data0, ptr noalias nocapture noundef readonly %data1) local_unnamed_addr #0 {
entry:
  %0 = load half, ptr %data1, align 2, !tbaa !4
  %cmp2 = fcmp ogt half %0, 0xHFC00
  %cond.v = select i1 %cmp2, half %0, half 0xHFC00
  %add.ptr.1 = getelementptr inbounds i8, ptr %data1, i64 2
  %1 = load half, ptr %add.ptr.1, align 2, !tbaa !4
  %cmp2.1 = fcmp olt half %cond.v, %1
  %cond.v.1 = select i1 %cmp2.1, half %1, half %cond.v
  store half %cond.v.1, ptr %data0, align 2, !tbaa !4
  ret void
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64-v4" "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"frame-pointer", i32 2}
!3 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 61a6439f35b6de28ff4aff4450d6fca970292fd5)"}
!4 = !{!5, !5, i64 0}
!5 = !{!"__fp16", !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C/C++ TBAA"}

llc -debug test.ll -o test.s -O2 2>llc_debug.txt

	.text
	.file	"test.c"
	.section	.rodata.cst4,"aM",@progbits,4
	.p2align	2, 0x0                          # -- Begin function r_2_10001
.LCPI0_0:
	.long	0xff800000                      # float -Inf
	.text
	.globl	r_2_10001
	.p2align	4
	.type	r_2_10001,@function
r_2_10001:                              # @r_2_10001
# %bb.0:                                # %entry
	pushq	%rbp
	movq	%rsp, %rbp
	movzwl	(%rsi), %eax
	vmovd	%eax, %xmm0
	vcvtph2ps	%xmm0, %xmm0
	vucomiss	.LCPI0_0(%rip), %xmm0
	movl	$64512, %ecx                    # imm = 0xFC00
	cmoval	%eax, %ecx
	vmovd	%ecx, %xmm0
	vcvtph2ps	%xmm0, %xmm0
	movzwl	2(%rsi), %eax
	vmovd	%eax, %xmm1
	vcvtph2ps	%xmm1, %xmm1
	vucomiss	%xmm0, %xmm1
	cmoval	%eax, %ecx
	movw	%cx, (%rdi)
	popq	%rbp
	retq
.Lfunc_end0:
	.size	r_2_10001, .Lfunc_end0-r_2_10001
                                        # -- End function
	.ident	"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 61a6439f35b6de28ff4aff4450d6fca970292fd5)"
	.section	".note.GNU-stack","",@progbits

llc_debug.txt

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions