[DAGCombiner] Enable constant folding of (bitcast int_c0) -> fp_c0 #82289

goldsteinn · 2024-02-19T23:58:00Z

No description provided.

llvmbot · 2024-02-19T23:58:28Z

@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-aarch64

Author: None (goldsteinn)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/82289.diff

10 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+3-3)
(modified) llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll (+1-2)
(modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll (+1-3)
(modified) llvm/test/CodeGen/Mips/cconv/vector.ll (+55-29)
(modified) llvm/test/CodeGen/X86/combine-concatvectors.ll (+5-4)
(modified) llvm/test/CodeGen/X86/extractelement-fp.ll (+2-1)
(modified) llvm/test/CodeGen/X86/ret-mmx.ll (+1-1)
(modified) llvm/test/CodeGen/X86/vec_zero_cse.ll (+2-2)
(modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (+4-5)
(modified) llvm/test/CodeGen/X86/widen_shuffle-1.ll (+2-2)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 89ef648ee7d7ed..0cf5a23c251858 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15231,9 +15231,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // TODO: Support FP bitcasts after legalize types.
   if (VT.isVector() &&
       (!LegalTypes ||
-       (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
-        TLI.isTypeLegal(VT.getVectorElementType()))) &&
-      N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
+       (!LegalOperations && TLI.isTypeLegal(VT.getVectorElementType()))) &&
+      N0.getOpcode() == ISD::BUILD_VECTOR &&
+      N0->hasOneUse() &&
       cast<BuildVectorSDNode>(N0)->isConstant())
     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
                                              VT.getVectorElementType());
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index 3c8aca5145261d..3aa00f703d14f3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -146,9 +146,8 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) {
 ; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    movi.2d v0, #0000000000000000
-; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    str xzr, [x0, #16]
 ; CHECK-NEXT:    uaddlv.4s d1, v0
-; CHECK-NEXT:    str d2, [x0, #16]
 ; CHECK-NEXT:    mov.d v0[0], v1[0]
 ; CHECK-NEXT:    ucvtf.2d v0, v0
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index 367ccbeeea81ed..72fd7f4731e745 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -60,9 +60,7 @@ define void @store_v2i16(ptr %a) {
 define void @store_v2f16(ptr %a) {
 ; CHECK-LABEL: store_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.h, #0 // =0x0
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    str w8, [x0]
+; CHECK-NEXT:    str wzr, [x0]
 ; CHECK-NEXT:    ret
   store <2 x half> zeroinitializer, ptr %a
   ret void
diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index 28a7dc046139b2..5fe0276e26d9a5 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -5337,35 +5337,33 @@ define void @callfloat_2() {
 ; MIPS32R5-NEXT:    jr $ra
 ; MIPS32R5-NEXT:    nop
 ;
-; MIPS64R5-LABEL: callfloat_2:
-; MIPS64R5:       # %bb.0: # %entry
-; MIPS64R5-NEXT:    daddiu $sp, $sp, -16
-; MIPS64R5-NEXT:    .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
-; MIPS64R5-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
-; MIPS64R5-NEXT:    .cfi_offset 31, -8
-; MIPS64R5-NEXT:    .cfi_offset 28, -16
-; MIPS64R5-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_2)))
-; MIPS64R5-NEXT:    daddu $1, $1, $25
-; MIPS64R5-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
-; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI37_0)($gp)
-; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI37_0)
-; MIPS64R5-NEXT:    ld.d $w0, 0($1)
-; MIPS64R5-NEXT:    copy_s.d $4, $w0[0]
-; MIPS64R5-NEXT:    ld $1, %got_page(.LCPI37_1)($gp)
-; MIPS64R5-NEXT:    daddiu $1, $1, %got_ofst(.LCPI37_1)
-; MIPS64R5-NEXT:    ld.d $w0, 0($1)
-; MIPS64R5-NEXT:    copy_s.d $5, $w0[0]
-; MIPS64R5-NEXT:    ld $25, %call16(float2_extern)($gp)
-; MIPS64R5-NEXT:    jalr $25
-; MIPS64R5-NEXT:    nop
-; MIPS64R5-NEXT:    ld $1, %got_disp(gv2f32)($gp)
-; MIPS64R5-NEXT:    sd $2, 0($1)
-; MIPS64R5-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
-; MIPS64R5-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
-; MIPS64R5-NEXT:    daddiu $sp, $sp, 16
-; MIPS64R5-NEXT:    jr $ra
-; MIPS64R5-NEXT:    nop
+; MIPS64R5EB-LABEL: callfloat_2:
+; MIPS64R5EB:       # %bb.0: # %entry
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EB-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EB-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EB-NEXT:    daddu $1, $1, $25
+; MIPS64R5EB-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EB-NEXT:    daddiu $1, $zero, 383
+; MIPS64R5EB-NEXT:    dsll $4, $1, 23
+; MIPS64R5EB-NEXT:    daddiu $1, $zero, 261
+; MIPS64R5EB-NEXT:    dsll $1, $1, 33
+; MIPS64R5EB-NEXT:    daddiu $1, $1, 523
+; MIPS64R5EB-NEXT:    dsll $5, $1, 21
+; MIPS64R5EB-NEXT:    ld $25, %call16(float2_extern)($gp)
+; MIPS64R5EB-NEXT:    jalr $25
+; MIPS64R5EB-NEXT:    nop
+; MIPS64R5EB-NEXT:    ld $1, %got_disp(gv2f32)($gp)
+; MIPS64R5EB-NEXT:    sd $2, 0($1)
+; MIPS64R5EB-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5EB-NEXT:    jr $ra
+; MIPS64R5EB-NEXT:    nop
 ;
 ; MIPS64EL-LABEL: callfloat_2:
 ; MIPS64EL:       # %bb.0: # %entry
@@ -5394,6 +5392,34 @@ define void @callfloat_2() {
 ; MIPS64EL-NEXT:    daddiu $sp, $sp, 16
 ; MIPS64EL-NEXT:    jr $ra
 ; MIPS64EL-NEXT:    nop
+;
+; MIPS64R5EL-LABEL: callfloat_2:
+; MIPS64R5EL:       # %bb.0: # %entry
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT:    .cfi_offset 31, -8
+; MIPS64R5EL-NEXT:    .cfi_offset 28, -16
+; MIPS64R5EL-NEXT:    lui $1, %hi(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EL-NEXT:    daddu $1, $1, $25
+; MIPS64R5EL-NEXT:    daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EL-NEXT:    daddiu $1, $zero, 383
+; MIPS64R5EL-NEXT:    dsll $4, $1, 55
+; MIPS64R5EL-NEXT:    daddiu $1, $zero, 523
+; MIPS64R5EL-NEXT:    dsll $1, $1, 31
+; MIPS64R5EL-NEXT:    daddiu $1, $1, 261
+; MIPS64R5EL-NEXT:    dsll $5, $1, 22
+; MIPS64R5EL-NEXT:    ld $25, %call16(float2_extern)($gp)
+; MIPS64R5EL-NEXT:    jalr $25
+; MIPS64R5EL-NEXT:    nop
+; MIPS64R5EL-NEXT:    ld $1, %got_disp(gv2f32)($gp)
+; MIPS64R5EL-NEXT:    sd $2, 0($1)
+; MIPS64R5EL-NEXT:    ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT:    daddiu $sp, $sp, 16
+; MIPS64R5EL-NEXT:    jr $ra
+; MIPS64R5EL-NEXT:    nop
 entry:
   %0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
   store <2 x float> %0, ptr @gv2f32
diff --git a/llvm/test/CodeGen/X86/combine-concatvectors.ll b/llvm/test/CodeGen/X86/combine-concatvectors.ll
index 17d22607cfea88..faadf011cd6d4d 100644
--- a/llvm/test/CodeGen/X86/combine-concatvectors.ll
+++ b/llvm/test/CodeGen/X86/combine-concatvectors.ll
@@ -50,8 +50,8 @@ define void @concat_of_broadcast_v2f64_v4f64() {
 ; AVX1-NEXT:    movq %rcx, 46348(%rax)
 ; AVX1-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216]
 ; AVX1-NEXT:    vmovups %ymm0, 48296(%rax)
-; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = [7.812501848093234E-3,0.0E+0]
-; AVX1-NEXT:    vmovsd %xmm0, 47372(%rax)
+; AVX1-NEXT:    movabsq $4575657222473777152, %rcx # imm = 0x3F8000003F800000
+; AVX1-NEXT:    movq %rcx, 47372(%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -61,9 +61,10 @@ define void @concat_of_broadcast_v2f64_v4f64() {
 ; AVX2-NEXT:    movl $1091567616, 30256(%rax) # imm = 0x41100000
 ; AVX2-NEXT:    movabsq $4294967297, %rcx # imm = 0x100000001
 ; AVX2-NEXT:    movq %rcx, 46348(%rax)
-; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [4575657222473777152,4575657222473777152,4575657222473777152,4575657222473777152]
 ; AVX2-NEXT:    vmovups %ymm0, 48296(%rax)
-; AVX2-NEXT:    vmovlps %xmm0, 47372(%rax)
+; AVX2-NEXT:    movabsq $4575657222473777152, %rcx # imm = 0x3F8000003F800000
+; AVX2-NEXT:    movq %rcx, 47372(%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 alloca_0:
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 38162f676e7ee3..0b31c0fb09677e 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -320,7 +320,8 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
 ; X64-LABEL: extvselectsetcc_crash:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT:    vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
+; X64-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
+; X64-NEXT:    vmovq %rax, %xmm2
 ; X64-NEXT:    vandpd %xmm2, %xmm1, %xmm1
 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
diff --git a/llvm/test/CodeGen/X86/ret-mmx.ll b/llvm/test/CodeGen/X86/ret-mmx.ll
index 81dd73363c1fb1..d1f54dfc315ae5 100644
--- a/llvm/test/CodeGen/X86/ret-mmx.ll
+++ b/llvm/test/CodeGen/X86/ret-mmx.ll
@@ -40,7 +40,7 @@ define <2 x i32> @t3() nounwind {
 define double @t4() nounwind {
 ; CHECK-LABEL: t4:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [1,0,0,0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.9406564584124654E-324,0.0E+0]
 ; CHECK-NEXT:    retq
   ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
 }
diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll
index d4357aeb2e1dea..9376212740d3a6 100644
--- a/llvm/test/CodeGen/X86/vec_zero_cse.ll
+++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll
@@ -34,8 +34,8 @@ define void @test2() {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl $-1, M1+4
 ; X86-NEXT:    movl $-1, M1
-; X86-NEXT:    pcmpeqd %xmm0, %xmm0
-; X86-NEXT:    movq %xmm0, M2
+; X86-NEXT:    movl $-1, M2+4
+; X86-NEXT:    movl $-1, M2
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test2:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index 388511ce0741f6..ebd718d6d11aa1 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -108,11 +108,10 @@ define void @PR46178(ptr %0) {
 ; X86-NEXT:    vmovdqu (%eax), %ymm1
 ; X86-NEXT:    vpmovqw %ymm0, %xmm0
 ; X86-NEXT:    vpmovqw %ymm1, %xmm1
-; X86-NEXT:    vpsllw $8, %xmm0, %xmm0
-; X86-NEXT:    vpsraw $8, %xmm0, %xmm0
-; X86-NEXT:    vpsllw $8, %xmm1, %xmm1
-; X86-NEXT:    vpsraw $8, %xmm1, %xmm1
-; X86-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; X86-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT:    vpsllw $8, %ymm0, %ymm0
+; X86-NEXT:    vpsraw $8, %ymm0, %ymm0
+; X86-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,1]
 ; X86-NEXT:    vmovdqu %ymm0, (%eax)
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll
index 925b96f5e346cd..e82486c10e2362 100644
--- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll
@@ -105,8 +105,8 @@ define void @shuf5(ptr %p) nounwind {
 ; X86-LABEL: shuf5:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movsd {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33,0,0,0,0,0,0,0,0]
-; X86-NEXT:    movsd %xmm0, (%eax)
+; X86-NEXT:    movl $555819297, 4(%eax) # imm = 0x21212121
+; X86-NEXT:    movl $555819297, (%eax) # imm = 0x21212121
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: shuf5:

github-actions · 2024-02-20T00:00:25Z

✅ With the latest revision this PR passed the C/C++ code formatter.

RKSimon · 2024-02-20T10:14:21Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -15231,8 +15231,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
  // TODO: Support FP bitcasts after legalize types.


This TODO can be removed I guess - did you investigate the history behind this?

https://reviews.llvm.org/D58884, adding what I think is the necessary check.

AFAICT, the intermediate types for the float version are just integers, so if we are allowing integers seems the same as allowing FP. @topperc any reason this is buggy?

@topperc any comment?

david-arm · 2024-02-22T10:51:10Z

llvm/test/CodeGen/X86/extractelement-fp.ll

@@ -320,7 +320,8 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
 ; X64-LABEL: extvselectsetcc_crash:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT:    vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
+; X64-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000


I'm not an expert on X86 assembly, but this test change and the one above seem to produce more instructions so is that a regression?

Its an optimization to save a load, I'm not 100% sure if the optimization is sound, but think it should be addressed there not here.

vector loads vs rematerialization vs scalar constants has never been ideal on x86

RKSimon

LGTM

RKSimon · 2024-03-07T12:11:08Z

llvm/test/CodeGen/X86/extractelement-fp.ll

@@ -320,7 +320,8 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
 ; X64-LABEL: extvselectsetcc_crash:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT:    vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
+; X64-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000


vector loads vs rematerialization vs scalar constants has never been ideal on x86

RKSimon · 2024-04-02T20:13:38Z

@goldsteinn reverse-ping

goldsteinn · 2024-04-02T21:01:58Z

@goldsteinn reverse-ping

Wanted to see if @topperc had any comment (he was original author) and IIRC neither of us really understood why this was necessary.

topperc · 2024-04-03T02:03:30Z

@goldsteinn reverse-ping

Wanted to see if @topperc had any comment (he was original author) and IIRC neither of us really understood why this was necessary.

I think my concern was that this code in ConstantFoldBITCASTofBUILD_VECTOR would need to check if the intermediate integer types it creates are legal by calling isTypeLegal when done after legalize types. The caller would have only checked that the FP type was legal. Though that's unnecessary since the type exists in the DAG it must be legal if we're past legalize types.

The original type and the destination type must both be legal since they exist, but if they have different number of elements you need to know that the intermediate integer type with the same number of elements as the FP type is legal since its didn't exist in the DAG.

  // Otherwise, we're growing or shrinking the elements.  To avoid having to     
  // handle annoying details of growing/shrinking FP values, we convert them to  
  // int first.
  if (SrcEltVT.isFloatingPoint()) {
    // Convert the input float vector to a int vector where the elements are the 
    // same sizes.
    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());  
    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
    SrcEltVT = IntVT;
  }

  // Now we know the input is an integer vector.  If the output is a FP type,    
  // convert to integer first, then to FP of the right size.
  if (DstEltVT.isFloatingPoint()) {
    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();

    // Next, convert to FP elements of the same size.
    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
  }

goldsteinn · 2024-04-03T04:33:10Z

@goldsteinn reverse-ping

Wanted to see if @topperc had any comment (he was original author) and IIRC neither of us really understood why this was necessary.

I think my concern was that this code in ConstantFoldBITCASTofBUILD_VECTOR would need to check if the intermediate integer types it creates are legal by calling isTypeLegal when done after legalize types. The caller would have only checked that the FP type was legal. Though that's unnecessary since the type exists in the DAG it must be legal if we're past legalize types.

The original type and the destination type must both be legal since they exist, but if they have different number of elements you need to know that the intermediate integer type with the same number of elements as the FP type is legal since its didn't exist in the DAG.
  // Otherwise, we're growing or shrinking the elements.  To avoid having to     
  // handle annoying details of growing/shrinking FP values, we convert them to  
  // int first.
  if (SrcEltVT.isFloatingPoint()) {
    // Convert the input float vector to a int vector where the elements are the 
    // same sizes.
    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());  
    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
    SrcEltVT = IntVT;
  }

  // Now we know the input is an integer vector.  If the output is a FP type,    
  // convert to integer first, then to FP of the right size.
  if (DstEltVT.isFloatingPoint()) {
    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();

    // Next, convert to FP elements of the same size.
    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
  }

I see, so we need either integer checks or same number of elements?

llvmbot added backend:AArch64 backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Feb 19, 2024

goldsteinn requested review from RKSimon and topperc February 19, 2024 23:59

goldsteinn force-pushed the goldsteinn/bitcast-of-fp-const branch from 6b525fb to d21a580 Compare February 20, 2024 00:06

RKSimon reviewed Feb 20, 2024

View reviewed changes

[DAGCombiner] Enable constant folding of (bitcast int_c0) -> fp_c0

499d278

goldsteinn force-pushed the goldsteinn/bitcast-of-fp-const branch from d21a580 to 499d278 Compare February 20, 2024 15:27

david-arm reviewed Feb 22, 2024

View reviewed changes

RKSimon approved these changes Mar 7, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DAGCombiner] Enable constant folding of (bitcast int_c0) -> fp_c0 #82289

[DAGCombiner] Enable constant folding of (bitcast int_c0) -> fp_c0 #82289

goldsteinn commented Feb 19, 2024

llvmbot commented Feb 19, 2024 •

edited

github-actions bot commented Feb 20, 2024 •

edited

RKSimon Feb 20, 2024

goldsteinn Feb 20, 2024

goldsteinn Feb 20, 2024

goldsteinn Mar 7, 2024

david-arm Feb 22, 2024

goldsteinn Feb 22, 2024

RKSimon Mar 7, 2024

RKSimon left a comment

RKSimon Mar 7, 2024

RKSimon commented Apr 2, 2024

goldsteinn commented Apr 2, 2024

topperc commented Apr 3, 2024 •

edited

goldsteinn commented Apr 3, 2024

		@@ -15231,8 +15231,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
		// TODO: Support FP bitcasts after legalize types.

[DAGCombiner] Enable constant folding of (bitcast int_c0) -> fp_c0 #82289

Are you sure you want to change the base?

[DAGCombiner] Enable constant folding of (bitcast int_c0) -> fp_c0 #82289

Conversation

goldsteinn commented Feb 19, 2024

llvmbot commented Feb 19, 2024 • edited

github-actions bot commented Feb 20, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon commented Apr 2, 2024

goldsteinn commented Apr 2, 2024

topperc commented Apr 3, 2024 • edited

goldsteinn commented Apr 3, 2024

llvmbot commented Feb 19, 2024 •

edited

github-actions bot commented Feb 20, 2024 •

edited

topperc commented Apr 3, 2024 •

edited