-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner] Enable constant folding of (bitcast int_c0) -> fp_c0 #82289
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-aarch64 Author: None (goldsteinn) ChangesFull diff: https://github.com/llvm/llvm-project/pull/82289.diff 10 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 89ef648ee7d7ed..0cf5a23c251858 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15231,9 +15231,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// TODO: Support FP bitcasts after legalize types.
if (VT.isVector() &&
(!LegalTypes ||
- (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
- TLI.isTypeLegal(VT.getVectorElementType()))) &&
- N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
+ (!LegalOperations && TLI.isTypeLegal(VT.getVectorElementType()))) &&
+ N0.getOpcode() == ISD::BUILD_VECTOR &&
+ N0->hasOneUse() &&
cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
VT.getVectorElementType());
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index 3c8aca5145261d..3aa00f703d14f3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -146,9 +146,8 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) {
; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.2d v0, #0000000000000000
-; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: str xzr, [x0, #16]
; CHECK-NEXT: uaddlv.4s d1, v0
-; CHECK-NEXT: str d2, [x0, #16]
; CHECK-NEXT: mov.d v0[0], v1[0]
; CHECK-NEXT: ucvtf.2d v0, v0
; CHECK-NEXT: fcvtn v0.2s, v0.2d
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index 367ccbeeea81ed..72fd7f4731e745 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -60,9 +60,7 @@ define void @store_v2i16(ptr %a) {
define void @store_v2f16(ptr %a) {
; CHECK-LABEL: store_v2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, #0 // =0x0
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: str w8, [x0]
+; CHECK-NEXT: str wzr, [x0]
; CHECK-NEXT: ret
store <2 x half> zeroinitializer, ptr %a
ret void
diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
index 28a7dc046139b2..5fe0276e26d9a5 100644
--- a/llvm/test/CodeGen/Mips/cconv/vector.ll
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -5337,35 +5337,33 @@ define void @callfloat_2() {
; MIPS32R5-NEXT: jr $ra
; MIPS32R5-NEXT: nop
;
-; MIPS64R5-LABEL: callfloat_2:
-; MIPS64R5: # %bb.0: # %entry
-; MIPS64R5-NEXT: daddiu $sp, $sp, -16
-; MIPS64R5-NEXT: .cfi_def_cfa_offset 16
-; MIPS64R5-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
-; MIPS64R5-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
-; MIPS64R5-NEXT: .cfi_offset 31, -8
-; MIPS64R5-NEXT: .cfi_offset 28, -16
-; MIPS64R5-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_2)))
-; MIPS64R5-NEXT: daddu $1, $1, $25
-; MIPS64R5-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
-; MIPS64R5-NEXT: ld $1, %got_page(.LCPI37_0)($gp)
-; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI37_0)
-; MIPS64R5-NEXT: ld.d $w0, 0($1)
-; MIPS64R5-NEXT: copy_s.d $4, $w0[0]
-; MIPS64R5-NEXT: ld $1, %got_page(.LCPI37_1)($gp)
-; MIPS64R5-NEXT: daddiu $1, $1, %got_ofst(.LCPI37_1)
-; MIPS64R5-NEXT: ld.d $w0, 0($1)
-; MIPS64R5-NEXT: copy_s.d $5, $w0[0]
-; MIPS64R5-NEXT: ld $25, %call16(float2_extern)($gp)
-; MIPS64R5-NEXT: jalr $25
-; MIPS64R5-NEXT: nop
-; MIPS64R5-NEXT: ld $1, %got_disp(gv2f32)($gp)
-; MIPS64R5-NEXT: sd $2, 0($1)
-; MIPS64R5-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
-; MIPS64R5-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
-; MIPS64R5-NEXT: daddiu $sp, $sp, 16
-; MIPS64R5-NEXT: jr $ra
-; MIPS64R5-NEXT: nop
+; MIPS64R5EB-LABEL: callfloat_2:
+; MIPS64R5EB: # %bb.0: # %entry
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EB-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5EB-NEXT: .cfi_offset 31, -8
+; MIPS64R5EB-NEXT: .cfi_offset 28, -16
+; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EB-NEXT: daddu $1, $1, $25
+; MIPS64R5EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EB-NEXT: daddiu $1, $zero, 383
+; MIPS64R5EB-NEXT: dsll $4, $1, 23
+; MIPS64R5EB-NEXT: daddiu $1, $zero, 261
+; MIPS64R5EB-NEXT: dsll $1, $1, 33
+; MIPS64R5EB-NEXT: daddiu $1, $1, 523
+; MIPS64R5EB-NEXT: dsll $5, $1, 21
+; MIPS64R5EB-NEXT: ld $25, %call16(float2_extern)($gp)
+; MIPS64R5EB-NEXT: jalr $25
+; MIPS64R5EB-NEXT: nop
+; MIPS64R5EB-NEXT: ld $1, %got_disp(gv2f32)($gp)
+; MIPS64R5EB-NEXT: sd $2, 0($1)
+; MIPS64R5EB-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16
+; MIPS64R5EB-NEXT: jr $ra
+; MIPS64R5EB-NEXT: nop
;
; MIPS64EL-LABEL: callfloat_2:
; MIPS64EL: # %bb.0: # %entry
@@ -5394,6 +5392,34 @@ define void @callfloat_2() {
; MIPS64EL-NEXT: daddiu $sp, $sp, 16
; MIPS64EL-NEXT: jr $ra
; MIPS64EL-NEXT: nop
+;
+; MIPS64R5EL-LABEL: callfloat_2:
+; MIPS64R5EL: # %bb.0: # %entry
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16
+; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16
+; MIPS64R5EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS64R5EL-NEXT: .cfi_offset 31, -8
+; MIPS64R5EL-NEXT: .cfi_offset 28, -16
+; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EL-NEXT: daddu $1, $1, $25
+; MIPS64R5EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(callfloat_2)))
+; MIPS64R5EL-NEXT: daddiu $1, $zero, 383
+; MIPS64R5EL-NEXT: dsll $4, $1, 55
+; MIPS64R5EL-NEXT: daddiu $1, $zero, 523
+; MIPS64R5EL-NEXT: dsll $1, $1, 31
+; MIPS64R5EL-NEXT: daddiu $1, $1, 261
+; MIPS64R5EL-NEXT: dsll $5, $1, 22
+; MIPS64R5EL-NEXT: ld $25, %call16(float2_extern)($gp)
+; MIPS64R5EL-NEXT: jalr $25
+; MIPS64R5EL-NEXT: nop
+; MIPS64R5EL-NEXT: ld $1, %got_disp(gv2f32)($gp)
+; MIPS64R5EL-NEXT: sd $2, 0($1)
+; MIPS64R5EL-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16
+; MIPS64R5EL-NEXT: jr $ra
+; MIPS64R5EL-NEXT: nop
entry:
%0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
store <2 x float> %0, ptr @gv2f32
diff --git a/llvm/test/CodeGen/X86/combine-concatvectors.ll b/llvm/test/CodeGen/X86/combine-concatvectors.ll
index 17d22607cfea88..faadf011cd6d4d 100644
--- a/llvm/test/CodeGen/X86/combine-concatvectors.ll
+++ b/llvm/test/CodeGen/X86/combine-concatvectors.ll
@@ -50,8 +50,8 @@ define void @concat_of_broadcast_v2f64_v4f64() {
; AVX1-NEXT: movq %rcx, 46348(%rax)
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm0 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216]
; AVX1-NEXT: vmovups %ymm0, 48296(%rax)
-; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [7.812501848093234E-3,0.0E+0]
-; AVX1-NEXT: vmovsd %xmm0, 47372(%rax)
+; AVX1-NEXT: movabsq $4575657222473777152, %rcx # imm = 0x3F8000003F800000
+; AVX1-NEXT: movq %rcx, 47372(%rax)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -61,9 +61,10 @@ define void @concat_of_broadcast_v2f64_v4f64() {
; AVX2-NEXT: movl $1091567616, 30256(%rax) # imm = 0x41100000
; AVX2-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001
; AVX2-NEXT: movq %rcx, 46348(%rax)
-; AVX2-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm0 = [4575657222473777152,4575657222473777152,4575657222473777152,4575657222473777152]
; AVX2-NEXT: vmovups %ymm0, 48296(%rax)
-; AVX2-NEXT: vmovlps %xmm0, 47372(%rax)
+; AVX2-NEXT: movabsq $4575657222473777152, %rcx # imm = 0x3F8000003F800000
+; AVX2-NEXT: movq %rcx, 47372(%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
alloca_0:
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 38162f676e7ee3..0b31c0fb09677e 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -320,7 +320,8 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
; X64-LABEL: extvselectsetcc_crash:
; X64: # %bb.0:
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
+; X64-NEXT: movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
+; X64-NEXT: vmovq %rax, %xmm2
; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
diff --git a/llvm/test/CodeGen/X86/ret-mmx.ll b/llvm/test/CodeGen/X86/ret-mmx.ll
index 81dd73363c1fb1..d1f54dfc315ae5 100644
--- a/llvm/test/CodeGen/X86/ret-mmx.ll
+++ b/llvm/test/CodeGen/X86/ret-mmx.ll
@@ -40,7 +40,7 @@ define <2 x i32> @t3() nounwind {
define double @t4() nounwind {
; CHECK-LABEL: t4:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1,0,0,0]
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.9406564584124654E-324,0.0E+0]
; CHECK-NEXT: retq
ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
}
diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll
index d4357aeb2e1dea..9376212740d3a6 100644
--- a/llvm/test/CodeGen/X86/vec_zero_cse.ll
+++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll
@@ -34,8 +34,8 @@ define void @test2() {
; X86: # %bb.0:
; X86-NEXT: movl $-1, M1+4
; X86-NEXT: movl $-1, M1
-; X86-NEXT: pcmpeqd %xmm0, %xmm0
-; X86-NEXT: movq %xmm0, M2
+; X86-NEXT: movl $-1, M2+4
+; X86-NEXT: movl $-1, M2
; X86-NEXT: retl
;
; X64-LABEL: test2:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index 388511ce0741f6..ebd718d6d11aa1 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -108,11 +108,10 @@ define void @PR46178(ptr %0) {
; X86-NEXT: vmovdqu (%eax), %ymm1
; X86-NEXT: vpmovqw %ymm0, %xmm0
; X86-NEXT: vpmovqw %ymm1, %xmm1
-; X86-NEXT: vpsllw $8, %xmm0, %xmm0
-; X86-NEXT: vpsraw $8, %xmm0, %xmm0
-; X86-NEXT: vpsllw $8, %xmm1, %xmm1
-; X86-NEXT: vpsraw $8, %xmm1, %xmm1
-; X86-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT: vpsllw $8, %ymm0, %ymm0
+; X86-NEXT: vpsraw $8, %ymm0, %ymm0
+; X86-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,1]
; X86-NEXT: vmovdqu %ymm0, (%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll
index 925b96f5e346cd..e82486c10e2362 100644
--- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll
@@ -105,8 +105,8 @@ define void @shuf5(ptr %p) nounwind {
; X86-LABEL: shuf5:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movsd {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33,0,0,0,0,0,0,0,0]
-; X86-NEXT: movsd %xmm0, (%eax)
+; X86-NEXT: movl $555819297, 4(%eax) # imm = 0x21212121
+; X86-NEXT: movl $555819297, (%eax) # imm = 0x21212121
; X86-NEXT: retl
;
; X64-LABEL: shuf5:
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
6b525fb
to
d21a580
Compare
@@ -15231,8 +15231,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { | |||
// TODO: Support FP bitcasts after legalize types. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This TODO can be removed I guess - did you investigate the history behind this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://reviews.llvm.org/D58884, adding what I think is the necessary check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AFAICT, the intermediate types for the float
version are just integers, so if we are allowing integers seems the same as allowing FP. @topperc any reason this is buggy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@topperc any comment?
d21a580
to
499d278
Compare
@@ -320,7 +320,8 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) { | |||
; X64-LABEL: extvselectsetcc_crash: | |||
; X64: # %bb.0: | |||
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 | |||
; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0] | |||
; X64-NEXT: movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not an expert on X86 assembly, but this test change and the one above seem to produce more instructions so is that a regression?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its an optimization to save a load, I'm not 100% sure if the optimization is sound, but think it should be addressed there not here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
vector loads vs rematerialization vs scalar constants has never been ideal on x86
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -320,7 +320,8 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) { | |||
; X64-LABEL: extvselectsetcc_crash: | |||
; X64: # %bb.0: | |||
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 | |||
; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0] | |||
; X64-NEXT: movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
vector loads vs rematerialization vs scalar constants has never been ideal on x86
@goldsteinn reverse-ping |
Wanted to see if @topperc had any comment (he was original author) and IIRC neither of us really understood why this was necessary. |
I think my concern was that this code in The original type and the destination type must both be legal since they exist, but if they have different number of elements you need to know that the intermediate integer type with the same number of elements as the FP type is legal since its didn't exist in the DAG.
|
I see, so we need either integer checks or same number of elements? |
No description provided.