-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Closed
Description
https://gcc.godbolt.org/z/dnsebnPz5
We seeing cases where multiple uses of a node is preventing vector-combine from merging equivalent shuffles.
Sorry the test case is still more convoluted than necessary :(
define <2 x float> @foo(<2 x float> %a0, <2 x float> %a1, <2 x float> %a3, <2 x float> %a4) {
; dot product
%dp0 = fmul <2 x float> %a0, %a1
%dp1 = shufflevector <2 x float> %dp0, <2 x float> poison, <2 x i32> <i32 1, i32 undef>
%dp2 = fadd <2 x float> %dp0, %dp1
%dp3x = extractelement <2 x float> %dp2, i32 0
; scalar fdiv
%a3x = extractelement <2 x float> %a3, i32 0
%x = fdiv float %dp3x, %a3x
; first use
%xsplat0 = insertelement <2 x float> poison, float %x, i32 0
%xsplat1 = shufflevector <2 x float> %xsplat0, <2 x float> poison, <2 x i32> zeroinitializer
%vv = fmul <2 x float> %xsplat1, %a4
; second use
%a4x = extractelement <2 x float> %a4, i32 0
%q = fmul float %x, %a4x
%qsplat0 = insertelement <2 x float> poison, float %q, i32 0
%qsplat1 = shufflevector <2 x float> %qsplat0, <2 x float> poison, <2 x i32> zeroinitializer
%res = fadd <2 x float> %vv, %qsplat1
ret <2 x float> %res
}
opt -O3
define <2 x float> @foo(<2 x float> %a0, <2 x float> %a1, <2 x float> %a3, <2 x float> %a4) {
%dp0 = fmul <2 x float> %a0, %a1
%dp1 = shufflevector <2 x float> %dp0, <2 x float> poison, <2 x i32> <i32 1, i32 undef>
%dp2 = fadd <2 x float> %dp0, %dp1
%1 = fdiv <2 x float> %dp2, %a3
%xsplat1 = shufflevector <2 x float> %1, <2 x float> poison, <2 x i32> zeroinitializer
%vv = fmul <2 x float> %xsplat1, %a4
%2 = fmul <2 x float> %1, %a4
%qsplat1 = shufflevector <2 x float> %2, <2 x float> poison, <2 x i32> zeroinitializer
%res = fadd <2 x float> %vv, %qsplat1
ret <2 x float> %res
}
as the %2 fmul case will be splatted, we should be able to use %vv again:
define <2 x float> @foo(<2 x float> %a0, <2 x float> %a1, <2 x float> %a3, <2 x float> %a4) {
%dp0 = fmul <2 x float> %a0, %a1
%dp1 = shufflevector <2 x float> %dp0, <2 x float> poison, <2 x i32> <i32 1, i32 undef>
%dp2 = fadd <2 x float> %dp0, %dp1
%1 = fdiv <2 x float> %dp2, %a3
%xsplat1 = shufflevector <2 x float> %1, <2 x float> poison, <2 x i32> zeroinitializer
%vv = fmul <2 x float> %xsplat1, %a4
%qsplat1 = shufflevector <2 x float> %vv, <2 x float> poison, <2 x i32> zeroinitializer
%res = fadd <2 x float> %vv, %qsplat1
ret <2 x float> %res
}