-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Description
| Bugzilla Link | 2109 |
| Resolution | FIXED |
| Resolved on | Dec 28, 2015 13:09 |
| Version | unspecified |
| OS | Linux |
| CC | @rotateright |
Extended Description
Take the following bitcode
define <4 x float> @_mm_loadh_pi22(<4 x float> %__A, <2 x i32>* %__P) nounwind {
entry:
load <2 x i32>* %__P
bitcast <2 x i32> %0 to <2 x float>
extractelement <2 x float> %1, i32 0
extractelement <2 x float> %1, i32 1
insertelement <4 x float> %__A, float %2, i32 2
insertelement <4 x float> %4, float %3, i32 3
ret <4 x float> %5
}
This currently codegens to the following:
_mm_loadh_pi22:
subl $8, %esp
movl 12(%esp), %eax
movl 4(%eax), %ecx
movl %ecx, 4(%esp)
movl (%eax), %eax
movl %eax, (%esp)
movss (%esp), %xmm1
movaps %xmm0, %xmm2
shufps $3, %xmm1, %xmm2
shufps $36, %xmm2, %xmm0
movss 4(%esp), %xmm1
movaps %xmm0, %xmm2
shufps $2, %xmm1, %xmm2
shufps $132, %xmm2, %xmm0
addl $8, %esp
ret
Unless I've made a mistake, this should codegen to the following:
_mm_loadh_pi22:
movl 4(%esp), %eax
movhps (%eax), %xmm0
ret
Note that LLVM does know how to generate movhps... it does in fact generate it for the following, which is functionally equivalent:
define <4 x float> @_mm_loadh_pi22(<4 x float> %__A, <1 x i64>* %__P) nounwind {
entry:
%tmp4 = bitcast <1 x i64>* %__P to double*
%tmp5 = load double* %tmp4
%tmp6 = insertelement <2 x double> undef, double %tmp5, i32 0
%tmp8 = bitcast <2 x double> %tmp6 to <4 x float>
%tmp9 = shufflevector <4 x float> %__A, <4 x float> %tmp8, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >
ret <4 x float> %tmp9
}