-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Description
| Bugzilla Link | 9623 |
| Resolution | FIXED |
| Resolved on | Oct 22, 2011 07:56 |
| Version | trunk |
| OS | All |
| Reporter | LLVM Bugzilla Contributor |
| CC | @asl |
Extended Description
Given this input:
define <4 x i8> @foo(<4 x i8> %x, <4 x i8> %y, <4 x i8> %__mask) nounwind readnone alwaysinline {
entry:
%binop = mul <4 x i8> %x, %y
%binop6 = add <4 x i8> %binop, %x
ret <4 x i8> %binop6
}
The following quite lengthy code is generated by llc. It would be nice to get the appropriate MMX instructions instead. (This is probably not a high priority fix in the grand scheme of things, though.)
_foo: ## @foo
BB#0: ## %entry
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
movdqa %xmm0, %xmm2
pextrb $1, %xmm2, %eax
pextrb $1, %xmm1, %ecx
mulb %cl
pextrb $0, %xmm2, %ecx
pextrb $0, %xmm1, %edx
movzbl %al, %esi
movb %cl, %al
mulb %dl
movzbl %al, %eax
movd %eax, %xmm0
pextrb $2, %xmm2, %eax
pextrb $2, %xmm1, %ecx
pinsrb $1, %esi, %xmm0
mulb %cl
movb %al, %cl
pextrb $3, %xmm2, %eax
pextrb $3, %xmm1, %edx
mulb %dl
movb %al, %dl
movzbl %cl, %ecx
pextrb $4, %xmm2, %eax
pextrb $4, %xmm1, %esi
pinsrb $2, %ecx, %xmm0
mulb %sil
movzbl %dl, %ecx
pextrb $11, %xmm2, %edx
pextrb $12, %xmm2, %esi
pextrb $13, %xmm2, %edi
pextrb $14, %xmm2, %r8d
movl %r8d, -4(%rsp) ## 4-byte Spill
pextrb $5, %xmm1, %r9d
pextrb $5, %xmm2, %r10d
pextrb $8, %xmm1, %r11d
pinsrb $3, %ecx, %xmm0
movzbl %al, %ecx
pextrb $15, %xmm2, %ebx
pextrb $8, %xmm2, %r14d
pextrb $12, %xmm1, %r15d
movb %r10b, %al
pextrb $13, %xmm1, %r10d
pinsrb $4, %ecx, %xmm0
pextrb $14, %xmm1, %ecx
pextrb $15, %xmm1, %r12d
mulb %r9b
movb %al, %r9b
pextrb $11, %xmm1, %r13d
pextrb $10, %xmm2, %ebp
movb %r14b, %al
mulb %r11b
movb %al, %r11b
pextrb $9, %xmm2, %eax
pextrb $9, %xmm1, %r14d
mulb %r14b
movb %al, %r14b
pextrb $10, %xmm1, %r8d
movb %bpl, %al
mulb %r8b
movb %al, %r8b
movb %dl, %al
mulb %r13b
movb %al, %dl
movb %sil, %al
mulb %r15b
movb %al, %sil
movb %dil, %al
mulb %r10b
movb %al, %dil
movl -4(%rsp), %eax ## 4-byte Reload
mulb %cl
movb %al, %cl
movb %bl, %al
mulb %r12b
movb %al, %r10b
movzbl %r9b, %r9d
pextrb $7, %xmm2, %eax
pextrb $7, %xmm1, %ebx
mulb %bl
pinsrb $5, %r9d, %xmm0
movzbl %r10b, %r9d
movzbl %cl, %ecx
movzbl %dil, %edi
movzbl %sil, %esi
movzbl %dl, %edx
movzbl %r8b, %r8d
movzbl %r14b, %r10d
movzbl %r11b, %r11d
movzbl %al, %ebx
pextrb $6, %xmm2, %eax
pextrb $6, %xmm1, %r14d
mulb %r14b
movzbl %al, %eax
pinsrb $6, %eax, %xmm0
pinsrb $7, %ebx, %xmm0
pinsrb $8, %r11d, %xmm0
pinsrb $9, %r10d, %xmm0
pinsrb $10, %r8d, %xmm0
pinsrb $11, %edx, %xmm0
pinsrb $12, %esi, %xmm0
pinsrb $13, %edi, %xmm0
pinsrb $14, %ecx, %xmm0
pinsrb $15, %r9d, %xmm0
paddb %xmm2, %xmm0
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
If I explicitly extract the values from the vector, do the math, and repack, like this:
define <4 x i8> @bar(<4 x i8> %x, <4 x i8> %y, <4 x i8> %__mask) nounwind readnone alwaysinline {
entry:
%x0 = extractelement <4 x i8> %x, i32 0
%x1 = extractelement <4 x i8> %x, i32 1
%x2 = extractelement <4 x i8> %x, i32 2
%x3 = extractelement <4 x i8> %x, i32 3
%y0 = extractelement <4 x i8> %y, i32 0
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%y3 = extractelement <4 x i8> %y, i32 3
%m0 = mul i8 %x0, %y0
%m1 = mul i8 %x1, %y1
%m2 = mul i8 %x2, %y2
%m3 = mul i8 %x3, %y3
%a0 = add i8 %m0, %x0
%a1 = add i8 %m1, %x1
%a2 = add i8 %m2, %x2
%a3 = add i8 %m3, %x3
%r0 = insertelement <4 x i8> undef, i8 %a0, i32 0
%r1 = insertelement <4 x i8> %r0, i8 %a1, i32 1
%r2 = insertelement <4 x i8> %r1, i8 %a2, i32 2
%r3 = insertelement <4 x i8> %r2, i8 %a3, i32 3
ret <4 x i8> %r3
}
The code is better:
_bar: ## @bar
BB#0: ## %entry
pextrb $2, %xmm0, %ecx
pextrb $2, %xmm1, %edx
movb %cl, %al
mulb %dl
movb %al, %dl
addb %cl, %dl
pextrb $0, %xmm0, %ecx
pextrb $0, %xmm1, %esi
movb %cl, %al
mulb %sil
pextrb $3, %xmm0, %esi
movb %al, %dil
addb %cl, %dil
movzbl %dl, %ecx
pextrb $3, %xmm1, %edx
movb %sil, %al
mulb %dl
addb %sil, %al
movzbl %al, %edx
shll $8, %edx
pextrb $1, %xmm0, %esi
orl %ecx, %edx
movzbl %dil, %ecx
pextrb $1, %xmm1, %edi
movb %sil, %al
mulb %dil
addb %sil, %al
movzbl %al, %eax
shll $8, %eax
orl %ecx, %eax
pinsrw $0, %eax, %xmm0
pinsrw $1, %edx, %xmm0
ret