Skip to content

x86: inefficient code generated for i8 vector types #9995

@llvmbot

Description

@llvmbot
Bugzilla Link 9623
Resolution FIXED
Resolved on Oct 22, 2011 07:56
Version trunk
OS All
Reporter LLVM Bugzilla Contributor
CC @asl

Extended Description

Given this input:

define <4 x i8> @​foo(<4 x i8> %x, <4 x i8> %y, <4 x i8> %__mask) nounwind readnone alwaysinline {
entry:
%binop = mul <4 x i8> %x, %y
%binop6 = add <4 x i8> %binop, %x
ret <4 x i8> %binop6
}

The following quite lengthy code is generated by llc. It would be nice to get the appropriate MMX instructions instead. (This is probably not a high priority fix in the grand scheme of things, though.)

_foo: ## @​foo

BB#0: ## %entry

pushq	%rbp
pushq	%r15
pushq	%r14
pushq	%r13
pushq	%r12
pushq	%rbx
movdqa	%xmm0, %xmm2
pextrb	$1, %xmm2, %eax
pextrb	$1, %xmm1, %ecx
mulb	%cl
pextrb	$0, %xmm2, %ecx
pextrb	$0, %xmm1, %edx
movzbl	%al, %esi
movb	%cl, %al
mulb	%dl
movzbl	%al, %eax
movd	%eax, %xmm0
pextrb	$2, %xmm2, %eax
pextrb	$2, %xmm1, %ecx
pinsrb	$1, %esi, %xmm0
mulb	%cl
movb	%al, %cl
pextrb	$3, %xmm2, %eax
pextrb	$3, %xmm1, %edx
mulb	%dl
movb	%al, %dl
movzbl	%cl, %ecx
pextrb	$4, %xmm2, %eax
pextrb	$4, %xmm1, %esi
pinsrb	$2, %ecx, %xmm0
mulb	%sil
movzbl	%dl, %ecx
pextrb	$11, %xmm2, %edx
pextrb	$12, %xmm2, %esi
pextrb	$13, %xmm2, %edi
pextrb	$14, %xmm2, %r8d
movl	%r8d, -4(%rsp)          ## 4-byte Spill
pextrb	$5, %xmm1, %r9d
pextrb	$5, %xmm2, %r10d
pextrb	$8, %xmm1, %r11d
pinsrb	$3, %ecx, %xmm0
movzbl	%al, %ecx
pextrb	$15, %xmm2, %ebx
pextrb	$8, %xmm2, %r14d
pextrb	$12, %xmm1, %r15d
movb	%r10b, %al
pextrb	$13, %xmm1, %r10d
pinsrb	$4, %ecx, %xmm0
pextrb	$14, %xmm1, %ecx
pextrb	$15, %xmm1, %r12d
mulb	%r9b
movb	%al, %r9b
pextrb	$11, %xmm1, %r13d
pextrb	$10, %xmm2, %ebp
movb	%r14b, %al
mulb	%r11b
movb	%al, %r11b
pextrb	$9, %xmm2, %eax
pextrb	$9, %xmm1, %r14d
mulb	%r14b
movb	%al, %r14b
pextrb	$10, %xmm1, %r8d
movb	%bpl, %al
mulb	%r8b
movb	%al, %r8b
movb	%dl, %al
mulb	%r13b
movb	%al, %dl
movb	%sil, %al
mulb	%r15b
movb	%al, %sil
movb	%dil, %al
mulb	%r10b
movb	%al, %dil
movl	-4(%rsp), %eax          ## 4-byte Reload
mulb	%cl
movb	%al, %cl
movb	%bl, %al
mulb	%r12b
movb	%al, %r10b
movzbl	%r9b, %r9d
pextrb	$7, %xmm2, %eax
pextrb	$7, %xmm1, %ebx
mulb	%bl
pinsrb	$5, %r9d, %xmm0
movzbl	%r10b, %r9d
movzbl	%cl, %ecx
movzbl	%dil, %edi
movzbl	%sil, %esi
movzbl	%dl, %edx
movzbl	%r8b, %r8d
movzbl	%r14b, %r10d
movzbl	%r11b, %r11d
movzbl	%al, %ebx
pextrb	$6, %xmm2, %eax
pextrb	$6, %xmm1, %r14d
mulb	%r14b
movzbl	%al, %eax
pinsrb	$6, %eax, %xmm0
pinsrb	$7, %ebx, %xmm0
pinsrb	$8, %r11d, %xmm0
pinsrb	$9, %r10d, %xmm0
pinsrb	$10, %r8d, %xmm0
pinsrb	$11, %edx, %xmm0
pinsrb	$12, %esi, %xmm0
pinsrb	$13, %edi, %xmm0
pinsrb	$14, %ecx, %xmm0
pinsrb	$15, %r9d, %xmm0
paddb	%xmm2, %xmm0
popq	%rbx
popq	%r12
popq	%r13
popq	%r14
popq	%r15
popq	%rbp
ret

If I explicitly extract the values from the vector, do the math, and repack, like this:

define <4 x i8> @​bar(<4 x i8> %x, <4 x i8> %y, <4 x i8> %__mask) nounwind readnone alwaysinline {
entry:
%x0 = extractelement <4 x i8> %x, i32 0
%x1 = extractelement <4 x i8> %x, i32 1
%x2 = extractelement <4 x i8> %x, i32 2
%x3 = extractelement <4 x i8> %x, i32 3

%y0 = extractelement <4 x i8> %y, i32 0
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%y3 = extractelement <4 x i8> %y, i32 3

%m0 = mul i8 %x0, %y0
%m1 = mul i8 %x1, %y1
%m2 = mul i8 %x2, %y2
%m3 = mul i8 %x3, %y3

%a0 = add i8 %m0, %x0
%a1 = add i8 %m1, %x1
%a2 = add i8 %m2, %x2
%a3 = add i8 %m3, %x3

%r0 = insertelement <4 x i8> undef, i8 %a0, i32 0
%r1 = insertelement <4 x i8> %r0, i8 %a1, i32 1
%r2 = insertelement <4 x i8> %r1, i8 %a2, i32 2
%r3 = insertelement <4 x i8> %r2, i8 %a3, i32 3

ret <4 x i8> %r3
}

The code is better:

_bar: ## @​bar

BB#0: ## %entry

pextrb	$2, %xmm0, %ecx
pextrb	$2, %xmm1, %edx
movb	%cl, %al
mulb	%dl
movb	%al, %dl
addb	%cl, %dl
pextrb	$0, %xmm0, %ecx
pextrb	$0, %xmm1, %esi
movb	%cl, %al
mulb	%sil
pextrb	$3, %xmm0, %esi
movb	%al, %dil
addb	%cl, %dil
movzbl	%dl, %ecx
pextrb	$3, %xmm1, %edx
movb	%sil, %al
mulb	%dl
addb	%sil, %al
movzbl	%al, %edx
shll	$8, %edx
pextrb	$1, %xmm0, %esi
orl	%ecx, %edx
movzbl	%dil, %ecx
pextrb	$1, %xmm1, %edi
movb	%sil, %al
mulb	%dil
addb	%sil, %al
movzbl	%al, %eax
shll	$8, %eax
orl	%ecx, %eax
pinsrw	$0, %eax, %xmm0
pinsrw	$1, %edx, %xmm0
ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugzillaIssues migrated from bugzilla

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions