Skip to content

Inefficient code generated checking 32-bit multiply for signed overflow on x86 #2425

@efriedma-quic

Description

@efriedma-quic
Bugzilla Link 2053
Resolution LATER
Resolved on Feb 17, 2008 13:46
Version unspecified
OS Linux

Extended Description

The generated code on x86 for checking for signed overflow on a multiply the obvious way is much longer than it needs to be.

Original C code:

int x(int a, int b) {long long prod = (long long)a*b; return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1);}

Generated LLVM IL (generated using clang -emit-llvm-bc | opt -std-compile-opts):
define i32 @​x(i32 %a, i32 %b) nounwind {
entry:
%conv = sext i32 %a to i64 ; [#uses=1]
%conv2 = sext i32 %b to i64 ; [#uses=1]
%mul = mul i64 %conv2, %conv ; [#uses=2]
%cmp = icmp sgt i64 %mul, 2147483647 ; [#uses=1]
br i1 %cmp, label %UnifiedReturnBlock, label %lor_rhs

lor_rhs: ; preds = %entry
%cmp6 = icmp slt i64 %mul, -2147483648 ; [#uses=1]
%phitmp = zext i1 %cmp6 to i32 ; [#uses=1]
ret i32 %phitmp

UnifiedReturnBlock: ; preds = %entry
ret i32 1
}

Code generated by llc:
x:
pushl %ebx
pushl %esi
movl 16(%esp), %eax
imull 12(%esp)
testl %eax, %eax
sets %cl
movzbw %cl, %cx
testl %edx, %edx
setg %bl
movzbw %bl, %si
cmove %cx, %si
movw %si, %cx
testb $1, %cl
je .LBB1_3 # lor_rhs
.LBB1_1: # UnifiedReturnBlock
movl $1, %eax
.LBB1_2: # UnifiedReturnBlock
popl %esi
popl %ebx
ret
.LBB1_3: # lor_rhs
cmpl $2147483648, %eax
setb %al
movzbw %al, %ax
cmpl $4294967295, %edx
setl %cl
movzbw %cl, %cx
cmove %ax, %cx
movw %cx, %ax
andl $1, %eax
jmp .LBB1_2 # UnifiedReturnBlock

Code generated by gcc:
x:
movl 8(%esp), %eax
imull 4(%esp)
addl $-2147483648, %eax
adcl $0, %edx
xorl %eax, %eax
cmpl $0, %edx
seta %al
ret

LLVM seems to be able to deal with the following variant much more easily:

int x(int a, int b) {long long prod = (long long)a*b; return (prod > 0x7FFFFFFF) | (prod < (-0x7FFFFFFF-1));}

Note that this code uses "|" instead of "||".

Generated LLVM IL (generated using clang -emit-llvm-bc | opt -std-compile-opts):
define i32 @​x(i32 %a, i32 %b) nounwind {
entry:
%conv = sext i32 %a to i64 ; [#uses=1]
%conv2 = sext i32 %b to i64 ; [#uses=1]
%mul = mul i64 %conv2, %conv ; [#uses=1]
%mul.off = add i64 %mul, 2147483648 ; [#uses=1]
%or1 = icmp ugt i64 %mul.off, 4294967295 ; [#uses=1]
%or = zext i1 %or1 to i32 ; [#uses=1]
ret i32 %or
}

Code generated by llc:
x:
movl 8(%esp), %eax
imull 4(%esp)
addl $2147483648, %eax
adcl $0, %edx
testl %edx, %edx
setne %al
movzbl %al, %eax
ret

Here's the ideal code as far as I know (handwritten by myself; not really tested, but all it does is use the overflow flag set by imull):
x:
movl 8(%esp), %eax
imull 4(%esp)
setc %al
movzbl %al, %eax
ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugzillaIssues migrated from bugzilla

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions