-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Description
| Bugzilla Link | 2053 |
| Resolution | LATER |
| Resolved on | Feb 17, 2008 13:46 |
| Version | unspecified |
| OS | Linux |
Extended Description
The generated code on x86 for checking for signed overflow on a multiply the obvious way is much longer than it needs to be.
Original C code:
int x(int a, int b) {long long prod = (long long)a*b; return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1);}
Generated LLVM IL (generated using clang -emit-llvm-bc | opt -std-compile-opts):
define i32 @x(i32 %a, i32 %b) nounwind {
entry:
%conv = sext i32 %a to i64 ; [#uses=1]
%conv2 = sext i32 %b to i64 ; [#uses=1]
%mul = mul i64 %conv2, %conv ; [#uses=2]
%cmp = icmp sgt i64 %mul, 2147483647 ; [#uses=1]
br i1 %cmp, label %UnifiedReturnBlock, label %lor_rhs
lor_rhs: ; preds = %entry
%cmp6 = icmp slt i64 %mul, -2147483648 ; [#uses=1]
%phitmp = zext i1 %cmp6 to i32 ; [#uses=1]
ret i32 %phitmp
UnifiedReturnBlock: ; preds = %entry
ret i32 1
}
Code generated by llc:
x:
pushl %ebx
pushl %esi
movl 16(%esp), %eax
imull 12(%esp)
testl %eax, %eax
sets %cl
movzbw %cl, %cx
testl %edx, %edx
setg %bl
movzbw %bl, %si
cmove %cx, %si
movw %si, %cx
testb $1, %cl
je .LBB1_3 # lor_rhs
.LBB1_1: # UnifiedReturnBlock
movl $1, %eax
.LBB1_2: # UnifiedReturnBlock
popl %esi
popl %ebx
ret
.LBB1_3: # lor_rhs
cmpl $2147483648, %eax
setb %al
movzbw %al, %ax
cmpl $4294967295, %edx
setl %cl
movzbw %cl, %cx
cmove %ax, %cx
movw %cx, %ax
andl $1, %eax
jmp .LBB1_2 # UnifiedReturnBlock
Code generated by gcc:
x:
movl 8(%esp), %eax
imull 4(%esp)
addl $-2147483648, %eax
adcl $0, %edx
xorl %eax, %eax
cmpl $0, %edx
seta %al
ret
LLVM seems to be able to deal with the following variant much more easily:
int x(int a, int b) {long long prod = (long long)a*b; return (prod > 0x7FFFFFFF) | (prod < (-0x7FFFFFFF-1));}
Note that this code uses "|" instead of "||".
Generated LLVM IL (generated using clang -emit-llvm-bc | opt -std-compile-opts):
define i32 @x(i32 %a, i32 %b) nounwind {
entry:
%conv = sext i32 %a to i64 ; [#uses=1]
%conv2 = sext i32 %b to i64 ; [#uses=1]
%mul = mul i64 %conv2, %conv ; [#uses=1]
%mul.off = add i64 %mul, 2147483648 ; [#uses=1]
%or1 = icmp ugt i64 %mul.off, 4294967295 ; [#uses=1]
%or = zext i1 %or1 to i32 ; [#uses=1]
ret i32 %or
}
Code generated by llc:
x:
movl 8(%esp), %eax
imull 4(%esp)
addl $2147483648, %eax
adcl $0, %edx
testl %edx, %edx
setne %al
movzbl %al, %eax
ret
Here's the ideal code as far as I know (handwritten by myself; not really tested, but all it does is use the overflow flag set by imull):
x:
movl 8(%esp), %eax
imull 4(%esp)
setc %al
movzbl %al, %eax
ret