-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Open
Description
Test code
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
extern void subroutine(size_t x);
void func1a(void) {
size_t x = 1;
while (true) {
subroutine(x);
if (x > SIZE_MAX / 2)
break;
x *= 2;
}
}
void func1b(void) {
size_t x = 1;
while (true) {
subroutine(x);
if (__builtin_add_overflow(x, x, &x))
break;
}
}
void func1c(void) {
size_t x = 1;
while (true) {
subroutine(x);
if (__builtin_mul_overflow(x, (size_t)2, &x))
break;
}
}x86-64 Clang 21.1.0 with -Os option produces:
func1a:
pushq %rbx
movl $1, %ebx
.LBB2_1:
movq %rbx, %rdi
callq subroutine@PLT
testq %rbx, %rbx
leaq (%rbx,%rbx), %rbx
jns .LBB2_1
popq %rbx
retq
func1b:
pushq %rbx
movl $1, %ebx
.LBB3_1:
movq %rbx, %rdi
callq subroutine@PLT
addq %rbx, %rbx
jae .LBB3_1
popq %rbx
retq(func1c assembly omitted because it's the same as func1a)
(Compiler Explorer link)
While the conditional (x > SIZE_MAX / 2) can be converted into a "test if sign bit is set" check, it can miss that x would multiply by 2 afterward, so the code can be smaller by checking the carry bit after addition.
The expected result is func1a and func1c both optimize to func1b.
Note that I have also tested with AArch64 Clang and it has the similar problem.
EDIT: I also reported in GCC