-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Description
Problem Description
In Clang 21.1.0, the __builtin_constant_p
builtin function exhibits inconsistent behavior when using constant arrays (.rodata) versus stack arrays. The function returns true in recursive contexts with constant arrays, but returns false in recursive contexts with stack arrays.
Test Case 1: Constant Array
#include <stdio.h>
#include <string.h>
__attribute__((always_inline)) static int o_strlen(const char *string) {
if (! __builtin_constant_p(*string)) {
return strlen(string);
}
return *string ? 1 + o_strlen(string + 1) : 0;
}
void entry() {
char *string = "hello world";
printf("%d", o_strlen(string));
}
clang example.c -S -O3 -ffreestanding
.file "example.c"
.text
.globl entry # -- Begin function entry
.p2align 4
.type entry,@function
entry: # @entry
# %bb.0:
leaq .L.str.1(%rip), %rdi
movl $11, %esi
xorl %eax, %eax
jmp printf@PLT # TAILCALL
.Lfunc_end0:
.size entry, .Lfunc_end0-entry
# -- End function
.type .L.str.1,@object # @.str.1
.section .rodata.str1.1,"aMS",@progbits,1
.L.str.1:
.asciz "%d"
.size .L.str.1, 3
.ident "clang version 21.1.0 (https://github.com/llvm/llvm-project.git 3623fe661ae35c6c80ac221f14d85be76aa870f1)"
.section ".note.GNU-stack","",@progbits
.addrsig
Observation: o_strlen(string) correctly returns 11, showing the compiler recognizes compile-time constants.
Test Case 2: Stack Array
#include <stdio.h>
#include <string.h>
__attribute__((always_inline)) static int o_strlen(const char *string) {
if (! __builtin_constant_p(*string)) {
return strlen(string);
}
return *string ? 1 + o_strlen(string + 1) : 0;
}
void entry() {
char string[] = {'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '\0'};
printf("%d", o_strlen(string));
}
clang example.c -S -O3 -ffreestanding
.file "example.c"
.text
.globl entry # -- Begin function entry
.p2align 4
.type entry,@function
entry: # @entry
# %bb.0:
subq $24, %rsp
movabsq $8031924123371070824, %rax # imm = 0x6F77206F6C6C6568
movq %rax, 8(%rsp)
movl $6581362, 16(%rsp) # imm = 0x646C72
leaq 8(%rsp), %rdi
callq strlen@PLT
leaq .L.str(%rip), %rdi
movl %eax, %esi
xorl %eax, %eax
callq printf@PLT
addq $24, %rsp
retq
.Lfunc_end0:
.size entry, .Lfunc_end0-entry
# -- End function
.type .L__const.entry.string,@object # @__const.entry.string
.section .rodata.str1.1,"aMS",@progbits,1
.L__const.entry.string:
.asciz "hello world"
.size .L__const.entry.string, 12
.type .L.str,@object # @.str
.L.str:
.asciz "%d"
.size .L.str, 3
.ident "clang version 21.1.0 (https://github.com/llvm/llvm-project.git 3623fe661ae35c6c80ac221f14d85be76aa870f1)"
.section ".note.GNU-stack","",@progbits
.addrsig
Observation: Optimization fails with stack arrays, and strlen
is called. This is not because stack arrays cannot return true in __builtin_constant_p
.
Test Case 3: Isolated __builtin_constant_p Test
#include <stdio.h>
#include <string.h>
void entry() {
char string[] = {'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '\0'};
printf("%d", __builtin_constant_p(*string));
}
clang example.c -S -O3 -ffreestanding
.file "example.c"
.text
.globl entry # -- Begin function entry
.p2align 4
.type entry,@function
entry: # @entry
# %bb.0:
leaq .L.str(%rip), %rdi
movl $1, %esi
xorl %eax, %eax
jmp printf@PLT # TAILCALL
.Lfunc_end0:
.size entry, .Lfunc_end0-entry
# -- End function
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "%d"
.size .L.str, 3
.ident "clang version 21.1.0 (https://github.com/llvm/llvm-project.git 3623fe661ae35c6c80ac221f14d85be76aa870f1)"
.section ".note.GNU-stack","",@progbits
.addrsig
Observation: __builtin_constant_p(*string) correctly returns 1 (true), showing the compiler recognizes compile-time constants.
Additional Issue: __builtin_strlen with -ffreestanding
#include <stdio.h>
#include <string.h>
void entry() {
char *string = "hello world";
printf("%d", __builtin_strlen(string));
}
clang example.c -S -O3 -ffreestanding
.file "example.c"
.text
.globl entry # -- Begin function entry
.p2align 4
.type entry,@function
entry: # @entry
# %bb.0:
pushq %rax
leaq .L.str(%rip), %rdi
callq strlen@PLT
leaq .L.str.1(%rip), %rdi
movq %rax, %rsi
xorl %eax, %eax
popq %rcx
jmp printf@PLT # TAILCALL
.Lfunc_end0:
.size entry, .Lfunc_end0-entry
# -- End function
.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "hello world"
.size .L.str, 12
.type .L.str.1,@object # @.str.1
.L.str.1:
.asciz "%d"
.size .L.str.1, 3
.ident "clang version 21.1.0 (https://github.com/llvm/llvm-project.git 3623fe661ae35c6c80ac221f14d85be76aa870f1)"
.section ".note.GNU-stack","",@progbits
.addrsig
Observation: Even though the string is constant and known at compile time, __builtin_strlen
calls the library function instead of computing the length at compile time.
Summary of Issues
-
Inconsistent
__builtin_constant_p
behavior: The function works correctly with constant arrays in recursive contexts but fails with stack arrays in the same recursive contexts, even though isolated tests show stack array elements are recognized as constants. -
Suboptimal
__builtin_strlen
with -ffreestanding: The compiler fails to optimize__builtin_strlen
for constant strings when using -ffreestanding, always emitting calls to the library strlen function.(This may be related to the main issue.)
These behaviors appear to be compiler bugs or implementation limitations that should be addressed for consistent optimization behavior.