Skip to content

Conversation

Girgias
Copy link
Member

@Girgias Girgias commented Aug 4, 2025

This idea is based on @nielsdos previous PR to reduce code bloat: #18436 To do so we create a specialized function to handle parameters that only accept strings and not null such that we can assign the zend_string to the destination directly.

Will take the assembly of the ext/zend_test function ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8) as an example:
Compiled in a release build and retrieved using:

./configure -C --disable-all --enable-zend_test --prefix=/path/to/custom/releases
make -jN
gdb sapi/cli/php
disassemble zif_zend_test_is_string_marked_as_valid_utf8

Prior to this commit (master@98e0dbcefedeecf5e4d032e54df1aeebaa118754)the ASM looks like:

Address range 0x7344e0 to 0x73455a:
   0x00000000007344e0 <+0>:	push   %rbx
   0x00000000007344e1 <+1>:	sub    $0x20,%rsp
   0x00000000007344e5 <+5>:	mov    0x2c(%rdi),%r9d
   0x00000000007344e9 <+9>:	cmp    $0x1,%r9d
   0x00000000007344ed <+13>:	jne    0x41ab56 <zif_zend_test_is_string_marked_as_valid_utf8-3250570>
   0x00000000007344f3 <+19>:	mov    %rsi,%rcx
   0x00000000007344f6 <+22>:	cmpb   $0x6,0x58(%rdi)
   0x00000000007344fa <+26>:	jne    0x734520 <zif_zend_test_is_string_marked_as_valid_utf8+64>
   0x00000000007344fc <+28>:	mov    0x50(%rdi),%rax
   0x0000000000734500 <+32>:	testb  $0x2,0x5(%rax)
   0x0000000000734504 <+36>:	setne  %al
   0x0000000000734507 <+39>:	movzbl %al,%eax
   0x000000000073450a <+42>:	add    $0x2,%eax
   0x000000000073450d <+45>:	mov    %eax,0x8(%rcx)
   0x0000000000734510 <+48>:	add    $0x20,%rsp
   0x0000000000734514 <+52>:	pop    %rbx
   0x0000000000734515 <+53>:	ret
   0x0000000000734516 <+54>:	cs nopw 0x0(%rax,%rax,1)
   0x0000000000734520 <+64>:	mov    %rsi,0x8(%rsp)
   0x0000000000734525 <+69>:	lea    0x50(%rdi),%rbx
   0x0000000000734529 <+73>:	lea    0x18(%rsp),%rsi
   0x000000000073452e <+78>:	add    $0x50,%rdi
   0x0000000000734532 <+82>:	mov    $0x1,%edx
   0x0000000000734537 <+87>:	mov    %r9d,0x4(%rsp)
   0x000000000073453c <+92>:	call   0x7afd50 <zend_parse_arg_str_slow>
   0x0000000000734541 <+97>:	mov    0x4(%rsp),%r9d
   0x0000000000734546 <+102>:	test   %al,%al
   0x0000000000734548 <+104>:	je     0x41ab3a <zif_zend_test_is_string_marked_as_valid_utf8.cold>
   0x000000000073454e <+110>:	mov    0x18(%rsp),%rax
   0x0000000000734553 <+115>:	mov    0x8(%rsp),%rcx
   0x0000000000734558 <+120>:	jmp    0x734500 <zif_zend_test_is_string_marked_as_valid_utf8+32>
Address range 0x41ab3a to 0x41ab73:
   0x000000000041ab3a <-3250598>:	mov    $0x9,%edi
   0x000000000041ab3f <-3250593>:	mov    $0x4,%ecx
   0x000000000041ab44 <-3250588>:	mov    %rbx,%r8
   0x000000000041ab47 <-3250585>:	xor    %edx,%edx
   0x000000000041ab49 <-3250583>:	mov    %r9d,%esi
   0x000000000041ab4c <-3250580>:	call   0x41f549 <zend_wrong_parameter_error>
   0x000000000041ab51 <-3250575>:	jmp    0x734510 <zif_zend_test_is_string_marked_as_valid_utf8+48>
   0x000000000041ab56 <-3250570>:	mov    $0x1,%edi
   0x000000000041ab5b <-3250565>:	mov    $0x1,%esi
   0x000000000041ab60 <-3250560>:	xor    %ebx,%ebx
   0x000000000041ab62 <-3250558>:	call   0x41f017 <zend_wrong_parameters_count_error>
   0x000000000041ab67 <-3250553>:	xor    %r9d,%r9d
   0x000000000041ab6a <-3250550>:	mov    $0x1,%edi
   0x000000000041ab6f <-3250545>:	xor    %ecx,%ecx
   0x000000000041ab71 <-3250543>:	jmp    0x41ab44 <zif_zend_test_is_string_marked_as_valid_utf8-3250588>

Afterwards it looks like:

Address range 0x7339d0 to 0x733a45:
   0x00000000007339d0 <+0>:	sub    $0x28,%rsp
   0x00000000007339d4 <+4>:	mov    0x2c(%rdi),%r9d
   0x00000000007339d8 <+8>:	cmp    $0x1,%r9d
   0x00000000007339dc <+12>:	jne    0x41acd4 <zif_zend_test_is_string_marked_as_valid_utf8-3247356>
   0x00000000007339e2 <+18>:	mov    %rsi,%rdx
   0x00000000007339e5 <+21>:	lea    0x50(%rdi),%r8
   0x00000000007339e9 <+25>:	cmpb   $0x6,0x58(%rdi)
   0x00000000007339ed <+29>:	jne    0x733a18 <zif_zend_test_is_string_marked_as_valid_utf8+72>
   0x00000000007339ef <+31>:	mov    0x50(%rdi),%rax
   0x00000000007339f3 <+35>:	test   %rax,%rax
   0x00000000007339f6 <+38>:	je     0x41acbc <zif_zend_test_is_string_marked_as_valid_utf8.cold>
   0x00000000007339fc <+44>:	testb  $0x2,0x5(%rax)
   0x0000000000733a00 <+48>:	setne  %al
   0x0000000000733a03 <+51>:	movzbl %al,%eax
   0x0000000000733a06 <+54>:	add    $0x2,%eax
   0x0000000000733a09 <+57>:	mov    %eax,0x8(%rdx)
   0x0000000000733a0c <+60>:	add    $0x28,%rsp
   0x0000000000733a10 <+64>:	ret
   0x0000000000733a11 <+65>:	nopl   0x0(%rax)
   0x0000000000733a18 <+72>:	mov    %rsi,0x18(%rsp)
   0x0000000000733a1d <+77>:	mov    %r8,%rdi
   0x0000000000733a20 <+80>:	mov    $0x1,%esi
   0x0000000000733a25 <+85>:	mov    %r9d,0x14(%rsp)
   0x0000000000733a2a <+90>:	mov    %r8,0x8(%rsp)
   0x0000000000733a2f <+95>:	call   0x7af240 <zend_parse_arg_str_slow>
   0x0000000000733a34 <+100>:	mov    0x18(%rsp),%rdx
   0x0000000000733a39 <+105>:	mov    0x14(%rsp),%r9d
   0x0000000000733a3e <+110>:	mov    0x8(%rsp),%r8
   0x0000000000733a43 <+115>:	jmp    0x7339f3 <zif_zend_test_is_string_marked_as_valid_utf8+35>
Address range 0x41acbc to 0x41acf2:
   0x000000000041acbc <-3247380>:	mov    $0x9,%edi
   0x000000000041acc1 <-3247375>:	mov    $0x4,%ecx
   0x000000000041acc6 <-3247370>:	xor    %edx,%edx
   0x000000000041acc8 <-3247368>:	mov    %r9d,%esi
   0x000000000041accb <-3247365>:	add    $0x28,%rsp
   0x000000000041accf <-3247361>:	jmp    0x41f6f4 <zend_wrong_parameter_error>
   0x000000000041acd4 <-3247356>:	mov    $0x1,%edi
   0x000000000041acd9 <-3247351>:	mov    $0x1,%esi
   0x000000000041acde <-3247346>:	call   0x41f1c2 <zend_wrong_parameters_count_error>
   0x000000000041ace3 <-3247341>:	mov    $0x1,%edi
   0x000000000041ace8 <-3247336>:	xor    %ecx,%ecx
   0x000000000041acea <-3247334>:	xor    %r8d,%r8d
   0x000000000041aced <-3247331>:	xor    %r9d,%r9d
   0x000000000041acf0 <-3247328>:	jmp    0x41acc6 <zif_zend_test_is_string_marked_as_valid_utf8-3247370>

For reference we have 772 usages of Z_PARAM_STR (found using git grep -o "Z_PARAM_STR" | wc -l)

This approach should also work for the "path" and "C string" version, and more generally for the pure HashTable and object FZPP specifier

@Girgias Girgias force-pushed the zpp-code-bloat branch 2 times, most recently from 4cee049 to a434a77 Compare August 4, 2025 03:50
Zend/zend_API.h Outdated
Comment on lines 2321 to 2329
static zend_always_inline zend_string *zend_parse_arg_str_no_null(zval *arg, uint32_t arg_num)
{
if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) {
return Z_STR_P(arg);
} else {
return zend_parse_arg_str_slow(arg, arg_num);
}
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I agree with getting rid of the dest param for the _slow functions, does this particular extra function actually change anything? Please check the asm; given that it's force inlined it ought to be equivalent to zend_parse_arg_str_ex.

@Girgias
Copy link
Member Author

Girgias commented Aug 4, 2025

@bwoebi Applying the following patch:

diff --git a/Zend/zend_API.h b/Zend/zend_API.h
index e6d17cf29bb..b1f3202b4db 100644
--- a/Zend/zend_API.h
+++ b/Zend/zend_API.h
@@ -2088,13 +2088,7 @@ ZEND_API ZEND_COLD void zend_class_redeclaration_error_ex(int type, zend_string
                }
 
 #define Z_PARAM_STR(dest) \
-       Z_PARAM_PROLOGUE(false, false); \
-       dest = zend_parse_arg_str_no_null(_arg, _i); \
-       if (UNEXPECTED(dest == NULL)) { \
-               _expected_type = Z_EXPECTED_STRING; \
-               _error_code = ZPP_ERROR_WRONG_ARG; \
-               break; \
-       }
+       Z_PARAM_STR_EX(dest, 0, 0)
 
 #define Z_PARAM_STR_OR_NULL(dest) \
        Z_PARAM_STR_EX(dest, 1, 0)
@@ -2318,15 +2312,6 @@ static zend_always_inline bool zend_parse_arg_str(zval *arg, zend_string **dest,
        return zend_parse_arg_str_ex(arg, dest, check_null, arg_num, /* frameless */ false);
 }
 
-static zend_always_inline zend_string *zend_parse_arg_str_no_null(zval *arg, uint32_t arg_num)
-{
-       if (EXPECTED(Z_TYPE_P(arg) == IS_STRING)) {
-               return Z_STR_P(arg);
-       } else {
-               return zend_parse_arg_str_slow(arg, arg_num);
-       }
-}
-
 static zend_always_inline bool zend_parse_arg_string(zval *arg, char **dest, size_t *dest_len, bool check_null, uint32_t arg_num)
 {
        zend_string *str;

gives me the following assembly for ZEND_FUNCTION(zend_test_is_string_marked_as_valid_utf8):

Address range 0x733e00 to 0x733e71:
   0x0000000000733e00 <+0>:	push   %rbx
   0x0000000000733e01 <+1>:	sub    $0x10,%rsp
   0x0000000000733e05 <+5>:	mov    0x2c(%rdi),%r9d
   0x0000000000733e09 <+9>:	cmp    $0x1,%r9d
   0x0000000000733e0d <+13>:	jne    0x41aeda <zif_zend_test_is_string_marked_as_valid_utf8-3247910>
   0x0000000000733e13 <+19>:	mov    %rsi,%rdx
   0x0000000000733e16 <+22>:	cmpb   $0x6,0x58(%rdi)
   0x0000000000733e1a <+26>:	jne    0x733e40 <zif_zend_test_is_string_marked_as_valid_utf8+64>
   0x0000000000733e1c <+28>:	mov    0x50(%rdi),%rax
   0x0000000000733e20 <+32>:	testb  $0x2,0x5(%rax)
   0x0000000000733e24 <+36>:	setne  %al
   0x0000000000733e27 <+39>:	movzbl %al,%eax
   0x0000000000733e2a <+42>:	add    $0x2,%eax
   0x0000000000733e2d <+45>:	mov    %eax,0x8(%rdx)
   0x0000000000733e30 <+48>:	add    $0x10,%rsp
   0x0000000000733e34 <+52>:	pop    %rbx
   0x0000000000733e35 <+53>:	ret
   0x0000000000733e36 <+54>:	cs nopw 0x0(%rax,%rax,1)
   0x0000000000733e40 <+64>:	mov    %rsi,0x8(%rsp)
   0x0000000000733e45 <+69>:	lea    0x50(%rdi),%rbx
   0x0000000000733e49 <+73>:	mov    $0x1,%esi
   0x0000000000733e4e <+78>:	add    $0x50,%rdi
   0x0000000000733e52 <+82>:	mov    %r9d,0x4(%rsp)
   0x0000000000733e57 <+87>:	call   0x7af5c0 <zend_parse_arg_str_slow>
   0x0000000000733e5c <+92>:	mov    0x4(%rsp),%r9d
   0x0000000000733e61 <+97>:	test   %rax,%rax
   0x0000000000733e64 <+100>:	je     0x41aebe <zif_zend_test_is_string_marked_as_valid_utf8.cold>
   0x0000000000733e6a <+106>:	mov    0x8(%rsp),%rdx
   0x0000000000733e6f <+111>:	jmp    0x733e20 <zif_zend_test_is_string_marked_as_valid_utf8+32>
Address range 0x41aebe to 0x41aef7:
   0x000000000041aebe <-3247938>:	mov    $0x9,%edi
   0x000000000041aec3 <-3247933>:	mov    $0x4,%ecx
   0x000000000041aec8 <-3247928>:	add    $0x10,%rsp
   0x000000000041aecc <-3247924>:	mov    %rbx,%r8
   0x000000000041aecf <-3247921>:	xor    %edx,%edx
   0x000000000041aed1 <-3247919>:	mov    %r9d,%esi
   0x000000000041aed4 <-3247916>:	pop    %rbx
   0x000000000041aed5 <-3247915>:	jmp    0x41f8d1 <zend_wrong_parameter_error>
   0x000000000041aeda <-3247910>:	mov    $0x1,%edi
   0x000000000041aedf <-3247905>:	mov    $0x1,%esi
   0x000000000041aee4 <-3247900>:	xor    %ebx,%ebx
   0x000000000041aee6 <-3247898>:	call   0x41f39f <zend_wrong_parameters_count_error>
   0x000000000041aeeb <-3247893>:	xor    %r9d,%r9d
   0x000000000041aeee <-3247890>:	mov    $0x1,%edi
   0x000000000041aef3 <-3247885>:	xor    %ecx,%ecx
   0x000000000041aef5 <-3247883>:	jmp    0x41aec8 <zif_zend_test_is_string_marked_as_valid_utf8-3247928>

Arguably I don't know anything about codegen and assembly, but it doesn't seem equivalent to me?

@bwoebi
Copy link
Member

bwoebi commented Aug 4, 2025

The new code has one branch less in the hot path, but does an extra push/pop (no idea why). I'd say the saved branch is better though.
So I'd go for that, given that it's less code.

@nielsdos
Copy link
Member

nielsdos commented Aug 4, 2025

The motivation in my original PR was to get rid of the stack protector overhead, something not showcased in the assembly here.

@Girgias Girgias marked this pull request as ready for review September 22, 2025 13:37
@Girgias Girgias requested a review from dstogov as a code owner September 22, 2025 13:37
This idea is based on @nielsdos previous PR to reduce codebloat: php#18436
To do so we create a specialized function to handle parameters that only accept strings and not null such that we can assign the zend_string to the destination directly.
@dstogov
Copy link
Member

dstogov commented Sep 29, 2025

please, run size php with and without the patch to show the benefit.

@Girgias
Copy link
Member Author

Girgias commented Sep 30, 2025

On current master (commit 44714a325475197c4301d25fd813bd4240c126c8) with the following build instructions:

./buildconf
./configure -C --disable-all --prefix=/home/girgias/Dev/custom-php/
make -j22
size sapi/cli/php --format=SysV

I get the following output:

sapi/cli/php  :
section                     size       addr
.note.gnu.build-id            36    4195096
.init                         27    4195132
.plt                        4928    4195168
.text                    4909661    4200128
.remap_stub                  358   10485760
.fini                         13   10486120
.interp                       28   12582912
.gnu.hash                  26708   12582944
.dynsym                    93240   12609656
.dynstr                    95096   12702896
.gnu.version                7770   12797992
.gnu.version_r               352   12805768
.rela.dyn                    264   12806120
.rela.plt                   7368   12806384
.rodata                  2932484   12813760
.eh_frame_hdr             101724   15746244
.eh_frame                 559176   15847968
.note.gnu.property            64   16407144
.note.ABI-tag                 32   16407208
.init_array                   16   18873768
.fini_array                    8   18873784
.data.rel.ro                  16   18873792
.dynamic                     496   18873808
.got                          16   18874304
.got.plt                    2480   18874344
.data                     780944   18876832
.bss                      126672   19657792
.comment                      46          0
.annobin.notes               335          0
.gnu.build.attributes        396   23978768
.debug_aranges             29456          0
.debug_info             12449856          0
.debug_abbrev             588177          0
.debug_line              5552219          0
.debug_str                742017          0
.debug_line_str            41301          0
.debug_loclists          8100704          0
.debug_rnglists          1028428          0
.debug_gdb_scripts         48541          0
Total                   38231453

Rebasing this commit on top of commit 44714a325475197c4301d25fd813bd4240c126c8 and running the same commands as above after a make distclean I get:

sapi/cli/php  :
section                     size       addr
.note.gnu.build-id            36    4195096
.init                         27    4195132
.plt                        4928    4195168
.text                    4907549    4200128
.remap_stub                  358   10485760
.fini                         13   10486120
.interp                       28   12582912
.gnu.hash                  26708   12582944
.dynsym                    93240   12609656
.dynstr                    95096   12702896
.gnu.version                7770   12797992
.gnu.version_r               352   12805768
.rela.dyn                    264   12806120
.rela.plt                   7368   12806384
.rodata                  2932484   12813760
.eh_frame_hdr             101724   15746244
.eh_frame                 562480   15847968
.note.gnu.property            64   16410448
.note.ABI-tag                 32   16410512
.init_array                   16   18873768
.fini_array                    8   18873784
.data.rel.ro                  16   18873792
.dynamic                     496   18873808
.got                          16   18874304
.got.plt                    2480   18874344
.data                     780944   18876832
.bss                      126672   19657792
.comment                      46          0
.annobin.notes               335          0
.gnu.build.attributes        396   23978768
.debug_aranges             29456          0
.debug_info             12462504          0
.debug_abbrev             588601          0
.debug_line              5553301          0
.debug_str                742017          0
.debug_line_str            41301          0
.debug_loclists          8182021          0
.debug_rnglists          1034676          0
.debug_gdb_scripts         48541          0
Total                   38334364

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants