-
-
Notifications
You must be signed in to change notification settings - Fork 33.4k
Open
Labels
extension-modulesC modules in the Modules dirC modules in the Modules dirperformancePerformance or resource usagePerformance or resource usagetopic-SSLtopic-free-threadingtype-bugAn unexpected behavior, bug, or errorAn unexpected behavior, bug, or error
Description
On free-threading there is a large ~20% performance regression under asyncio_tcp_ssl benchmark. A large part of slowdown is from #124993 which added critical sections and locks for thread safety however 20% is large slowdown for the important single threaded use-case.
Critical sections are slow especially for extensions which are dynamically loaded because accessing thread states is slow and there are multiple function calls even for the fastpath of no contention for acquisition of critical section.
Comparing the assembly of _ssl_RAND_status in free-threading vs normal build:
- free-threading:
Dump of assembler code for function _ssl_RAND_status:
0x00007ffff7b97090 <+0>: push %rbx
0x00007ffff7b97091 <+1>: sub $0x10,%rsp
0x00007ffff7b97095 <+5>: lea 0xa(%rdi),%rbx
0x00007ffff7b97099 <+9>: mov $0x1,%cl
0x00007ffff7b9709b <+11>: xor %eax,%eax
0x00007ffff7b9709d <+13>: lock cmpxchg %cl,0xa(%rdi)
0x00007ffff7b970a2 <+18>: jne 0x7ffff7b970c5 <_ssl_RAND_status+53>
0x00007ffff7b970a4 <+20>: call 0x7ffff7b950e0 <_PyThreadState_GetCurrent@plt>
0x00007ffff7b970a9 <+25>: mov %rbx,0x8(%rsp)
0x00007ffff7b970ae <+30>: mov 0xb0(%rax),%rcx
0x00007ffff7b970b5 <+37>: mov %rcx,(%rsp)
0x00007ffff7b970b9 <+41>: mov %rsp,%rcx
0x00007ffff7b970bc <+44>: mov %rcx,0xb0(%rax)
0x00007ffff7b970c3 <+51>: jmp 0x7ffff7b970d0 <_ssl_RAND_status+64>
0x00007ffff7b970c5 <+53>: mov %rsp,%rdi
0x00007ffff7b970c8 <+56>: mov %rbx,%rsi
0x00007ffff7b970cb <+59>: call 0x7ffff7b95cc0 <_PyCriticalSection_BeginSlow@plt>
0x00007ffff7b970d0 <+64>: call 0x7ffff7b95c30 <RAND_status@plt>
0x00007ffff7b970d5 <+69>: movslq %eax,%rdi
0x00007ffff7b970d8 <+72>: call 0x7ffff7b96560 <PyBool_FromLong@plt>
0x00007ffff7b970dd <+77>: mov %rax,%rbx
0x00007ffff7b970e0 <+80>: mov 0x8(%rsp),%rdi
0x00007ffff7b970e5 <+85>: test %rdi,%rdi
0x00007ffff7b970e8 <+88>: je 0x7ffff7b97116 <_ssl_RAND_status+134>
0x00007ffff7b970ea <+90>: xor %ecx,%ecx
0x00007ffff7b970ec <+92>: mov $0x1,%al
0x00007ffff7b970ee <+94>: lock cmpxchg %cl,(%rdi)
0x00007ffff7b970f2 <+98>: je 0x7ffff7b970f9 <_ssl_RAND_status+105>
0x00007ffff7b970f4 <+100>: call 0x7ffff7b954a0 <PyMutex_Unlock@plt>
0x00007ffff7b970f9 <+105>: call 0x7ffff7b950e0 <_PyThreadState_GetCurrent@plt>
0x00007ffff7b970fe <+110>: mov (%rsp),%rcx
0x00007ffff7b97102 <+114>: mov %rcx,0xb0(%rax)
0x00007ffff7b97109 <+121>: test $0x1,%cl
0x00007ffff7b9710c <+124>: je 0x7ffff7b97116 <_ssl_RAND_status+134>
0x00007ffff7b9710e <+126>: mov %rax,%rdi
0x00007ffff7b97111 <+129>: call 0x7ffff7b95f90 <_PyCriticalSection_Resume@plt>
0x00007ffff7b97116 <+134>: mov %rbx,%rax
0x00007ffff7b97119 <+137>: add $0x10,%rsp
0x00007ffff7b9711d <+141>: pop %rbx
0x00007ffff7b9711e <+142>: ret
End of assembler dump.
- normal build:
0x00007ffff773bd00 <+0>: push %rax
0x00007ffff773bd01 <+1>: call 0x7ffff773ac20 <RAND_status@plt>
0x00007ffff773bd06 <+6>: movslq %eax,%rdi
0x00007ffff773bd09 <+9>: pop %rax
0x00007ffff773bd0a <+10>: jmp 0x7ffff773b520 <PyBool_FromLong@plt>
Linked PRs
Metadata
Metadata
Assignees
Labels
extension-modulesC modules in the Modules dirC modules in the Modules dirperformancePerformance or resource usagePerformance or resource usagetopic-SSLtopic-free-threadingtype-bugAn unexpected behavior, bug, or errorAn unexpected behavior, bug, or error