Skip to content

Commit

Permalink
pythongh-119247: Add macros to use PySequence_Fast safely in free-thr…
Browse files Browse the repository at this point in the history
…eaded build (pythonGH-119315)

Add `Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST` and
`Py_END_CRITICAL_SECTION_SEQUENCE_FAST` macros and update `str.join` to use
them. Also add a regression test that would crash reliably without this
patch.
(cherry picked from commit baf347d)

Co-authored-by: Josh {*()} Rosenberg <26495692+MojoVampire@users.noreply.github.com>
  • Loading branch information
MojoVampire authored and miss-islington committed May 22, 2024
1 parent a463cd8 commit 5860dc7
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 3 deletions.
22 changes: 22 additions & 0 deletions Include/internal/pycore_critical_section.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,26 @@ extern "C" {
_PyCriticalSection2_End(&_cs2); \
}

// Specialized version of critical section locking to safely use
// PySequence_Fast APIs without the GIL. For performance, the argument *to*
// PySequence_Fast() is provided to the macro, not the *result* of
// PySequence_Fast(), which would require an extra test to determine if the
// lock must be acquired.
# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) \
{ \
PyObject *_orig_seq = _PyObject_CAST(original); \
const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \
_PyCriticalSection _cs; \
if (_should_lock_cs) { \
_PyCriticalSection_Begin(&_cs, &_orig_seq->ob_mutex); \
}

# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \
if (_should_lock_cs) { \
_PyCriticalSection_End(&_cs); \
} \
}

// Asserts that the mutex is locked. The mutex must be held by the
// top-most critical section otherwise there's the possibility
// that the mutex would be swalled out in some code paths.
Expand Down Expand Up @@ -137,6 +157,8 @@ extern "C" {
# define Py_END_CRITICAL_SECTION()
# define Py_BEGIN_CRITICAL_SECTION2(a, b)
# define Py_END_CRITICAL_SECTION2()
# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original)
# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST()
# define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex)
# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op)
#endif /* !Py_GIL_DISABLED */
Expand Down
75 changes: 75 additions & 0 deletions Lib/test/test_free_threading/test_str.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import sys
import unittest

from itertools import cycle
from threading import Event, Thread
from unittest import TestCase

from test.support import threading_helper

@threading_helper.requires_working_threading()
class TestStr(TestCase):
def test_racing_join_extend(self):
'''Test joining a string being extended by another thread'''
l = []
ITERS = 100
READERS = 10
done_event = Event()
def writer_func():
for i in range(ITERS):
l.extend(map(str, range(i)))
l.clear()
done_event.set()
def reader_func():
while not done_event.is_set():
''.join(l)
writer = Thread(target=writer_func)
readers = []
for x in range(READERS):
reader = Thread(target=reader_func)
readers.append(reader)
reader.start()

writer.start()
writer.join()
for reader in readers:
reader.join()

def test_racing_join_replace(self):
'''
Test joining a string of characters being replaced with ephemeral
strings by another thread.
'''
l = [*'abcdefg']
MAX_ORDINAL = 1_000
READERS = 10
done_event = Event()

def writer_func():
for i, c in zip(cycle(range(len(l))),
map(chr, range(128, MAX_ORDINAL))):
l[i] = c
done_event.set()

def reader_func():
while not done_event.is_set():
''.join(l)
''.join(l)
''.join(l)
''.join(l)

writer = Thread(target=writer_func)
readers = []
for x in range(READERS):
reader = Thread(target=reader_func)
readers.append(reader)
reader.start()

writer.start()
writer.join()
for reader in readers:
reader.join()


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Added ``Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST`` and
``Py_END_CRITICAL_SECTION_SEQUENCE_FAST`` macros to make it possible to use
PySequence_Fast APIs safely when free-threaded, and update str.join to work
without the GIL using them.
8 changes: 5 additions & 3 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "pycore_bytesobject.h" // _PyBytes_Repeat()
#include "pycore_ceval.h" // _PyEval_GetBuiltin()
#include "pycore_codecs.h" // _PyCodec_Lookup()
#include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST
#include "pycore_format.h" // F_LJUST
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // PyInterpreterState.fs_codec
Expand Down Expand Up @@ -9559,13 +9560,14 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
return NULL;
}

/* NOTE: the following code can't call back into Python code,
* so we are sure that fseq won't be mutated.
*/
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq);

items = PySequence_Fast_ITEMS(fseq);
seqlen = PySequence_Fast_GET_SIZE(fseq);
res = _PyUnicode_JoinArray(separator, items, seqlen);

Py_END_CRITICAL_SECTION_SEQUENCE_FAST();

Py_DECREF(fseq);
return res;
}
Expand Down

0 comments on commit 5860dc7

Please sign in to comment.