Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-100227: Make the Global Interned Dict Safe for Isolated Interpreters #102858

6 changes: 6 additions & 0 deletions Include/internal/pycore_unicodeobject.h
Expand Up @@ -33,6 +33,12 @@ struct _Py_unicode_runtime_ids {
};

struct _Py_unicode_runtime_state {
struct {
PyThread_type_lock lock;
PyThreadState *tstate;
/* The actual interned dict is at
_PyRuntime.cached_objects.interned_strings. */
} interned;
struct _Py_unicode_runtime_ids ids;
};

Expand Down
82 changes: 70 additions & 12 deletions Objects/unicodeobject.c
Expand Up @@ -14533,13 +14533,12 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
return _PyStatus_OK();
}

// Initialize the global interned dict
/* Initialize the global interned dict. */
PyObject *interned = PyDict_New();
if (interned == NULL) {
PyErr_Clear();
return _PyStatus_ERR("failed to create interned dict");
}

set_interned_dict(interned);

/* Intern statically allocated string identifiers and deepfreeze strings.
Expand Down Expand Up @@ -14585,6 +14584,63 @@ _PyUnicode_InitTypes(PyInterpreterState *interp)
}


static PyThreadState *
get_interned_tstate(void)
{
PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate;
if (tstate == NULL) {
PyInterpreterState *main_interp = _PyInterpreterState_Main();
/* We do not "bind" the thread state here. */
tstate = _PyThreadState_New(main_interp);
if (tstate == NULL) {
PyErr_Clear();
return NULL;
}
}
return tstate;
}

static void
clear_interned_tstate(void)
{
PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate;
if (tstate != NULL) {
_PyRuntime.unicode_state.interned.tstate = NULL;
PyThreadState_Clear(tstate);
PyThreadState_Delete(tstate);
}
}

static inline PyObject *
store_interned(PyObject *obj)
{
PyObject *interned = get_interned_dict();
assert(interned != NULL);

/* Swap to the main interpreter, if necessary. */
PyThreadState *oldts = NULL;
if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) {
PyThreadState *main_tstate = get_interned_tstate();
if (main_tstate == NULL) {
return NULL;
}
oldts = PyThreadState_Swap(main_tstate);
assert(oldts != NULL);
}

PyObject *t = PyDict_SetDefault(interned, obj, obj);
if (t == NULL) {
PyErr_Clear();
}

/* Swap back. */
if (oldts != NULL) {
PyThreadState_Swap(oldts);
}

return t;
}

void
PyUnicode_InternInPlace(PyObject **p)
{
Expand All @@ -14608,20 +14664,20 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}

PyObject *interned = get_interned_dict();
assert(interned != NULL);

PyObject *t = PyDict_SetDefault(interned, s, s);
if (t == NULL) {
PyErr_Clear();
return;
}

PyThread_acquire_lock(_PyRuntime.unicode_state.interned.lock, WAIT_LOCK);
PyObject *t = store_interned(s);
PyThread_release_lock(_PyRuntime.unicode_state.interned.lock);
if (t != s) {
Py_SETREF(*p, Py_NewRef(t));
if (t != NULL) {
Py_SETREF(*p, Py_NewRef(t));
}
return;
}

/* Immortalize the object. */
// XXX Uncomment this once the PEP 683 implementation has landed.
//_Py_SetImmortal(s);

/* The two references in interned dict (key and value) are not counted by
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
this. */
Expand Down Expand Up @@ -14696,6 +14752,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
PyDict_Clear(interned);
Py_DECREF(interned);
set_interned_dict(NULL);

clear_interned_tstate();
}


Expand Down
23 changes: 19 additions & 4 deletions Python/pystate.c
Expand Up @@ -356,7 +356,8 @@ _Py_COMP_DIAG_POP

static int
alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2,
PyThread_type_lock *plock3, PyThread_type_lock *plock4)
PyThread_type_lock *plock3, PyThread_type_lock *plock4,
PyThread_type_lock *plock5)
{
/* Force default allocator, since _PyRuntimeState_Fini() must
use the same allocator than this function. */
Expand Down Expand Up @@ -389,12 +390,22 @@ alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2,
return -1;
}

PyThread_type_lock lock5 = PyThread_allocate_lock();
if (lock4 == NULL) {
PyThread_free_lock(lock1);
PyThread_free_lock(lock2);
PyThread_free_lock(lock3);
PyThread_free_lock(lock4);
return -1;
}

PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);

*plock1 = lock1;
*plock2 = lock2;
*plock3 = lock3;
*plock4 = lock4;
*plock5 = lock5;
return 0;
}

Expand All @@ -404,6 +415,7 @@ init_runtime(_PyRuntimeState *runtime,
_Py_AuditHookEntry *audit_hook_head,
Py_ssize_t unicode_next_index,
PyThread_type_lock unicode_ids_mutex,
PyThread_type_lock interned_mutex,
PyThread_type_lock interpreters_mutex,
PyThread_type_lock xidregistry_mutex,
PyThread_type_lock getargs_mutex)
Expand Down Expand Up @@ -435,6 +447,7 @@ init_runtime(_PyRuntimeState *runtime,

runtime->unicode_state.ids.next_index = unicode_next_index;
runtime->unicode_state.ids.lock = unicode_ids_mutex;
runtime->unicode_state.interned.lock = interned_mutex;

runtime->_initialized = 1;
}
Expand All @@ -452,8 +465,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
// is called multiple times.
Py_ssize_t unicode_next_index = runtime->unicode_state.ids.next_index;

PyThread_type_lock lock1, lock2, lock3, lock4;
if (alloc_for_runtime(&lock1, &lock2, &lock3, &lock4) != 0) {
PyThread_type_lock lock1, lock2, lock3, lock4, lock5;
if (alloc_for_runtime(&lock1, &lock2, &lock3, &lock4, &lock5) != 0) {
return _PyStatus_NO_MEMORY();
}

Expand All @@ -474,7 +487,7 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
}

init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
unicode_next_index, lock1, lock2, lock3, lock4);
unicode_next_index, lock1, lock2, lock3, lock4, lock5);

return _PyStatus_OK();
}
Expand Down Expand Up @@ -530,6 +543,7 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex);
int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex);
int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_state.ids.lock);
int reinit_interned = _PyThread_at_fork_reinit(&runtime->unicode_state.interned.lock);
int reinit_getargs = _PyThread_at_fork_reinit(&runtime->getargs.mutex);

PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
Expand All @@ -542,6 +556,7 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
|| reinit_main_id < 0
|| reinit_xidregistry < 0
|| reinit_unicode_ids < 0
|| reinit_interned < 0
|| reinit_getargs < 0)
{
return _PyStatus_ERR("Failed to reinitialize runtime locks");
Expand Down