Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-47084: Clear cached representations on finalization #32032

Merged
merged 6 commits into from Mar 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_unicodeobject.h
Expand Up @@ -18,6 +18,7 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
extern void _PyUnicode_Fini(PyInterpreterState *);
extern void _PyUnicode_FiniTypes(PyInterpreterState *);
extern void _PyStaticUnicode_Dealloc(PyObject *);


/* other API */
Expand Down
9 changes: 9 additions & 0 deletions Lib/__hello__.py
@@ -1,5 +1,14 @@
initialized = True

class TestFrozenUtf8_1:
"""\u00b6"""

class TestFrozenUtf8_2:
"""\u03c0"""

class TestFrozenUtf8_4:
"""\U0001f600"""

def main():
print("Hello world!")

Expand Down
41 changes: 23 additions & 18 deletions Lib/test/test_embed.py
Expand Up @@ -1645,24 +1645,29 @@ def test_frozenmain(self):
'-X showrefcount requires a Python debug build')
def test_no_memleak(self):
# bpo-1635741: Python must release all memory at exit
cmd = [sys.executable, "-I", "-X", "showrefcount", "-c", "pass"]
proc = subprocess.run(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True)
self.assertEqual(proc.returncode, 0)
out = proc.stdout.rstrip()
match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
if not match:
self.fail(f"unexpected output: {out!a}")
refs = int(match.group(1))
blocks = int(match.group(2))
self.assertEqual(refs, 0, out)
if not MS_WINDOWS:
self.assertEqual(blocks, 0, out)
else:
# bpo-46857: on Windows, Python still leaks 1 memory block at exit
self.assertIn(blocks, (0, 1), out)
tests = (
('off', 'pass'),
('on', 'pass'),
('off', 'import __hello__'),
('on', 'import __hello__'),
)
for flag, stmt in tests:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please use with self.subTest(flag=flag, stmt=stmt):? Otherwise, it will be more complicated to debug if a test fails. With that, you can omit f"{xopt}, stmt='{stmt}' -> {out}") below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't know about TestCase.subTest(), thanks.

xopt = f"frozen_modules={flag}"
cmd = [sys.executable, "-I", "-X", "showrefcount", "-X", xopt, "-c", stmt]
proc = subprocess.run(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True)
self.assertEqual(proc.returncode, 0)
out = proc.stdout.rstrip()
match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
if not match:
self.fail(f"unexpected output: {out!a}")
refs = int(match.group(1))
blocks = int(match.group(2))
with self.subTest(frozen_modules=flag, stmt=stmt):
self.assertEqual(refs, 0, out)
self.assertEqual(blocks, 0, out)


class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase):
Expand Down
44 changes: 44 additions & 0 deletions Objects/unicodeobject.c
Expand Up @@ -16057,6 +16057,35 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
}


static void unicode_static_dealloc(PyObject *op)
{
PyASCIIObject* ascii = (PyASCIIObject*)op;

assert(ascii->state.compact);

if (ascii->state.ascii) {
if (ascii->wstr) {
PyObject_Free(ascii->wstr);
ascii->wstr = NULL;
}
}
else {
PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
void* data = (void*)(compact + 1);
if (ascii->wstr && ascii->wstr != data) {
PyObject_Free(ascii->wstr);
ascii->wstr = NULL;
compact->wstr_length = 0;
}
if (compact->utf8) {
PyObject_Free(compact->utf8);
compact->utf8 = NULL;
compact->utf8_length = 0;
}
}
}


void
_PyUnicode_Fini(PyInterpreterState *interp)
{
Expand All @@ -16070,6 +16099,21 @@ _PyUnicode_Fini(PyInterpreterState *interp)
_PyUnicode_FiniEncodings(&state->fs_codec);

unicode_clear_identifiers(state);

// Clear the single character singletons
for (int i = 0; i < 128; i++) {
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
}
for (int i = 0; i < 128; i++) {
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
}
}


void
_PyStaticUnicode_Dealloc(PyObject *op)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to share code between this function and unicode_dealloc()? Maybe add an unicode_dealloc_common() function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is the duplication of the PyObect_Free() calls, but not really to the level of what I would consider "in common". I did, however, refactor out the duplication of the 1-character singletons and frozen strings deallocation into a standalone static function.

{
unicode_static_dealloc(op);
}


Expand Down
1 change: 1 addition & 0 deletions Tools/scripts/deepfreeze.py
Expand Up @@ -185,6 +185,7 @@ def generate_unicode(self, name: str, s: str) -> str:
else:
self.write("PyCompactUnicodeObject _compact;")
self.write(f"{datatype} _data[{len(s)+1}];")
self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
with self.block(f"{name} =", ";"):
if ascii:
with self.block("._ascii =", ","):
Expand Down