New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bpo-36694: Do not memoize temporary objects in the C implementation of pickle. #13036
base: main
Are you sure you want to change the base?
Changes from 1 commit
7936816
edcd39c
20aab10
66979e2
7b1d188
cf82909
6444a90
9d1b10d
b39126a
646fe43
31eab15
48c12c7
15e2399
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
The C implementation of :mod:`pickle` no longer memoizes temporary objects. | ||
This produces more optimal pickle data and reduces memory consumption on | ||
pickling and unpickling. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1601,15 +1601,15 @@ memo_get(PicklerObject *self, PyObject *key) | |
/* Store an object in the memo, assign it a new unique ID based on the number | ||
of objects currently stored in the memo and generate a PUT opcode. */ | ||
static int | ||
memo_put(PicklerObject *self, PyObject *obj) | ||
memo_put(PicklerObject *self, PyObject *obj, int opt) | ||
{ | ||
char pdata[30]; | ||
Py_ssize_t len; | ||
Py_ssize_t idx; | ||
|
||
const char memoize_op = MEMOIZE; | ||
|
||
if (self->fast) | ||
if (self->fast || (opt && Py_REFCNT(obj) == 1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Requiring There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It mostly affects temporary objects. For example, if Another example: if you have a list of unique numbers, strings, tuples, etc, then items of the list will not be memoized as the only reference to the item is from the list. |
||
return 0; | ||
|
||
idx = PyMemoTable_Size(self->memo); | ||
|
@@ -2289,7 +2289,7 @@ save_bytes(PicklerObject *self, PyObject *obj) | |
return -1; | ||
} | ||
|
||
if (memo_put(self, obj) < 0) | ||
if (memo_put(self, obj, 1) < 0) | ||
return -1; | ||
|
||
return 0; | ||
|
@@ -2457,7 +2457,7 @@ save_unicode(PicklerObject *self, PyObject *obj) | |
if (_Pickler_Write(self, "\n", 1) < 0) | ||
return -1; | ||
} | ||
if (memo_put(self, obj) < 0) | ||
if (memo_put(self, obj, 1) < 0) | ||
return -1; | ||
|
||
return 0; | ||
|
@@ -2583,7 +2583,7 @@ save_tuple(PicklerObject *self, PyObject *obj) | |
} | ||
|
||
memoize: | ||
if (memo_put(self, obj) < 0) | ||
if (memo_put(self, obj, 1) < 0) | ||
return -1; | ||
|
||
return 0; | ||
|
@@ -2784,7 +2784,7 @@ save_list(PicklerObject *self, PyObject *obj) | |
if ((len = PyList_Size(obj)) < 0) | ||
goto error; | ||
|
||
if (memo_put(self, obj) < 0) | ||
if (memo_put(self, obj, len == 0) < 0) | ||
goto error; | ||
|
||
if (len != 0) { | ||
|
@@ -3041,10 +3041,11 @@ save_dict(PicklerObject *self, PyObject *obj) | |
if (_Pickler_Write(self, header, len) < 0) | ||
goto error; | ||
|
||
if (memo_put(self, obj) < 0) | ||
len = PyDict_GET_SIZE(obj); | ||
if (memo_put(self, obj, len == 0) < 0) | ||
goto error; | ||
|
||
if (PyDict_GET_SIZE(obj)) { | ||
if (len != 0) { | ||
/* Save the dict items. */ | ||
if (PyDict_CheckExact(obj) && self->proto > 0) { | ||
/* We can take certain shortcuts if we know this is a dict and | ||
|
@@ -3119,10 +3120,10 @@ save_set(PicklerObject *self, PyObject *obj) | |
if (_Pickler_Write(self, &empty_set_op, 1) < 0) | ||
return -1; | ||
|
||
if (memo_put(self, obj) < 0) | ||
set_size = PySet_GET_SIZE(obj); | ||
if (memo_put(self, obj, set_size == 0) < 0) | ||
return -1; | ||
|
||
set_size = PySet_GET_SIZE(obj); | ||
if (set_size == 0) | ||
return 0; /* nothing to do */ | ||
|
||
|
@@ -3224,7 +3225,7 @@ save_frozenset(PicklerObject *self, PyObject *obj) | |
|
||
if (_Pickler_Write(self, &frozenset_op, 1) < 0) | ||
return -1; | ||
if (memo_put(self, obj) < 0) | ||
if (memo_put(self, obj, 1) < 0) | ||
return -1; | ||
|
||
return 0; | ||
|
@@ -3533,7 +3534,7 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name) | |
goto error; | ||
} | ||
/* Memoize the object. */ | ||
if (memo_put(self, obj) < 0) | ||
if (memo_put(self, obj, 1) < 0) | ||
goto error; | ||
} | ||
|
||
|
@@ -3922,7 +3923,7 @@ save_reduce(PicklerObject *self, PyObject *args, PyObject *obj) | |
|
||
return 0; | ||
} | ||
else if (memo_put(self, obj) < 0) | ||
else if (memo_put(self, obj, 1) < 0) | ||
return -1; | ||
} | ||
|
||
|
@@ -4113,6 +4114,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save) | |
} | ||
} | ||
} | ||
Py_DECREF(reduce_func); | ||
|
||
if (reduce_value == NULL) | ||
goto error; | ||
|
@@ -4138,7 +4140,6 @@ save(PicklerObject *self, PyObject *obj, int pers_save) | |
done: | ||
|
||
Py_LeaveRecursiveCall(); | ||
Py_XDECREF(reduce_func); | ||
Py_XDECREF(reduce_value); | ||
|
||
return status; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand how you decide whether
opt
should be 0 or 1. What is the heuristic?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 on this. This is critical behavior for
cloudpickle
:)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an interesting question. The rule is opt=0 for objects which save its content after saving itself. They are non-empty lists, sets, dicts and general objects with non-trivial elements 2-4 of the tuple returned by
__reduce__()
. They should be memoized to allow detecting reference loops.