Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-27015: Save kwargs given to exceptions constructor #11580

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/library/exceptions.rst
Expand Up @@ -87,6 +87,10 @@ The following exceptions are used mostly as base classes for other exceptions.
assign a special meaning to the elements of this tuple, while others are
usually called only with a single string giving an error message.

.. attribute:: kwargs

The dictionnary of keyword arguments given to the exception constructor.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo: dictionnary -> dictionary


.. method:: with_traceback(tb)

This method sets *tb* as the new traceback for the exception and returns
Expand Down
2 changes: 1 addition & 1 deletion Include/cpython/pyerrors.h
Expand Up @@ -10,7 +10,7 @@ extern "C" {

/* PyException_HEAD defines the initial segment of every exception class. */
#define PyException_HEAD PyObject_HEAD PyObject *dict;\
PyObject *args; PyObject *traceback;\
PyObject *args; PyObject *kwargs; PyObject *traceback;\
PyObject *context; PyObject *cause;\
char suppress_context;

Expand Down
44 changes: 44 additions & 0 deletions Lib/test/test_exceptions.py
Expand Up @@ -1299,6 +1299,43 @@ def g():
next(i)
next(i)

def test_get_kwargs(self):
self.assertEqual(BaseException().kwargs, {})
remilapeyre marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(NaiveException(x=1).kwargs, {'x': 1})

def test_set_kwargs(self):
b = BaseException()
b.kwargs = {'x': 1}
self.assertEqual(b.kwargs, {'x': 1})

b = NaiveException(x=1)
b.kwargs = {'x': 2}
self.assertEqual(b.kwargs, {'x': 2})

def test_del_args_kwargs(self):
b = BaseException()

with self.assertRaisesRegex(TypeError, "args may not be deleted"):
del b.args

with self.assertRaisesRegex(TypeError, "kwargs may not be deleted"):
del b.kwargs

def test_repr(self):
class MixedArgsKwargs(Exception):
def __init__(*args, **kwargs):
pass

self.assertEqual(repr(BaseException()), "BaseException()")
self.assertEqual(repr(BaseException(1)), "BaseException(1)")
self.assertEqual(repr(NaiveException(1)), "NaiveException(1)")
self.assertEqual(repr(NaiveException(x=1)), "NaiveException(x=1)")
self.assertEqual(repr(MixedArgsKwargs(1, b=2)), "MixedArgsKwargs(1, b=2)")

class NoKwargs(Exception):
def __init__(self, foo,):
self.args = (foo,)


class ImportErrorTests(unittest.TestCase):

Expand Down Expand Up @@ -1376,6 +1413,13 @@ def test_copy_pickle(self):
self.assertEqual(exc.name, orig.name)
self.assertEqual(exc.path, orig.path)

def test_pickle_overriden_init(self):
# Issue #27015
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
orig = NaiveException(x='foo')
exc = pickle.loads(pickle.dumps(orig, proto))
self.assertEqual(orig.x, exc.x)


if __name__ == '__main__':
unittest.main()
9 changes: 9 additions & 0 deletions Lib/test/test_subprocess.py
Expand Up @@ -18,6 +18,7 @@
import threading
import gc
import textwrap
import pickle
from test.support import FakePath

try:
Expand Down Expand Up @@ -1690,6 +1691,14 @@ def test_CalledProcessError_str_non_zero(self):
error_string = str(err)
self.assertIn("non-zero exit status 2.", error_string)

def test_CalledProcessError_picklable(self):
# Issue #27015
err = subprocess.CalledProcessError(returncode=2, cmd='foo')
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
new = pickle.loads(pickle.dumps(err, proto))
self.assertEqual(err.returncode, new.returncode)
self.assertEqual(err.cmd, new.cmd)

def test_preexec(self):
# DISCLAIMER: Setting environment variables is *not* a good use
# of a preexec_fn. This is merely a test.
Expand Down
8 changes: 4 additions & 4 deletions Lib/test/test_sys.py
Expand Up @@ -1004,13 +1004,13 @@ def inner():
class C(object): pass
check(C.__dict__, size('P'))
# BaseException
check(BaseException(), size('5Pb'))
check(BaseException(), size('6Pb'))
# UnicodeEncodeError
check(UnicodeEncodeError("", "", 0, 0, ""), size('5Pb 2P2nP'))
check(UnicodeEncodeError("", "", 0, 0, ""), size('6Pb 2P2nP'))
# UnicodeDecodeError
check(UnicodeDecodeError("", b"", 0, 0, ""), size('5Pb 2P2nP'))
check(UnicodeDecodeError("", b"", 0, 0, ""), size('6Pb 2P2nP'))
# UnicodeTranslateError
check(UnicodeTranslateError("", 0, 1, ""), size('5Pb 2P2nP'))
check(UnicodeTranslateError("", 0, 1, ""), size('6Pb 2P2nP'))
# ellipses
check(Ellipsis, size(''))
# EncodingMap
Expand Down
@@ -0,0 +1,3 @@
Exceptions now save the keyword arguments given to their constructor in their
``kwargs`` attribute. Exceptions with overridden ``__init__`` and using keyword
arguments are now picklable. Contributed by Rémi Lapeyre.
188 changes: 173 additions & 15 deletions Objects/exceptions.c
Expand Up @@ -41,20 +41,23 @@ BaseException_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
/* the dict is created on the fly in PyObject_GenericSetAttr */
self->dict = NULL;
self->kwargs = NULL;
self->traceback = self->cause = self->context = NULL;
self->suppress_context = 0;

if (args) {
self->args = args;
Py_INCREF(args);
return (PyObject *)self;
} else {
self->args = PyTuple_New(0);
if (!self->args) {
Py_DECREF(self);
return NULL;
}
}

self->args = PyTuple_New(0);
if (!self->args) {
Py_DECREF(self);
return NULL;
}
self->kwargs = kwds;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assignment should only be done if kwds is a non-empty dict, otherwise we're risking increasing the size of exception instances by sys.getsizeof({}) bytes for no compelling reason (that's an extra 288 bytes on my machine, vs the current 88 bytes for a BaseException instance).

While calls like SomeException() would get NULL for kwds, consider code like super().__new__(*some_args, **maybe_some_kwds) in exception subclasses.

If kwds is NULL, or an empty dict, then self->kwargs should be set to the None singleton.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be good to do that for args too?

Py_XINCREF(kwds);

return (PyObject *)self;
}
Expand All @@ -76,6 +79,7 @@ BaseException_clear(PyBaseExceptionObject *self)
{
Py_CLEAR(self->dict);
Py_CLEAR(self->args);
Py_CLEAR(self->kwargs);
Py_CLEAR(self->traceback);
Py_CLEAR(self->cause);
Py_CLEAR(self->context);
Expand All @@ -95,6 +99,7 @@ BaseException_traverse(PyBaseExceptionObject *self, visitproc visit, void *arg)
{
Py_VISIT(self->dict);
Py_VISIT(self->args);
Py_VISIT(self->kwargs);
Py_VISIT(self->traceback);
Py_VISIT(self->cause);
Py_VISIT(self->context);
Expand All @@ -118,21 +123,153 @@ static PyObject *
BaseException_repr(PyBaseExceptionObject *self)
{
const char *name = _PyType_Name(Py_TYPE(self));
if (PyTuple_GET_SIZE(self->args) == 1)
return PyUnicode_FromFormat("%s(%R)", name,
PyTuple_GET_ITEM(self->args, 0));
else
return PyUnicode_FromFormat("%s%R", name, self->args);
PyObject *separator = NULL;
PyObject *args = NULL;
PyObject *kwargs = NULL;
PyObject *seq = NULL;
PyObject *repr = NULL;
PyObject *item = NULL;
PyObject *items = NULL;
PyObject *it = NULL;
PyObject *key = NULL;
PyObject *value = NULL;
PyObject *result = NULL;

separator = PyUnicode_FromString(", ");

if (PyTuple_Check(self->args)) {
const Py_ssize_t len = PyTuple_Size(self->args);
seq = PyTuple_New(len);
if (seq == NULL) {
goto fail;
}
for (Py_ssize_t i=0; i < len; i++) {
repr = PyObject_Repr(PyTuple_GET_ITEM(self->args, i));
if (repr == NULL) {
goto fail;
}
PyTuple_SET_ITEM(seq, i, repr);
}
args = PyUnicode_Join(separator, seq);
Py_DECREF(seq);
}

if (PyMapping_Check(self->kwargs)) {
const Py_ssize_t len = PyMapping_Length(self->kwargs);
if (len == -1) {
goto fail;
}
seq = PyTuple_New(len);
items = PyMapping_Items(self->kwargs);
if (seq == NULL || items == NULL) {
goto fail;
}
it = PyObject_GetIter(items);
if (it == NULL) {
goto fail;
}
Py_ssize_t i = 0;
while ((item = PyIter_Next(it)) != NULL) {
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
goto fail;
}
key = PyTuple_GET_ITEM(item, 0);
value = PyTuple_GET_ITEM(item, 1);
repr = PyUnicode_FromFormat("%S=%R", key, value);
if (repr == NULL) {
goto fail;
}
PyTuple_SET_ITEM(seq, i, repr);
i++;
Py_DECREF(item);
}
kwargs = PyUnicode_Join(separator, seq);
Py_DECREF(seq);
Py_DECREF(items);
Py_DECREF(it);
}
Py_DECREF(separator);

if (args == NULL && kwargs == NULL) {
result = PyUnicode_FromFormat("%s()", name, kwargs);
remilapeyre marked this conversation as resolved.
Show resolved Hide resolved
} else if (kwargs == NULL || PyUnicode_GET_LENGTH(kwargs) == 0) {
result = PyUnicode_FromFormat("%s(%S)", name, args);
} else if (args == NULL || PyUnicode_GET_LENGTH(args) == 0) {
result = PyUnicode_FromFormat("%s(%S)", name, kwargs);
} else {
result = PyUnicode_FromFormat("%s(%S, %S)", name, args, kwargs);
}
Py_XDECREF(args);
Py_XDECREF(kwargs);
return result;

fail:
Py_XDECREF(separator);
Py_XDECREF(args);
Py_XDECREF(kwargs);
Py_XDECREF(seq);
Py_XDECREF(repr);
Py_XDECREF(item);
Py_XDECREF(items);
Py_XDECREF(it);
Py_XDECREF(key);
Py_XDECREF(value);
return NULL;
}

/* Pickling support */
static PyObject *
BaseException_reduce(PyBaseExceptionObject *self, PyObject *Py_UNUSED(ignored))
{
if (self->args && self->dict)
return PyTuple_Pack(3, Py_TYPE(self), self->args, self->dict);
else
return PyTuple_Pack(2, Py_TYPE(self), self->args);
PyObject *functools;
PyObject *partial;
PyObject *constructor;
PyObject *result;
PyObject **newargs;

_Py_IDENTIFIER(partial);
functools = PyImport_ImportModule("functools");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reduce implementation concerns me, as it looks like it will make everything much slower, even for exception instances where self->kwargs isn't set.

Instead, I'd recommend migrating BaseException away from implementing __reduce__ directly, and instead have it implement __getnewargs_ex__: https://docs.python.org/3/library/pickle.html#object.__getnewargs_ex__

That way the pickle machinery will take care of calling __new__ with the correct arguments, and you wouldn't need to introduce a weird dependency from a core builtin into a standard library module.

(That would have potential backwards compatibility implications for subclasses implementing reduce based on the parent class implementation, but the same would hold true for introduce a partial object in place of a direct reference to the class - either way, there'll need to be a note in the Porting section of the What's New guide, and switching to __get_newargs_ex__ will at least have the virtue of simplifying the code rather than making it more complicated)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to do that, I removed __reduce__ and added __getnewargs_ex__ to the methods as:

static PyObject *
BaseException_getnewargs_ex(PyBaseExceptionObject *self, PyObject *Py_UNUSED(ignored))
{
    PyObject *args = PyObject_GetAttrString((PyObject *) self, "args");
    PyObject *kwargs = PyObject_GetAttrString((PyObject *) self, "kwargs");

    if (args == NULL || kwargs == NULL) {
        return NULL;
    }

    return Py_BuildValue("(OO)", args, kwargs);
}

but it brocke pickling. Did I miss something?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, found my mistake, using __getnewargs_ex__ broke pickling for protocols 0 and 1. Is this expected?

I don't think this happened when using a partial reference on the the constructor of the class.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's ok to broke pickling support for protocols 0 and 1 since it was broken for keyword args anyway?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Defining __reduce_ex__ would let you restore the old behaviour for those protocols, but I'm not sure __getnewargs_ex__ will still be called if you do that (we're reaching the limits of my own pickle knowledge).

@pitrou Do you have any recommendations here? (Context: trying to get BaseException to pickle keyword args properly, wanting to use __getnewargs_ex__ for more recent pickle protocols, but wondering how to handle the older protocols that don't use that)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How should I call object.__reduce_ex__?

It seems to me that calling the builtin super is not done anywhere in the source code but I don't find the right way to do it.

Do I need to call object___reduce_ex__ directly?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ncoghlan Well, I'm not sure why you wouldn't implement the entire logic in __reduce_ex__, instead of also defining __getnewargs_ex__?

Or, rather, you could just define __getnewargs_ex__ and stop caring about protocols 0 and 1 (which are extremely obsolete by now, so we want to maintain compatibility, but fixing bugs in them is not important).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pitrou I only suggested delegating to __getnewargs_ex__ because I wasn't sure how to mimic that behaviour from inside a custom __reduce_ex__ implementation.

But if __reduce__ still gets called for protocols 0 and 1 even when __getnewargs_ex__ is defined, then that's even better.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @pitrou @ncoghlan, thanks for you input. I pushed a new commit that implement __getnewargs_ex__ but it seems that __reduce_ex__ does not check it and call __reduce__ no matter what the protocol is:

>>> BaseException().__reduce_ex__(0)
(<class 'BaseException'>, ())
>>> BaseException().__reduce_ex__(1)
(<class 'BaseException'>, ())
>>> BaseException().__reduce_ex__(2)
(<class 'BaseException'>, ())
>>> BaseException().__reduce_ex__(3)
(<class 'BaseException'>, ())
>>> BaseException().__reduce_ex__(4)
(<class 'BaseException'>, ())
>>> BaseException().__getnewargs_ex__()
((), {})

If I remove the __reduce__, then it breaks pickling for protocols 0 and 1:

>>> BaseException().__reduce_ex__(0)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/remi/src/cpython/Lib/copyreg.py", line 66, in _reduce_ex
    raise TypeError(f"cannot pickle {cls.__name__!r} object")
TypeError: cannot pickle 'BaseException' object
>>> BaseException().__reduce_ex__(1)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/remi/src/cpython/Lib/copyreg.py", line 66, in _reduce_ex
    raise TypeError(f"cannot pickle {cls.__name__!r} object")
TypeError: cannot pickle 'BaseException' object
>>> BaseException().__reduce_ex__(2)
(<function __newobj__ at 0x105c63040>, (<class 'BaseException'>,), None, None, None)
>>> BaseException().__reduce_ex__(3)
(<function __newobj__ at 0x105c63040>, (<class 'BaseException'>,), None, None, None)
>>> BaseException().__reduce_ex__(4)
(<function __newobj__ at 0x105c63040>, (<class 'BaseException'>,), None, None, None)

Do I need to define a custom __reduce_ex__ as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dug further and it seems my issue comes from https://github.com/python/cpython/blob/master/Lib/copyreg.py#L66, I will look into the details tomorrow.

if (functools == NULL) {
return NULL;
}
partial = _PyObject_GetAttrId(functools, &PyId_partial);
Py_DECREF(functools);
if (partial == NULL) {
return NULL;
}

Py_ssize_t len = 1;
if (PyTuple_Check(self->args)) {
len += PyTuple_GET_SIZE(self->args);
}
newargs = PyMem_New(PyObject *, len);
if (newargs == NULL) {
PyErr_NoMemory();
return NULL;
}
newargs[0] = (PyObject *)Py_TYPE(self);

for (Py_ssize_t i=1; i < len; i++) {
newargs[i] = PyTuple_GetItem(self->args, i-1);
}
constructor = _PyObject_FastCallDict(partial, newargs, len, self->kwargs);
PyMem_Free(newargs);

Py_DECREF(partial);

PyObject *args = PyTuple_New(0);
if (args == NULL) {
return NULL;
}
if (self->args && self->dict){
result = PyTuple_Pack(3, constructor, args, self->dict);
} else {
result = PyTuple_Pack(2, constructor, args);
}
Py_DECREF(constructor);
Py_DECREF(args);
return result;
}

/*
Expand Down Expand Up @@ -206,6 +343,26 @@ BaseException_set_args(PyBaseExceptionObject *self, PyObject *val, void *Py_UNUS
return 0;
}

static PyObject *
BaseException_get_kwargs(PyBaseExceptionObject *self, void *Py_UNUSED(ignored)) {
if (self->kwargs == NULL) {
self->kwargs = PyDict_New();
remilapeyre marked this conversation as resolved.
Show resolved Hide resolved
}
Py_XINCREF(self->kwargs);
return self->kwargs;
}

static int
BaseException_set_kwargs(PyBaseExceptionObject *self, PyObject *val, void *Py_UNUSED(ignored)) {
if (val == NULL) {
PyErr_SetString(PyExc_TypeError, "kwargs may not be deleted");
return -1;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

val needs to be checked to make sure it is a mapping (with PyMapping_Check) or None.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about doing that but it seems to me that PyMapping_Check is only a loose check, for example, wouldn't we be able to do:

>>> b = BaseException()
>>> b.kwargs = ()

since tuples support slicing?

If so, is the check still worth it?

Py_INCREF(val);
self->kwargs = val;
remilapeyre marked this conversation as resolved.
Show resolved Hide resolved
return 0;
}

static PyObject *
BaseException_get_tb(PyBaseExceptionObject *self, void *Py_UNUSED(ignored))
{
Expand Down Expand Up @@ -296,6 +453,7 @@ BaseException_set_cause(PyObject *self, PyObject *arg, void *Py_UNUSED(ignored))
static PyGetSetDef BaseException_getset[] = {
{"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict},
{"args", (getter)BaseException_get_args, (setter)BaseException_set_args},
{"kwargs", (getter)BaseException_get_kwargs, (setter)BaseException_set_kwargs},
{"__traceback__", (getter)BaseException_get_tb, (setter)BaseException_set_tb},
{"__context__", BaseException_get_context,
BaseException_set_context, PyDoc_STR("exception context")},
Expand Down