Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions msgpack/_unpacker.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ cdef extern from "unpack.h":
Py_ssize_t max_array_len
Py_ssize_t max_map_len
Py_ssize_t max_ext_len
PyObject* memo

ctypedef struct unpack_context:
msgpack_user user
Expand All @@ -61,7 +62,7 @@ cdef inline init_ctx(unpack_context *ctx,
const char* encoding, const char* unicode_errors,
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
Py_ssize_t max_ext_len):
Py_ssize_t max_ext_len, object d):
unpack_init(ctx)
ctx.user.use_list = use_list
ctx.user.raw = raw
Expand Down Expand Up @@ -101,6 +102,9 @@ cdef inline init_ctx(unpack_context *ctx,

ctx.user.encoding = encoding
ctx.user.unicode_errors = unicode_errors
Py_INCREF(d)
ctx.user.memo = <PyObject*>d
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK <object>d will enable refcounting in Cython. Otherwise object and PyObject* are identical.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, I didn't manage to follow this suggestion, because the memo field is in a C struct, which doesn't support object fields.



def default_read_extended_type(typecode, data):
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
Expand Down Expand Up @@ -195,9 +199,10 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
max_ext_len = buf_len

try:
memo = {}
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
use_list, raw, strict_map_key, cenc, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len, memo)
ret = unpack_construct(&ctx, buf, buf_len, &off)
finally:
if new_protocol:
Expand Down Expand Up @@ -404,7 +409,7 @@ cdef class Unpacker(object):
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_hook, use_list, raw, strict_map_key, cenc, cerr,
max_str_len, max_bin_len, max_array_len,
max_map_len, max_ext_len)
max_map_len, max_ext_len, {})

def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""
Expand Down
6 changes: 5 additions & 1 deletion msgpack/fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_
self._max_ext_len = max_ext_len
self._stream_offset = 0

self._memo = {} # used to memoize keys to reduce memory consumption

if list_hook is not None and not callable(list_hook):
raise TypeError('`list_hook` is not callable')
if object_hook is not None and not callable(object_hook):
Expand Down Expand Up @@ -654,7 +656,9 @@ def _unpack(self, execute=EX_CONSTRUCT):
ret = {}
for _ in xrange(n):
key = self._unpack(EX_CONSTRUCT)
if self._strict_map_key and type(key) not in (unicode, bytes):
if type(key) in (unicode, bytes):
key = self._memo.setdefault(key, key)
elif self._strict_map_key:
raise ValueError("%s is not allowed for map key" % str(type(key)))
ret[key] = self._unpack(EX_CONSTRUCT)
if self._object_hook is not None:
Expand Down
18 changes: 17 additions & 1 deletion msgpack/unpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ typedef struct unpack_user {
const char *encoding;
const char *unicode_errors;
Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len;
PyObject *memo;
} unpack_user;

typedef PyObject* msgpack_unpack_object;
Expand Down Expand Up @@ -189,7 +190,22 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un

static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v)
{
if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) {
if (PyUnicode_CheckExact(k) || PyBytes_CheckExact(k)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should not make a big difference, but this can be combined with the previous condition in l.194 :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review! I fixed this.

PyObject *memokey = PyDict_GetItem(u->memo, k);
if (memokey != NULL) {
Py_INCREF(memokey);
Py_DECREF(k);
k = memokey;
}
else {
if (PyDict_SetItem(u->memo, k, k) < 0) {
Py_DECREF(k);
Py_DECREF(v);
return -1;
}
}
}
else if (u->strict_map_key) {
PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key", Py_TYPE(k)->tp_name);
return -1;
}
Expand Down
1 change: 1 addition & 0 deletions msgpack/unpack_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ static inline PyObject* unpack_data(unpack_context* ctx)
static inline void unpack_clear(unpack_context *ctx)
{
Py_CLEAR(ctx->stack[0].obj);
Py_CLEAR(ctx->user.memo);
}

template <bool construct>
Expand Down
9 changes: 9 additions & 0 deletions test/test_unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ def _hook(self, code, data):
assert unpacker.unpack() == {'a': ExtType(2, b'321')}


def test_unpacker_shares_stringly_keys():
f = BytesIO(packb([{" a ?!": 1}, {" a ?!": 2}]))
unpacker = Unpacker(f)
d1, d2 = unpacker.unpack()
key1, = d1
key2, = d2
assert key1 is key2


if __name__ == '__main__':
test_unpack_array_header_from_file()
test_unpacker_hook_refcnt()
Expand Down