diff --git a/ChangeLog.rst b/ChangeLog.rst index d71e0f5c..cc6b5e4f 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -18,6 +18,8 @@ If you need to handle large data, you need to specify limits manually. Other changes -------------- +Add ``Unpacker.getbuffer()`` method. + 0.5.6 diff --git a/docs/Makefile b/docs/Makefile index b09d8844..831a6a7f 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -153,7 +153,7 @@ doctest: "results in $(BUILDDIR)/doctest/output.txt." serve: html - cd _build/html && python3 -m http.server + python3 -m http.server -d _build/html zip: html cd _build/html && zip -r ../../../msgpack-doc.zip . diff --git a/docs/advanced.rst b/docs/advanced.rst new file mode 100644 index 00000000..38370088 --- /dev/null +++ b/docs/advanced.rst @@ -0,0 +1,32 @@ +Advanced usage +=============== + +Packer +------ + +autoreset +~~~~~~~~~ + +When you used ``autoreset=False`` option of :class:`~msgpack.Packer`, +``pack()`` method doesn't return packed ``bytes``. + +You can use :meth:`~msgpack.Packer.bytes` or :meth:`~msgpack.Packer.getbuffer` to +get packed data. + +``bytes()`` returns ``bytes`` object. ``getbuffer()`` returns some bytes-like +object. It's concrete type is implement detail and it will be changed in future +versions. + +You can reduce temporary bytes object by using ``Unpacker.getbuffer()``. + +.. code-block:: python + + packer = Packer(use_bin_type=True, autoreset=False) + + packer.pack([1, 2]) + packer.pack([3, 4]) + + with open('data.bin', 'wb') as f: + f.write(packer.getbuffer()) + + packer.reset() # reset internal buffer diff --git a/docs/index.rst b/docs/index.rst index dcdab4f5..e9c2ce83 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,3 +8,4 @@ language data exchange. :maxdepth: 1 api + advanced diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 225f24ae..fd05ae06 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -41,6 +41,9 @@ cdef extern from "pack.h": int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) +cdef extern from "buff_converter.h": + object buff_to_buff(char *, Py_ssize_t) + cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -349,9 +352,16 @@ cdef class Packer(object): return buf def reset(self): - """Clear internal buffer.""" + """Reset internal buffer. + + This method is usaful only when autoreset=False. + """ self.pk.length = 0 def bytes(self): - """Return buffer content.""" + """Return internal buffer contents as bytes object""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + + def getbuffer(self): + """Return view of internal buffer.""" + return buff_to_buff(self.pk.buf, self.pk.length) diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h new file mode 100644 index 00000000..bc7227ae --- /dev/null +++ b/msgpack/buff_converter.h @@ -0,0 +1,28 @@ +#include "Python.h" + +/* cython does not support this preprocessor check => write it in raw C */ +#if PY_MAJOR_VERSION == 2 +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + return PyBuffer_FromMemory(buff, size); +} + +#elif (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION >= 3) +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + return PyMemoryView_FromMemory(buff, size, PyBUF_READ); +} +#else +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + Py_buffer pybuf; + if (PyBuffer_FillInfo(&pybuf, NULL, buff, size, 1, PyBUF_FULL_RO) == -1) { + return NULL; + } + + return PyMemoryView_FromBuffer(&pybuf); +} +#endif diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 895864ed..5b4d6cef 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -860,43 +860,35 @@ def pack(self, obj): except: self._buffer = StringIO() # force reset raise - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_array_header(self, n): if n >= 2**32: raise PackValueError self._pack_array_header(n) - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_map_header(self, n): if n >= 2**32: raise PackValueError self._pack_map_header(n) - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_ext_type(self, typecode, data): if not isinstance(typecode, int): @@ -976,7 +968,19 @@ def _pack_bin_header(self, n): raise PackValueError('Bin is too large') def bytes(self): + """Return internal buffer contents as bytes object""" return self._buffer.getvalue() def reset(self): + """Reset internal buffer. + + This method is usaful only when autoreset=False. + """ self._buffer = StringIO() + + def getbuffer(self): + """Return view of internal buffer.""" + if USING_STRINGBUILDER or not PY3: + return memoryview(self.bytes()) + else: + return self._buffer.getbuffer() diff --git a/test/test_pack.py b/test/test_pack.py index b447f9c3..46080832 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -5,7 +5,7 @@ import struct from pytest import raises, xfail -from msgpack import packb, unpackb, Unpacker, Packer +from msgpack import packb, unpackb, Unpacker, Packer, pack from collections import OrderedDict from io import BytesIO @@ -148,3 +148,13 @@ def test_pairlist(): packed = packer.pack_map_pairs(pairlist) unpacked = unpackb(packed, object_pairs_hook=list) assert pairlist == unpacked + +def test_get_buffer(): + packer = Packer(autoreset=0, use_bin_type=True) + packer.pack([1, 2]) + strm = BytesIO() + strm.write(packer.getbuffer()) + written = strm.getvalue() + + expected = packb([1, 2], use_bin_type=True) + assert written == expected