From 2facd5cbdcaa08b61270c0c0760a39cd03acc007 Mon Sep 17 00:00:00 2001 From: Julian Taylor Date: Mon, 3 Feb 2014 20:01:07 +0100 Subject: [PATCH] ENH: add tobytes and stop using tostring in documentation tostring returns bytes which are not equal to string, so provide a tobytes function alias. tostring does not emit a deprecation warning yet so rdepends do not need to check two names to support older versions of numpy without warnings. --- doc/numpybook/capi.lyx | 2 +- doc/numpybook/numpybook.lyx | 12 ++++----- doc/release/1.9.0-notes.rst | 8 ++++++ doc/source/reference/arrays.ndarray.rst | 1 + doc/source/reference/c-api.array.rst | 2 +- .../reference/maskedarray.baseclass.rst | 1 + doc/source/reference/routines.ma.rst | 2 +- numpy/add_newdocs.py | 25 +++++++++++++------ numpy/core/src/multiarray/methods.c | 7 ++++-- numpy/core/src/multiarray/scalartypes.c.src | 9 ++++--- numpy/core/tests/test_defchararray.py | 2 +- numpy/core/tests/test_multiarray.py | 8 +++--- numpy/core/tests/test_regression.py | 16 ++++++------ numpy/doc/byteswapping.py | 8 +++--- numpy/f2py/tests/test_array_from_pyobj.py | 3 ++- numpy/lib/format.py | 4 +-- numpy/ma/core.py | 22 +++++++++++----- numpy/ma/mrecords.py | 4 +-- numpy/matrixlib/tests/test_defmatrix.py | 4 +-- 19 files changed, 88 insertions(+), 52 deletions(-) diff --git a/doc/numpybook/capi.lyx b/doc/numpybook/capi.lyx index 94f18362a1f8..a5c835523739 100644 --- a/doc/numpybook/capi.lyx +++ b/doc/numpybook/capi.lyx @@ -9384,7 +9384,7 @@ self \emph default . \series bold -tostring +tobytes \series default ( \emph on diff --git a/doc/numpybook/numpybook.lyx b/doc/numpybook/numpybook.lyx index 59f51b4ebab1..65185a2be114 100644 --- a/doc/numpybook/numpybook.lyx +++ b/doc/numpybook/numpybook.lyx @@ -6062,9 +6062,9 @@ name "ndarray!methods!itemset" \end_layout \begin_layout Description -tostring +tobytes \begin_inset LatexCommand index -name "ndarray!methods!tostring" +name "ndarray!methods!tobytes" \end_inset @@ -6134,7 +6134,7 @@ format the shape, type, or endianness of an array. When written in binary mode, tofile is functionally equivalent to \family typewriter -fid.write(self.tostring()) +fid.write(self.tobytes()) \family default . \end_layout @@ -6836,7 +6836,7 @@ Insert scalar (last argument) into array \begin_layout Standard \series bold -tostring +tobytes \end_layout \end_inset @@ -9450,7 +9450,7 @@ name "ndarray!special methods!setstate" ). In this tuple, isfortran is a Bool stating whether the following flattened data is in Fortran order or not, and string_or_list is a string formed - by self.tostring() if the data type is not object. + by self.tobytes() if the data type is not object. If the data type of self is an object array, then string_or_list is a flat list equivalent to self.ravel().tolist(). @@ -17719,7 +17719,7 @@ __reduce__ () \InsetSpace ~ This is called to pickle an array scalar. It returns a tuple of (numpy.core.multiarray.scalar, self.dtypestr, obj or - self.tostring()) which can be used to reconstruct the scalar on unpickling. + self.tobytes()) which can be used to reconstruct the scalar on unpickling. Notice that no state is written, because the entire scalar can be constructed from just the string. Also, if this is an object array scalar, then the Python object being reference diff --git a/doc/release/1.9.0-notes.rst b/doc/release/1.9.0-notes.rst index 1d382ff0390e..8047fbd3a55f 100644 --- a/doc/release/1.9.0-notes.rst +++ b/doc/release/1.9.0-notes.rst @@ -74,6 +74,14 @@ Dtype parameter added to `np.linspace` and `np.logspace` The returned data type from the `linspace` and `logspace` functions can now be specificed using the dtype parameter. + +`tobytes` alias for `tostring` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`ndarray.tobytes` and `MaskedArray.tobytes` have been added as aliases for +`tostring` which exports arrays as `bytes`. This is more consistent in Python 3 +where `str` and `bytes` are not the same. + + Improvements ============ diff --git a/doc/source/reference/arrays.ndarray.rst b/doc/source/reference/arrays.ndarray.rst index 85e41c2157a4..e9c0a6d87369 100644 --- a/doc/source/reference/arrays.ndarray.rst +++ b/doc/source/reference/arrays.ndarray.rst @@ -294,6 +294,7 @@ Array conversion ndarray.itemset ndarray.setasflat ndarray.tostring + ndarray.tobytes ndarray.tofile ndarray.dump ndarray.dumps diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst index 6e68a9a0e47a..323ca0655cf0 100644 --- a/doc/source/reference/c-api.array.rst +++ b/doc/source/reference/c-api.array.rst @@ -1574,7 +1574,7 @@ Conversion .. cfunction:: PyObject* PyArray_ToString(PyArrayObject* self, NPY_ORDER order) - Equivalent to :meth:`ndarray.tostring` (*self*, *order*). Return the bytes + Equivalent to :meth:`ndarray.tobytes` (*self*, *order*). Return the bytes of this array in a Python string. .. cfunction:: PyObject* PyArray_ToFile(PyArrayObject* self, FILE* fp, char* sep, char* format) diff --git a/doc/source/reference/maskedarray.baseclass.rst b/doc/source/reference/maskedarray.baseclass.rst index fd1fd7ae611a..a1c90a45dc28 100644 --- a/doc/source/reference/maskedarray.baseclass.rst +++ b/doc/source/reference/maskedarray.baseclass.rst @@ -200,6 +200,7 @@ Conversion MaskedArray.tolist MaskedArray.torecords MaskedArray.tostring + MaskedArray.tobytes Shape manipulation diff --git a/doc/source/reference/routines.ma.rst b/doc/source/reference/routines.ma.rst index 6eda37578995..5cb38e83f41a 100644 --- a/doc/source/reference/routines.ma.rst +++ b/doc/source/reference/routines.ma.rst @@ -251,7 +251,7 @@ Conversion operations ma.MaskedArray.tofile ma.MaskedArray.tolist ma.MaskedArray.torecords - ma.MaskedArray.tostring + ma.MaskedArray.tobytes Pickling and unpickling diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 3e115c84580b..be343f79da87 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -4386,7 +4386,7 @@ def luf(lamdaexpr, *args, **kwargs): sep : str Separator between array items for text output. If "" (empty), a binary file is written, equivalent to - ``file.write(a.tostring())``. + ``file.write(a.tobytes())``. format : str Format string for text file output. Each entry in the array is formatted to text by first converting @@ -4440,8 +4440,7 @@ def luf(lamdaexpr, *args, **kwargs): """)) -add_newdoc('numpy.core.multiarray', 'ndarray', ('tostring', - """ +tobytesdoc = """ a.tostring(order='C') Construct a Python string containing the raw data bytes in the array. @@ -4452,9 +4451,11 @@ def luf(lamdaexpr, *args, **kwargs): unless the F_CONTIGUOUS flag in the array is set, in which case it means 'Fortran' order. + {deprecated} + Parameters ---------- - order : {'C', 'F', None}, optional + order : {{'C', 'F', None}}, optional Order of the data for multidimensional arrays: C, Fortran, or the same as for the original array. @@ -4466,15 +4467,23 @@ def luf(lamdaexpr, *args, **kwargs): Examples -------- >>> x = np.array([[0, 1], [2, 3]]) - >>> x.tostring() + >>> x.tobytes() '\\x00\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x03\\x00\\x00\\x00' - >>> x.tostring('C') == x.tostring() + >>> x.tobytes('C') == x.tobytes() True - >>> x.tostring('F') + >>> x.tobytes('F') '\\x00\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x01\\x00\\x00\\x00\\x03\\x00\\x00\\x00' - """)) + """ +add_newdoc('numpy.core.multiarray', 'ndarray', + ('tostring', tobytesdoc.format(deprecated= + 'This function is a compatibility ' + 'alias for tobytes. Despite its ' + 'name it returns bytes not ' + 'strings.'))) +add_newdoc('numpy.core.multiarray', 'ndarray', + ('tobytes', tobytesdoc.format(deprecated='.. versionadded:: 1.9.0'))) add_newdoc('numpy.core.multiarray', 'ndarray', ('trace', """ diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 37b7d3c18285..bf717c1f563e 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -542,7 +542,7 @@ array_tolist(PyArrayObject *self, PyObject *args) static PyObject * -array_tostring(PyArrayObject *self, PyObject *args, PyObject *kwds) +array_tobytes(PyArrayObject *self, PyObject *args, PyObject *kwds) { NPY_ORDER order = NPY_CORDER; static char *kwlist[] = {"order", NULL}; @@ -2456,6 +2456,9 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = { {"take", (PyCFunction)array_take, METH_VARARGS | METH_KEYWORDS, NULL}, + {"tobytes", + (PyCFunction)array_tobytes, + METH_VARARGS | METH_KEYWORDS, NULL}, {"tofile", (PyCFunction)array_tofile, METH_VARARGS | METH_KEYWORDS, NULL}, @@ -2463,7 +2466,7 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = { (PyCFunction)array_tolist, METH_VARARGS, NULL}, {"tostring", - (PyCFunction)array_tostring, + (PyCFunction)array_tobytes, METH_VARARGS | METH_KEYWORDS, NULL}, {"trace", (PyCFunction)array_trace, diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 9c081f150539..c8e66ac42f09 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -1499,9 +1499,9 @@ gentype_wraparray(PyObject *NPY_UNUSED(scalar), PyObject *args) */ /**begin repeat * - * #name = tolist, item, tostring, astype, copy, __deepcopy__, searchsorted, - * view, swapaxes, conj, conjugate, nonzero, flatten, ravel, fill, - * transpose, newbyteorder# + * #name = tolist, item, tostring, tobytes, astype, copy, __deepcopy__, + * searchsorted, view, swapaxes, conj, conjugate, nonzero, flatten, + * ravel, fill, transpose, newbyteorder# */ static PyObject * gentype_@name@(PyObject *self, PyObject *args) @@ -1845,6 +1845,9 @@ static PyMethodDef gentype_methods[] = { {"itemset", (PyCFunction)gentype_itemset, METH_VARARGS, NULL}, + {"tobytes", + (PyCFunction)gentype_tobytes, + METH_VARARGS, NULL}, {"tofile", (PyCFunction)gentype_tofile, METH_VARARGS | METH_KEYWORDS, NULL}, diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py index 09fcff0d0666..fe0e02a6da75 100644 --- a/numpy/core/tests/test_defchararray.py +++ b/numpy/core/tests/test_defchararray.py @@ -138,7 +138,7 @@ def setUp(self): def test_it(self): assert_equal(self.A.shape, (4,)) - assert_equal(self.A.upper()[:2].tostring(), asbytes('AB')) + assert_equal(self.A.upper()[:2].tobytes(), asbytes('AB')) class TestComparisons(TestCase): def setUp(self): diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index c2ac00923e22..b5e6fdf57afc 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -2308,7 +2308,7 @@ def test_datetime(self): class TestIO(object): - """Test tofile, fromfile, tostring, and fromstring""" + """Test tofile, fromfile, tobytes, and fromstring""" def setUp(self): shape = (2, 4, 3) @@ -2357,11 +2357,11 @@ def test_roundtrip_filename(self): assert_array_equal(y, self.x.flat) def test_roundtrip_binary_str(self): - s = self.x.tostring() + s = self.x.tobytes() y = np.fromstring(s, dtype=self.dtype) assert_array_equal(y, self.x.flat) - s = self.x.tostring('F') + s = self.x.tobytes('F') y = np.fromstring(s, dtype=self.dtype) assert_array_equal(y, self.x.flatten('F')) @@ -2567,7 +2567,7 @@ def test_ip_basic(self): for dtype in [float, int, np.complex]: dt = np.dtype(dtype).newbyteorder(byteorder) x = (np.random.random((4, 7))*5).astype(dt) - buf = x.tostring() + buf = x.tobytes() yield self.tst_basic, buf, x.flat, {'dtype':dt} def test_empty(self): diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index e35bbb320cbd..6b0d58e22550 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -286,11 +286,11 @@ def test_unicode_string_comparison(self,level=rlevel): b = np.array('world') a == b - def test_tostring_FORTRANORDER_discontiguous(self,level=rlevel): + def test_tobytes_FORTRANORDER_discontiguous(self,level=rlevel): """Fix in r2836""" # Create discontiguous Fortran-ordered array x = np.array(np.random.rand(3, 3), order='F')[:, :2] - assert_array_almost_equal(x.ravel(), np.fromstring(x.tostring())) + assert_array_almost_equal(x.ravel(), np.fromstring(x.tobytes())) def test_flat_assignment(self,level=rlevel): """Correct behaviour of ticket #194""" @@ -1200,7 +1200,7 @@ def test_void_scalar_constructor(self): #that void scalar contains original data. test_string = np.array("test") test_string_void_scalar = np.core.multiarray.scalar( - np.dtype(("V", test_string.dtype.itemsize)), test_string.tostring()) + np.dtype(("V", test_string.dtype.itemsize)), test_string.tobytes()) assert_(test_string_void_scalar.view(test_string.dtype) == test_string) @@ -1208,7 +1208,7 @@ def test_void_scalar_constructor(self): #reconstructed scalar is correct. test_record = np.ones((), "i,i") test_record_void_scalar = np.core.multiarray.scalar( - test_record.dtype, test_record.tostring()) + test_record.dtype, test_record.tobytes()) assert_(test_record_void_scalar == test_record) @@ -1378,10 +1378,10 @@ def test_byteswap_complex_scalar(self): y = x.byteswap() if x.dtype.byteorder == z.dtype.byteorder: # little-endian machine - assert_equal(x, np.fromstring(y.tostring(), dtype=dtype.newbyteorder())) + assert_equal(x, np.fromstring(y.tobytes(), dtype=dtype.newbyteorder())) else: # big-endian machine - assert_equal(x, np.fromstring(y.tostring(), dtype=dtype)) + assert_equal(x, np.fromstring(y.tobytes(), dtype=dtype)) # double check real and imaginary parts: assert_equal(x.real, y.real.byteswap()) assert_equal(x.imag, y.imag.byteswap()) @@ -1527,7 +1527,7 @@ def test_fromfile_tofile_seeks(self): # file handle out of sync f0 = tempfile.NamedTemporaryFile() f = f0.file - f.write(np.arange(255, dtype='u1').tostring()) + f.write(np.arange(255, dtype='u1').tobytes()) f.seek(20) ret = np.fromfile(f, count=4, dtype='u1') @@ -1904,7 +1904,7 @@ def test_complex64_alignment(self): # 2D array arr2 = np.reshape(arr, (2, 5)) # Fortran write followed by (C or F) read caused bus error - data_str = arr2.tostring('F') + data_str = arr2.tobytes('F') data_back = np.ndarray(arr2.shape, arr2.dtype, buffer=data_str, diff --git a/numpy/doc/byteswapping.py b/numpy/doc/byteswapping.py index ffefe3168366..430683d308d4 100644 --- a/numpy/doc/byteswapping.py +++ b/numpy/doc/byteswapping.py @@ -101,7 +101,7 @@ Note the the array has not changed in memory: ->>> fixed_end_dtype_arr.tostring() == big_end_str +>>> fixed_end_dtype_arr.tobytes() == big_end_str True Data and type endianness don't match, change data to match dtype @@ -117,7 +117,7 @@ Now the array *has* changed in memory: ->>> fixed_end_mem_arr.tostring() == big_end_str +>>> fixed_end_mem_arr.tobytes() == big_end_str False Data and dtype endianness match, swap data and dtype @@ -131,7 +131,7 @@ >>> swapped_end_arr = big_end_arr.byteswap().newbyteorder() >>> swapped_end_arr[0] 1 ->>> swapped_end_arr.tostring() == big_end_str +>>> swapped_end_arr.tobytes() == big_end_str False An easier way of casting the data to a specific dtype and byte ordering @@ -140,7 +140,7 @@ >>> swapped_end_arr = big_end_arr.astype('>> swapped_end_arr[0] 1 ->>> swapped_end_arr.tostring() == big_end_str +>>> swapped_end_arr.tobytes() == big_end_str False """ diff --git a/numpy/f2py/tests/test_array_from_pyobj.py b/numpy/f2py/tests/test_array_from_pyobj.py index 09d613293816..3a148e72c735 100644 --- a/numpy/f2py/tests/test_array_from_pyobj.py +++ b/numpy/f2py/tests/test_array_from_pyobj.py @@ -227,7 +227,8 @@ def __init__(self, typ, dims, intent, obj): assert_(self.arr_attr[2]==self.pyarr_attr[2]) # dimensions if self.arr_attr[1]<=1: assert_(self.arr_attr[3]==self.pyarr_attr[3],\ - repr((self.arr_attr[3], self.pyarr_attr[3], self.arr.tostring(), self.pyarr.tostring()))) # strides + repr((self.arr_attr[3], self.pyarr_attr[3], + self.arr.tobytes(), self.pyarr.tobytes()))) # strides assert_(self.arr_attr[5][-2:]==self.pyarr_attr[5][-2:],\ repr((self.arr_attr[5], self.pyarr_attr[5]))) # descr assert_(self.arr_attr[6]==self.pyarr_attr[6],\ diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 4cfbbe05da3c..631e92959b11 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -407,7 +407,7 @@ def write_array(fp, array, version=(1, 0)): for chunk in numpy.nditer( array, flags=['external_loop', 'buffered', 'zerosize_ok'], buffersize=buffersize, order='F'): - fp.write(chunk.tostring('C')) + fp.write(chunk.tobytes('C')) else: if isfileobj(fp): array.tofile(fp) @@ -415,7 +415,7 @@ def write_array(fp, array, version=(1, 0)): for chunk in numpy.nditer( array, flags=['external_loop', 'buffered', 'zerosize_ok'], buffersize=buffersize, order='C'): - fp.write(chunk.tostring('C')) + fp.write(chunk.tobytes('C')) def read_array(fp): diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 42787e3c70ac..e4116fbd8001 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -5381,11 +5381,21 @@ def tolist(self, fill_value=None): # return result #........................ def tostring(self, fill_value=None, order='C'): + """ + This function is a compatibility alias for tobytes. Despite its name it + returns bytes not strings. + """ + + return self.tobytes(fill_value, order='C') + #........................ + def tobytes(self, fill_value=None, order='C'): """ Return the array data as a string containing the raw bytes in the array. The array is filled with a fill value before the string conversion. + .. versionadded:: 1.9.0 + Parameters ---------- fill_value : scalar, optional @@ -5401,22 +5411,22 @@ def tostring(self, fill_value=None, order='C'): See Also -------- - ndarray.tostring + ndarray.tobytes tolist, tofile Notes ----- - As for `ndarray.tostring`, information about the shape, dtype, etc., + As for `ndarray.tobytes`, information about the shape, dtype, etc., but also about `fill_value`, will be lost. Examples -------- >>> x = np.ma.array(np.array([[1, 2], [3, 4]]), mask=[[0, 1], [1, 0]]) - >>> x.tostring() + >>> x.tobytes() '\\x01\\x00\\x00\\x00?B\\x0f\\x00?B\\x0f\\x00\\x04\\x00\\x00\\x00' """ - return self.filled(fill_value).tostring(order=order) + return self.filled(fill_value).tobytes(order=order) #........................ def tofile(self, fid, sep="", format="%s"): """ @@ -5498,9 +5508,9 @@ def __getstate__(self): self.shape, self.dtype, self.flags.fnc, - self._data.tostring(cf), + self._data.tobytes(cf), #self._data.tolist(), - getmaskarray(self).tostring(cf), + getmaskarray(self).tobytes(cf), #getmaskarray(self).tolist(), self._fill_value, ) diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py index a2380d8131f6..e66596509f63 100644 --- a/numpy/ma/mrecords.py +++ b/numpy/ma/mrecords.py @@ -426,8 +426,8 @@ def __getstate__(self): self.shape, self.dtype, self.flags.fnc, - self._data.tostring(), - self._mask.tostring(), + self._data.tobytes(), + self._mask.tobytes(), self._fill_value, ) return state diff --git a/numpy/matrixlib/tests/test_defmatrix.py b/numpy/matrixlib/tests/test_defmatrix.py index d1a4e4ab5a9b..a06a564aa8ba 100644 --- a/numpy/matrixlib/tests/test_defmatrix.py +++ b/numpy/matrixlib/tests/test_defmatrix.py @@ -285,8 +285,8 @@ def test_instance_methods(self): 'getA', 'getA1', 'item', 'nonzero', 'put', 'putmask', 'resize', 'searchsorted', 'setflags', 'setfield', 'sort', 'partition', 'argpartition', - 'take', 'tofile', 'tolist', 'tostring', 'all', 'any', 'sum', - 'argmax', 'argmin', 'min', 'max', 'mean', 'var', 'ptp', + 'take', 'tofile', 'tolist', 'tostring', 'tobytes', 'all', 'any', + 'sum', 'argmax', 'argmin', 'min', 'max', 'mean', 'var', 'ptp', 'prod', 'std', 'ctypes', 'itemset', 'setasflat' ] for attrib in dir(a):