Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #452 from 'batterseapower/master'

BUG: io/matlab: Fix byte order used to interpret strings in Matlab reader
  • Loading branch information...
commit 8e83bc9e4cc0e79752e631da42856cd65417801e 2 parents cff6eb2 + 8715dce
@pv pv authored
View
8 scipy/io/matlab/mio5.py
@@ -520,6 +520,14 @@ def write_element(self, arr, mdtype=None):
''' write tag and data '''
if mdtype is None:
mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]
+
+ # We are writing a little-endian Matlab file but our incoming arrays may
+ # be big-endian. In particular, they might be big-endian because we originally
+ # *read* them from a big-endian Matlab file
+ byte_order = arr.dtype.byteorder
+ if byte_order == '>' or (byte_order == '=' and not boc.sys_is_le):
+ arr = arr.byteswap().newbyteorder()
+
byte_count = arr.size*arr.itemsize
if byte_count <= 4:
self.write_smalldata_element(arr, mdtype, byte_count)
View
9 scipy/io/matlab/mio5_utils.pyx
@@ -203,11 +203,10 @@ cdef class VarReader5:
if isinstance(key, str):
continue
self.class_dtypes[key] = <PyObject*>dt
- # cache correctly byte ordered dtypes
- if self.little_endian:
- self.U1_dtype = np.dtype('<U1')
- else:
- self.U1_dtype = np.dtype('>U1')
+ # Always use U1 rather than <U or >U1 for interpreting string
+ # data because the strings are created by the Python runtime
+ # by .decode() and hence use native byte order rather the mat file's
+ self.U1_dtype = np.dtype('U1')
bool_dtype = np.dtype('bool')
def set_stream(self, fobj):
View
BIN  scipy/io/matlab/tests/data/big_endian.mat
Binary file not shown
View
28 scipy/io/matlab/tests/test_mio.py
@@ -818,6 +818,34 @@ def test_empty_string():
stream.close()
+def test_read_big_endian():
+ # make sure big-endian data is read correctly
+ estring_fname = pjoin(test_data_path, 'big_endian.mat')
+ fp = open(estring_fname, 'rb')
+ rdr = MatFile5Reader_future(fp)
+ d = rdr.get_variables()
+ fp.close()
+ assert_array_equal(d['strings'], np.array([[u'hello'],
+ [u'world']], dtype=np.object))
+ assert_array_equal(d['floats'], np.array([[ 2., 3.],
+ [ 3., 4.]], dtype=np.float32))
+
+
+def test_write_big_endian():
+ # we don't support writing actual big-endian .mat files, but we need to
+ # behave correctly if the user supplies a big-endian numpy array to write out
+ stream = BytesIO()
+ savemat_future(stream, {'a': np.array([[ 2., 3.],
+ [ 3., 4.]], dtype='>f4'),
+ 'b': np.array([u'hello', u'world'], dtype='>U')})
+ rdr = MatFile5Reader_future(stream)
+ d = rdr.get_variables()
+ assert_array_equal(d['a'], np.array([[ 2., 3.],
+ [ 3., 4.]], dtype='f4'))
+ assert_array_equal(d['b'], np.array([u'hello', u'world'], dtype='U'))
+ stream.close()
+
+
def test_mat4_3d():
# test behavior when writing 3D arrays to matlab 4 files
stream = BytesIO()
Please sign in to comment.
Something went wrong with that request. Please try again.