From 5ace2d2c46554ac85bda4011ced542b5b97bdc83 Mon Sep 17 00:00:00 2001 From: tbeu Date: Thu, 25 Jan 2018 20:48:57 +0100 Subject: [PATCH] Fix writing of Unicode encoded character array to v7 MAT file As reported by https://github.com/tbeu/matio/issues/79 --- src/mat5.c | 59 ++----------------- test/Makefile.am | 3 + test/mat5_compressed_write.at | 6 +- test/results/readvar-write_char-a-utf8.out | 11 ++++ .../readvar-write_empty_struct-var4-utf8.out | 35 +++++++++++ test/results/write_struct_char-utf8.out | 39 ++++++++++++ 6 files changed, 97 insertions(+), 56 deletions(-) create mode 100644 test/results/readvar-write_char-a-utf8.out create mode 100644 test/results/readvar-write_empty_struct-var4-utf8.out create mode 100644 test/results/write_struct_char-utf8.out diff --git a/src/mat5.c b/src/mat5.c index edbf0638..a706393c 100644 --- a/src/mat5.c +++ b/src/mat5.c @@ -533,10 +533,15 @@ WriteCompressedCharData(mat_t *mat,z_streamp z,void *data,int N, return 0; switch ( data_type ) { + case MAT_T_INT8: + case MAT_T_UINT8: + case MAT_T_INT16: case MAT_T_UINT16: case MAT_T_UTF8: + case MAT_T_UTF16: data_size = Mat_SizeOf(data_type); - data_tag[0] = data_type; + data_tag[0] = (MAT_T_INT8 == data_type || MAT_T_UINT8 == data_type) ? + MAT_T_UTF8 : data_type; data_tag[1] = N*data_size; z->next_in = ZLIB_BYTE_PTR(data_tag); z->avail_in = 8; @@ -571,58 +576,6 @@ WriteCompressedCharData(mat_t *mat,z_streamp z,void *data,int N, } while ( z->avail_out == 0 ); } break; - case MAT_T_INT8: - case MAT_T_UINT8: - { - mat_uint8_t *ptr; - mat_uint16_t c; - int i; - - /* Matlab can't read MAT_C_CHAR as uint8, needs uint16 */ - data_size = 2; - data_tag[0] = MAT_T_UINT16; - data_tag[1] = N*data_size; - z->next_in = ZLIB_BYTE_PTR(data_tag); - z->avail_in = 8; - do { - z->next_out = buf; - z->avail_out = buf_size; - deflate(z,Z_NO_FLUSH); - byteswritten += fwrite(buf,1,buf_size-z->avail_out,(FILE*)mat->fp); - } while ( z->avail_out == 0 ); - - /* exit early if this is an empty data */ - if ( NULL == data || N < 1 ) - break; - - z->next_in = (Bytef*)data; - z->avail_in = data_size*N; - ptr = (mat_uint8_t*)data; - for ( i = 0; i < N; i++ ) { - c = (mat_uint16_t)*(char *)ptr; - z->next_in = ZLIB_BYTE_PTR(&c); - z->avail_in = 2; - do { - z->next_out = buf; - z->avail_out = buf_size; - deflate(z,Z_NO_FLUSH); - byteswritten += fwrite(buf,1,buf_size-z->avail_out,(FILE*)mat->fp); - } while ( z->avail_out == 0 ); - ptr++; - } - /* Add/Compress padding to pad to 8-byte boundary */ - if ( N*data_size % 8 ) { - z->next_in = pad; - z->avail_in = 8 - (N*data_size % 8); - do { - z->next_out = buf; - z->avail_out = buf_size; - deflate(z,Z_NO_FLUSH); - byteswritten += fwrite(buf,1,buf_size-z->avail_out,(FILE*)mat->fp); - } while ( z->avail_out == 0 ); - } - break; - } case MAT_T_UNKNOWN: /* Sometimes empty char data will have MAT_T_UNKNOWN, so just write a data tag */ data_size = 2; diff --git a/test/Makefile.am b/test/Makefile.am index 1a1e9663..83881e2f 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -267,6 +267,7 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \ $(srcdir)/results/read-x.out \ $(srcdir)/results/readvar-write_cell_empty_struct-var1.out \ $(srcdir)/results/readvar-write_char-a.out \ + $(srcdir)/results/readvar-write_char-a-utf8.out \ $(srcdir)/results/readvar-write_complex_sparse-sparse_matrix-double.out \ $(srcdir)/results/readvar-write_complex_sparse-sparse_matrix-int8.out \ $(srcdir)/results/readvar-write_complex_sparse-sparse_matrix-int16.out \ @@ -284,6 +285,7 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \ $(srcdir)/results/readvar-write_empty_struct-var2.out \ $(srcdir)/results/readvar-write_empty_struct-var3.out \ $(srcdir)/results/readvar-write_empty_struct-var4.out \ + $(srcdir)/results/readvar-write_empty_struct-var4-utf8.out \ $(srcdir)/results/readvar-write_sparse-sparse_matrix-double.out \ $(srcdir)/results/readvar-write_sparse-sparse_matrix-int8.out \ $(srcdir)/results/readvar-write_sparse-sparse_matrix-int16.out \ @@ -367,6 +369,7 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \ $(srcdir)/results/write_struct_2d_numeric-uint32.out \ $(srcdir)/results/write_struct_2d_numeric-uint64.out \ $(srcdir)/results/write_struct_char.out \ + $(srcdir)/results/write_struct_char-utf8.out \ $(srcdir)/results/write_struct_complex_2d_numeric-double.out \ $(srcdir)/results/write_struct_complex_2d_numeric-int8.out \ $(srcdir)/results/write_struct_complex_2d_numeric-int16.out \ diff --git a/test/mat5_compressed_write.at b/test/mat5_compressed_write.at index 4fe82847..ab04cd77 100644 --- a/test/mat5_compressed_write.at +++ b/test/mat5_compressed_write.at @@ -298,7 +298,7 @@ AT_CLEANUP AT_SETUP([Write character array]) AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1]) AT_CHECK([$builddir/test_mat -v 5 -z write_char],[0],[],[]) -AT_CHECK([cp $srcdir/results/readvar-write_char-a.out expout +AT_CHECK([cp $srcdir/results/readvar-write_char-a-utf8.out expout $builddir/test_mat readvar test_write_char.mat a],[0],[expout],[]) AT_SKIP_IF([test -z "$MATLABEXE"]) AT_CHECK([cp $srcdir/matlab/test_write_char.m . @@ -316,7 +316,7 @@ AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var2.out expout $builddir/test_mat readvar test_write_empty_struct.mat var2],[0],[expout],[]) AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var3.out expout $builddir/test_mat readvar test_write_empty_struct.mat var3],[0],[expout],[]) -AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var4.out expout +AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var4-utf8.out expout $builddir/test_mat readvar test_write_empty_struct.mat var4],[0],[expout],[]) AT_SKIP_IF([test -z "$MATLABEXE"]) AT_CHECK([cp $srcdir/matlab/test_write_empty_struct.m . @@ -563,7 +563,7 @@ AT_SETUP([Write struct with character fields]) AT_KEYWORDS([bug30]) AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1]) AT_CHECK([$builddir/test_mat -v 5 -z write_struct_char],[0],[],[]) -AT_CHECK([cp $srcdir/results/write_struct_char.out expout +AT_CHECK([cp $srcdir/results/write_struct_char-utf8.out expout $builddir/test_mat readvar test_write_struct_char.mat a],[0],[expout],[]) AT_SKIP_IF([test -z "$MATLABEXE"]) AT_CHECK([cp $srcdir/matlab/test_write_struct_char.m . diff --git a/test/results/readvar-write_char-a-utf8.out b/test/results/readvar-write_char-a-utf8.out new file mode 100644 index 00000000..d72313a0 --- /dev/null +++ b/test/results/readvar-write_char-a-utf8.out @@ -0,0 +1,11 @@ + Name: a + Rank: 2 +Dimensions: 4 x 26 +Class Type: Character Array + Data Type: Unicode UTF-8 Encoded Character Data +{ +abcdefghijklmnopqrstuvwxyz +ABCDEFGHIJKLMNOPQRSTUVWXYZ +1234567890!@#$%^&*()-_=+`~ +[{]}\|;:'",<.>/? +} diff --git a/test/results/readvar-write_empty_struct-var4-utf8.out b/test/results/readvar-write_empty_struct-var4-utf8.out new file mode 100644 index 00000000..30b5a43e --- /dev/null +++ b/test/results/readvar-write_empty_struct-var4-utf8.out @@ -0,0 +1,35 @@ + Name: var4 + Rank: 2 +Dimensions: 2 x 1 +Class Type: Structure + Data Type: Structure +Fields[4] { + Name: field1 + Rank: 2 +Dimensions: 0 x 1 +Class Type: Double Precision Array + Data Type: IEEE 754 double-precision +{ +} + Name: field2 + Rank: 2 +Dimensions: 0 x 1 +Class Type: Character Array + Data Type: Unicode UTF-8 Encoded Character Data +{ +} + Name: field1 + Rank: 2 +Dimensions: 0 x 1 +Class Type: Structure + Data Type: Structure +Fields[0] { +} + Name: field2 + Rank: 2 +Dimensions: 0 x 1 +Class Type: Cell Array + Data Type: Cell Array +{ +} +} diff --git a/test/results/write_struct_char-utf8.out b/test/results/write_struct_char-utf8.out new file mode 100644 index 00000000..b7fae22d --- /dev/null +++ b/test/results/write_struct_char-utf8.out @@ -0,0 +1,39 @@ + Name: a + Rank: 2 +Dimensions: 2 x 1 +Class Type: Structure + Data Type: Structure +Fields[4] { + Name: field1 + Rank: 2 +Dimensions: 0 x 0 +Class Type: Double Precision Array + Data Type: IEEE 754 double-precision +{ +} + Name: field2 + Rank: 2 +Dimensions: 0 x 0 +Class Type: Double Precision Array + Data Type: IEEE 754 double-precision +{ +} + Name: field1 + Rank: 2 +Dimensions: 0 x 0 +Class Type: Double Precision Array + Data Type: IEEE 754 double-precision +{ +} + Name: field2 + Rank: 2 +Dimensions: 4 x 26 +Class Type: Character Array + Data Type: Unicode UTF-8 Encoded Character Data +{ +abcdefghijklmnopqrstuvwxyz +ABCDEFGHIJKLMNOPQRSTUVWXYZ +1234567890!@#$%^&*()-_=+`~ +[{]}\|;:'",<.>/? +} +}