Skip to content

Commit

Permalink
Fix writing of Unicode encoded character array to v7 MAT file
Browse files Browse the repository at this point in the history
As reported by #79
  • Loading branch information
tbeu committed Jan 25, 2018
1 parent a5795ef commit 5ace2d2
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 56 deletions.
59 changes: 6 additions & 53 deletions src/mat5.c
Expand Up @@ -533,10 +533,15 @@ WriteCompressedCharData(mat_t *mat,z_streamp z,void *data,int N,
return 0;

switch ( data_type ) {
case MAT_T_INT8:
case MAT_T_UINT8:
case MAT_T_INT16:
case MAT_T_UINT16:
case MAT_T_UTF8:
case MAT_T_UTF16:
data_size = Mat_SizeOf(data_type);
data_tag[0] = data_type;
data_tag[0] = (MAT_T_INT8 == data_type || MAT_T_UINT8 == data_type) ?
MAT_T_UTF8 : data_type;
data_tag[1] = N*data_size;
z->next_in = ZLIB_BYTE_PTR(data_tag);
z->avail_in = 8;
Expand Down Expand Up @@ -571,58 +576,6 @@ WriteCompressedCharData(mat_t *mat,z_streamp z,void *data,int N,
} while ( z->avail_out == 0 );
}
break;
case MAT_T_INT8:
case MAT_T_UINT8:
{
mat_uint8_t *ptr;
mat_uint16_t c;
int i;

/* Matlab can't read MAT_C_CHAR as uint8, needs uint16 */
data_size = 2;
data_tag[0] = MAT_T_UINT16;
data_tag[1] = N*data_size;
z->next_in = ZLIB_BYTE_PTR(data_tag);
z->avail_in = 8;
do {
z->next_out = buf;
z->avail_out = buf_size;
deflate(z,Z_NO_FLUSH);
byteswritten += fwrite(buf,1,buf_size-z->avail_out,(FILE*)mat->fp);
} while ( z->avail_out == 0 );

/* exit early if this is an empty data */
if ( NULL == data || N < 1 )
break;

z->next_in = (Bytef*)data;
z->avail_in = data_size*N;
ptr = (mat_uint8_t*)data;
for ( i = 0; i < N; i++ ) {
c = (mat_uint16_t)*(char *)ptr;
z->next_in = ZLIB_BYTE_PTR(&c);
z->avail_in = 2;
do {
z->next_out = buf;
z->avail_out = buf_size;
deflate(z,Z_NO_FLUSH);
byteswritten += fwrite(buf,1,buf_size-z->avail_out,(FILE*)mat->fp);
} while ( z->avail_out == 0 );
ptr++;
}
/* Add/Compress padding to pad to 8-byte boundary */
if ( N*data_size % 8 ) {
z->next_in = pad;
z->avail_in = 8 - (N*data_size % 8);
do {
z->next_out = buf;
z->avail_out = buf_size;
deflate(z,Z_NO_FLUSH);
byteswritten += fwrite(buf,1,buf_size-z->avail_out,(FILE*)mat->fp);
} while ( z->avail_out == 0 );
}
break;
}
case MAT_T_UNKNOWN:
/* Sometimes empty char data will have MAT_T_UNKNOWN, so just write a data tag */
data_size = 2;
Expand Down
3 changes: 3 additions & 0 deletions test/Makefile.am
Expand Up @@ -267,6 +267,7 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \
$(srcdir)/results/read-x.out \
$(srcdir)/results/readvar-write_cell_empty_struct-var1.out \
$(srcdir)/results/readvar-write_char-a.out \
$(srcdir)/results/readvar-write_char-a-utf8.out \
$(srcdir)/results/readvar-write_complex_sparse-sparse_matrix-double.out \
$(srcdir)/results/readvar-write_complex_sparse-sparse_matrix-int8.out \
$(srcdir)/results/readvar-write_complex_sparse-sparse_matrix-int16.out \
Expand All @@ -284,6 +285,7 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \
$(srcdir)/results/readvar-write_empty_struct-var2.out \
$(srcdir)/results/readvar-write_empty_struct-var3.out \
$(srcdir)/results/readvar-write_empty_struct-var4.out \
$(srcdir)/results/readvar-write_empty_struct-var4-utf8.out \
$(srcdir)/results/readvar-write_sparse-sparse_matrix-double.out \
$(srcdir)/results/readvar-write_sparse-sparse_matrix-int8.out \
$(srcdir)/results/readvar-write_sparse-sparse_matrix-int16.out \
Expand Down Expand Up @@ -367,6 +369,7 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \
$(srcdir)/results/write_struct_2d_numeric-uint32.out \
$(srcdir)/results/write_struct_2d_numeric-uint64.out \
$(srcdir)/results/write_struct_char.out \
$(srcdir)/results/write_struct_char-utf8.out \
$(srcdir)/results/write_struct_complex_2d_numeric-double.out \
$(srcdir)/results/write_struct_complex_2d_numeric-int8.out \
$(srcdir)/results/write_struct_complex_2d_numeric-int16.out \
Expand Down
6 changes: 3 additions & 3 deletions test/mat5_compressed_write.at
Expand Up @@ -298,7 +298,7 @@ AT_CLEANUP
AT_SETUP([Write character array])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([$builddir/test_mat -v 5 -z write_char],[0],[],[])
AT_CHECK([cp $srcdir/results/readvar-write_char-a.out expout
AT_CHECK([cp $srcdir/results/readvar-write_char-a-utf8.out expout
$builddir/test_mat readvar test_write_char.mat a],[0],[expout],[])
AT_SKIP_IF([test -z "$MATLABEXE"])
AT_CHECK([cp $srcdir/matlab/test_write_char.m .
Expand All @@ -316,7 +316,7 @@ AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var2.out expout
$builddir/test_mat readvar test_write_empty_struct.mat var2],[0],[expout],[])
AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var3.out expout
$builddir/test_mat readvar test_write_empty_struct.mat var3],[0],[expout],[])
AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var4.out expout
AT_CHECK([cp $srcdir/results/readvar-write_empty_struct-var4-utf8.out expout
$builddir/test_mat readvar test_write_empty_struct.mat var4],[0],[expout],[])
AT_SKIP_IF([test -z "$MATLABEXE"])
AT_CHECK([cp $srcdir/matlab/test_write_empty_struct.m .
Expand Down Expand Up @@ -563,7 +563,7 @@ AT_SETUP([Write struct with character fields])
AT_KEYWORDS([bug30])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([$builddir/test_mat -v 5 -z write_struct_char],[0],[],[])
AT_CHECK([cp $srcdir/results/write_struct_char.out expout
AT_CHECK([cp $srcdir/results/write_struct_char-utf8.out expout
$builddir/test_mat readvar test_write_struct_char.mat a],[0],[expout],[])
AT_SKIP_IF([test -z "$MATLABEXE"])
AT_CHECK([cp $srcdir/matlab/test_write_struct_char.m .
Expand Down
11 changes: 11 additions & 0 deletions test/results/readvar-write_char-a-utf8.out
@@ -0,0 +1,11 @@
Name: a
Rank: 2
Dimensions: 4 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890!@#$%^&*()-_=+`~
[{]}\|;:'",<.>/?
}
35 changes: 35 additions & 0 deletions test/results/readvar-write_empty_struct-var4-utf8.out
@@ -0,0 +1,35 @@
Name: var4
Rank: 2
Dimensions: 2 x 1
Class Type: Structure
Data Type: Structure
Fields[4] {
Name: field1
Rank: 2
Dimensions: 0 x 1
Class Type: Double Precision Array
Data Type: IEEE 754 double-precision
{
}
Name: field2
Rank: 2
Dimensions: 0 x 1
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
}
Name: field1
Rank: 2
Dimensions: 0 x 1
Class Type: Structure
Data Type: Structure
Fields[0] {
}
Name: field2
Rank: 2
Dimensions: 0 x 1
Class Type: Cell Array
Data Type: Cell Array
{
}
}
39 changes: 39 additions & 0 deletions test/results/write_struct_char-utf8.out
@@ -0,0 +1,39 @@
Name: a
Rank: 2
Dimensions: 2 x 1
Class Type: Structure
Data Type: Structure
Fields[4] {
Name: field1
Rank: 2
Dimensions: 0 x 0
Class Type: Double Precision Array
Data Type: IEEE 754 double-precision
{
}
Name: field2
Rank: 2
Dimensions: 0 x 0
Class Type: Double Precision Array
Data Type: IEEE 754 double-precision
{
}
Name: field1
Rank: 2
Dimensions: 0 x 0
Class Type: Double Precision Array
Data Type: IEEE 754 double-precision
{
}
Name: field2
Rank: 2
Dimensions: 4 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890!@#$%^&*()-_=+`~
[{]}\|;:'",<.>/?
}
}

0 comments on commit 5ace2d2

Please sign in to comment.