Skip to content

Commit

Permalink
Fix reading of Unicode encoded character array of v7 MAT file
Browse files Browse the repository at this point in the history
As reported by #79
  • Loading branch information
tbeu committed Jan 25, 2018
1 parent 76e04df commit a5795ef
Show file tree
Hide file tree
Showing 11 changed files with 291 additions and 38 deletions.
23 changes: 13 additions & 10 deletions src/mat5.c
Original file line number Diff line number Diff line change
Expand Up @@ -3298,18 +3298,21 @@ Read5(mat_t *mat, matvar_t *matvar)
nBytes = tag[1];
}
}
/* FIXME: */
matvar->data_type = MAT_T_UINT8;
if ( nBytes == 0 ) {
matvar->nbytes = 0;
matvar->data = calloc(1,1);
break;
if ( matvar->compression == MAT_COMPRESSION_NONE ) {
matvar->data_type = MAT_T_UINT8;
matvar->data_size = (int)Mat_SizeOf(MAT_T_UINT8);
matvar->nbytes = len*matvar->data_size;
} else {
matvar->data_type = packed_type;
matvar->data_size = (int)Mat_SizeOf(matvar->data_type);
matvar->nbytes = nBytes;
}
matvar->data_size = sizeof(char);
matvar->nbytes = len*matvar->data_size;
matvar->data = calloc(matvar->nbytes+1,1);
matvar->data = calloc(matvar->nbytes+1,1);
if ( NULL == matvar->data ) {
Mat_Critical("Failed to allocate %d bytes",matvar->nbytes);
Mat_Critical("Failed to allocate %d byte(s)",matvar->nbytes + 1);
break;
}
if ( 0 == matvar->nbytes ) {
break;
}
if ( matvar->compression == MAT_COMPRESSION_NONE ) {
Expand Down
48 changes: 32 additions & 16 deletions src/read_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ int
ReadCompressedCharData(mat_t *mat,z_streamp z,char *data,
enum matio_types data_type,int len)
{
int nBytes = 0, i;
int nBytes = 0;
unsigned int data_size;

if ( mat == NULL || data == NULL || mat->fp == NULL )
Expand All @@ -1363,24 +1363,24 @@ ReadCompressedCharData(mat_t *mat,z_streamp z,char *data,
InflateData(mat,z,data,len*data_size);
break;
case MAT_T_UTF16:
case MAT_T_INT16:
case MAT_T_UINT16:
{
mat_uint16_t i16;

InflateData(mat,z,data,len*data_size);
if ( mat->byteswap ) {
int i;
for ( i = 0; i < len; i++ ) {
InflateData(mat,z,&i16,data_size);
data[i] = (char)Mat_uint16Swap(&i16);
Mat_uint16Swap((mat_uint16_t*)&data[2*i]);
}
} else {
}
break;
case MAT_T_INT16:
InflateData(mat,z,data,len*data_size);
if ( mat->byteswap ) {
int i;
for ( i = 0; i < len; i++ ) {
InflateData(mat,z,&i16,data_size);
data[i] = (char)i16;
Mat_int16Swap((mat_int16_t*)&data[2*i]);
}
}
break;
}
default:
Mat_Warning("ReadCompressedCharData: %d is not a supported data "
"type for character data", data_type);
Expand All @@ -1394,7 +1394,7 @@ ReadCompressedCharData(mat_t *mat,z_streamp z,char *data,
int
ReadCharData(mat_t *mat,char *data,enum matio_types data_type,int len)
{
int bytesread = 0, i;
int bytesread = 0;
unsigned int data_size;

if ( (mat == NULL) || (data == NULL) || (mat->fp == NULL) )
Expand All @@ -1409,15 +1409,31 @@ ReadCharData(mat_t *mat,char *data,enum matio_types data_type,int len)
bytesread += fread(data,data_size,len,(FILE*)mat->fp);
break;
case MAT_T_UTF16:
case MAT_T_INT16:
case MAT_T_UINT16:
{
mat_uint16_t i16;

mat_uint16_t ui16;
int i;
if ( mat->byteswap ) {
for ( i = 0; i < len; i++ ) {
bytesread += fread(&ui16,data_size,1,(FILE*)mat->fp);
data[i] = (char)Mat_uint16Swap(&ui16);
}
} else {
for ( i = 0; i < len; i++ ) {
bytesread += fread(&ui16,data_size,1,(FILE*)mat->fp);
data[i] = (char)ui16;
}
}
break;
}
case MAT_T_INT16:
{
mat_int16_t i16;
int i;
if ( mat->byteswap ) {
for ( i = 0; i < len; i++ ) {
bytesread += fread(&i16,data_size,1,(FILE*)mat->fp);
data[i] = (char)Mat_uint16Swap(&i16);
data[i] = (char)Mat_int16Swap(&i16);
}
} else {
for ( i = 0; i < len; i++ ) {
Expand Down
6 changes: 6 additions & 0 deletions test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,11 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \
$(srcdir)/results/read-var22.out \
$(srcdir)/results/read-var23.out \
$(srcdir)/results/read-var24.out \
$(srcdir)/results/read-var24-utf8.out \
$(srcdir)/results/read-var25.out \
$(srcdir)/results/read-var26.out \
$(srcdir)/results/read-var27.out \
$(srcdir)/results/read-var27-utf8.out \
$(srcdir)/results/read-var28.out \
$(srcdir)/results/read-var29.out \
$(srcdir)/results/read-var30.out \
Expand All @@ -236,6 +238,7 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \
$(srcdir)/results/read-var48.out \
$(srcdir)/results/read-var49.out \
$(srcdir)/results/read-var50.out \
$(srcdir)/results/read-var50-utf8.out \
$(srcdir)/results/read-var51.out \
$(srcdir)/results/read-var52.out \
$(srcdir)/results/read-var53.out \
Expand All @@ -251,10 +254,13 @@ TEST_DATAFILES = $(srcdir)/datasets/matio_test_cases.m \
$(srcdir)/results/read-var63.out \
$(srcdir)/results/read-var64.out \
$(srcdir)/results/read-var65.out \
$(srcdir)/results/read-var65-utf8.out \
$(srcdir)/results/read-var66.out \
$(srcdir)/results/read-var66-utf8.out \
$(srcdir)/results/read-var67.out \
$(srcdir)/results/read-var68.out \
$(srcdir)/results/read-var69.out \
$(srcdir)/results/read-var69-utf8.out \
$(srcdir)/results/read-var90.out \
$(srcdir)/results/read-var91.out \
$(srcdir)/results/read-var92.out \
Expand Down
12 changes: 6 additions & 6 deletions test/mat5_compressed_read_be.at
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ AT_CLEANUP

AT_SETUP([Read 2D character array])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var24.out expout
AT_CHECK([cp $srcdir/results/read-var24-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_be.mat var24],[0],[expout],[])
AT_CLEANUP

Expand All @@ -194,7 +194,7 @@ AT_CLEANUP
AT_SETUP([Read struct with empty fields])
AT_KEYWORDS([empty])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var27.out expout
AT_CHECK([cp $srcdir/results/read-var27-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_be.mat var27],[0],[expout],[])
AT_CLEANUP

Expand Down Expand Up @@ -338,7 +338,7 @@ AT_CLEANUP

AT_SETUP([Read struct with character fields])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var50.out expout
AT_CHECK([cp $srcdir/results/read-var50-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_be.mat var50],[0],[expout],[])
AT_CLEANUP

Expand Down Expand Up @@ -436,14 +436,14 @@ AT_CLEANUP

AT_SETUP([Read cell array with character arrays])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var65.out expout
AT_CHECK([cp $srcdir/results/read-var65-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_be.mat var65],[0],[expout],[])
AT_CLEANUP

AT_SETUP([Read cell array with empty structs])
AT_KEYWORDS([empty])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var66.out expout
AT_CHECK([cp $srcdir/results/read-var66-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_be.mat var66],[0],[expout],[])
AT_CLEANUP

Expand All @@ -461,7 +461,7 @@ AT_CLEANUP

AT_SETUP([Read cell array with structs (character fields)])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var69.out expout
AT_CHECK([cp $srcdir/results/read-var69-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_be.mat var69],[0],[expout],[])
AT_CLEANUP

Expand Down
12 changes: 6 additions & 6 deletions test/mat5_compressed_read_le.at
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ AT_CLEANUP

AT_SETUP([Read 2D character array])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var24.out expout
AT_CHECK([cp $srcdir/results/read-var24-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_le.mat var24],[0],[expout],[])
AT_CLEANUP

Expand All @@ -194,7 +194,7 @@ AT_CLEANUP
AT_SETUP([Read struct with empty fields])
AT_KEYWORDS([empty])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var27.out expout
AT_CHECK([cp $srcdir/results/read-var27-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_le.mat var27],[0],[expout],[])
AT_CLEANUP

Expand Down Expand Up @@ -338,7 +338,7 @@ AT_CLEANUP

AT_SETUP([Read struct with character fields])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var50.out expout
AT_CHECK([cp $srcdir/results/read-var50-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_le.mat var50],[0],[expout],[])
AT_CLEANUP

Expand Down Expand Up @@ -436,14 +436,14 @@ AT_CLEANUP

AT_SETUP([Read cell array with character arrays])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var65.out expout
AT_CHECK([cp $srcdir/results/read-var65-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_le.mat var65],[0],[expout],[])
AT_CLEANUP

AT_SETUP([Read cell array with empty structs])
AT_KEYWORDS([empty])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var66.out expout
AT_CHECK([cp $srcdir/results/read-var66-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_le.mat var66],[0],[expout],[])
AT_CLEANUP

Expand All @@ -461,7 +461,7 @@ AT_CLEANUP

AT_SETUP([Read cell array with structs (character fields)])
AT_SKIP_IF([test $COMPRESSION_ZLIB -ne 1])
AT_CHECK([cp $srcdir/results/read-var69.out expout
AT_CHECK([cp $srcdir/results/read-var69-utf8.out expout
$builddir/test_mat readvar $srcdir/datasets/matio_test_cases_compressed_le.mat var69],[0],[expout],[])
AT_CLEANUP

Expand Down
11 changes: 11 additions & 0 deletions test/results/read-var24-utf8.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Name: var24
Rank: 2
Dimensions: 4 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890!@#$%^&*()-_=+`~
[{]}\|;:'",<.>/?
}
35 changes: 35 additions & 0 deletions test/results/read-var27-utf8.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Name: var27
Rank: 2
Dimensions: 1 x 2
Class Type: Structure
Data Type: Structure
Fields[4] {
Name: field1
Rank: 2
Dimensions: 0 x 1
Class Type: Double Precision Array
Data Type: IEEE 754 double-precision
{
}
Name: field2
Rank: 2
Dimensions: 0 x 1
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
}
Name: field1
Rank: 2
Dimensions: 0 x 1
Class Type: Structure
Data Type: Structure
Fields[0] {
}
Name: field2
Rank: 2
Dimensions: 0 x 1
Class Type: Cell Array
Data Type: Cell Array
{
}
}
39 changes: 39 additions & 0 deletions test/results/read-var50-utf8.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
Name: var50
Rank: 2
Dimensions: 2 x 1
Class Type: Structure
Data Type: Structure
Fields[4] {
Name: field1
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
abcdefghijklmnopqrstuvwxyz
}
Name: field2
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
ABCDEFGHIJKLMNOPQRSTUVWXYZ
}
Name: field1
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
1234567890!@#$%^&*()-_=+`~
}
Name: field2
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
[{]}\|;:'",<.>/?
}
}
35 changes: 35 additions & 0 deletions test/results/read-var65-utf8.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Name: var65
Rank: 2
Dimensions: 2 x 2
Class Type: Cell Array
Data Type: Cell Array
{
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
abcdefghijklmnopqrstuvwxyz
}
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
ABCDEFGHIJKLMNOPQRSTUVWXYZ
}
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
1234567890!@#$%^&*()-_=+`~
}
Rank: 2
Dimensions: 1 x 26
Class Type: Character Array
Data Type: Unicode UTF-8 Encoded Character Data
{
[{]}\|;:'",<.>/?
}
}

0 comments on commit a5795ef

Please sign in to comment.