Skip to content

Commit

Permalink
fixed invalid read in Parser, CSVFile constructor is shortened
Browse files Browse the repository at this point in the history
  • Loading branch information
gsomix committed Jul 29, 2013
1 parent e6788a0 commit 5798840
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 71 deletions.
87 changes: 42 additions & 45 deletions src/shogun/io/CSVFile.cpp
Expand Up @@ -19,16 +19,18 @@ CCSVFile::CCSVFile()
init();
}

CCSVFile::CCSVFile(FILE* f, const char* name, char delimiter, char quote) :
CCSVFile::CCSVFile(FILE* f, const char* name) :
CFile(f, name)
{
init(delimiter, quote);
init();
m_line_reader=new CLineReader(file, m_line_tokenizer);
}

CCSVFile::CCSVFile(const char* fname, char rw, const char* name, char delimiter, char quote) :
CCSVFile::CCSVFile(const char* fname, char rw, const char* name) :
CFile(fname, rw, name)
{
init(delimiter, quote);
init();
m_line_reader=new CLineReader(file, m_line_tokenizer);
}

CCSVFile::~CCSVFile()
Expand All @@ -39,14 +41,17 @@ CCSVFile::~CCSVFile()
SG_UNREF(m_line_reader);
}

void CCSVFile::set_fortran_order()
void CCSVFile::set_order(csv_data_order order)
{
m_fortran_order=true;
m_order=order;
}

void CCSVFile::set_c_order()
void CCSVFile::set_delimiter(char delimiter)
{
m_fortran_order=false;
m_tokenizer->delimiters[m_delimiter]=0;

m_delimiter=delimiter;
m_tokenizer->delimiters[m_delimiter]=1;
}

void CCSVFile::skip_lines(int32_t num_lines)
Expand All @@ -57,18 +62,10 @@ void CCSVFile::skip_lines(int32_t num_lines)

void CCSVFile::init()
{
m_tokenizer=new CDelimiterTokenizer();
m_line_tokenizer=new CDelimiterTokenizer();
m_parser=new CParser();
m_line_reader=new CLineReader();
}

void CCSVFile::init(char delimiter, char quote)
{
m_delimiter=delimiter;
m_order=FORTRAN_ORDER;

m_tokenizer=new CDelimiterTokenizer(true);
m_tokenizer->delimiters[delimiter]=1;
m_tokenizer->delimiters[m_delimiter]=1;
m_tokenizer->delimiters[' ']=1;

m_line_tokenizer=new CDelimiterTokenizer(true);
Expand All @@ -77,7 +74,7 @@ void CCSVFile::init(char delimiter, char quote)
m_parser=new CParser();
m_parser->set_tokenizer(m_tokenizer);

m_line_reader=new CLineReader(file, m_line_tokenizer);
m_line_reader=new CLineReader();
}

#define GET_VECTOR(fname, read_func, sg_type) \
Expand Down Expand Up @@ -150,7 +147,7 @@ void CCSVFile::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
nlines++; \
} \
\
if (m_fortran_order) \
if (m_order==FORTRAN_ORDER) \
{ \
num_feat=nlines; \
num_vec=ntokens; \
Expand Down Expand Up @@ -188,24 +185,24 @@ void CCSVFile::fname(const sg_type* vector, int32_t len) \
fprintf(file, "\n"); \
}

SET_VECTOR(set_vector, %hhd, int8_t)
SET_VECTOR(set_vector, %hhu, uint8_t)
SET_VECTOR(set_vector, %hhd, char)
SET_VECTOR(set_vector, %d, int32_t)
SET_VECTOR(set_vector, %u, uint32_t)
SET_VECTOR(set_vector, %ld, int64_t)
SET_VECTOR(set_vector, %lu, uint64_t)
SET_VECTOR(set_vector, %e, float32_t)
SET_VECTOR(set_vector, %e, float64_t)
SET_VECTOR(set_vector, %Le, floatmax_t)
SET_VECTOR(set_vector, %hd, int16_t)
SET_VECTOR(set_vector, %hu, uint16_t)
SET_VECTOR(set_vector, "%" #SCNi8, int8_t)
SET_VECTOR(set_vector, "%" #SCNu8, uint8_t)
SET_VECTOR(set_vector, "%" #SCNu8, char)
SET_VECTOR(set_vector, "%" #SCNi32, int32_t)
SET_VECTOR(set_vector, "%" #SCNu32, uint32_t)
SET_VECTOR(set_vector, "%" #SCNi64, int64_t)
SET_VECTOR(set_vector, "%" #SCNu64, uint64_t)
SET_VECTOR(set_vector, %g, float32_t)
SET_VECTOR(set_vector, %lg, float64_t)
SET_VECTOR(set_vector, %Lg, floatmax_t)
SET_VECTOR(set_vector, "%" #SCNi16, int16_t)
SET_VECTOR(set_vector, "%" #SCNu16, uint16_t)
#undef SET_VECTOR

#define SET_MATRIX(fname, format, sg_type) \
void CCSVFile::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
{ \
if (m_fortran_order) \
if (m_order==FORTRAN_ORDER) \
{ \
for (int32_t i=0; i<num_feat; i++) \
{ \
Expand All @@ -225,16 +222,16 @@ void CCSVFile::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
} \
}

SET_MATRIX(set_matrix, %hhd, int8_t)
SET_MATRIX(set_matrix, %hhu, uint8_t)
SET_MATRIX(set_matrix, %hhd, char)
SET_MATRIX(set_matrix, %d, int32_t)
SET_MATRIX(set_matrix, %u, uint32_t)
SET_MATRIX(set_matrix, %ld, int64_t)
SET_MATRIX(set_matrix, %lu, uint64_t)
SET_MATRIX(set_matrix, %e, float32_t)
SET_MATRIX(set_matrix, %e, float64_t)
SET_MATRIX(set_matrix, %Le, floatmax_t)
SET_MATRIX(set_matrix, %hd, int16_t)
SET_MATRIX(set_matrix, %hu, uint16_t)
SET_MATRIX(set_matrix, SCNi8, int8_t)
SET_MATRIX(set_matrix, SCNu8, uint8_t)
SET_MATRIX(set_matrix, SCNu8, char)
SET_MATRIX(set_matrix, SCNi32, int32_t)
SET_MATRIX(set_matrix, SCNu32, uint32_t)
SET_MATRIX(set_matrix, SCNi64, int64_t)
SET_MATRIX(set_matrix, SCNu64, uint64_t)
SET_MATRIX(set_matrix, %g, float32_t)
SET_MATRIX(set_matrix, %lg, float64_t)
SET_MATRIX(set_matrix, %Lg, floatmax_t)
SET_MATRIX(set_matrix, SCNi16, int16_t)
SET_MATRIX(set_matrix, SCNu16, uint16_t)
#undef SET_MATRIX
25 changes: 14 additions & 11 deletions src/shogun/io/CSVFile.h
Expand Up @@ -22,6 +22,12 @@
namespace shogun
{

enum csv_data_order
{
FORTRAN_ORDER,
C_ORDER
};

/** @brief
*/
class CCSVFile : public CFile
Expand All @@ -31,19 +37,19 @@ class CCSVFile : public CFile
CCSVFile();

/** */
CCSVFile(FILE* f, const char* name=NULL, char delimiter=',', char quote='"');
CCSVFile(FILE* f, const char* name=NULL);

/** */
CCSVFile(const char* fname, char rw='r', const char* name=NULL, char delimiter=',', char quote='"');
CCSVFile(const char* fname, char rw='r', const char* name=NULL);

/** destructor */
virtual ~CCSVFile();

/** set column-major order for data in file */
void set_fortran_order();
/** set order for data in file */
void set_order(csv_data_order order);

/** set row-major order for data in file */
void set_c_order();
/** set delimiting character */
void set_delimiter(char delimiter);

/** skip lines
*
Expand Down Expand Up @@ -344,9 +350,6 @@ class CCSVFile : public CFile
/** class initialization */
void init();

/** class initialization */
void init(char delimiter, char quote);

private:
/** object for reading lines from file */
CLineReader* m_line_reader;
Expand All @@ -360,8 +363,8 @@ class CCSVFile : public CFile
/** tokenizer for parser */
CDelimiterTokenizer* m_tokenizer;

/** column-major order? */
bool m_fortran_order;
/** data order */
csv_data_order m_order;

/** delimiter */
char m_delimiter;
Expand Down
23 changes: 20 additions & 3 deletions src/shogun/io/Parser.cpp
Expand Up @@ -60,9 +60,26 @@ SGVector<char> CParser::read_string()
return result;
}

SGVector<char> CParser::read_cstring()
{
index_t start=0;
index_t end=0;

end=m_tokenizer->next_token_idx(start);

SGVector<char> result=SGVector<char>(end-start+1);
for (index_t i=start; i<end; i++)
{
result[i-start]=m_text[i];
}
result[end-start]='\0';

return result;
}

bool CParser::read_bool()
{
SGVector<char> token=read_string();
SGVector<char> token=read_cstring();

if (token.vlen>0)
return (bool) strtol(token.vector, NULL, 10);
Expand All @@ -73,7 +90,7 @@ bool CParser::read_bool()
#define READ_INT_METHOD(fname, convf, sg_type) \
sg_type CParser::fname(int32_t base) \
{ \
SGVector<char> token=read_string(); \
SGVector<char> token=read_cstring(); \
\
if (token.vlen>0) \
return (sg_type) convf(token.vector, NULL, base); \
Expand All @@ -94,7 +111,7 @@ READ_INT_METHOD(read_ulong, strtoull, uint64_t)
#define READ_REAL_METHOD(fname, convf, sg_type) \
sg_type CParser::fname() \
{ \
SGVector<char> token=read_string(); \
SGVector<char> token=read_cstring(); \
\
if (token.vlen>0) \
return (sg_type) convf(token.vector, NULL); \
Expand Down
3 changes: 3 additions & 0 deletions src/shogun/io/Parser.h
Expand Up @@ -44,6 +44,9 @@ class CParser : public CSGObject
/** read string */
virtual SGVector<char> read_string();

/** read zero-terminated string */
virtual SGVector<char> read_cstring();

/** read one of the several base data types. */
//@{
virtual bool read_bool();
Expand Down
30 changes: 18 additions & 12 deletions tests/unit/io/CSVFile_unittest.cc
Expand Up @@ -40,9 +40,10 @@ TEST(CSVFileTest, read_matrix)
CCSVFile* fin;

// try read in fortran order
fin=new CCSVFile("csvfile_test.csv",'r', NULL, '|', '"');
fin=new CCSVFile("csvfile_test.csv",'r', NULL);
fin->set_delimiter('|');
fin->skip_lines(2);
fin->set_fortran_order();
fin->set_order(FORTRAN_ORDER);

fin->get_matrix(tmp.matrix, tmp.num_cols, tmp.num_rows);
EXPECT_EQ(tmp.num_rows, nfeats);
Expand All @@ -58,9 +59,10 @@ TEST(CSVFileTest, read_matrix)
SG_UNREF(fin);

// try read in c order
fin=new CCSVFile("csvfile_test.csv",'r', NULL, '|', '"');
fin=new CCSVFile("csvfile_test.csv",'r', NULL);
fin->set_delimiter('|');
fin->skip_lines(2);
fin->set_c_order();
fin->set_order(C_ORDER);

fin->get_matrix(tmp.matrix, tmp.num_cols, tmp.num_rows);
EXPECT_EQ(tmp.num_rows, nvecs);
Expand Down Expand Up @@ -93,13 +95,15 @@ TEST(CSVFileTest, write_matrix)
SGMatrix<float64_t> tmp(true);

// try write/read in fortran order
fout=new CCSVFile("csvfile_test.csv",'w', NULL, '|', '"');
fout->set_fortran_order();
fout=new CCSVFile("csvfile_test.csv",'w', NULL);
fout->set_delimiter('|');
fout->set_order(FORTRAN_ORDER);
fout->set_matrix(fortran_order_data, nvecs, nfeats);
SG_UNREF(fout);

fin=new CCSVFile("csvfile_test.csv",'r', NULL, '|', '"');
fin->set_fortran_order();
fin=new CCSVFile("csvfile_test.csv",'r', NULL);
fin->set_delimiter('|');
fin->set_order(FORTRAN_ORDER);

fin->get_matrix(tmp.matrix, tmp.num_cols, tmp.num_rows);
EXPECT_EQ(tmp.num_rows, nfeats);
Expand All @@ -115,13 +119,15 @@ TEST(CSVFileTest, write_matrix)
SG_UNREF(fin);

// try write/read in c order
fout=new CCSVFile("csvfile_test.csv",'w', NULL, '|', '"');
fout->set_c_order();
fout=new CCSVFile("csvfile_test.csv",'w', NULL);
fout->set_delimiter('|');
fout->set_order(C_ORDER);
fout->set_matrix(fortran_order_data, nvecs, nfeats);
SG_UNREF(fout);

fin=new CCSVFile("csvfile_test.csv",'r', NULL, '|', '"');
fin->set_c_order();
fin=new CCSVFile("csvfile_test.csv",'r', NULL);
fin->set_delimiter('|');
fin->set_order(C_ORDER);

fin->get_matrix(tmp.matrix, tmp.num_cols, tmp.num_rows);
EXPECT_EQ(tmp.num_rows, nfeats);
Expand Down

0 comments on commit 5798840

Please sign in to comment.