Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/bugfix/8509_Fix_LoadAscii_Header…
Browse files Browse the repository at this point in the history
…_Skip'
  • Loading branch information
martyngigg committed Nov 25, 2013
2 parents 9cc47b2 + 939ae3b commit 76a0da3
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ namespace Mantid
//wirte the spectra to the workspace
void writeToWorkspace(API::MatrixWorkspace_sptr & localWorkspace, const size_t & numSpectra) const;
//Process the header information. This implementation just skips it entirely.
int processHeader(std::ifstream & file) const;
void processHeader(std::ifstream & file);
/// The column separator
std::string m_columnSep;

Expand Down
193 changes: 133 additions & 60 deletions Code/Mantid/Framework/DataHandling/src/LoadAscii2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,12 @@ namespace Mantid
}
else if (badLine(line))
{
throw std::runtime_error("Line " + boost::lexical_cast<std::string>(m_lineNo) + ": Unexpected character found at beginning of line. Lines must either be a single integer, a list of numeric values, blank, or a text line beginning with the specified comment indicator:" + m_comment +".");
throw std::runtime_error("Line " + boost::lexical_cast<std::string>(m_lineNo) + ": Unexpected character found at beginning of line. Lines must either be a single integer, a list of numeric values, blank, or a text line beginning with the specified comment indicator: " + m_comment +".");
}
else
{
//strictly speaking this should never be hit, but just being sure
throw std::runtime_error("Line " + boost::lexical_cast<std::string>(m_lineNo) + ": Unknown format at line. Lines must either be a single integer, a list of numeric values, blank, or a text line beginning with the specified comment indicator:" + m_comment +".");
throw std::runtime_error("Line " + boost::lexical_cast<std::string>(m_lineNo) + ": Unknown format at line. Lines must either be a single integer, a list of numeric values, blank, or a text line beginning with the specified comment indicator: " + m_comment +".");
}
}

Expand Down Expand Up @@ -250,109 +250,175 @@ namespace Mantid
*/
void LoadAscii2::setcolumns(std::ifstream & file, std::string & line, std::list<std::string> & columns)
{
size_t lineno = 0;
m_lineNo = 0;
std::vector<double> values;
//first find the first data set and set that as the template for the number of data collumns we expect from this file
while( getline(file,line) && m_baseCols == 0)
//processheader will also look for a base number of columns, to save time here if possible
//but if the user specifies a number of lines to skip that check won't happen in processheader
processHeader(file);
if (m_baseCols == 0 || m_baseCols > 4 || m_baseCols < 2)
{
lineno++;
std::string templine = line;
boost::trim(templine);
if (!templine.empty())
//first find the first data set and set that as the template for the number of data columns we expect from this file
while( getline(file,line) && (m_baseCols == 0 || m_baseCols > 4 || m_baseCols < 2))
{
if (std::isdigit(templine.at(0)))
//std::string line = line;
boost::trim(line);
if (!line.empty())
{
const int cols = splitIntoColumns(columns, templine);
//we might have the first set of values but there can't be more than 3 commas if it is
//int values = std::count(line.begin(), line.end(), ',');
if (cols > 4 || cols < 1)
{
//there were more separators than there should have been, which isn't right, or something went rather wrong
throw std::runtime_error("Sets of values must have between 1 and 3 delimiters. Found " + boost::lexical_cast<std::string>(cols) + ".");
}
else if (cols != 1)
if (std::isdigit(line.at(0)) || line.at(0) == '-' || line.at(0) == '+')
{
try
const int cols = splitIntoColumns(columns, line);
//we might have the first set of values but there can't be more than 3 commas if it is
//int values = std::count(line.begin(), line.end(), ',');
if (cols > 4 || cols < 1)
{
fillInputValues(values, columns);
//there were more separators than there should have been, which isn't right, or something went rather wrong
throw std::runtime_error("Sets of values must have between 1 and 3 delimiters. Found " + boost::lexical_cast<std::string>(cols) + ".");
}
catch(boost::bad_lexical_cast&)
else if (cols != 1)
{
continue;
try
{
fillInputValues(values, columns);
}
catch(boost::bad_lexical_cast&)
{
continue;
}
//a size of 1 is most likely a spectra ID so ignore it, a value of 2, 3 or 4 is a valid data set
m_baseCols = cols;
}
//a size of 1 is most likely a spectra ID so ignore it, a value of 2, 3 or 4 is a valid data set
m_baseCols = cols;
}
}
}
}
//make sure some valid data has been found to set the amount of columns, and the file isn't at EOF
if (m_baseCols > 4 || m_baseCols < 2 || file.eof())
{
throw std::runtime_error("No valid data in file, check separator settings or number of collumns per bin.");
}
//make sure some valid data has been found to set the amount of columns, and the file isn't at EOF
if (m_baseCols > 4 || m_baseCols < 2 || file.eof())
{
throw std::runtime_error("No valid data in file, check separator settings or number of columns per bin.");
}

//start from the top again, this time filling in the list
file.seekg(0,std::ios_base::beg);

m_lineNo = processHeader(file);
//start from the top again, this time filling in the list
file.seekg(0,std::ios_base::beg);
for(int i = 0;i < m_lineNo;i++)
{
getline(file,line);
}
}
}

/**
* Process the header information. This implementation just skips it entirely.
* @param file :: A reference to the file stream
*/
int LoadAscii2::processHeader(std::ifstream & file) const
void LoadAscii2::processHeader(std::ifstream & file)
{

// Most files will have some sort of header. If we've haven't been told how many lines to
// skip then try and guess
int numToSkip = getProperty("SkipNumLines");
size_t numCols = 0;
if( numToSkip == EMPTY_INT() )
{
const int rowsToMatch(5);
// Have a guess where the data starts. Basically say, when we have say "rowsToMatch" lines of pure numbers
// in a row then the line that started block is the top of the data
int matchingRows = 0;
size_t matchingRows = 0;
int validRows = 0;
size_t blankRows = 0;
int row = 0;
size_t numCols = 0;
std::string line;
std::vector<double> values;
while( getline(file,line) )
while( getline(file,line) && matchingRows < rowsToMatch)
{
++row;
boost::trim(line);
if( skipLine(line,true) )
{
continue;
}

std::list<std::string> columns;
size_t lineCols = this->splitIntoColumns(columns, line);
if (lineCols != 1)
size_t lineCols = 0;

if (!line.empty())
{
try
if (badLine(line))
{
matchingRows = 0;
validRows = 0;
continue;
}

//a skipped line is a valid non-data line this shouldn't be counted as a matching line
//but neither should it reset the matching counter
if (skipLine(line,true))
{
++validRows;
continue;
}
if (std::isdigit(line.at(0)) || line.at(0) == '-' || line.at(0) == '+')
{
fillInputValues(values, columns);
lineCols = this->splitIntoColumns(columns, line);
//we might have the first set of values but there can't be more than 3 delimiters if it is
if (lineCols > 4 || lineCols < 1)
{
//there were more separators than there should have been,
//which isn't right, or something went rather wrong
matchingRows = 0;
validRows = 0;
continue;
}
else if (lineCols != 1)
{
try
{
fillInputValues(values, columns);
}
catch(boost::bad_lexical_cast&)
{
matchingRows = 0;
validRows = 0;
continue;
}
//a size of 1 is most likely a spectra ID so ignore it, a value of 2, 3 or 4 is a valid data set
}
}
catch(boost::bad_lexical_cast&)
else
{
//line wasn't valid
matchingRows = 0;
validRows = 0;
continue;
}
}
if( numCols == 0 )
else
{
//an empty line is legitimate but make sure there aren't too many in sucession as we need to see data
++matchingRows;
++validRows;
++blankRows;
if (blankRows >= rowsToMatch)
{
matchingRows = 1;
validRows = 1;
}
continue;
}

if (numCols == 0 && lineCols != 1)
{
numCols = lineCols;
}
if( lineCols == m_baseCols || (lineCols == 1))

//to reduce the chance of finding problems later,
//the concurrent data should also have the same nubmer of columns
//if the data has a different number of columns to the previous lines
//start the coutner again assuming those previous lines were header info
if ( lineCols == numCols || lineCols == 1)
{
//line is valid increment the counter
++matchingRows;
if( matchingRows == rowsToMatch ) break;
++validRows;
}
else
{
numCols = lineCols;
matchingRows = 1;
validRows = 1;
}
}
// if the file does not have more than rowsToMatch + skipped lines, it will stop
Expand All @@ -361,22 +427,29 @@ namespace Mantid
if (file.eof()){
file.clear(file.eofbit);
}

//save some time in setcolumns as we've found the base columns
if (numCols > 4 || numCols < 2)
{
throw std::runtime_error("No valid data in file, check separator settings or number of columns per bin.");
}
m_baseCols = numCols;
// Seek the file pointer back to the start.
// NOTE: Originally had this as finding the stream position of the data and then moving the file pointer
// back to the start of the data. This worked when a file was read on the same platform it was written
// but failed when read on a different one due to underlying differences in the stream translation.
file.seekg(0,std::ios::beg);
// We've read the header plus the number of rowsToMatch
numToSkip = row - rowsToMatch;
// We've read the header plus a number of validRows
numToSkip = row - validRows;
}
int i(0);
m_lineNo = 0;
std::string line;
while( i < numToSkip && getline(file, line) )
while( m_lineNo < numToSkip && getline(file, line) )
{
++i;
++m_lineNo;
}
g_log.information() << "Skipped " << numToSkip << " line(s) of header information()\n";
return numToSkip;
}


Expand All @@ -395,7 +468,7 @@ namespace Mantid
//check for E and DX
switch (m_baseCols)
{
// if only 2 collumns X and Y in file, E = 0 is implicit when constructing workspace, omit DX
// if only 2 columns X and Y in file, E = 0 is implicit when constructing workspace, omit DX
case 3:
{
//E in file, include it, omit DX
Expand Down Expand Up @@ -494,12 +567,12 @@ namespace Mantid
/**
* Return true if the line doesn't start wiht a valid character.
* @param[in] line :: The line to be checked
* @return :: True if the line doesn't start wiht a valid character.
* @return :: True if the line doesn't start with a valid character.
*/
bool LoadAscii2::badLine(const std::string & line) const
{
// Empty or comment
return (!std::isdigit(line.at(0)) && line.at(0) != m_comment.at(0));
return (!(std::isdigit(line.at(0)) || line.at(0) == '-' || line.at(0) == '+') && line.at(0) != m_comment.at(0));
}

/**
Expand Down

0 comments on commit 76a0da3

Please sign in to comment.