Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added TSVSerialiser utility class. Refs #9970
This class provides useful deserialisation routines for loading and saving project files. Currently it only supports loading but it will be given support for saving in the near future. Hopefully this will help to seriously reduce the technical debt of loading and saving project files.
- Loading branch information
Harry Jeffery
committed
Jul 24, 2014
1 parent
2aa1a7a
commit d71d564
Showing
3 changed files
with
303 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,233 @@ | ||
#include "TSVSerialiser.h" | ||
|
||
#include "MantidKernel/Logger.h" | ||
|
||
#include <boost/algorithm/string.hpp> | ||
#include <boost/regex.hpp> | ||
|
||
namespace | ||
{ | ||
Mantid::Kernel::Logger g_log("TSVSerialiser"); | ||
} | ||
|
||
TSVSerialiser::TSVSerialiser() : m_curIndex(0) | ||
{ | ||
} | ||
|
||
TSVSerialiser::TSVSerialiser(std::string lines) | ||
{ | ||
parseLines(lines); | ||
} | ||
|
||
void TSVSerialiser::parseLines(std::string lines) | ||
{ | ||
std::vector<std::string> lineVec; | ||
boost::split(lineVec, lines, boost::is_any_of("\n")); | ||
|
||
//Clear out any old data. | ||
m_lines.clear(); | ||
m_sections.clear(); | ||
|
||
boost::regex valueLineRegex("([a-zA-Z0-9]+)\\b.*"); | ||
boost::regex closedSectionRegex("<([a-zA-Z0-9]+)>(.*)</\\1>"); | ||
boost::regex openSectionRegex("<([a-zA-Z0-9]+)>(.*)"); | ||
|
||
for(auto lineIt = lineVec.begin(); lineIt != lineVec.end(); ++lineIt) | ||
{ | ||
std::string line = *lineIt; | ||
|
||
if(line.length() == 0) | ||
continue; | ||
|
||
//Stores matched sections of a regex | ||
boost::smatch matches; | ||
|
||
//Check if this is a value line | ||
if(boost::regex_match(line, matches, valueLineRegex)) | ||
{ | ||
std::string name = matches[1].str(); | ||
|
||
m_lines[name].push_back(line); | ||
|
||
g_log.information() << "found value line with name '" << name << "'" << std::endl; | ||
continue; | ||
} | ||
|
||
//Look for lines which open and close a section in one line: <section>data</section> | ||
if(boost::regex_match(line, matches, closedSectionRegex)) | ||
{ | ||
std::string name = matches[1].str(); | ||
std::string contents = matches[2].str(); | ||
|
||
m_sections[name].push_back(contents); | ||
|
||
g_log.information() << "found closed section '" << name << "' with contents='" << contents << "'" << std::endl; | ||
continue; | ||
} | ||
|
||
//Check if this is the start of a multiline section, if so, consume the whole section. | ||
if(boost::regex_match(line, matches, openSectionRegex)) | ||
{ | ||
std::stringstream sectionSS; | ||
|
||
std::string name = matches[1].str(); | ||
std::string firstLine = matches[2].str(); | ||
|
||
//firstLine exists because of a legacy edgecase: the <folder> section keeps values on the same line as | ||
//the opening tag, so we have to be able to read that. | ||
if(firstLine.length() > 0) | ||
sectionSS << firstLine << "\n"; | ||
|
||
std::stringstream openSS; | ||
openSS << "<" << name << ">.*"; | ||
boost::regex openRegex(openSS.str()); | ||
|
||
std::stringstream closeSS; | ||
closeSS << "</" << name << ">"; | ||
boost::regex closeRegex(closeSS.str()); | ||
|
||
//Next line, to begin parsing | ||
lineIt++; | ||
|
||
//Search for opening and closing tags, counting depth and building the section string. | ||
for(int depth = 1; depth > 0 && lineIt != lineVec.end(); ++lineIt) | ||
{ | ||
line = *lineIt; | ||
//Are we going down? | ||
if(boost::regex_match(line, openRegex)) | ||
{ | ||
depth++; | ||
} else if(boost::regex_match(line, closeRegex)) | ||
{ | ||
depth--; | ||
} | ||
|
||
if(depth > 0) | ||
sectionSS << line << "\n"; | ||
} | ||
|
||
//Back to start of next line; | ||
lineIt--; | ||
|
||
std::string sectionStr = sectionSS.str(); | ||
|
||
//We drop the last character because it's a spare newline | ||
if(sectionStr.size() > 0) | ||
sectionStr.resize(sectionStr.size() - 1); | ||
|
||
m_sections[name].push_back(sectionStr); | ||
|
||
g_log.information() << "read <" << name << ">:\n---------------------------\n" << sectionSS.str() << "----------------------------" << std::endl; | ||
continue; | ||
} | ||
|
||
//If we've made it here then we don't know what kind of line this is. | ||
g_log.error() << "Unable to identify line in TSVSerialiser::parseLines(): '" << line << "'" << std::endl; | ||
} | ||
} | ||
|
||
bool TSVSerialiser::hasLine(std::string name) const | ||
{ | ||
return ( m_lines.find(name) != m_lines.end() ); | ||
} | ||
|
||
bool TSVSerialiser::hasSection(std::string name) const | ||
{ | ||
return ( m_sections.find(name) != m_sections.end() ); | ||
} | ||
|
||
std::vector<std::string> TSVSerialiser::values(std::string name, size_t i) const | ||
{ | ||
//Select correct line with lineAsString, parse it, then return values | ||
std::vector<std::string> ret; | ||
|
||
std::string line = lineAsString(name, i); | ||
boost::split(ret, line, boost::is_any_of("\t")); | ||
|
||
return ret; | ||
} | ||
|
||
std::vector<std::string> TSVSerialiser::sections(std::string name) const | ||
{ | ||
if(!hasSection(name)) | ||
return std::vector<std::string>(); | ||
|
||
return m_sections.at(name); | ||
} | ||
|
||
std::string TSVSerialiser::lineAsString(const std::string name, const size_t i) const | ||
{ | ||
if(!hasLine(name)) | ||
return ""; | ||
|
||
auto lines = m_lines.at(name); | ||
|
||
return lines[i]; | ||
} | ||
|
||
bool TSVSerialiser::selectLine(std::string name, const size_t i) | ||
{ | ||
if(!hasLine(name)) | ||
return false; | ||
|
||
if(i >= m_lines[name].size()) | ||
return false; | ||
|
||
m_curValues = values(name, i); | ||
m_curIndex = 1; //1 because we want to start on the values, not the name | ||
return true; | ||
} | ||
|
||
int TSVSerialiser::asInt(const size_t i) const | ||
{ | ||
if(i >= m_curValues.size()) | ||
return 0; | ||
|
||
std::string valStr = m_curValues.at(i); | ||
|
||
std::stringstream valSS(valStr); | ||
int ret; | ||
valSS >> ret; | ||
|
||
return ret; | ||
} | ||
|
||
double TSVSerialiser::asDouble(const size_t i) const | ||
{ | ||
if(i >= m_curValues.size()) | ||
return 0.00; | ||
|
||
std::string valStr = m_curValues.at(i); | ||
|
||
std::stringstream valSS(valStr); | ||
double ret; | ||
valSS >> ret; | ||
|
||
return ret; | ||
} | ||
|
||
std::string TSVSerialiser::asString(const size_t i) const | ||
{ | ||
if(i >= m_curValues.size()) | ||
return ""; | ||
|
||
return m_curValues.at(i); | ||
} | ||
|
||
TSVSerialiser& TSVSerialiser::operator>>(int& val) | ||
{ | ||
val = asInt(m_curIndex++); | ||
return *this; | ||
} | ||
|
||
TSVSerialiser& TSVSerialiser::operator>>(double& val) | ||
{ | ||
val = asDouble(m_curIndex++); | ||
return *this; | ||
} | ||
|
||
TSVSerialiser& TSVSerialiser::operator>>(std::string& val) | ||
{ | ||
val = asString(m_curIndex++); | ||
return *this; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#ifndef MANTID_TSVSERIALISER_H_ | ||
#define MANTID_TSVSERIALISER_H_ | ||
|
||
#include <map> | ||
#include <string> | ||
#include <vector> | ||
|
||
/** Parses the formatting used in MantidPlot project files | ||
@author Harry Jeffery, ISIS, RAL | ||
@date 23/07/2014 | ||
Copyright © 2007-2014 ISIS Rutherford Appleton Laboratory & NScD Oak Ridge National Laboratory | ||
This file is part of Mantid. | ||
Mantid is free software; you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation; either version 3 of the License, or | ||
(at your option) any later version. | ||
Mantid is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
File change history is stored at: <https://github.com/mantidproject/mantid> | ||
*/ | ||
class TSVSerialiser | ||
{ | ||
public: | ||
|
||
TSVSerialiser(); | ||
|
||
TSVSerialiser(std::string lines); | ||
|
||
void parseLines(std::string lines); | ||
|
||
bool hasLine(const std::string name) const; | ||
bool hasSection(const std::string name) const; | ||
|
||
std::vector<std::string> values(const std::string name, const size_t i = 0) const; | ||
std::vector<std::string> sections(const std::string name) const; | ||
|
||
std::string lineAsString(const std::string name, const size_t i = 0) const; | ||
|
||
bool selectLine(const std::string name, const size_t i = 0); | ||
|
||
int asInt(const size_t i) const; | ||
double asDouble(const size_t i) const; | ||
std::string asString(const size_t i) const; | ||
|
||
TSVSerialiser& operator>>(int& val); | ||
TSVSerialiser& operator>>(double& val); | ||
TSVSerialiser& operator>>(std::string& val); | ||
|
||
private: | ||
std::map<std::string,std::vector<std::string> > m_sections; | ||
std::map<std::string,std::vector<std::string> > m_lines; | ||
|
||
std::vector<std::string> m_curValues; | ||
int m_curIndex; | ||
}; | ||
|
||
#endif |