Skip to content

Commit

Permalink
Merge pull request #960 from leethomason/kcsaul-pedantic-whitespace
Browse files Browse the repository at this point in the history
Integrate branch with Pedantic whitespace
  • Loading branch information
leethomason committed Nov 21, 2023
2 parents 4c27b08 + 8d3cdf5 commit bfc4ac4
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 15 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Expand Up @@ -6,7 +6,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ windows-2019, macos-10.15, ubuntu-20.04 ]
os: [ windows-2019, macos-latest, ubuntu-20.04 ]
cmake: [ 3.15, 3.x ]
include:
- os: windows-2019
Expand All @@ -17,7 +17,7 @@ jobs:
- os: ubuntu-20.04
tree: tree

- os: macos-10.15
- os: macos-latest
tree: find

- cmake: 3.15
Expand Down
18 changes: 12 additions & 6 deletions readme.md
@@ -1,9 +1,7 @@
TinyXML-2
=========

![Build](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)

![TinyXML-2 Logo](http://www.grinninglizard.com/tinyxml2/TinyXML2_small.png)
[![Test](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml)

TinyXML-2 is a simple, small, efficient, C++ XML parser that can be
easily integrated into other programs.
Expand Down Expand Up @@ -93,7 +91,7 @@ by the Document. When the Document is deleted, so are all the nodes it contains.

### White Space

#### Whitespace Preservation (default)
#### Whitespace Preservation (default, PRESERVE_WHITESPACE)

Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx

Expand Down Expand Up @@ -125,7 +123,7 @@ valuable. TinyXML-2 sees these as the same XML:

<document><data>1</data><data>2</data><data>3</data></document>

#### Whitespace Collapse
#### Whitespace Collapse (COLLAPSE_WHITESPACE)

For some applications, it is preferable to collapse whitespace. Collapsing
whitespace gives you "HTML-like" behavior, which is sometimes more suitable
Expand All @@ -143,7 +141,15 @@ However, you may also use COLLAPSE_WHITESPACE, which will:
Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
It essentially causes the XML to be parsed twice.

#### Error Reporting
#### Pedantic Whitespace (PEDANTIC_WHITESPACE)

For applications that need to know about text nodes that are composed entirely of
whitespace, PEDANTIC_WHITESPACE is available. PEDANTIC_WHITESPACE maintains all the
whilespace between elements.

PEDANTIC_WHITESPACE is a new mode and not as tested as the other whitespace modes.

### Error Reporting

TinyXML-2 reports the line number of any errors in an XML document that
cannot be parsed correctly. In addition, all nodes (elements, declarations,
Expand Down
22 changes: 17 additions & 5 deletions tinyxml2.cpp
Expand Up @@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) {
}


char* XMLDocument::Identify( char* p, XMLNode** node )
char* XMLDocument::Identify( char* p, XMLNode** node, bool first )
{
TIXMLASSERT( node );
TIXMLASSERT( p );
Expand Down Expand Up @@ -767,9 +767,19 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
p += dtdHeaderLen;
}
else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
returnNode = CreateUnlinkedNode<XMLElement>( _elementPool );
returnNode->_parseLineNum = _parseCurLineNum;
p += elementHeaderLen;

// Preserve whitespace pedantically before closing tag, when it's immediately after opening tag
if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') {
returnNode = CreateUnlinkedNode<XMLText>(_textPool);
returnNode->_parseLineNum = startLine;
p = start; // Back it up, all the text counts.
_parseCurLineNum = startLine;
}
else {
returnNode = CreateUnlinkedNode<XMLElement>(_elementPool);
returnNode->_parseLineNum = _parseCurLineNum;
p += elementHeaderLen;
}
}
else {
returnNode = CreateUnlinkedNode<XMLText>( _textPool );
Expand Down Expand Up @@ -1098,14 +1108,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr )
if (_document->Error())
return 0;

bool first = true;
while( p && *p ) {
XMLNode* node = 0;

p = _document->Identify( p, &node );
p = _document->Identify( p, &node, first );
TIXMLASSERT( p );
if ( node == 0 ) {
break;
}
first = false;

const int initialLineNum = node->_parseLineNum;

Expand Down
5 changes: 3 additions & 2 deletions tinyxml2.h
Expand Up @@ -1710,7 +1710,8 @@ class TINYXML2_LIB XMLElement : public XMLNode

enum Whitespace {
PRESERVE_WHITESPACE,
COLLAPSE_WHITESPACE
COLLAPSE_WHITESPACE,
PEDANTIC_WHITESPACE
};


Expand Down Expand Up @@ -1921,7 +1922,7 @@ class TINYXML2_LIB XMLDocument : public XMLNode
void DeepCopy(XMLDocument* target) const;

// internal
char* Identify( char* p, XMLNode** node );
char* Identify( char* p, XMLNode** node, bool first );

// internal
void MarkInUse(const XMLNode* const);
Expand Down
172 changes: 172 additions & 0 deletions xmltest.cpp
Expand Up @@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv )
XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() );
}

// ----------- Preserve Whitespace ------------
{
const char* xml = "<element>This is &apos; \n\n text &apos;</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> This \nis &apos; text &apos; </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> \n This is &apos; text &apos; \n</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
}

// Following cases are for text that is all whitespace which are not preserved intentionally
{
const char* xml = "<element> </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element>\n\n</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> \n</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> \n \n </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}

// ----------- Pedantic Whitespace ------------
{
const char* xml = "<element>This is &apos; \n\n text &apos;</element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> This \nis &apos; text &apos; </element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> \n This is &apos; text &apos; \n</element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
}

// Following cases are for text that is all whitespace which is preserved with pedantic mode
{
const char* xml = "<element> </element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> </element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element>\n\n</element>\n";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> \n</element> \n ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " \n", doc.FirstChildElement()->GetText());
}

{
const char* xml = "<element> \n \n </element> ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " \n \n ", doc.FirstChildElement()->GetText());
}

// Following cases are for checking nested elements are still parsed with pedantic whitespace
{
const char* xml = "<element>\n\t<a> This is nested text </a>\n</element> ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText());
}

{
const char* xml = "<element> <b> </b> </element>\n";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText());
}

{
const char* xml = "<element> <c attribute=\"test\"/> </element>\n ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText());
}

// Check sample xml can be parsed with pedantic mode
{
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.LoadFile("resources/dream.xml");
XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error());

XMLTest("Dream", "xml version=\"1.0\"",
doc.FirstChild()->ToDeclaration()->Value());
XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0);
XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"",
doc.FirstChild()->NextSibling()->ToUnknown()->Value());
XMLTest("Dream", "And Robin shall restore amends.",
doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText());
}

{
// An assert should not fire.
const char* xml = "<element/>";
Expand Down

0 comments on commit bfc4ac4

Please sign in to comment.