Skip to content

Commit

Permalink
svg_loader: fix data parsing
Browse files Browse the repository at this point in the history
The appearance of xml entities in the svg file
was handled by the svg parser, but they may also
be present in the embedded data. Thus, while decoding
base64, these elements must be removed.

@issue: thorvg#2273
  • Loading branch information
mgrudzinska committed May 20, 2024
1 parent f02fccf commit cf2352a
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 49 deletions.
4 changes: 2 additions & 2 deletions src/common/tvgCompressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ uint8_t* lzwEncode(const uint8_t* uncompressed, uint32_t uncompressedSizeBytes,
/************************************************************************/


size_t b64Decode(const char* encoded, const size_t len, char** decoded)
size_t b64Decode(const char* encoded, size_t len, char** decoded, const CustomDecoder customizer)
{
static constexpr const char B64_INDEX[256] =
{
Expand All @@ -438,8 +438,8 @@ size_t b64Decode(const char* encoded, const size_t len, char** decoded)
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
};


if (!decoded || !encoded || len == 0) return 0;
if (customizer) return (*customizer)(encoded, len, decoded, B64_INDEX);

auto reserved = 3 * (1 + (len >> 2)) + 1;
auto output = static_cast<char*>(malloc(reserved * sizeof(char)));
Expand Down
4 changes: 3 additions & 1 deletion src/common/tvgCompressor.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@

namespace tvg
{
typedef size_t (*CustomDecoder)(const char* start, size_t len, char** decoded, const char* b64Index);

uint8_t* lzwEncode(const uint8_t* uncompressed, uint32_t uncompressedSizeBytes, uint32_t* compressedSizeBytes, uint32_t* compressedSizeBits);
uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes);
size_t b64Decode(const char* encoded, const size_t len, char** decoded);
size_t b64Decode(const char* encoded, size_t len, char** decoded, const CustomDecoder customizer = nullptr);
}

#endif //_TVG_COMPRESSOR_H_
4 changes: 3 additions & 1 deletion src/loaders/svg/tvgSvgSceneBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include "tvgSvgSceneBuilder.h"
#include "tvgSvgPath.h"
#include "tvgSvgUtil.h"
#include "tvgXmlParser.h"


/************************************************************************/
/* Internal Class Implementation */
Expand Down Expand Up @@ -570,7 +572,7 @@ static unique_ptr<Picture> _imageBuildHelper(SvgLoaderData& loaderData, SvgNode*
if (!_isValidImageMimeTypeAndEncoding(&href, &mimetype, &encoding)) return nullptr; //not allowed mime type or encoding
char *decoded = nullptr;
if (encoding == imageMimeTypeEncoding::base64) {
auto size = b64Decode(href, strlen(href), &decoded);
auto size = b64Decode(href, strlen(href), &decoded, &b64DecodeSkipXML);
if (picture->load(decoded, size, mimetype) != Result::Success) {
free(decoded);
TaskScheduler::async(true);
Expand Down
127 changes: 86 additions & 41 deletions src/loaders/svg/tvgXmlParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,56 @@

#include "tvgXmlParser.h"
#include "tvgStr.h"
#include "tvgCompressor.h"

/************************************************************************/
/* Internal Class Implementation */
/************************************************************************/

bool _isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char* tagAttribute, TVG_UNUSED const char* tagValue)
#define NUMBER_OF_XML_ENTITIES 9
const char* const xmlEntity[] = {"&#10;", "&quot;", "&nbsp;", "&apos;", "&amp;", "&lt;", "&gt;", "&#035;", "&#039;"};
const int xmlEntityLength[] = {5, 6, 6, 6, 5, 4, 4, 6, 6};


static uint32_t _skipXmlEntities(const char*& start, const char* end)
{
auto org = start;
auto p = start;
while (start < end && *start == '&') {
for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
strncmp(start, xmlEntity[i], xmlEntityLength[i]);
if (strncmp(start, xmlEntity[i], xmlEntityLength[i]) == 0) {
start += xmlEntityLength[i];
break;
}
}
if (start == p) break;
p = start;
}
return start - org;
}


static uint32_t _unskipXmlEntities(const char*& end, const char* start)
{
auto org = end;
auto p = end;
while (end > start && *(end - 1) == ';') {
for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
if (end - xmlEntityLength[i] > start &&
strncmp(end - xmlEntityLength[i], xmlEntity[i], xmlEntityLength[i]) == 0) {
end -= xmlEntityLength[i];
break;
}
}
if (end == p) break;
p = end;
}
return org - end;
}


static bool _isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char* tagAttribute, TVG_UNUSED const char* tagValue)
{
#ifdef THORVG_LOG_ENABLED
const auto attributesNum = 6;
Expand Down Expand Up @@ -100,47 +144,12 @@ static const char* _simpleXmlUnskipWhiteSpace(const char* itr, const char* itrSt
}


static const char* _simpleXmlSkipXmlEntities(const char* itr, const char* itrEnd)
{
auto p = itr;
while (itr < itrEnd && *itr == '&') {
for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
if (strncmp(itr, xmlEntity[i], xmlEntityLength[i]) == 0) {
itr += xmlEntityLength[i];
break;
}
}
if (itr == p) break;
p = itr;
}
return itr;
}


static const char* _simpleXmlUnskipXmlEntities(const char* itr, const char* itrStart)
{
auto p = itr;
while (itr > itrStart && *(itr - 1) == ';') {
for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
if (itr - xmlEntityLength[i] > itrStart &&
strncmp(itr - xmlEntityLength[i], xmlEntity[i], xmlEntityLength[i]) == 0) {
itr -= xmlEntityLength[i];
break;
}
}
if (itr == p) break;
p = itr;
}
return itr;
}


static const char* _skipWhiteSpacesAndXmlEntities(const char* itr, const char* itrEnd)
{
itr = _simpleXmlSkipWhiteSpace(itr, itrEnd);
auto p = itr;
while (true) {
if (p != (itr = _simpleXmlSkipXmlEntities(itr, itrEnd))) p = itr;
if (_skipXmlEntities(itr, itrEnd)) p = itr;
else break;
if (p != (itr = _simpleXmlSkipWhiteSpace(itr, itrEnd))) p = itr;
else break;
Expand All @@ -154,7 +163,7 @@ static const char* _unskipWhiteSpacesAndXmlEntities(const char* itr, const char*
itr = _simpleXmlUnskipWhiteSpace(itr, itrStart);
auto p = itr;
while (true) {
if (p != (itr = _simpleXmlUnskipXmlEntities(itr, itrStart))) p = itr;
if (_unskipXmlEntities(itr, itrStart)) p = itr;
else break;
if (p != (itr = _simpleXmlUnskipWhiteSpace(itr, itrStart))) p = itr;
else break;
Expand Down Expand Up @@ -325,7 +334,7 @@ bool simpleXmlParseAttributes(const char* buf, unsigned bufLength, simpleXMLAttr
if (!value) goto error;
value++;
}
keyEnd = _simpleXmlUnskipXmlEntities(keyEnd, key);
_unskipXmlEntities(keyEnd, key);

value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
if (value == itrEnd) goto error;
Expand All @@ -349,7 +358,7 @@ bool simpleXmlParseAttributes(const char* buf, unsigned bufLength, simpleXMLAttr
tval = tmpBuf + (keyEnd - key) + 1;
int i = 0;
while (value < valueEnd) {
value = _simpleXmlSkipXmlEntities(value, valueEnd);
_skipXmlEntities(value, valueEnd);
tval[i++] = *value;
value++;
}
Expand Down Expand Up @@ -579,11 +588,47 @@ const char* simpleXmlFindAttributesTag(const char* buf, unsigned bufLength)
//User skip tagname and already gave it the attributes.
if (*itr == '=') return buf;
} else {
itr = _simpleXmlUnskipXmlEntities(itr, buf);
_unskipXmlEntities(itr, buf);
if (itr == itrEnd) return nullptr;
return itr;
}
}

return nullptr;
}


size_t b64DecodeSkipXML(const char* encoded, size_t len, char** decoded, const char* b64Index)
{
auto reserved = 3 * (1 + (len >> 2)) + 1;
auto output = static_cast<char*>(malloc(reserved * sizeof(char)));
if (!output) return 0;

size_t idx = 0;
const char* end = encoded + len;

while (*encoded && *(encoded + 1)) {
if (*encoded <= 0x20) {
++encoded;
continue;
}
if (_skipXmlEntities(encoded, end)) continue;

auto value1 = b64Index[(size_t)encoded[0]];
auto value2 = b64Index[(size_t)encoded[1]];
output[idx++] = (value1 << 2) + ((value2 & 0x30) >> 4);

if (!encoded[2] || encoded[3] < 0 || encoded[2] == '=' || encoded[2] == '.') break;
auto value3 = b64Index[(size_t)encoded[2]];
output[idx++] = ((value2 & 0x0f) << 4) + ((value3 & 0x3c) >> 2);

if (!encoded[3] || encoded[3] < 0 || encoded[3] == '=' || encoded[3] == '.') break;
auto value4 = b64Index[(size_t)encoded[3]];
output[idx++] = ((value3 & 0x03) << 6) + value4;
encoded += 4;
}
output[idx] = '\0';
*decoded = output;

return idx + 1;
}
5 changes: 1 addition & 4 deletions src/loaders/svg/tvgXmlParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@

#include "tvgSvgLoaderCommon.h"

#define NUMBER_OF_XML_ENTITIES 8
const char* const xmlEntity[] = {"&quot;", "&nbsp;", "&apos;", "&amp;", "&lt;", "&gt;", "&#035;", "&#039;"};
const int xmlEntityLength[] = {6, 6, 6, 5, 4, 4, 6, 6};

enum class SimpleXMLType
{
Open = 0, //!< \<tag attribute="value"\>
Expand All @@ -54,5 +50,6 @@ const char* simpleXmlParseCSSAttribute(const char* buf, unsigned bufLength, char
const char* simpleXmlFindAttributesTag(const char* buf, unsigned bufLength);
bool isIgnoreUnsupportedLogElements(const char* tagName);
const char* simpleXmlNodeTypeToString(SvgNodeType type);
size_t b64DecodeSkipXML(const char* encoded, size_t len, char** decoded, const char* b64Index);

#endif //_TVG_SIMPLE_XML_PARSER_H_

0 comments on commit cf2352a

Please sign in to comment.